aboutsummaryrefslogtreecommitdiff
path: root/libvpx
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2016-07-21 12:09:52 -0700
committerJohann <johannkoenig@google.com>2016-07-21 12:09:52 -0700
commit68e1c830ade592be74773e249bf94e2bbfb50de7 (patch)
tree08299f7deb6079690f0a3d2118ef3882fa77bdc6 /libvpx
parent96ebd06cb9832f583f7c181ec886eade209524df (diff)
downloadlibvpx-68e1c830ade592be74773e249bf94e2bbfb50de7.tar.gz
Update external/libvpx to v1.6.0
Change-Id: I9425a3d3c3524d43823bc89f9f03556420c3dd42
Diffstat (limited to 'libvpx')
-rw-r--r--libvpx/.mailmap11
-rw-r--r--libvpx/AUTHORS8
-rw-r--r--libvpx/CHANGELOG30
-rw-r--r--libvpx/README3
-rw-r--r--libvpx/build/make/Android.mk3
-rw-r--r--libvpx/build/make/Makefile32
-rwxr-xr-xlibvpx/build/make/configure.sh181
-rwxr-xr-xlibvpx/build/make/gen_msvs_proj.sh2
-rwxr-xr-xlibvpx/build/make/gen_msvs_vcxproj.sh2
-rw-r--r--libvpx/build/make/ios-Info.plist37
-rwxr-xr-xlibvpx/build/make/iosbuild.sh127
-rwxr-xr-xlibvpx/build/make/msvs_common.sh7
-rwxr-xr-xlibvpx/build/make/version.sh3
-rwxr-xr-xlibvpx/configure90
-rw-r--r--libvpx/examples.mk37
-rw-r--r--libvpx/examples/simple_encoder.c22
-rw-r--r--libvpx/examples/twopass_encoder.c24
-rw-r--r--libvpx/examples/vp8_multi_resolution_encoder.c25
-rw-r--r--libvpx/examples/vp9_spatial_svc_encoder.c40
-rw-r--r--libvpx/examples/vpx_temporal_svc_encoder.c33
-rw-r--r--libvpx/ivfdec.c2
-rw-r--r--libvpx/libs.mk63
-rw-r--r--libvpx/md5_utils.c15
-rw-r--r--libvpx/test/acm_random.h6
-rw-r--r--libvpx/test/active_map_test.cc2
-rw-r--r--libvpx/test/add_noise_test.cc197
-rw-r--r--libvpx/test/altref_test.cc92
-rw-r--r--libvpx/test/avg_test.cc (renamed from libvpx/test/vp9_avg_test.cc)169
-rw-r--r--libvpx/test/borders_test.cc5
-rw-r--r--libvpx/test/byte_alignment_test.cc4
-rw-r--r--libvpx/test/codec_factory.h97
-rw-r--r--libvpx/test/convolve_test.cc243
-rw-r--r--libvpx/test/cpu_speed_test.cc33
-rw-r--r--libvpx/test/datarate_test.cc248
-rw-r--r--libvpx/test/dct16x16_test.cc90
-rw-r--r--libvpx/test/dct32x32_test.cc90
-rw-r--r--libvpx/test/decode_api_test.cc37
-rw-r--r--libvpx/test/encode_api_test.cc65
-rw-r--r--libvpx/test/encode_test_driver.cc9
-rw-r--r--libvpx/test/encode_test_driver.h4
-rw-r--r--libvpx/test/error_resilience_test.cc5
-rw-r--r--libvpx/test/external_frame_buffer_test.cc7
-rw-r--r--libvpx/test/fdct4x4_test.cc24
-rw-r--r--libvpx/test/fdct8x8_test.cc24
-rw-r--r--libvpx/test/hadamard_test.cc220
-rw-r--r--libvpx/test/level_test.cc119
-rw-r--r--libvpx/test/lpf_8_test.cc300
-rw-r--r--libvpx/test/minmax_test.cc132
-rw-r--r--libvpx/test/realtime_test.cc64
-rw-r--r--libvpx/test/register_state_check.h6
-rw-r--r--libvpx/test/resize_test.cc263
-rw-r--r--libvpx/test/sad_test.cc983
-rwxr-xr-xlibvpx/test/simple_encoder.sh11
-rw-r--r--libvpx/test/sixtap_predict_test.cc66
-rw-r--r--libvpx/test/superframe_test.cc11
-rw-r--r--libvpx/test/test-data.mk92
-rw-r--r--libvpx/test/test-data.sha190
-rw-r--r--libvpx/test/test.mk45
-rw-r--r--libvpx/test/test_intra_pred_speed.cc63
-rw-r--r--libvpx/test/test_vector_test.cc21
-rw-r--r--libvpx/test/test_vectors.cc55
-rw-r--r--libvpx/test/test_vectors.h2
-rw-r--r--libvpx/test/tile_independence_test.cc2
-rwxr-xr-xlibvpx/test/twopass_encoder.sh12
-rw-r--r--libvpx/test/variance_test.cc1653
-rw-r--r--libvpx/test/vp10_dct_test.cc111
-rw-r--r--libvpx/test/vp10_inv_txfm_test.cc321
-rw-r--r--libvpx/test/vp9_arf_freq_test.cc20
-rw-r--r--libvpx/test/vp9_denoiser_sse2_test.cc7
-rw-r--r--libvpx/test/vp9_encoder_parms_get_to_decoder.cc8
-rw-r--r--libvpx/test/vp9_end_to_end_test.cc20
-rw-r--r--libvpx/test/vp9_error_block_test.cc6
-rw-r--r--libvpx/test/vp9_ethread_test.cc43
-rw-r--r--libvpx/test/vp9_intrapred_test.cc96
-rw-r--r--libvpx/test/vp9_lossless_test.cc4
-rwxr-xr-xlibvpx/test/vp9_spatial_svc_encoder.sh2
-rw-r--r--libvpx/test/webm_video_source.h4
-rw-r--r--libvpx/third_party/googletest/README.libvpx6
-rw-r--r--libvpx/third_party/googletest/src/include/gtest/gtest.h4
-rw-r--r--libvpx/third_party/libwebm/Android.mk17
-rw-r--r--libvpx/third_party/libwebm/README.libvpx2
-rw-r--r--libvpx/third_party/libwebm/RELEASE.TXT34
-rw-r--r--libvpx/third_party/libwebm/common/file_util.cc67
-rw-r--r--libvpx/third_party/libwebm/common/file_util.h41
-rw-r--r--libvpx/third_party/libwebm/common/hdr_util.cc182
-rw-r--r--libvpx/third_party/libwebm/common/hdr_util.h51
-rw-r--r--libvpx/third_party/libwebm/common/webmids.h (renamed from libvpx/third_party/libwebm/webmids.hpp)39
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc (renamed from libvpx/third_party/libwebm/mkvmuxer.cpp)1538
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h (renamed from libvpx/third_party/libwebm/mkvmuxer.hpp)715
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h (renamed from libvpx/third_party/libwebm/mkvmuxertypes.hpp)22
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc (renamed from libvpx/third_party/libwebm/mkvmuxerutil.cpp)297
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h95
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc (renamed from libvpx/third_party/libwebm/mkvwriter.cpp)4
-rw-r--r--libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h (renamed from libvpx/third_party/libwebm/mkvwriter.hpp)12
-rw-r--r--libvpx/third_party/libwebm/mkvmuxerutil.hpp83
-rw-r--r--libvpx/third_party/libwebm/mkvparser/mkvparser.cc (renamed from libvpx/third_party/libwebm/mkvparser.cpp)534
-rw-r--r--libvpx/third_party/libwebm/mkvparser/mkvparser.h (renamed from libvpx/third_party/libwebm/mkvparser.hpp)107
-rw-r--r--libvpx/third_party/libwebm/mkvparser/mkvreader.cc (renamed from libvpx/third_party/libwebm/mkvreader.cpp)5
-rw-r--r--libvpx/third_party/libwebm/mkvparser/mkvreader.h (renamed from libvpx/third_party/libwebm/mkvreader.hpp)12
-rw-r--r--libvpx/third_party/x86inc/README.libvpx8
-rw-r--r--libvpx/third_party/x86inc/x86inc.asm400
-rwxr-xr-xlibvpx/tools/gen_authors.sh2
-rw-r--r--libvpx/tools_common.c12
-rw-r--r--libvpx/tools_common.h1
-rw-r--r--libvpx/vp8/common/arm/neon/bilinearpredict_neon.c108
-rw-r--r--libvpx/vp8/common/arm/neon/sixtappredict_neon.c377
-rw-r--r--libvpx/vp8/common/findnearmv.h5
-rw-r--r--libvpx/vp8/common/generic/systemdependent.c2
-rw-r--r--libvpx/vp8/common/mips/msa/postproc_msa.c52
-rw-r--r--libvpx/vp8/common/postproc.c51
-rw-r--r--libvpx/vp8/common/reconintra4x4.h4
-rw-r--r--libvpx/vp8/common/rtcd_defs.pl6
-rw-r--r--libvpx/vp8/common/threading.h60
-rw-r--r--libvpx/vp8/common/vp8_loopfilter.c2
-rw-r--r--libvpx/vp8/common/x86/postproc_mmx.asm62
-rw-r--r--libvpx/vp8/common/x86/postproc_sse2.asm62
-rw-r--r--libvpx/vp8/decoder/dboolhuff.c2
-rw-r--r--libvpx/vp8/decoder/dboolhuff.h2
-rw-r--r--libvpx/vp8/decoder/decodeframe.c15
-rw-r--r--libvpx/vp8/decoder/error_concealment.c15
-rw-r--r--libvpx/vp8/decoder/error_concealment.h8
-rw-r--r--libvpx/vp8/decoder/onyxd_int.h4
-rw-r--r--libvpx/vp8/decoder/threading.c67
-rw-r--r--libvpx/vp8/encoder/bitstream.c2
-rw-r--r--libvpx/vp8/encoder/boolhuff.h2
-rw-r--r--libvpx/vp8/encoder/denoising.c124
-rw-r--r--libvpx/vp8/encoder/denoising.h7
-rw-r--r--libvpx/vp8/encoder/encodeframe.c24
-rw-r--r--libvpx/vp8/encoder/ethreading.c55
-rw-r--r--libvpx/vp8/encoder/firstpass.c8
-rw-r--r--libvpx/vp8/encoder/lookahead.c1
-rw-r--r--libvpx/vp8/encoder/mcomp.c1
-rw-r--r--libvpx/vp8/encoder/onyx_if.c109
-rw-r--r--libvpx/vp8/encoder/onyx_int.h5
-rw-r--r--libvpx/vp8/encoder/pickinter.c85
-rw-r--r--libvpx/vp8/encoder/rdopt.c5
-rw-r--r--libvpx/vp8/encoder/vp8_quantize.c6
-rw-r--r--libvpx/vp8/vp8_cx_iface.c42
-rw-r--r--libvpx/vp8/vp8_dx_iface.c26
-rw-r--r--libvpx/vp9/common/vp9_alloccommon.c25
-rw-r--r--libvpx/vp9/common/vp9_alloccommon.h1
-rw-r--r--libvpx/vp9/common/vp9_blockd.c10
-rw-r--r--libvpx/vp9/common/vp9_blockd.h35
-rw-r--r--libvpx/vp9/common/vp9_common.h1
-rw-r--r--libvpx/vp9/common/vp9_common_data.c15
-rw-r--r--libvpx/vp9/common/vp9_common_data.h3
-rw-r--r--libvpx/vp9/common/vp9_debugmodes.c18
-rw-r--r--libvpx/vp9/common/vp9_entropy.c19
-rw-r--r--libvpx/vp9/common/vp9_entropy.h2
-rw-r--r--libvpx/vp9/common/vp9_entropymv.c8
-rw-r--r--libvpx/vp9/common/vp9_entropymv.h9
-rw-r--r--libvpx/vp9/common/vp9_idct.c3
-rw-r--r--libvpx/vp9/common/vp9_loopfilter.c564
-rw-r--r--libvpx/vp9/common/vp9_loopfilter.h3
-rw-r--r--libvpx/vp9/common/vp9_mfqe.c12
-rw-r--r--libvpx/vp9/common/vp9_mvref_common.c64
-rw-r--r--libvpx/vp9/common/vp9_mvref_common.h23
-rw-r--r--libvpx/vp9/common/vp9_onyxc_int.h21
-rw-r--r--libvpx/vp9/common/vp9_postproc.c30
-rw-r--r--libvpx/vp9/common/vp9_pred_common.c231
-rw-r--r--libvpx/vp9/common/vp9_pred_common.h51
-rw-r--r--libvpx/vp9/common/vp9_reconinter.c40
-rw-r--r--libvpx/vp9/common/vp9_reconinter.h20
-rw-r--r--libvpx/vp9/common/vp9_reconintra.c143
-rw-r--r--libvpx/vp9/common/vp9_rtcd_defs.pl61
-rw-r--r--libvpx/vp9/common/vp9_scan.c6
-rw-r--r--libvpx/vp9/common/vp9_scan.h2
-rw-r--r--libvpx/vp9/common/vp9_seg_common.c1
-rw-r--r--libvpx/vp9/common/vp9_seg_common.h3
-rw-r--r--libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c1
-rw-r--r--libvpx/vp9/common/x86/vp9_postproc_sse2.asm62
-rw-r--r--libvpx/vp9/decoder/vp9_decodeframe.c534
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.c538
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.h2
-rw-r--r--libvpx/vp9/decoder/vp9_decoder.c23
-rw-r--r--libvpx/vp9/decoder/vp9_decoder.h13
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.c205
-rw-r--r--libvpx/vp9/decoder/vp9_dsubexp.c2
-rw-r--r--libvpx/vp9/encoder/vp9_aq_360.c74
-rw-r--r--libvpx/vp9/encoder/vp9_aq_360.h28
-rw-r--r--libvpx/vp9/encoder/vp9_aq_complexity.c6
-rw-r--r--libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c191
-rw-r--r--libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h11
-rw-r--r--libvpx/vp9/encoder/vp9_aq_variance.c6
-rw-r--r--libvpx/vp9/encoder/vp9_bitstream.c184
-rw-r--r--libvpx/vp9/encoder/vp9_bitstream.h2
-rw-r--r--libvpx/vp9/encoder/vp9_block.h15
-rw-r--r--libvpx/vp9/encoder/vp9_context_tree.h1
-rw-r--r--libvpx/vp9/encoder/vp9_cost.c49
-rw-r--r--libvpx/vp9/encoder/vp9_cost.h10
-rw-r--r--libvpx/vp9/encoder/vp9_denoiser.c386
-rw-r--r--libvpx/vp9/encoder/vp9_denoiser.h46
-rw-r--r--libvpx/vp9/encoder/vp9_encodeframe.c812
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.c130
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.h3
-rw-r--r--libvpx/vp9/encoder/vp9_encodemv.c99
-rw-r--r--libvpx/vp9/encoder/vp9_encoder.c629
-rw-r--r--libvpx/vp9/encoder/vp9_encoder.h108
-rw-r--r--libvpx/vp9/encoder/vp9_ethread.c4
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.c440
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.h11
-rw-r--r--libvpx/vp9/encoder/vp9_lookahead.c14
-rw-r--r--libvpx/vp9/encoder/vp9_lookahead.h8
-rw-r--r--libvpx/vp9/encoder/vp9_mbgraph.c33
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.c537
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.h17
-rw-r--r--libvpx/vp9/encoder/vp9_noise_estimate.c263
-rw-r--r--libvpx/vp9/encoder/vp9_noise_estimate.h58
-rw-r--r--libvpx/vp9/encoder/vp9_picklpf.c3
-rw-r--r--libvpx/vp9/encoder/vp9_pickmode.c913
-rw-r--r--libvpx/vp9/encoder/vp9_quantize.c13
-rw-r--r--libvpx/vp9/encoder/vp9_ratectrl.c388
-rw-r--r--libvpx/vp9/encoder/vp9_ratectrl.h3
-rw-r--r--libvpx/vp9/encoder/vp9_rd.c69
-rw-r--r--libvpx/vp9/encoder/vp9_rd.h18
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.c642
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.h9
-rw-r--r--libvpx/vp9/encoder/vp9_resize.c59
-rw-r--r--libvpx/vp9/encoder/vp9_segmentation.c10
-rw-r--r--libvpx/vp9/encoder/vp9_skin_detection.c132
-rw-r--r--libvpx/vp9/encoder/vp9_skin_detection.h7
-rw-r--r--libvpx/vp9/encoder/vp9_speed_features.c118
-rw-r--r--libvpx/vp9/encoder/vp9_speed_features.h37
-rw-r--r--libvpx/vp9/encoder/vp9_subexp.c89
-rw-r--r--libvpx/vp9/encoder/vp9_subexp.h2
-rw-r--r--libvpx/vp9/encoder/vp9_svc_layercontext.c170
-rw-r--r--libvpx/vp9/encoder/vp9_svc_layercontext.h9
-rw-r--r--libvpx/vp9/encoder/vp9_temporal_filter.c158
-rw-r--r--libvpx/vp9/encoder/vp9_tokenize.c595
-rw-r--r--libvpx/vp9/encoder/vp9_tokenize.h39
-rw-r--r--libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c (renamed from libvpx/vp9/encoder/x86/vp9_dct_sse2.c)0
-rw-r--r--libvpx/vp9/encoder/x86/vp9_dct_mmx.asm104
-rw-r--r--libvpx/vp9/encoder/x86/vp9_dct_sse2.asm86
-rw-r--r--libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c130
-rw-r--r--libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c314
-rw-r--r--libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c211
-rw-r--r--libvpx/vp9/vp9_cx_iface.c93
-rw-r--r--libvpx/vp9/vp9_dx_iface.c19
-rw-r--r--libvpx/vp9/vp9cx.mk17
-rw-r--r--libvpx/vpx/exports_enc6
-rw-r--r--libvpx/vpx/exports_spatial_svc6
-rw-r--r--libvpx/vpx/src/svc_encodeframe.c51
-rw-r--r--libvpx/vpx/vp8cx.h30
-rw-r--r--libvpx/vpx/vp8dx.h9
-rw-r--r--libvpx/vpx/vpx_image.h2
-rw-r--r--libvpx/vpx_dsp/add_noise.c40
-rw-r--r--libvpx/vpx_dsp/arm/avg_neon.c (renamed from libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c)106
-rw-r--r--libvpx/vpx_dsp/arm/hadamard_neon.c201
-rw-r--r--libvpx/vpx_dsp/arm/loopfilter_4_neon.asm32
-rw-r--r--libvpx/vpx_dsp/arm/loopfilter_4_neon.c16
-rw-r--r--libvpx/vpx_dsp/arm/loopfilter_8_neon.asm30
-rw-r--r--libvpx/vpx_dsp/arm/loopfilter_8_neon.c16
-rw-r--r--libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm47
-rw-r--r--libvpx/vpx_dsp/arm/loopfilter_neon.c12
-rw-r--r--libvpx/vpx_dsp/avg.c (renamed from libvpx/vp9/encoder/vp9_avg.c)37
-rw-r--r--libvpx/vpx_dsp/bitreader.c2
-rw-r--r--libvpx/vpx_dsp/bitreader.h2
-rw-r--r--libvpx/vpx_dsp/bitreader_buffer.c2
-rw-r--r--libvpx/vpx_dsp/bitwriter.h2
-rw-r--r--libvpx/vpx_dsp/fwd_txfm.c13
-rw-r--r--libvpx/vpx_dsp/intrapred.c44
-rw-r--r--libvpx/vpx_dsp/inv_txfm.c1752
-rw-r--r--libvpx/vpx_dsp/inv_txfm.h44
-rw-r--r--libvpx/vpx_dsp/loopfilter.c108
-rw-r--r--libvpx/vpx_dsp/mips/add_noise_msa.c59
-rw-r--r--libvpx/vpx_dsp/mips/avg_msa.c (renamed from libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c)6
-rw-r--r--libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c36
-rw-r--r--libvpx/vpx_dsp/mips/fwd_txfm_msa.c12
-rw-r--r--libvpx/vpx_dsp/mips/loopfilter_16_msa.c24
-rw-r--r--libvpx/vpx_dsp/mips/loopfilter_4_msa.c10
-rw-r--r--libvpx/vpx_dsp/mips/loopfilter_8_msa.c10
-rw-r--r--libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c23
-rw-r--r--libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c6
-rw-r--r--libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c26
-rw-r--r--libvpx/vpx_dsp/psnrhvs.c2
-rw-r--r--libvpx/vpx_dsp/quantize.c5
-rw-r--r--libvpx/vpx_dsp/sad.c45
-rw-r--r--libvpx/vpx_dsp/variance.c16
-rw-r--r--libvpx/vpx_dsp/variance.h4
-rw-r--r--libvpx/vpx_dsp/vpx_dsp.mk35
-rw-r--r--libvpx/vpx_dsp/vpx_dsp_common.h7
-rw-r--r--libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl239
-rw-r--r--libvpx/vpx_dsp/x86/add_noise_sse2.asm83
-rw-r--r--libvpx/vpx_dsp/x86/avg_intrin_sse2.c (renamed from libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c)59
-rw-r--r--libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm (renamed from libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm)4
-rw-r--r--libvpx/vpx_dsp/x86/convolve.h32
-rw-r--r--libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h1
-rw-r--r--libvpx/vpx_dsp/x86/fwd_txfm_sse2.c43
-rw-r--r--libvpx/vpx_dsp/x86/halfpix_variance_sse2.c14
-rw-r--r--libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm271
-rw-r--r--libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c75
-rw-r--r--libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm30
-rw-r--r--libvpx/vpx_dsp/x86/highbd_variance_sse2.c103
-rw-r--r--libvpx/vpx_dsp/x86/intrapred_sse2.asm443
-rw-r--r--libvpx/vpx_dsp/x86/intrapred_ssse3.asm165
-rw-r--r--libvpx/vpx_dsp/x86/inv_txfm_sse2.c38
-rw-r--r--libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm1496
-rw-r--r--libvpx/vpx_dsp/x86/inv_wht_sse2.asm8
-rw-r--r--libvpx/vpx_dsp/x86/loopfilter_avx2.c23
-rw-r--r--libvpx/vpx_dsp/x86/loopfilter_mmx.asm611
-rw-r--r--libvpx/vpx_dsp/x86/loopfilter_sse2.c249
-rw-r--r--libvpx/vpx_dsp/x86/sad4d_sse2.asm56
-rw-r--r--libvpx/vpx_dsp/x86/sad_mmx.asm427
-rw-r--r--libvpx/vpx_dsp/x86/sad_sse2.asm23
-rw-r--r--libvpx/vpx_dsp/x86/subpel_variance_sse2.asm360
-rw-r--r--libvpx/vpx_dsp/x86/variance_avx2.c2
-rw-r--r--libvpx/vpx_dsp/x86/variance_impl_mmx.asm744
-rw-r--r--libvpx/vpx_dsp/x86/variance_mmx.c249
-rw-r--r--libvpx/vpx_dsp/x86/variance_sse2.c86
-rw-r--r--libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm70
-rw-r--r--libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm158
-rw-r--r--libvpx/vpx_mem/vpx_mem.c2
-rw-r--r--libvpx/vpx_ports/mem_ops.h40
-rw-r--r--libvpx/vpx_ports/mem_ops_aligned.h4
-rw-r--r--libvpx/vpx_ports/vpx_once.h110
-rw-r--r--libvpx/vpx_ports/x86.h40
-rw-r--r--libvpx/vpx_ports/x86_abi_support.asm2
-rw-r--r--libvpx/vpx_scale/generic/yv12config.c36
-rw-r--r--libvpx/vpx_scale/generic/yv12extend.c6
-rw-r--r--libvpx/vpx_scale/vpx_scale_rtcd.pl2
-rw-r--r--libvpx/vpx_util/vpx_thread.h146
-rw-r--r--libvpx/vpxdec.c7
-rw-r--r--libvpx/vpxenc.c128
-rw-r--r--libvpx/vpxstats.c16
-rw-r--r--libvpx/webmdec.cc17
-rw-r--r--libvpx/webmdec.h8
-rw-r--r--libvpx/webmenc.cc45
-rw-r--r--libvpx/webmenc.h9
328 files changed, 19210 insertions, 14738 deletions
diff --git a/libvpx/.mailmap b/libvpx/.mailmap
index 42f3617b0..94cb1ecfe 100644
--- a/libvpx/.mailmap
+++ b/libvpx/.mailmap
@@ -1,27 +1,28 @@
Adrian Grange <agrange@google.com>
-Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com>
Aâ„“ex Converse <aconverse@google.com>
Aâ„“ex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
Hangyu Kuang <hkuang@google.com>
-Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com>
Hui Su <huisu@google.com>
Jacky Chen <jackychen@google.com>
Jim Bankoski <jimbankoski@google.com>
Johann Koenig <johannkoenig@google.com>
Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
-Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org>
John Koleszar <jkoleszar@google.com>
Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
Marco Paniconi <marpan@google.com>
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
Pascal Massimino <pascal.massimino@gmail.com>
Paul Wilkins <paulwilkins@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
+Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
@@ -29,8 +30,8 @@ Sami Pietilä <samipietila@google.com>
Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
-Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
+Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
-Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com>
+Yaowu Xu <yaowu@google.com> <Yaowu Xu>
diff --git a/libvpx/AUTHORS b/libvpx/AUTHORS
index f89b6776a..fcd5c534a 100644
--- a/libvpx/AUTHORS
+++ b/libvpx/AUTHORS
@@ -24,6 +24,7 @@ changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
chm <chm@rock-chips.com>
Christian Duvivier <cduvivier@google.com>
+Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com>
Deb Mukherjee <debargha@google.com>
Dim Temp <dimtemp0@gmail.com>
@@ -56,10 +57,12 @@ James Zern <jzern@google.com>
Jan Gerber <j@mailb.org>
Jan Kratochvil <jan.kratochvil@redhat.com>
Janne Salonen <jsalonen@google.com>
+Jean-Yves Avenard <jyavenard@mozilla.com>
Jeff Faust <jfaust@google.com>
Jeff Muizelaar <jmuizelaar@mozilla.com>
Jeff Petkau <jpet@chromium.org>
Jia Jia <jia.jia@linaro.org>
+Jian Zhou <zhoujian@google.com>
Jim Bankoski <jimbankoski@google.com>
Jingning Han <jingning@google.com>
Joey Parrish <joeyparrish@google.com>
@@ -74,6 +77,7 @@ Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
KO Myung-Hun <komh@chollian.net>
Lawrence Velázquez <larryv@macports.org>
+Linfeng Zhang <linfengz@google.com>
Lou Quillio <louquillio@google.com>
Luca Barbato <lu_zero@gentoo.org>
Makoto Kato <makoto.kt@gmail.com>
@@ -107,9 +111,11 @@ Rob Bradford <rob@linux.intel.com>
Ronald S. Bultje <rsbultje@gmail.com>
Rui Ueyama <ruiu@google.com>
Sami Pietilä <samipietila@google.com>
+Sasi Inguva <isasi@google.com>
Scott Graham <scottmg@chromium.org>
Scott LaVarnway <slavarnway@google.com>
Sean McGovern <gseanmcg@gmail.com>
+Sergey Kolomenkin <kolomenkin@gmail.com>
Sergey Ulanov <sergeyu@chromium.org>
Shimon Doodkin <helpmepro1@gmail.com>
Shunyao Li <shunyaoli@google.com>
@@ -126,8 +132,10 @@ Timothy B. Terriberry <tterribe@xiph.org>
Tom Finegan <tomfinegan@google.com>
Vignesh Venkatasubramanian <vigneshv@google.com>
Yaowu Xu <yaowu@google.com>
+Yi Luo <luoyi@google.com>
Yongzhe Wang <yongzhe@google.com>
Yunqing Wang <yunqingwang@google.com>
+Yury Gitman <yuryg@google.com>
Zoe Liu <zoeliu@google.com>
Google Inc.
The Mozilla Foundation
diff --git a/libvpx/CHANGELOG b/libvpx/CHANGELOG
index 7746cc6c4..795d395f9 100644
--- a/libvpx/CHANGELOG
+++ b/libvpx/CHANGELOG
@@ -1,3 +1,33 @@
+2016-07-20 v1.6.0 "Khaki Campbell Duck"
+ This release improves upon the VP9 encoder and speeds up the encoding and
+ decoding processes.
+
+ - Upgrading:
+ This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
+ in vpx_image and some minor changes to the VP8_COMP structure.
+
+ The default key frame interval for VP9 has changed from 128 to 9999.
+
+ - Enhancement:
+ A core focus has been performance for low end Intel processors. SSSE3
+ instructions such as 'pshufb' have been avoided and instructions have been
+ reordered to better accommodate the more constrained pipelines.
+
+ As a result, devices based on Celeron processors have seen substantial
+ decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
+ decoding speed improved between 10 and 30%. Between Javan Whistling Duck
+ and Khaki Campbell Duck, it improved another 10 to 15%.
+
+ While Celeron benefited most, Core-i5 also improved 5% and 10% between the
+ respective releases.
+
+ Realtime performance for WebRTC for both speed and quality has received a
+ lot of attention.
+
+ - Bug Fixes:
+ A number of fuzzing issues, found variously by Mozilla, Chromium and others,
+ have been fixed and we strongly recommend updating.
+
2015-11-09 v1.5.0 "Javan Whistling Duck"
This release improves upon the VP9 encoder and speeds up the encoding and
decoding processes.
diff --git a/libvpx/README b/libvpx/README
index 979440eb7..a8e6aebcd 100644
--- a/libvpx/README
+++ b/libvpx/README
@@ -1,4 +1,4 @@
-README - 23 March 2015
+README - 20 July 2016
Welcome to the WebM VP8/VP9 Codec SDK!
@@ -47,7 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
--help output of the configure script. As of this writing, the list of
available targets is:
- armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
armv6-none-rvct
diff --git a/libvpx/build/make/Android.mk b/libvpx/build/make/Android.mk
index df01dece6..9eb6dd280 100644
--- a/libvpx/build/make/Android.mk
+++ b/libvpx/build/make/Android.mk
@@ -174,9 +174,6 @@ endif
ifeq ($(CONFIG_VP9), yes)
$$(rtcd_dep_template_SRCS): vp9_rtcd.h
endif
-ifeq ($(CONFIG_VP10), yes)
-$$(rtcd_dep_template_SRCS): vp10_rtcd.h
-endif
$$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
$$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h
diff --git a/libvpx/build/make/Makefile b/libvpx/build/make/Makefile
index 3081a9268..3e8c02490 100644
--- a/libvpx/build/make/Makefile
+++ b/libvpx/build/make/Makefile
@@ -119,29 +119,25 @@ utiltest:
test-no-data-check::
exampletest-no-data-check utiltest-no-data-check:
-# Add compiler flags for intrinsic files
+# Force to realign stack always on OS/2
ifeq ($(TOOLCHAIN), x86-os2-gcc)
-STACKREALIGN=-mstackrealign
-else
-STACKREALIGN=
+CFLAGS += -mstackrealign
endif
$(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
$(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
-$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(STACKREALIGN)
-$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(STACKREALIGN)
-$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(STACKREALIGN)
-$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(STACKREALIGN)
-$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(STACKREALIGN)
-$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(STACKREALIGN)
-$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(STACKREALIGN)
-$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(STACKREALIGN)
-$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
-$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
-$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
-$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
-$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
-$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)
+$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
+$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2
+$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3
+$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3
+$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3
+$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3
+$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1
+$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1
+$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
+$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
+$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
+$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
$(BUILD_PFX)%.c.d: %.c
$(if $(quiet),@echo " [DEP] $@")
diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh
index c592b6385..4f0071bb5 100755
--- a/libvpx/build/make/configure.sh
+++ b/libvpx/build/make/configure.sh
@@ -185,6 +185,7 @@ add_extralibs() {
#
# Boolean Manipulation Functions
#
+
enable_feature(){
set_all yes $*
}
@@ -201,6 +202,20 @@ disabled(){
eval test "x\$$1" = "xno"
}
+enable_codec(){
+ enabled "${1}" || echo " enabling ${1}"
+ enable_feature "${1}"
+
+ is_in "${1}" vp8 vp9 && enable_feature "${1}_encoder" "${1}_decoder"
+}
+
+disable_codec(){
+ disabled "${1}" || echo " disabling ${1}"
+ disable_feature "${1}"
+
+ is_in "${1}" vp8 vp9 && disable_feature "${1}_encoder" "${1}_decoder"
+}
+
# Iterates through positional parameters, checks to confirm the parameter has
# not been explicitly (force) disabled, and enables the setting controlled by
# the parameter when the setting is not disabled.
@@ -521,22 +536,20 @@ process_common_cmdline() {
;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
- if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
+ if is_in ${option} ${ARCH_EXT_LIST}; then
[ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} "
elif [ $action = "disable" ] && ! disabled $option ; then
- echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
- die_unknown $opt
+ is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
log_echo " disabling $option"
elif [ $action = "enable" ] && ! enabled $option ; then
- echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
- die_unknown $opt
+ is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
log_echo " enabling $option"
fi
${action}_feature $option
;;
--require-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
- if echo "${ARCH_EXT_LIST}" none | grep "^ *$option\$" >/dev/null; then
+ if is_in ${option} ${ARCH_EXT_LIST}; then
RTCD_OPTIONS="${RTCD_OPTIONS}${opt} "
else
die_unknown $opt
@@ -638,16 +651,39 @@ show_darwin_sdk_major_version() {
xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
}
+# Print the Xcode version.
+show_xcode_version() {
+ xcodebuild -version | head -n1 | cut -d' ' -f2
+}
+
+# Fails when Xcode version is less than 6.3.
+check_xcode_minimum_version() {
+ xcode_major=$(show_xcode_version | cut -f1 -d.)
+ xcode_minor=$(show_xcode_version | cut -f2 -d.)
+ xcode_min_major=6
+ xcode_min_minor=3
+ if [ ${xcode_major} -lt ${xcode_min_major} ]; then
+ return 1
+ fi
+ if [ ${xcode_major} -eq ${xcode_min_major} ] \
+ && [ ${xcode_minor} -lt ${xcode_min_minor} ]; then
+ return 1
+ fi
+}
+
process_common_toolchain() {
if [ -z "$toolchain" ]; then
gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
# detect tgt_isa
case "$gcctarget" in
+ aarch64*)
+ tgt_isa=arm64
+ ;;
armv6*)
tgt_isa=armv6
;;
- armv7*-hardfloat*)
+ armv7*-hardfloat* | armv7*-gnueabihf | arm-*-gnueabihf)
tgt_isa=armv7
float_abi=hard
;;
@@ -688,6 +724,10 @@ process_common_toolchain() {
tgt_isa=x86_64
tgt_os=darwin14
;;
+ *darwin15*)
+ tgt_isa=x86_64
+ tgt_os=darwin15
+ ;;
x86_64*mingw32*)
tgt_os=win64
;;
@@ -744,7 +784,14 @@ process_common_toolchain() {
enabled shared && soft_enable pic
# Minimum iOS version for all target platforms (darwin and iphonesimulator).
- IOS_VERSION_MIN="6.0"
+ # Shared library framework builds are only possible on iOS 8 and later.
+ if enabled shared; then
+ IOS_VERSION_OPTIONS="--enable-shared"
+ IOS_VERSION_MIN="8.0"
+ else
+ IOS_VERSION_OPTIONS=""
+ IOS_VERSION_MIN="6.0"
+ fi
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
@@ -795,6 +842,10 @@ process_common_toolchain() {
add_cflags "-mmacosx-version-min=10.10"
add_ldflags "-mmacosx-version-min=10.10"
;;
+ *-darwin15-*)
+ add_cflags "-mmacosx-version-min=10.11"
+ add_ldflags "-mmacosx-version-min=10.11"
+ ;;
*-iphonesimulator-*)
add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
@@ -869,7 +920,6 @@ process_common_toolchain() {
case ${tgt_cc} in
gcc)
- CROSS=${CROSS:-arm-none-linux-gnueabi-}
link_with_cc=gcc
setup_gnu_toolchain
arch_int=${tgt_isa##armv}
@@ -891,6 +941,9 @@ EOF
check_add_cflags -mfpu=neon #-ftree-vectorize
check_add_asflags -mfpu=neon
fi
+ elif [ ${tgt_isa} = "arm64" ] || [ ${tgt_isa} = "armv8" ]; then
+ check_add_cflags -march=armv8-a
+ check_add_asflags -march=armv8-a
else
check_add_cflags -march=${tgt_isa}
check_add_asflags -march=${tgt_isa}
@@ -958,6 +1011,10 @@ EOF
;;
android*)
+ if [ -z "${sdk_path}" ]; then
+ die "Must specify --sdk-path for Android builds."
+ fi
+
SDK_PATH=${sdk_path}
COMPILER_LOCATION=`find "${SDK_PATH}" \
-name "arm-linux-androideabi-gcc*" -print -quit`
@@ -979,8 +1036,10 @@ EOF
awk '{ print $1 }' | tail -1`
fi
- add_cflags "--sysroot=${alt_libc}"
- add_ldflags "--sysroot=${alt_libc}"
+ if [ -d "${alt_libc}" ]; then
+ add_cflags "--sysroot=${alt_libc}"
+ add_ldflags "--sysroot=${alt_libc}"
+ fi
# linker flag that routes around a CPU bug in some
# Cortex-A8 implementations (NDK Dev Guide)
@@ -1006,18 +1065,7 @@ EOF
NM="$(${XCRUN_FIND} nm)"
RANLIB="$(${XCRUN_FIND} ranlib)"
AS_SFX=.s
-
- # Special handling of ld for armv6 because libclang_rt.ios.a does
- # not contain armv6 support in Apple's clang package:
- # Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn).
- # TODO(tomfinegan): Remove this. Our minimum iOS version (6.0)
- # renders support for armv6 unnecessary because the 3GS and up
- # support neon.
- if [ "${tgt_isa}" = "armv6" ]; then
- LD="$(${XCRUN_FIND} ld)"
- else
- LD="${CXX:-$(${XCRUN_FIND} ld)}"
- fi
+ LD="${CXX:-$(${XCRUN_FIND} ld)}"
# ASFLAGS is written here instead of using check_add_asflags
# because we need to overwrite all of ASFLAGS and purge the
@@ -1043,6 +1091,19 @@ EOF
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
done
+ case ${tgt_isa} in
+ armv7|armv7s|armv8|arm64)
+ if enabled neon && ! check_xcode_minimum_version; then
+ soft_disable neon
+ log_echo " neon disabled: upgrade Xcode (need v6.3+)."
+ if enabled neon_asm; then
+ soft_disable neon_asm
+ log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)."
+ fi
+ fi
+ ;;
+ esac
+
asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
@@ -1057,7 +1118,7 @@ EOF
if enabled rvct; then
# Check if we have CodeSourcery GCC in PATH. Needed for
# libraries
- hash arm-none-linux-gnueabi-gcc 2>&- || \
+ which arm-none-linux-gnueabi-gcc 2>&- || \
die "Couldn't find CodeSourcery GCC from PATH"
# Use armcc as a linker to enable translation of
@@ -1098,7 +1159,7 @@ EOF
check_add_ldflags -mfp64
;;
i6400)
- check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
+ check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
check_add_cflags -mload-store-pairs -mhard-float -mfp64
check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64
check_add_ldflags -mips64r6 -mabi=64 -mfp64
@@ -1125,7 +1186,7 @@ EOF
CC=${CC:-${CROSS}gcc}
CXX=${CXX:-${CROSS}g++}
LD=${LD:-${CROSS}gcc}
- CROSS=${CROSS:-g}
+ CROSS=${CROSS-g}
;;
os2)
disable_feature pic
@@ -1178,6 +1239,12 @@ EOF
soft_disable avx2
;;
esac
+ case $vc_version in
+ 7|8|9)
+ echo "${tgt_cc} omits stdint.h, disabling webm-io..."
+ soft_disable webm_io
+ ;;
+ esac
;;
esac
@@ -1198,33 +1265,43 @@ EOF
soft_enable runtime_cpu_detect
# We can't use 'check_cflags' until the compiler is configured and CC is
# populated.
- check_gcc_machine_option mmx
- check_gcc_machine_option sse
- check_gcc_machine_option sse2
- check_gcc_machine_option sse3
- check_gcc_machine_option ssse3
- check_gcc_machine_option sse4 sse4_1
- check_gcc_machine_option avx
- check_gcc_machine_option avx2
-
- case "${AS}" in
- auto|"")
- which nasm >/dev/null 2>&1 && AS=nasm
- which yasm >/dev/null 2>&1 && AS=yasm
- if [ "${AS}" = nasm ] ; then
- # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit
- # this check if they start shipping a compatible version.
- apple=`nasm -v | grep "Apple"`
- [ -n "${apple}" ] \
- && echo "Unsupported version of nasm: ${apple}" \
- && AS=""
+ for ext in ${ARCH_EXT_LIST_X86}; do
+ # disable higher order extensions to simplify asm dependencies
+ if [ "$disable_exts" = "yes" ]; then
+ if ! disabled $ext; then
+ RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${ext} "
+ disable_feature $ext
fi
- [ "${AS}" = auto ] || [ -z "${AS}" ] \
- && die "Neither yasm nor nasm have been found." \
- "See the prerequisites section in the README for more info."
- ;;
- esac
- log_echo " using $AS"
+ elif disabled $ext; then
+ disable_exts="yes"
+ else
+ # use the shortened version for the flag: sse4_1 -> sse4
+ check_gcc_machine_option ${ext%_*} $ext
+ fi
+ done
+
+ if enabled external_build; then
+ log_echo " skipping assembler detection"
+ else
+ case "${AS}" in
+ auto|"")
+ which nasm >/dev/null 2>&1 && AS=nasm
+ which yasm >/dev/null 2>&1 && AS=yasm
+ if [ "${AS}" = nasm ] ; then
+ # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit
+ # this check if they start shipping a compatible version.
+ apple=`nasm -v | grep "Apple"`
+ [ -n "${apple}" ] \
+ && echo "Unsupported version of nasm: ${apple}" \
+ && AS=""
+ fi
+ [ "${AS}" = auto ] || [ -z "${AS}" ] \
+ && die "Neither yasm nor nasm have been found." \
+ "See the prerequisites section in the README for more info."
+ ;;
+ esac
+ log_echo " using $AS"
+ fi
[ "${AS##*/}" = nasm ] && add_asflags -Ox
AS_SFX=.asm
case ${tgt_os} in
diff --git a/libvpx/build/make/gen_msvs_proj.sh b/libvpx/build/make/gen_msvs_proj.sh
index 0cf335b3d..2b91fbfbc 100755
--- a/libvpx/build/make/gen_msvs_proj.sh
+++ b/libvpx/build/make/gen_msvs_proj.sh
@@ -193,7 +193,7 @@ for opt in "$@"; do
done
# Make one call to fix_path for file_list to improve performance.
-fix_file_list
+fix_file_list file_list
outfile=${outfile:-/dev/stdout}
guid=${guid:-`generate_uuid`}
diff --git a/libvpx/build/make/gen_msvs_vcxproj.sh b/libvpx/build/make/gen_msvs_vcxproj.sh
index 182ea28fa..e98611d10 100755
--- a/libvpx/build/make/gen_msvs_vcxproj.sh
+++ b/libvpx/build/make/gen_msvs_vcxproj.sh
@@ -211,7 +211,7 @@ for opt in "$@"; do
done
# Make one call to fix_path for file_list to improve performance.
-fix_file_list
+fix_file_list file_list
outfile=${outfile:-/dev/stdout}
guid=${guid:-`generate_uuid`}
diff --git a/libvpx/build/make/ios-Info.plist b/libvpx/build/make/ios-Info.plist
new file mode 100644
index 000000000..d157b11a0
--- /dev/null
+++ b/libvpx/build/make/ios-Info.plist
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>en</string>
+ <key>CFBundleExecutable</key>
+ <string>VPX</string>
+ <key>CFBundleIdentifier</key>
+ <string>org.webmproject.VPX</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundleName</key>
+ <string>VPX</string>
+ <key>CFBundlePackageType</key>
+ <string>FMWK</string>
+ <key>CFBundleShortVersionString</key>
+ <string>${VERSION}</string>
+ <key>CFBundleSignature</key>
+ <string>????</string>
+ <key>CFBundleSupportedPlatforms</key>
+ <array>
+ <string>iPhoneOS</string>
+ </array>
+ <key>CFBundleVersion</key>
+ <string>${VERSION}</string>
+ <key>MinimumOSVersion</key>
+ <string>${IOS_VERSION_MIN}</string>
+ <key>UIDeviceFamily</key>
+ <array>
+ <integer>1</integer>
+ <integer>2</integer>
+ </array>
+ <key>VPXFullVersion</key>
+ <string>${FULLVERSION}</string>
+</dict>
+</plist>
diff --git a/libvpx/build/make/iosbuild.sh b/libvpx/build/make/iosbuild.sh
index 6f7180d08..c703f22b0 100755
--- a/libvpx/build/make/iosbuild.sh
+++ b/libvpx/build/make/iosbuild.sh
@@ -24,16 +24,20 @@ CONFIGURE_ARGS="--disable-docs
--disable-unit-tests"
DIST_DIR="_dist"
FRAMEWORK_DIR="VPX.framework"
+FRAMEWORK_LIB="VPX.framework/VPX"
HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
SCRIPT_DIR=$(dirname "$0")
LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
ORIG_PWD="$(pwd)"
-TARGETS="arm64-darwin-gcc
- armv7-darwin-gcc
- armv7s-darwin-gcc
- x86-iphonesimulator-gcc
- x86_64-iphonesimulator-gcc"
+ARM_TARGETS="arm64-darwin-gcc
+ armv7-darwin-gcc
+ armv7s-darwin-gcc"
+SIM_TARGETS="x86-iphonesimulator-gcc
+ x86_64-iphonesimulator-gcc"
+OSX_TARGETS="x86-darwin15-gcc
+ x86_64-darwin15-gcc"
+TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"
# Configures for the target specified by $1, and invokes make with the dist
# target using $DIST_DIR as the distribution output directory.
@@ -134,6 +138,44 @@ create_vpx_framework_config_shim() {
printf "#endif // ${include_guard}" >> "${config_file}"
}
+# Verifies that $FRAMEWORK_LIB fat library contains requested builds.
+verify_framework_targets() {
+ local requested_cpus=""
+ local cpu=""
+
+ # Extract CPU from full target name.
+ for target; do
+ cpu="${target%%-*}"
+ if [ "${cpu}" = "x86" ]; then
+ # lipo -info outputs i386 for libvpx x86 targets.
+ cpu="i386"
+ fi
+ requested_cpus="${requested_cpus}${cpu} "
+ done
+
+ # Get target CPUs present in framework library.
+ local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB})
+
+ # $LIPO -info outputs a string like the following:
+ # Architectures in the fat file: $FRAMEWORK_LIB <architectures>
+ # Capture only the architecture strings.
+ targets_built=${targets_built##*: }
+
+ # Sort CPU strings to make the next step a simple string compare.
+ local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ")
+ local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ")
+
+ vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}"
+ vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}"
+
+ if [ "${requested}" != "${actual}" ]; then
+ elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list."
+ elog " Requested target CPUs: ${requested}"
+ elog " Actual target CPUs: ${actual}"
+ return 1
+ fi
+}
+
# Configures and builds each target specified by $1, and then builds
# VPX.framework.
build_framework() {
@@ -154,7 +196,12 @@ build_framework() {
for target in ${targets}; do
build_target "${target}"
target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
- lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
+ if [ "${ENABLE_SHARED}" = "yes" ]; then
+ local suffix="dylib"
+ else
+ local suffix="a"
+ fi
+ lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}"
done
cd "${ORIG_PWD}"
@@ -173,13 +220,25 @@ build_framework() {
# Copy in vpx_version.h.
cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"
- vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
+ if [ "${ENABLE_SHARED}" = "yes" ]; then
+ # Adjust the dylib's name so dynamic linking in apps works as expected.
+ install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX
+
+ # Copy in Info.plist.
+ cat "${SCRIPT_DIR}/ios-Info.plist" \
+ | sed "s/\${FULLVERSION}/${FULLVERSION}/g" \
+ | sed "s/\${VERSION}/${VERSION}/g" \
+ | sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \
+ > "${FRAMEWORK_DIR}/Info.plist"
+ fi
+
+ # Confirm VPX.framework/VPX contains the targets requested.
+ verify_framework_targets ${targets}
+
+ vlog "Created fat library ${FRAMEWORK_LIB} containing:"
for lib in ${lib_list}; do
vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')"
done
-
- # TODO(tomfinegan): Verify that expected targets are included within
- # VPX.framework/VPX via lipo -info.
}
# Trap function. Cleans up the subtree used to build all targets contained in
@@ -197,15 +256,28 @@ cleanup() {
fi
}
+print_list() {
+ local indent="$1"
+ shift
+ local list="$@"
+ for entry in ${list}; do
+ echo "${indent}${entry}"
+ done
+}
+
iosbuild_usage() {
cat << EOF
Usage: ${0##*/} [arguments]
--help: Display this message and exit.
+ --enable-shared: Build a dynamic framework for use on iOS 8 or later.
--extra-configure-args <args>: Extra args to pass when configuring libvpx.
+ --macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
+ and x86_64. Allows linking to framework when builds target MacOSX
+ instead of iOS.
--preserve-build-output: Do not delete the build directory.
--show-build-output: Show output from each library build.
--targets <targets>: Override default target list. Defaults:
- ${TARGETS}
+$(print_list " " ${TARGETS})
--test-link: Confirms all targets can be linked. Functionally identical to
passing --enable-examples via --extra-configure-args.
--verbose: Output information about the environment and each stage of the
@@ -236,6 +308,9 @@ while [ -n "$1" ]; do
iosbuild_usage
exit
;;
+ --enable-shared)
+ ENABLE_SHARED=yes
+ ;;
--preserve-build-output)
PRESERVE_BUILD_OUTPUT=yes
;;
@@ -249,6 +324,9 @@ while [ -n "$1" ]; do
TARGETS="$2"
shift
;;
+ --macosx)
+ TARGETS="${ARM_TARGETS} ${OSX_TARGETS}"
+ ;;
--verbose)
VERBOSE=yes
;;
@@ -260,6 +338,21 @@ while [ -n "$1" ]; do
shift
done
+if [ "${ENABLE_SHARED}" = "yes" ]; then
+ CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}"
+fi
+
+FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}")
+VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/')
+
+if [ "$ENABLE_SHARED" = "yes" ]; then
+ IOS_VERSION_OPTIONS="--enable-shared"
+ IOS_VERSION_MIN="8.0"
+else
+ IOS_VERSION_OPTIONS=""
+ IOS_VERSION_MIN="6.0"
+fi
+
if [ "${VERBOSE}" = "yes" ]; then
cat << EOF
BUILD_ROOT=${BUILD_ROOT}
@@ -267,16 +360,24 @@ cat << EOF
CONFIGURE_ARGS=${CONFIGURE_ARGS}
EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
FRAMEWORK_DIR=${FRAMEWORK_DIR}
+ FRAMEWORK_LIB=${FRAMEWORK_LIB}
HEADER_DIR=${HEADER_DIR}
LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
LIPO=${LIPO}
MAKEFLAGS=${MAKEFLAGS}
ORIG_PWD=${ORIG_PWD}
PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
- TARGETS="${TARGETS}"
+ TARGETS="$(print_list "" ${TARGETS})"
+ ENABLE_SHARED=${ENABLE_SHARED}
+ OSX_TARGETS="${OSX_TARGETS}"
+ SIM_TARGETS="${SIM_TARGETS}"
+ SCRIPT_DIR="${SCRIPT_DIR}"
+ FULLVERSION="${FULLVERSION}"
+ VERSION="${VERSION}"
+ IOS_VERSION_MIN="${IOS_VERSION_MIN}"
EOF
fi
build_framework "${TARGETS}"
echo "Successfully built '${FRAMEWORK_DIR}' for:"
-echo " ${TARGETS}"
+print_list "" ${TARGETS}
diff --git a/libvpx/build/make/msvs_common.sh b/libvpx/build/make/msvs_common.sh
index 90c14888c..88f1cf9b5 100755
--- a/libvpx/build/make/msvs_common.sh
+++ b/libvpx/build/make/msvs_common.sh
@@ -39,11 +39,12 @@ fix_path() {
}
# Corrects the paths in file_list in one pass for efficiency.
+# $1 is the name of the array to be modified.
fix_file_list() {
- # TODO(jzern): this could be more generic and take the array as a param.
- files=$(fix_path "${file_list[@]}")
+ declare -n array_ref=$1
+ files=$(fix_path "${array_ref[@]}")
local IFS=$'\n'
- file_list=($files)
+ array_ref=($files)
}
generate_uuid() {
diff --git a/libvpx/build/make/version.sh b/libvpx/build/make/version.sh
index b340142c9..696752777 100755
--- a/libvpx/build/make/version.sh
+++ b/libvpx/build/make/version.sh
@@ -24,8 +24,9 @@ out_file=${2}
id=${3:-VERSION_STRING}
git_version_id=""
-if [ -d "${source_path}/.git" ]; then
+if [ -e "${source_path}/.git" ]; then
# Source Path is a git working copy. Check for local modifications.
+ # Note that git submodules may have a file as .git, not a directory.
export GIT_DIR="${source_path}/.git"
git_version_id=`git describe --match=v[0-9]* 2>/dev/null`
fi
diff --git a/libvpx/configure b/libvpx/configure
index a40f3abb6..f82ee046b 100755
--- a/libvpx/configure
+++ b/libvpx/configure
@@ -35,9 +35,11 @@ Advanced options:
${toggle_debug_libs} in/exclude debug version of libraries
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
${toggle_vp9_highbitdepth} use VP9 high bit depth (10/12) profiles
+ ${toggle_better_hw_compatibility}
+ enable encoder to produce streams with better
+ hardware decoder compatibility
${toggle_vp8} VP8 codec support
${toggle_vp9} VP9 codec support
- ${toggle_vp10} VP10 codec support
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_postproc} postprocessing
${toggle_vp9_postproc} vp9 specific postprocessing
@@ -95,11 +97,11 @@ EOF
# all_platforms is a list of all supported target platforms. Maintain
# alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} armv6-darwin-gcc"
+all_platforms="${all_platforms} arm64-darwin-gcc"
+all_platforms="${all_platforms} arm64-linux-gcc"
all_platforms="${all_platforms} armv6-linux-rvct"
all_platforms="${all_platforms} armv6-linux-gcc"
all_platforms="${all_platforms} armv6-none-rvct"
-all_platforms="${all_platforms} arm64-darwin-gcc"
all_platforms="${all_platforms} armv7-android-gcc" #neon Cortex-A8
all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8
all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8
@@ -109,6 +111,7 @@ all_platforms="${all_platforms} armv7-win32-vs11"
all_platforms="${all_platforms} armv7-win32-vs12"
all_platforms="${all_platforms} armv7-win32-vs14"
all_platforms="${all_platforms} armv7s-darwin-gcc"
+all_platforms="${all_platforms} armv8-linux-gcc"
all_platforms="${all_platforms} mips32-linux-gcc"
all_platforms="${all_platforms} mips64-linux-gcc"
all_platforms="${all_platforms} sparc-solaris-gcc"
@@ -122,6 +125,7 @@ all_platforms="${all_platforms} x86-darwin11-gcc"
all_platforms="${all_platforms} x86-darwin12-gcc"
all_platforms="${all_platforms} x86-darwin13-gcc"
all_platforms="${all_platforms} x86-darwin14-gcc"
+all_platforms="${all_platforms} x86-darwin15-gcc"
all_platforms="${all_platforms} x86-iphonesimulator-gcc"
all_platforms="${all_platforms} x86-linux-gcc"
all_platforms="${all_platforms} x86-linux-icc"
@@ -142,6 +146,7 @@ all_platforms="${all_platforms} x86_64-darwin11-gcc"
all_platforms="${all_platforms} x86_64-darwin12-gcc"
all_platforms="${all_platforms} x86_64-darwin13-gcc"
all_platforms="${all_platforms} x86_64-darwin14-gcc"
+all_platforms="${all_platforms} x86_64-darwin15-gcc"
all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
@@ -190,12 +195,8 @@ if [ ${doxy_major:-0} -ge 1 ]; then
fi
# disable codecs when their source directory does not exist
-[ -d "${source_path}/vp8" ] || disable_feature vp8
-[ -d "${source_path}/vp9" ] || disable_feature vp9
-[ -d "${source_path}/vp10" ] || disable_feature vp10
-
-# disable vp10 codec by default
-disable_feature vp10
+[ -d "${source_path}/vp8" ] || disable_codec vp8
+[ -d "${source_path}/vp9" ] || disable_codec vp9
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
@@ -217,13 +218,10 @@ CODECS="
vp8_decoder
vp9_encoder
vp9_decoder
- vp10_encoder
- vp10_decoder
"
CODEC_FAMILIES="
vp8
vp9
- vp10
"
ARCH_LIST="
@@ -232,6 +230,16 @@ ARCH_LIST="
x86
x86_64
"
+ARCH_EXT_LIST_X86="
+ mmx
+ sse
+ sse2
+ sse3
+ ssse3
+ sse4_1
+ avx
+ avx2
+"
ARCH_EXT_LIST="
edsp
media
@@ -243,21 +251,12 @@ ARCH_EXT_LIST="
msa
mips64
- mmx
- sse
- sse2
- sse3
- ssse3
- sse4_1
- avx
- avx2
+ ${ARCH_EXT_LIST_X86}
"
HAVE_LIST="
${ARCH_EXT_LIST}
vpx_ports
- stdint_h
pthread_h
- sys_mman_h
unistd_h
"
EXPERIMENT_LIST="
@@ -317,6 +316,7 @@ CONFIG_LIST="
vp9_temporal_denoising
coefficient_range_checking
vp9_highbitdepth
+ better_hw_compatibility
experimental
size_limit
${EXPERIMENT_LIST}
@@ -375,6 +375,7 @@ CMDLINE_SELECT="
temporal_denoising
vp9_temporal_denoising
coefficient_range_checking
+ better_hw_compatibility
vp9_highbitdepth
experimental
"
@@ -383,15 +384,19 @@ process_cmdline() {
for opt do
optval="${opt#*=}"
case "$opt" in
- --disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
+ --disable-codecs)
+ for c in ${CODEC_FAMILIES}; do disable_codec $c; done
+ ;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
- if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
+ if is_in ${option} ${EXPERIMENT_LIST}; then
if enabled experimental; then
${action}_feature $option
else
log_echo "Ignoring $opt -- not in experimental mode."
fi
+ elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then
+ ${action}_codec ${option}
else
process_common_cmdline $opt
fi
@@ -405,14 +410,6 @@ process_cmdline() {
post_process_cmdline() {
c=""
- # If the codec family is disabled, disable all components of that family.
- # If the codec family is enabled, enable all components of that family.
- log_echo "Configuring selected codecs"
- for c in ${CODECS}; do
- disabled ${c%%_*} && disable_feature ${c}
- enabled ${c%%_*} && enable_feature ${c}
- done
-
# Enable all detected codecs, if they haven't been disabled
for c in ${CODECS}; do soft_enable $c; done
@@ -507,13 +504,18 @@ process_detect() {
# Can only build shared libs on a subset of platforms. Doing this check
# here rather than at option parse time because the target auto-detect
# magic happens after the command line has been parsed.
- if ! enabled linux && ! enabled os2; then
+ case "${tgt_os}" in
+ linux|os2|darwin*|iphonesimulator*)
+ # Supported platforms
+ ;;
+ *)
if enabled gnu; then
echo "--enable-shared is only supported on ELF; assuming this is OK"
else
- die "--enable-shared only supported on ELF and OS/2 for now"
+ die "--enable-shared only supported on ELF, OS/2, and Darwin for now"
fi
- fi
+ ;;
+ esac
fi
if [ -z "$CC" ] || enabled external_build; then
echo "Bypassing toolchain for environment detection."
@@ -540,16 +542,12 @@ process_detect() {
# Specialize windows and POSIX environments.
case $toolchain in
*-win*-*)
- case $header-$toolchain in
- stdint*-gcc) true;;
- *) false;;
- esac && enable_feature $var
- ;;
+ # Don't check for any headers in Windows builds.
+ false
+ ;;
*)
case $header in
- stdint.h) true;;
pthread.h) true;;
- sys/mman.h) true;;
unistd.h) true;;
*) false;;
esac && enable_feature $var
@@ -565,9 +563,7 @@ process_detect() {
int main(void) {return 0;}
EOF
# check system headers
- check_header stdint.h
check_header pthread.h
- check_header sys/mman.h
check_header unistd.h # for sysconf(3) and friends.
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
@@ -598,7 +594,11 @@ process_toolchain() {
;;
*) check_add_cflags -Wunused-but-set-variable ;;
esac
- enabled extra_warnings || check_add_cflags -Wno-unused-function
+ if enabled mips || [ -z "${INLINE}" ]; then
+ enabled extra_warnings || check_add_cflags -Wno-unused-function
+ else
+ check_add_cflags -Wunused-function
+ fi
fi
if enabled icc; then
diff --git a/libvpx/examples.mk b/libvpx/examples.mk
index f10bec68c..c891a5496 100644
--- a/libvpx/examples.mk
+++ b/libvpx/examples.mk
@@ -36,21 +36,30 @@ LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \
third_party/libyuv/source/scale_neon64.cc \
third_party/libyuv/source/scale_win.cc \
-LIBWEBM_COMMON_SRCS += third_party/libwebm/webmids.hpp
+LIBWEBM_COMMON_SRCS += third_party/libwebm/common/hdr_util.cc \
+ third_party/libwebm/common/hdr_util.h \
+ third_party/libwebm/common/webmids.h
-LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
- third_party/libwebm/mkvmuxerutil.cpp \
- third_party/libwebm/mkvwriter.cpp \
- third_party/libwebm/mkvmuxer.hpp \
- third_party/libwebm/mkvmuxertypes.hpp \
- third_party/libwebm/mkvmuxerutil.hpp \
- third_party/libwebm/mkvparser.hpp \
- third_party/libwebm/mkvwriter.hpp
+LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer/mkvmuxer.cc \
+ third_party/libwebm/mkvmuxer/mkvmuxerutil.cc \
+ third_party/libwebm/mkvmuxer/mkvwriter.cc \
+ third_party/libwebm/mkvmuxer/mkvmuxer.h \
+ third_party/libwebm/mkvmuxer/mkvmuxertypes.h \
+ third_party/libwebm/mkvmuxer/mkvmuxerutil.h \
+ third_party/libwebm/mkvparser/mkvparser.h \
+ third_party/libwebm/mkvmuxer/mkvwriter.h
+
+LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser/mkvparser.cc \
+ third_party/libwebm/mkvparser/mkvreader.cc \
+ third_party/libwebm/mkvparser/mkvparser.h \
+ third_party/libwebm/mkvparser/mkvreader.h
+
+# Add compile flags and include path for libwebm sources.
+ifeq ($(CONFIG_WEBM_IO),yes)
+ CXXFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+ INC_PATH-yes += $(SRC_PATH_BARE)/third_party/libwebm
+endif
-LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
- third_party/libwebm/mkvreader.cpp \
- third_party/libwebm/mkvparser.hpp \
- third_party/libwebm/mkvreader.hpp
# List of examples to build. UTILS are tools meant for distribution
# while EXAMPLES demonstrate specific portions of the API.
@@ -70,6 +79,7 @@ ifeq ($(CONFIG_LIBYUV),yes)
endif
ifeq ($(CONFIG_WEBM_IO),yes)
vpxdec.SRCS += $(LIBWEBM_COMMON_SRCS)
+ vpxdec.SRCS += $(LIBWEBM_MUXER_SRCS)
vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS)
vpxdec.SRCS += webmdec.cc webmdec.h
endif
@@ -93,6 +103,7 @@ endif
ifeq ($(CONFIG_WEBM_IO),yes)
vpxenc.SRCS += $(LIBWEBM_COMMON_SRCS)
vpxenc.SRCS += $(LIBWEBM_MUXER_SRCS)
+ vpxenc.SRCS += $(LIBWEBM_PARSER_SRCS)
vpxenc.SRCS += webmenc.cc webmenc.h
endif
vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
diff --git a/libvpx/examples/simple_encoder.c b/libvpx/examples/simple_encoder.c
index a30772973..64f0a0137 100644
--- a/libvpx/examples/simple_encoder.c
+++ b/libvpx/examples/simple_encoder.c
@@ -109,8 +109,8 @@ static const char *exec_name;
void usage_exit(void) {
fprintf(stderr,
"Usage: %s <codec> <width> <height> <infile> <outfile> "
- "<keyframe-interval> [<error-resilient>]\nSee comments in "
- "simple_encoder.c for more information.\n",
+ "<keyframe-interval> <error-resilient> <frames to encode>\n"
+ "See comments in simple_encoder.c for more information.\n",
exec_name);
exit(EXIT_FAILURE);
}
@@ -147,6 +147,7 @@ static int encode_frame(vpx_codec_ctx_t *codec,
return got_pkts;
}
+// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps.
int main(int argc, char **argv) {
FILE *infile = NULL;
vpx_codec_ctx_t codec;
@@ -157,12 +158,11 @@ int main(int argc, char **argv) {
VpxVideoInfo info = {0};
VpxVideoWriter *writer = NULL;
const VpxInterface *encoder = NULL;
- const int fps = 30; // TODO(dkovalev) add command line argument
- const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument
+ const int fps = 30;
+ const int bitrate = 200;
int keyframe_interval = 0;
-
- // TODO(dkovalev): Add some simple command line parsing code to make the
- // command line more flexible.
+ int max_frames = 0;
+ int frames_encoded = 0;
const char *codec_arg = NULL;
const char *width_arg = NULL;
const char *height_arg = NULL;
@@ -172,7 +172,7 @@ int main(int argc, char **argv) {
exec_name = argv[0];
- if (argc < 7)
+ if (argc != 9)
die("Invalid number of arguments");
codec_arg = argv[1];
@@ -181,6 +181,7 @@ int main(int argc, char **argv) {
infile_arg = argv[4];
outfile_arg = argv[5];
keyframe_interval_arg = argv[6];
+ max_frames = strtol(argv[8], NULL, 0);
encoder = get_vpx_encoder_by_name(codec_arg);
if (!encoder)
@@ -219,7 +220,7 @@ int main(int argc, char **argv) {
cfg.g_timebase.num = info.time_base.numerator;
cfg.g_timebase.den = info.time_base.denominator;
cfg.rc_target_bitrate = bitrate;
- cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;
+ cfg.g_error_resilient = strtol(argv[7], NULL, 0);
writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
if (!writer)
@@ -237,6 +238,9 @@ int main(int argc, char **argv) {
if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
flags |= VPX_EFLAG_FORCE_KF;
encode_frame(&codec, &raw, frame_count++, flags, writer);
+ frames_encoded++;
+ if (max_frames > 0 && frames_encoded >= max_frames)
+ break;
}
// Flush encoder.
diff --git a/libvpx/examples/twopass_encoder.c b/libvpx/examples/twopass_encoder.c
index aecc11d3f..15a6617cd 100644
--- a/libvpx/examples/twopass_encoder.c
+++ b/libvpx/examples/twopass_encoder.c
@@ -59,7 +59,9 @@
static const char *exec_name;
void usage_exit(void) {
- fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
+ fprintf(stderr,
+ "Usage: %s <codec> <width> <height> <infile> <outfile> "
+ "<frame limit>\n",
exec_name);
exit(EXIT_FAILURE);
}
@@ -129,7 +131,8 @@ static int encode_frame(vpx_codec_ctx_t *ctx,
static vpx_fixed_buf_t pass0(vpx_image_t *raw,
FILE *infile,
const VpxInterface *encoder,
- const vpx_codec_enc_cfg_t *cfg) {
+ const vpx_codec_enc_cfg_t *cfg,
+ int max_frames) {
vpx_codec_ctx_t codec;
int frame_count = 0;
vpx_fixed_buf_t stats = {NULL, 0};
@@ -142,6 +145,8 @@ static vpx_fixed_buf_t pass0(vpx_image_t *raw,
++frame_count;
get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY,
&stats);
+ if (max_frames > 0 && frame_count >= max_frames)
+ break;
}
// Flush encoder.
@@ -159,7 +164,8 @@ static void pass1(vpx_image_t *raw,
FILE *infile,
const char *outfile_name,
const VpxInterface *encoder,
- const vpx_codec_enc_cfg_t *cfg) {
+ const vpx_codec_enc_cfg_t *cfg,
+ int max_frames) {
VpxVideoInfo info = {
encoder->fourcc,
cfg->g_w,
@@ -181,6 +187,9 @@ static void pass1(vpx_image_t *raw,
while (vpx_img_read(raw, infile)) {
++frame_count;
encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer);
+
+ if (max_frames > 0 && frame_count >= max_frames)
+ break;
}
// Flush encoder.
@@ -213,11 +222,14 @@ int main(int argc, char **argv) {
const char *const height_arg = argv[3];
const char *const infile_arg = argv[4];
const char *const outfile_arg = argv[5];
+ int max_frames = 0;
exec_name = argv[0];
- if (argc != 6)
+ if (argc != 7)
die("Invalid number of arguments.");
+ max_frames = strtol(argv[6], NULL, 0);
+
encoder = get_vpx_encoder_by_name(codec_arg);
if (!encoder)
die("Unsupported codec.");
@@ -249,13 +261,13 @@ int main(int argc, char **argv) {
// Pass 0
cfg.g_pass = VPX_RC_FIRST_PASS;
- stats = pass0(&raw, infile, encoder, &cfg);
+ stats = pass0(&raw, infile, encoder, &cfg, max_frames);
// Pass 1
rewind(infile);
cfg.g_pass = VPX_RC_LAST_PASS;
cfg.rc_twopass_stats_in = stats;
- pass1(&raw, infile, outfile_arg, encoder, &cfg);
+ pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames);
free(stats.buf);
vpx_img_free(&raw);
diff --git a/libvpx/examples/vp8_multi_resolution_encoder.c b/libvpx/examples/vp8_multi_resolution_encoder.c
index 2b032049c..fc775ef7c 100644
--- a/libvpx/examples/vp8_multi_resolution_encoder.c
+++ b/libvpx/examples/vp8_multi_resolution_encoder.c
@@ -29,13 +29,6 @@
#include <math.h>
#include <assert.h>
#include <sys/time.h>
-#if USE_POSIX_MMAP
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <unistd.h>
-#endif
#include "vpx_ports/vpx_timer.h"
#include "vpx/vpx_encoder.h"
#include "vpx/vp8cx.h"
@@ -354,8 +347,7 @@ int main(int argc, char **argv)
double psnr_totals[NUM_ENCODERS][4] = {{0,0}};
int psnr_count[NUM_ENCODERS] = {0};
- double cx_time = 0;
- struct timeval tv1, tv2, difftv;
+ int64_t cx_time = 0;
/* Set the required target bitrates for each resolution level.
* If target bitrate for highest-resolution level is set to 0,
@@ -589,6 +581,7 @@ int main(int argc, char **argv)
while(frame_avail || got_data)
{
+ struct vpx_usec_timer timer;
vpx_codec_iter_t iter[NUM_ENCODERS]={NULL};
const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
@@ -643,18 +636,18 @@ int main(int argc, char **argv)
vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
}
- gettimeofday(&tv1, NULL);
/* Encode each frame at multi-levels */
/* Note the flags must be set to 0 in the encode call if they are set
for each frame with the vpx_codec_control(), as done above. */
+ vpx_usec_timer_start(&timer);
if(vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
frame_cnt, 1, 0, arg_deadline))
{
die_codec(&codec[0], "Failed to encode frame");
}
- gettimeofday(&tv2, NULL);
- timersub(&tv2, &tv1, &difftv);
- cx_time += (double)(difftv.tv_sec * 1000000 + difftv.tv_usec);
+ vpx_usec_timer_mark(&timer);
+ cx_time += vpx_usec_timer_elapsed(&timer);
+
for (i=NUM_ENCODERS-1; i>=0 ; i--)
{
got_data = 0;
@@ -693,8 +686,10 @@ int main(int argc, char **argv)
frame_cnt++;
}
printf("\n");
- printf("FPS for encoding %d %f %f \n", frame_cnt, (float)cx_time / 1000000,
- 1000000 * (double)frame_cnt / (double)cx_time);
+ printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
+ frame_cnt,
+ 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+ 1000000 * (double)frame_cnt / (double)cx_time);
fclose(infile);
diff --git a/libvpx/examples/vp9_spatial_svc_encoder.c b/libvpx/examples/vp9_spatial_svc_encoder.c
index b26e98734..271ab704b 100644
--- a/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -30,6 +30,7 @@
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "../vpxstats.h"
+#include "vp9/encoder/vp9_encoder.h"
#define OUTPUT_RC_STATS 1
static const arg_def_t skip_frames_arg =
@@ -408,7 +409,10 @@ static void set_rate_control_stats(struct RateControlStats *rc,
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
const int layer = sl * cfg->ts_number_layers + tl;
const int tlayer0 = sl * cfg->ts_number_layers;
- rc->layer_framerate[layer] =
+ if (cfg->ts_number_layers == 1)
+ rc->layer_framerate[layer] = framerate;
+ else
+ rc->layer_framerate[layer] =
framerate / cfg->ts_rate_decimator[tl];
if (tl > 0) {
rc->layer_pfb[layer] = 1000.0 *
@@ -714,6 +718,7 @@ int main(int argc, const char **argv) {
// TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
// mode to "VP9E_LAYERING_MODE_BYPASS".
if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ layer_id.spatial_layer_id = 0;
// Example for 2 temporal layers.
if (frame_cnt % 2 == 0)
layer_id.temporal_layer_id = 0;
@@ -729,6 +734,12 @@ int main(int argc, const char **argv) {
&ref_frame_config);
vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
&ref_frame_config);
+ // Keep track of input frames, to account for frame drops in rate control
+ // stats/metrics.
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+ layer_id.temporal_layer_id];
+ }
}
vpx_usec_timer_start(&timer);
@@ -739,6 +750,7 @@ int main(int argc, const char **argv) {
cx_time += vpx_usec_timer_elapsed(&timer);
printf("%s", vpx_svc_get_message(&svc_ctx));
+ fflush(stdout);
if (res != VPX_CODEC_OK) {
die_codec(&codec, "Failed to encode frame");
}
@@ -746,6 +758,7 @@ int main(int argc, const char **argv) {
while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
switch (cx_pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
+ SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
if (cx_pkt->data.frame.sz > 0) {
#if OUTPUT_RC_STATS
uint32_t sizes[8];
@@ -761,9 +774,16 @@ int main(int argc, const char **argv) {
vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
parse_superframe_index(cx_pkt->data.frame.buf,
cx_pkt->data.frame.sz, sizes, &count);
- for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
- ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
- layer_id.temporal_layer_id];
+ // Note computing input_layer_frames here won't account for frame
+ // drops in rate control stats.
+ // TODO(marpan): Fix this for non-bypass mode so we can get stats
+ // for dropped frames.
+ if (svc_ctx.temporal_layering_mode !=
+ VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+ layer_id.temporal_layer_id];
+ }
}
for (tl = layer_id.temporal_layer_id;
tl < enc_cfg.ts_number_layers; ++tl) {
@@ -834,6 +854,8 @@ int main(int argc, const char **argv) {
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
(int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
+ if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
+ si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
++frames_received;
break;
}
@@ -854,6 +876,16 @@ int main(int argc, const char **argv) {
pts += frame_duration;
}
}
+
+ // Compensate for the extra frame count for the bypass mode.
+ if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ const int layer = sl * enc_cfg.ts_number_layers +
+ layer_id.temporal_layer_id;
+ --rc.layer_input_frames[layer];
+ }
+ }
+
printf("Processed %d frames\n", frame_cnt);
fclose(infile);
#if OUTPUT_RC_STATS
diff --git a/libvpx/examples/vpx_temporal_svc_encoder.c b/libvpx/examples/vpx_temporal_svc_encoder.c
index 5adda9eeb..e6c09fb71 100644
--- a/libvpx/examples/vpx_temporal_svc_encoder.c
+++ b/libvpx/examples/vpx_temporal_svc_encoder.c
@@ -41,7 +41,7 @@ enum denoiserState {
kDenoiserOnAdaptive
};
-static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
+static int mode_to_num_layers[13] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3};
// For rate control encoding stats.
struct RateControlMetrics {
@@ -432,7 +432,32 @@ static void set_temporal_layer_pattern(int layering_mode,
layer_flags[7] = layer_flags[3];
break;
}
- case 11:
+ case 11: {
+ // 3-layers structure with one reference frame.
+ // This works same as temporal_layering_mode 3.
+ // This was added to compare with vp9_spatial_svc_encoder.
+
+ // 3-layers, 4-frame period.
+ int ids[4] = {0, 2, 1, 2};
+ cfg->ts_periodicity = 4;
+ *flag_periodicity = 4;
+ cfg->ts_number_layers = 3;
+ cfg->ts_rate_decimator[0] = 4;
+ cfg->ts_rate_decimator[1] = 2;
+ cfg->ts_rate_decimator[2] = 1;
+ memcpy(cfg->ts_layer_id, ids, sizeof(ids));
+ // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
+ layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
+ layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+ layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+ break;
+ }
+ case 12:
default: {
// 3-layers structure as in case 10, but no sync/refresh points for
// layer 1 and 2.
@@ -530,7 +555,7 @@ int main(int argc, char **argv) {
}
layering_mode = strtol(argv[10], NULL, 0);
- if (layering_mode < 0 || layering_mode > 12) {
+ if (layering_mode < 0 || layering_mode > 13) {
die("Invalid layering mode (0..12) %s", argv[10]);
}
@@ -690,7 +715,7 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
- vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
+ vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
diff --git a/libvpx/ivfdec.c b/libvpx/ivfdec.c
index 6dcd66f73..7fc25a0e8 100644
--- a/libvpx/ivfdec.c
+++ b/libvpx/ivfdec.c
@@ -23,7 +23,7 @@ static void fix_framerate(int *num, int *den) {
// we can guess the framerate using only the timebase in this
// case. Other files would require reading ahead to guess the
// timebase, like we do for webm.
- if (*num < 1000) {
+ if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) {
// Correct for the factor of 2 applied to the timebase in the encoder.
if (*num & 1)
*den *= 2;
diff --git a/libvpx/libs.mk b/libvpx/libs.mk
index f28d84a55..9a6092a51 100644
--- a/libvpx/libs.mk
+++ b/libvpx/libs.mk
@@ -109,40 +109,6 @@ endif
VP9_PREFIX=vp9/
$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
-# VP10 make file
-ifeq ($(CONFIG_VP10),yes)
- VP10_PREFIX=vp10/
- include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk
-endif
-
-ifeq ($(CONFIG_VP10_ENCODER),yes)
- VP10_PREFIX=vp10/
- include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10cx.mk
- CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_CX_SRCS))
- CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_CX_EXPORTS))
- CODEC_SRCS-yes += $(VP10_PREFIX)vp10cx.mk vpx/vp8.h vpx/vp8cx.h
- INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
- INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
- INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/%
- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
- CODEC_DOC_SECTIONS += vp9 vp9_encoder
-endif
-
-ifeq ($(CONFIG_VP10_DECODER),yes)
- VP10_PREFIX=vp10/
- include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10dx.mk
- CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_DX_SRCS))
- CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_DX_EXPORTS))
- CODEC_SRCS-yes += $(VP10_PREFIX)vp10dx.mk vpx/vp8.h vpx/vp8dx.h
- INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
- INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/%
- CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
- CODEC_DOC_SECTIONS += vp9 vp9_decoder
-endif
-
-VP10_PREFIX=vp10/
-$(BUILD_PFX)$(VP10_PREFIX)%.c.o: CFLAGS += -Wextra
-
ifeq ($(CONFIG_ENCODERS),yes)
CODEC_DOC_SECTIONS += encoder
endif
@@ -183,6 +149,9 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
endif
CODEC_EXPORTS-yes += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
+ifeq ($(CONFIG_SPATIAL_SVC),yes)
+CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc
+endif
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
INSTALL-LIBS-yes += include/vpx/vpx_codec.h
@@ -260,7 +229,7 @@ OBJS-yes += $(LIBVPX_OBJS)
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 3
+SO_VERSION_MAJOR := 4
SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
@@ -270,6 +239,12 @@ EXPORT_FILE := libvpx.syms
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.dylib )
else
+ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
+SHARED_LIB_SUF := .dylib
+EXPORT_FILE := libvpx.syms
+LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, libvpx.dylib)
+else
ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll
SHARED_LIB_SUF := _dll.a
@@ -285,6 +260,7 @@ LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
endif
endif
+endif
LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
$(notdir $(LIBVPX_SO_SYMLINKS)) \
@@ -394,6 +370,12 @@ $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
+#
+# Add include path for libwebm sources.
+#
+ifeq ($(CONFIG_WEBM_IO),yes)
+ CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/libwebm
+endif
##
## libvpx test directives
@@ -429,12 +411,10 @@ testdata:: $(LIBVPX_TEST_DATA)
if [ -n "$${sha1sum}" ]; then\
set -e;\
echo "Checking test data:";\
- if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
- for f in $(call enabled,LIBVPX_TEST_DATA); do\
- grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
- (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
- done; \
- fi; \
+ for f in $(call enabled,LIBVPX_TEST_DATA); do\
+ grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
+ (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
+ done; \
else\
echo "Skipping test data integrity check, sha1sum not found.";\
fi
@@ -471,6 +451,7 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
+ $(if $(CONFIG_WEBM_IO),-I"$(SRC_PATH_BARE)/third_party/libwebm") \
-L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)
diff --git a/libvpx/md5_utils.c b/libvpx/md5_utils.c
index f4f893a2d..a9b979a41 100644
--- a/libvpx/md5_utils.c
+++ b/libvpx/md5_utils.c
@@ -150,12 +150,23 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) {
#define MD5STEP(f,w,x,y,z,in,s) \
(w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+#define VPX_NO_UNSIGNED_OVERFLOW_CHECK \
+ __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+#ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK
+#define VPX_NO_UNSIGNED_OVERFLOW_CHECK
+#endif
+
/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
* reflect the addition of 16 longwords of new data. MD5Update blocks
* the data and converts bytes into longwords for this routine.
*/
-void
+VPX_NO_UNSIGNED_OVERFLOW_CHECK void
MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
register UWORD32 a, b, c, d;
@@ -238,4 +249,6 @@ MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
buf[3] += d;
}
+#undef VPX_NO_UNSIGNED_OVERFLOW_CHECK
+
#endif
diff --git a/libvpx/test/acm_random.h b/libvpx/test/acm_random.h
index ff5c93ea1..b94b6e195 100644
--- a/libvpx/test/acm_random.h
+++ b/libvpx/test/acm_random.h
@@ -32,6 +32,12 @@ class ACMRandom {
return (value >> 15) & 0xffff;
}
+ int16_t Rand9Signed(void) {
+ // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
+ const uint32_t value = random_.Generate(512);
+ return static_cast<int16_t>(value) - 256;
+ }
+
uint8_t Rand8(void) {
const uint32_t value =
random_.Generate(testing::internal::Random::kMaxRange);
diff --git a/libvpx/test/active_map_test.cc b/libvpx/test/active_map_test.cc
index 022199519..dc3de7213 100644
--- a/libvpx/test/active_map_test.cc
+++ b/libvpx/test/active_map_test.cc
@@ -85,5 +85,5 @@ TEST_P(ActiveMapTest, Test) {
VP9_INSTANTIATE_TEST_CASE(ActiveMapTest,
::testing::Values(::libvpx_test::kRealTime),
- ::testing::Range(0, 6));
+ ::testing::Range(0, 9));
} // namespace
diff --git a/libvpx/test/add_noise_test.cc b/libvpx/test/add_noise_test.cc
new file mode 100644
index 000000000..e9945c409
--- /dev/null
+++ b/libvpx/test/add_noise_test.cc
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <math.h>
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+// TODO(jimbankoski): make width and height integers not unsigned.
+typedef void (*AddNoiseFunc)(unsigned char *start, char *noise,
+ char blackclamp[16], char whiteclamp[16],
+ char bothclamp[16], unsigned int width,
+ unsigned int height, int pitch);
+
+class AddNoiseTest
+ : public ::testing::TestWithParam<AddNoiseFunc> {
+ public:
+ virtual void TearDown() {
+ libvpx_test::ClearSystemState();
+ }
+ virtual ~AddNoiseTest() {}
+};
+
+double stddev6(char a, char b, char c, char d, char e, char f) {
+ const double n = (a + b + c + d + e + f) / 6.0;
+ const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) +
+ (d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) /
+ 6.0;
+ return sqrt(v);
+}
+
+// TODO(jimbankoski): The following 2 functions are duplicated in each codec.
+// For now the vp9 one has been copied into the test as is. We should normalize
+// these in vpx_dsp and not have 3 copies of these unless there is different
+// noise we add for each codec.
+
+double gaussian(double sigma, double mu, double x) {
+ return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
+ (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
+}
+
+int setup_noise(int size_noise, char *noise) {
+ char char_dist[300];
+ const int ai = 4;
+ const int qi = 24;
+ const double sigma = ai + .5 + .6 * (63 - qi) / 63.0;
+
+ /* set up a lookup table of 256 entries that matches
+ * a gaussian distribution with sigma determined by q.
+ */
+ int next = 0;
+
+ for (int i = -32; i < 32; i++) {
+ int a_i = (int) (0.5 + 256 * gaussian(sigma, 0, i));
+
+ if (a_i) {
+ for (int j = 0; j < a_i; j++) {
+ char_dist[next + j] = (char)(i);
+ }
+
+ next = next + a_i;
+ }
+ }
+
+ for (; next < 256; next++)
+ char_dist[next] = 0;
+
+ for (int i = 0; i < size_noise; i++) {
+ noise[i] = char_dist[rand() & 0xff]; // NOLINT
+ }
+
+ // Returns the most negative value in distribution.
+ return char_dist[0];
+}
+
+TEST_P(AddNoiseTest, CheckNoiseAdded) {
+ DECLARE_ALIGNED(16, char, blackclamp[16]);
+ DECLARE_ALIGNED(16, char, whiteclamp[16]);
+ DECLARE_ALIGNED(16, char, bothclamp[16]);
+ const int width = 64;
+ const int height = 64;
+ const int image_size = width * height;
+ char noise[3072];
+
+ const int clamp = setup_noise(3072, noise);
+ for (int i = 0; i < 16; i++) {
+ blackclamp[i] = -clamp;
+ whiteclamp[i] = -clamp;
+ bothclamp[i] = -2 * clamp;
+ }
+
+ uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
+ memset(s, 99, image_size);
+
+ ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
+ bothclamp, width, height, width));
+
+ // Check to make sure we don't end up having either the same or no added
+ // noise either vertically or horizontally.
+ for (int i = 0; i < image_size - 6 * width - 6; ++i) {
+ const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99,
+ s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99);
+ const double vd = stddev6(s[i] - 99, s[i + width] - 99,
+ s[i + 2 * width] - 99, s[i + 3 * width] - 99,
+ s[i + 4 * width] - 99, s[i + 5 * width] - 99);
+
+ EXPECT_NE(hd, 0);
+ EXPECT_NE(vd, 0);
+ }
+
+ // Initialize pixels in the image to 255 and check for roll over.
+ memset(s, 255, image_size);
+
+ ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
+ bothclamp, width, height, width));
+
+ // Check to make sure don't roll over.
+ for (int i = 0; i < image_size; ++i) {
+ EXPECT_GT((int)s[i], 10) << "i = " << i;
+ }
+
+ // Initialize pixels in the image to 0 and check for roll under.
+ memset(s, 0, image_size);
+
+ ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
+ bothclamp, width, height, width));
+
+ // Check to make sure don't roll under.
+ for (int i = 0; i < image_size; ++i) {
+ EXPECT_LT((int)s[i], 245) << "i = " << i;
+ }
+
+ vpx_free(s);
+}
+
+TEST_P(AddNoiseTest, CheckCvsAssembly) {
+ DECLARE_ALIGNED(16, char, blackclamp[16]);
+ DECLARE_ALIGNED(16, char, whiteclamp[16]);
+ DECLARE_ALIGNED(16, char, bothclamp[16]);
+ const int width = 64;
+ const int height = 64;
+ const int image_size = width * height;
+ char noise[3072];
+
+ const int clamp = setup_noise(3072, noise);
+ for (int i = 0; i < 16; i++) {
+ blackclamp[i] = -clamp;
+ whiteclamp[i] = -clamp;
+ bothclamp[i] = -2 * clamp;
+ }
+
+ uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
+ uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
+
+ memset(s, 99, image_size);
+ memset(d, 99, image_size);
+
+ srand(0);
+ ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
+ bothclamp, width, height, width));
+ srand(0);
+ ASM_REGISTER_STATE_CHECK(vpx_plane_add_noise_c(d, noise, blackclamp,
+ whiteclamp, bothclamp,
+ width, height, width));
+
+ for (int i = 0; i < image_size; ++i) {
+ EXPECT_EQ((int)s[i], (int)d[i]) << "i = " << i;
+ }
+
+ vpx_free(d);
+ vpx_free(s);
+}
+
+INSTANTIATE_TEST_CASE_P(C, AddNoiseTest,
+ ::testing::Values(vpx_plane_add_noise_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest,
+ ::testing::Values(vpx_plane_add_noise_sse2));
+#endif
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest,
+ ::testing::Values(vpx_plane_add_noise_msa));
+#endif
+} // namespace
diff --git a/libvpx/test/altref_test.cc b/libvpx/test/altref_test.cc
index af25b7285..d9f83d8cd 100644
--- a/libvpx/test/altref_test.cc
+++ b/libvpx/test/altref_test.cc
@@ -14,6 +14,8 @@
#include "test/util.h"
namespace {
+#if CONFIG_VP8_ENCODER
+
// lookahead range: [kLookAheadMin, kLookAheadMax).
const int kLookAheadMin = 5;
const int kLookAheadMax = 26;
@@ -63,7 +65,95 @@ TEST_P(AltRefTest, MonotonicTimestamps) {
EXPECT_GE(altref_count(), 1);
}
-
VP8_INSTANTIATE_TEST_CASE(AltRefTest,
::testing::Range(kLookAheadMin, kLookAheadMax));
+
+#endif // CONFIG_VP8_ENCODER
+
+class AltRefForcedKeyTestLarge
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+ AltRefForcedKeyTestLarge()
+ : EncoderTest(GET_PARAM(0)),
+ encoding_mode_(GET_PARAM(1)),
+ cpu_used_(GET_PARAM(2)),
+ forced_kf_frame_num_(1),
+ frame_num_(0) {}
+ virtual ~AltRefForcedKeyTestLarge() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(encoding_mode_);
+ cfg_.rc_end_usage = VPX_VBR;
+ cfg_.g_threads = 0;
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+ // override test default for tile columns if necessary.
+#if CONFIG_VP9_ENCODER
+ if (GET_PARAM(0) == &libvpx_test::kVP9) {
+ encoder->Control(VP9E_SET_TILE_COLUMNS, 6);
+ }
+#endif
+ }
+ frame_flags_ =
+ (video->frame() == forced_kf_frame_num_) ? VPX_EFLAG_FORCE_KF : 0;
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ if (frame_num_ == forced_kf_frame_num_) {
+ ASSERT_TRUE(!!(pkt->data.frame.flags & VPX_FRAME_IS_KEY))
+ << "Frame #" << frame_num_ << " isn't a keyframe!";
+ }
+ ++frame_num_;
+ }
+
+ ::libvpx_test::TestMode encoding_mode_;
+ int cpu_used_;
+ unsigned int forced_kf_frame_num_;
+ unsigned int frame_num_;
+};
+
+TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) {
+ const vpx_rational timebase = { 1, 30 };
+ const int lag_values[] = { 3, 15, 25, -1 };
+
+ forced_kf_frame_num_ = 1;
+ for (int i = 0; lag_values[i] != -1; ++i) {
+ frame_num_ = 0;
+ cfg_.g_lag_in_frames = lag_values[i];
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 30);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+}
+
+TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) {
+ const vpx_rational timebase = { 1, 30 };
+ const int lag_values[] = { 3, 15, 25, -1 };
+
+ for (int i = 0; lag_values[i] != -1; ++i) {
+ frame_num_ = 0;
+ forced_kf_frame_num_ = lag_values[i] - 1;
+ cfg_.g_lag_in_frames = lag_values[i];
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 30);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+}
+
+VP8_INSTANTIATE_TEST_CASE(
+ AltRefForcedKeyTestLarge,
+ ::testing::Values(::libvpx_test::kOnePassGood),
+ ::testing::Range(0, 9));
+
+VP9_INSTANTIATE_TEST_CASE(
+ AltRefForcedKeyTestLarge,
+ ::testing::Values(::libvpx_test::kOnePassGood),
+ ::testing::Range(0, 9));
} // namespace
diff --git a/libvpx/test/vp9_avg_test.cc b/libvpx/test/avg_test.cc
index d38313116..44d8dd7db 100644
--- a/libvpx/test/vp9_avg_test.cc
+++ b/libvpx/test/avg_test.cc
@@ -15,9 +15,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
-#if CONFIG_VP9_ENCODER
-#include "./vp9_rtcd.h"
-#endif
+#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
@@ -57,19 +55,19 @@ class AverageTestBase : public ::testing::Test {
}
// Sum Pixels
- unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) {
+ unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch) {
unsigned int average = 0;
for (int h = 0; h < 8; ++h)
for (int w = 0; w < 8; ++w)
- average += source[h * source_stride_ + w];
+ average += source[h * pitch + w];
return ((average + 32) >> 6);
}
- unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) {
+ unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch) {
unsigned int average = 0;
for (int h = 0; h < 4; ++h)
for (int w = 0; w < 4; ++w)
- average += source[h * source_stride_ + w];
+ average += source[h * pitch + w];
return ((average + 8) >> 4);
}
@@ -194,6 +192,48 @@ class IntProColTest
int16_t sum_c_;
};
+typedef int (*SatdFunc)(const int16_t *coeffs, int length);
+typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
+
+class SatdTest
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<SatdTestParam> {
+ protected:
+ virtual void SetUp() {
+ satd_size_ = GET_PARAM(0);
+ satd_func_ = GET_PARAM(1);
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ src_ = reinterpret_cast<int16_t*>(
+ vpx_memalign(16, sizeof(*src_) * satd_size_));
+ ASSERT_TRUE(src_ != NULL);
+ }
+
+ virtual void TearDown() {
+ libvpx_test::ClearSystemState();
+ vpx_free(src_);
+ }
+
+ void FillConstant(const int16_t val) {
+ for (int i = 0; i < satd_size_; ++i) src_[i] = val;
+ }
+
+ void FillRandom() {
+ for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
+ }
+
+ void Check(const int expected) {
+ int total;
+ ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
+ EXPECT_EQ(expected, total);
+ }
+
+ int satd_size_;
+
+ private:
+ int16_t *src_;
+ SatdFunc satd_func_;
+ ACMRandom rnd_;
+};
uint8_t* AverageTestBase::source_data_ = NULL;
@@ -246,69 +286,126 @@ TEST_P(IntProColTest, Random) {
RunComparison();
}
+
+TEST_P(SatdTest, MinValue) {
+ const int kMin = -32640;
+ const int expected = -kMin * satd_size_;
+ FillConstant(kMin);
+ Check(expected);
+}
+
+TEST_P(SatdTest, MaxValue) {
+ const int kMax = 32640;
+ const int expected = kMax * satd_size_;
+ FillConstant(kMax);
+ Check(expected);
+}
+
+TEST_P(SatdTest, Random) {
+ int expected;
+ switch (satd_size_) {
+ case 16: expected = 205298; break;
+ case 64: expected = 1113950; break;
+ case 256: expected = 4268415; break;
+ case 1024: expected = 16954082; break;
+ default:
+ FAIL() << "Invalid satd size (" << satd_size_
+ << ") valid: 16/64/256/1024";
+ }
+ FillRandom();
+ Check(expected);
+}
+
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
C, AverageTest,
::testing::Values(
- make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
- make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
+ make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c),
+ make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(
+ C, SatdTest,
+ ::testing::Values(
+ make_tuple(16, &vpx_satd_c),
+ make_tuple(64, &vpx_satd_c),
+ make_tuple(256, &vpx_satd_c),
+ make_tuple(1024, &vpx_satd_c)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, AverageTest,
::testing::Values(
- make_tuple(16, 16, 0, 8, &vp9_avg_8x8_sse2),
- make_tuple(16, 16, 5, 8, &vp9_avg_8x8_sse2),
- make_tuple(32, 32, 15, 8, &vp9_avg_8x8_sse2),
- make_tuple(16, 16, 0, 4, &vp9_avg_4x4_sse2),
- make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
- make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
+ make_tuple(16, 16, 0, 8, &vpx_avg_8x8_sse2),
+ make_tuple(16, 16, 5, 8, &vpx_avg_8x8_sse2),
+ make_tuple(32, 32, 15, 8, &vpx_avg_8x8_sse2),
+ make_tuple(16, 16, 0, 4, &vpx_avg_4x4_sse2),
+ make_tuple(16, 16, 5, 4, &vpx_avg_4x4_sse2),
+ make_tuple(32, 32, 15, 4, &vpx_avg_4x4_sse2)));
INSTANTIATE_TEST_CASE_P(
SSE2, IntProRowTest, ::testing::Values(
- make_tuple(16, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
- make_tuple(32, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
- make_tuple(64, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c)));
+ make_tuple(16, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c),
+ make_tuple(32, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c),
+ make_tuple(64, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c)));
INSTANTIATE_TEST_CASE_P(
SSE2, IntProColTest, ::testing::Values(
- make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
- make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
- make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c)));
+ make_tuple(16, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c),
+ make_tuple(32, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c),
+ make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c)));
+
+INSTANTIATE_TEST_CASE_P(
+ SSE2, SatdTest,
+ ::testing::Values(
+ make_tuple(16, &vpx_satd_sse2),
+ make_tuple(64, &vpx_satd_sse2),
+ make_tuple(256, &vpx_satd_sse2),
+ make_tuple(1024, &vpx_satd_sse2)));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(
NEON, AverageTest,
::testing::Values(
- make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
- make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
- make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
+ make_tuple(16, 16, 0, 8, &vpx_avg_8x8_neon),
+ make_tuple(16, 16, 5, 8, &vpx_avg_8x8_neon),
+ make_tuple(32, 32, 15, 8, &vpx_avg_8x8_neon),
+ make_tuple(16, 16, 0, 4, &vpx_avg_4x4_neon),
+ make_tuple(16, 16, 5, 4, &vpx_avg_4x4_neon),
+ make_tuple(32, 32, 15, 4, &vpx_avg_4x4_neon)));
INSTANTIATE_TEST_CASE_P(
NEON, IntProRowTest, ::testing::Values(
- make_tuple(16, &vp9_int_pro_row_neon, &vp9_int_pro_row_c),
- make_tuple(32, &vp9_int_pro_row_neon, &vp9_int_pro_row_c),
- make_tuple(64, &vp9_int_pro_row_neon, &vp9_int_pro_row_c)));
+ make_tuple(16, &vpx_int_pro_row_neon, &vpx_int_pro_row_c),
+ make_tuple(32, &vpx_int_pro_row_neon, &vpx_int_pro_row_c),
+ make_tuple(64, &vpx_int_pro_row_neon, &vpx_int_pro_row_c)));
INSTANTIATE_TEST_CASE_P(
NEON, IntProColTest, ::testing::Values(
- make_tuple(16, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
- make_tuple(32, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
- make_tuple(64, &vp9_int_pro_col_neon, &vp9_int_pro_col_c)));
+ make_tuple(16, &vpx_int_pro_col_neon, &vpx_int_pro_col_c),
+ make_tuple(32, &vpx_int_pro_col_neon, &vpx_int_pro_col_c),
+ make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c)));
+
+INSTANTIATE_TEST_CASE_P(
+ NEON, SatdTest,
+ ::testing::Values(
+ make_tuple(16, &vpx_satd_neon),
+ make_tuple(64, &vpx_satd_neon),
+ make_tuple(256, &vpx_satd_neon),
+ make_tuple(1024, &vpx_satd_neon)));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(
MSA, AverageTest,
::testing::Values(
- make_tuple(16, 16, 0, 8, &vp9_avg_8x8_msa),
- make_tuple(16, 16, 5, 8, &vp9_avg_8x8_msa),
- make_tuple(32, 32, 15, 8, &vp9_avg_8x8_msa),
- make_tuple(16, 16, 0, 4, &vp9_avg_4x4_msa),
- make_tuple(16, 16, 5, 4, &vp9_avg_4x4_msa),
- make_tuple(32, 32, 15, 4, &vp9_avg_4x4_msa)));
+ make_tuple(16, 16, 0, 8, &vpx_avg_8x8_msa),
+ make_tuple(16, 16, 5, 8, &vpx_avg_8x8_msa),
+ make_tuple(32, 32, 15, 8, &vpx_avg_8x8_msa),
+ make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa),
+ make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa),
+ make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa)));
#endif
} // namespace
diff --git a/libvpx/test/borders_test.cc b/libvpx/test/borders_test.cc
index 6592375f8..bd3ac39f8 100644
--- a/libvpx/test/borders_test.cc
+++ b/libvpx/test/borders_test.cc
@@ -52,7 +52,7 @@ TEST_P(BordersTest, TestEncodeHighBitrate) {
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 2000;
cfg_.rc_max_quantizer = 10;
@@ -80,7 +80,4 @@ TEST_P(BordersTest, TestLowBitrate) {
VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
::libvpx_test::kTwoPassGood));
-
-VP10_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
- ::libvpx_test::kTwoPassGood));
} // namespace
diff --git a/libvpx/test/byte_alignment_test.cc b/libvpx/test/byte_alignment_test.cc
index aa4b78b9a..3a808b046 100644
--- a/libvpx/test/byte_alignment_test.cc
+++ b/libvpx/test/byte_alignment_test.cc
@@ -21,14 +21,14 @@
namespace {
+#if CONFIG_WEBM_IO
+
const int kLegacyByteAlignment = 0;
const int kLegacyYPlaneByteAlignment = 32;
const int kNumPlanesToCheck = 3;
const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";
-#if CONFIG_WEBM_IO
-
struct ByteAlignmentTestParam {
int byte_alignment;
vpx_codec_err_t expected_value;
diff --git a/libvpx/test/codec_factory.h b/libvpx/test/codec_factory.h
index 09c9cf984..429d40d81 100644
--- a/libvpx/test/codec_factory.h
+++ b/libvpx/test/codec_factory.h
@@ -13,10 +13,10 @@
#include "./vpx_config.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vpx_encoder.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
#endif
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
@@ -233,8 +233,6 @@ class VP9CodecFactory : public CodecFactory {
int usage) const {
#if CONFIG_VP9_ENCODER
return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
-#elif CONFIG_VP10_ENCODER
- return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage);
#else
return VPX_CODEC_INCAPABLE;
#endif
@@ -253,96 +251,5 @@ const libvpx_test::VP9CodecFactory kVP9;
#define VP9_INSTANTIATE_TEST_CASE(test, ...)
#endif // CONFIG_VP9
-/*
- * VP10 Codec Definitions
- */
-#if CONFIG_VP10
-class VP10Decoder : public Decoder {
- public:
- VP10Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
- : Decoder(cfg, deadline) {}
-
- VP10Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
- unsigned long deadline) // NOLINT
- : Decoder(cfg, flag, deadline) {}
-
- protected:
- virtual vpx_codec_iface_t* CodecInterface() const {
-#if CONFIG_VP10_DECODER
- return &vpx_codec_vp10_dx_algo;
-#else
- return NULL;
-#endif
- }
-};
-
-class VP10Encoder : public Encoder {
- public:
- VP10Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
- const unsigned long init_flags, TwopassStatsStore *stats)
- : Encoder(cfg, deadline, init_flags, stats) {}
-
- protected:
- virtual vpx_codec_iface_t* CodecInterface() const {
-#if CONFIG_VP10_ENCODER
- return &vpx_codec_vp10_cx_algo;
-#else
- return NULL;
-#endif
- }
-};
-
-class VP10CodecFactory : public CodecFactory {
- public:
- VP10CodecFactory() : CodecFactory() {}
-
- virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
- unsigned long deadline) const {
- return CreateDecoder(cfg, 0, deadline);
- }
-
- virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
- const vpx_codec_flags_t flags,
- unsigned long deadline) const { // NOLINT
-#if CONFIG_VP10_DECODER
- return new VP10Decoder(cfg, flags, deadline);
-#else
- return NULL;
-#endif
- }
-
- virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
- unsigned long deadline,
- const unsigned long init_flags,
- TwopassStatsStore *stats) const {
-#if CONFIG_VP10_ENCODER
- return new VP10Encoder(cfg, deadline, init_flags, stats);
-#else
- return NULL;
-#endif
- }
-
- virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg,
- int usage) const {
-#if CONFIG_VP10_ENCODER
- return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage);
-#else
- return VPX_CODEC_INCAPABLE;
-#endif
- }
-};
-
-const libvpx_test::VP10CodecFactory kVP10;
-
-#define VP10_INSTANTIATE_TEST_CASE(test, ...)\
- INSTANTIATE_TEST_CASE_P(VP10, test, \
- ::testing::Combine( \
- ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
- &libvpx_test::kVP10)), \
- __VA_ARGS__))
-#else
-#define VP10_INSTANTIATE_TEST_CASE(test, ...)
-#endif // CONFIG_VP10
-
} // namespace libvpx_test
#endif // TEST_CODEC_FACTORY_H_
diff --git a/libvpx/test/convolve_test.cc b/libvpx/test/convolve_test.cc
index 08267882d..73b0edb99 100644
--- a/libvpx/test/convolve_test.cc
+++ b/libvpx/test/convolve_test.cc
@@ -69,6 +69,21 @@ struct ConvolveFunctions {
typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
+#define ALL_SIZES(convolve_fn) \
+ make_tuple(4, 4, &convolve_fn), \
+ make_tuple(8, 4, &convolve_fn), \
+ make_tuple(4, 8, &convolve_fn), \
+ make_tuple(8, 8, &convolve_fn), \
+ make_tuple(16, 8, &convolve_fn), \
+ make_tuple(8, 16, &convolve_fn), \
+ make_tuple(16, 16, &convolve_fn), \
+ make_tuple(32, 16, &convolve_fn), \
+ make_tuple(16, 32, &convolve_fn), \
+ make_tuple(32, 32, &convolve_fn), \
+ make_tuple(64, 32, &convolve_fn), \
+ make_tuple(32, 64, &convolve_fn), \
+ make_tuple(64, 64, &convolve_fn)
+
// Reference 8-tap subpixel filter, slightly modified to fit into this test.
#define VP9_FILTER_WEIGHT 128
#define VP9_FILTER_SHIFT 7
@@ -103,7 +118,8 @@ void filter_block2d_8_c(const uint8_t *src_ptr,
// and filter_max_width = 16
//
uint8_t intermediate_buffer[71 * kMaxDimension];
- const int intermediate_next_stride = 1 - intermediate_height * output_width;
+ const int intermediate_next_stride =
+ 1 - static_cast<int>(intermediate_height * output_width);
// Horizontal pass (src -> transposed intermediate).
uint8_t *output_ptr = intermediate_buffer;
@@ -215,7 +231,8 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
* and filter_max_width = 16
*/
uint16_t intermediate_buffer[71 * kMaxDimension];
- const int intermediate_next_stride = 1 - intermediate_height * output_width;
+ const int intermediate_next_stride =
+ 1 - static_cast<int>(intermediate_height * output_width);
// Horizontal pass (src -> transposed intermediate).
{
@@ -279,8 +296,7 @@ void highbd_block2d_average_c(uint16_t *src,
uint16_t *output_ptr,
unsigned int output_stride,
unsigned int output_width,
- unsigned int output_height,
- int bd) {
+ unsigned int output_height) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
@@ -306,7 +322,7 @@ void highbd_filter_average_block2d_8_c(const uint16_t *src_ptr,
highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
output_width, output_height, bd);
highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
- output_width, output_height, bd);
+ output_width, output_height);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1035,20 +1051,6 @@ const ConvolveFunctions convolve8_c(
wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
-INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_c),
- make_tuple(8, 4, &convolve8_c),
- make_tuple(4, 8, &convolve8_c),
- make_tuple(8, 8, &convolve8_c),
- make_tuple(16, 8, &convolve8_c),
- make_tuple(8, 16, &convolve8_c),
- make_tuple(16, 16, &convolve8_c),
- make_tuple(32, 16, &convolve8_c),
- make_tuple(16, 32, &convolve8_c),
- make_tuple(32, 32, &convolve8_c),
- make_tuple(64, 32, &convolve8_c),
- make_tuple(32, 64, &convolve8_c),
- make_tuple(64, 64, &convolve8_c)));
const ConvolveFunctions convolve10_c(
wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
@@ -1057,20 +1059,6 @@ const ConvolveFunctions convolve10_c(
wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
-INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve10_c),
- make_tuple(8, 4, &convolve10_c),
- make_tuple(4, 8, &convolve10_c),
- make_tuple(8, 8, &convolve10_c),
- make_tuple(16, 8, &convolve10_c),
- make_tuple(8, 16, &convolve10_c),
- make_tuple(16, 16, &convolve10_c),
- make_tuple(32, 16, &convolve10_c),
- make_tuple(16, 32, &convolve10_c),
- make_tuple(32, 32, &convolve10_c),
- make_tuple(64, 32, &convolve10_c),
- make_tuple(32, 64, &convolve10_c),
- make_tuple(64, 64, &convolve10_c)));
const ConvolveFunctions convolve12_c(
wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
@@ -1079,23 +1067,13 @@ const ConvolveFunctions convolve12_c(
wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
-INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve12_c),
- make_tuple(8, 4, &convolve12_c),
- make_tuple(4, 8, &convolve12_c),
- make_tuple(8, 8, &convolve12_c),
- make_tuple(16, 8, &convolve12_c),
- make_tuple(8, 16, &convolve12_c),
- make_tuple(16, 16, &convolve12_c),
- make_tuple(32, 16, &convolve12_c),
- make_tuple(16, 32, &convolve12_c),
- make_tuple(32, 32, &convolve12_c),
- make_tuple(64, 32, &convolve12_c),
- make_tuple(32, 64, &convolve12_c),
- make_tuple(64, 64, &convolve12_c)));
+const ConvolveParam kArrayConvolve_c[] = {
+ ALL_SIZES(convolve8_c),
+ ALL_SIZES(convolve10_c),
+ ALL_SIZES(convolve12_c)
+};
#else
-
const ConvolveFunctions convolve8_c(
vpx_convolve_copy_c, vpx_convolve_avg_c,
vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
@@ -1104,22 +1082,10 @@ const ConvolveFunctions convolve8_c(
vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-
-INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_c),
- make_tuple(8, 4, &convolve8_c),
- make_tuple(4, 8, &convolve8_c),
- make_tuple(8, 8, &convolve8_c),
- make_tuple(16, 8, &convolve8_c),
- make_tuple(8, 16, &convolve8_c),
- make_tuple(16, 16, &convolve8_c),
- make_tuple(32, 16, &convolve8_c),
- make_tuple(16, 32, &convolve8_c),
- make_tuple(32, 32, &convolve8_c),
- make_tuple(64, 32, &convolve8_c),
- make_tuple(32, 64, &convolve8_c),
- make_tuple(64, 64, &convolve8_c)));
+const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
#endif
+INSTANTIATE_TEST_CASE_P(C, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve_c));
#if HAVE_SSE2 && ARCH_X86_64
#if CONFIG_VP9_HIGHBITDEPTH
@@ -1159,46 +1125,11 @@ const ConvolveFunctions convolve12_sse2(
wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_sse2),
- make_tuple(8, 4, &convolve8_sse2),
- make_tuple(4, 8, &convolve8_sse2),
- make_tuple(8, 8, &convolve8_sse2),
- make_tuple(16, 8, &convolve8_sse2),
- make_tuple(8, 16, &convolve8_sse2),
- make_tuple(16, 16, &convolve8_sse2),
- make_tuple(32, 16, &convolve8_sse2),
- make_tuple(16, 32, &convolve8_sse2),
- make_tuple(32, 32, &convolve8_sse2),
- make_tuple(64, 32, &convolve8_sse2),
- make_tuple(32, 64, &convolve8_sse2),
- make_tuple(64, 64, &convolve8_sse2),
- make_tuple(4, 4, &convolve10_sse2),
- make_tuple(8, 4, &convolve10_sse2),
- make_tuple(4, 8, &convolve10_sse2),
- make_tuple(8, 8, &convolve10_sse2),
- make_tuple(16, 8, &convolve10_sse2),
- make_tuple(8, 16, &convolve10_sse2),
- make_tuple(16, 16, &convolve10_sse2),
- make_tuple(32, 16, &convolve10_sse2),
- make_tuple(16, 32, &convolve10_sse2),
- make_tuple(32, 32, &convolve10_sse2),
- make_tuple(64, 32, &convolve10_sse2),
- make_tuple(32, 64, &convolve10_sse2),
- make_tuple(64, 64, &convolve10_sse2),
- make_tuple(4, 4, &convolve12_sse2),
- make_tuple(8, 4, &convolve12_sse2),
- make_tuple(4, 8, &convolve12_sse2),
- make_tuple(8, 8, &convolve12_sse2),
- make_tuple(16, 8, &convolve12_sse2),
- make_tuple(8, 16, &convolve12_sse2),
- make_tuple(16, 16, &convolve12_sse2),
- make_tuple(32, 16, &convolve12_sse2),
- make_tuple(16, 32, &convolve12_sse2),
- make_tuple(32, 32, &convolve12_sse2),
- make_tuple(64, 32, &convolve12_sse2),
- make_tuple(32, 64, &convolve12_sse2),
- make_tuple(64, 64, &convolve12_sse2)));
+const ConvolveParam kArrayConvolve_sse2[] = {
+ ALL_SIZES(convolve8_sse2),
+ ALL_SIZES(convolve10_sse2),
+ ALL_SIZES(convolve12_sse2)
+};
#else
const ConvolveFunctions convolve8_sse2(
#if CONFIG_USE_X86INC
@@ -1213,21 +1144,10 @@ const ConvolveFunctions convolve8_sse2(
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_sse2),
- make_tuple(8, 4, &convolve8_sse2),
- make_tuple(4, 8, &convolve8_sse2),
- make_tuple(8, 8, &convolve8_sse2),
- make_tuple(16, 8, &convolve8_sse2),
- make_tuple(8, 16, &convolve8_sse2),
- make_tuple(16, 16, &convolve8_sse2),
- make_tuple(32, 16, &convolve8_sse2),
- make_tuple(16, 32, &convolve8_sse2),
- make_tuple(32, 32, &convolve8_sse2),
- make_tuple(64, 32, &convolve8_sse2),
- make_tuple(32, 64, &convolve8_sse2),
- make_tuple(64, 64, &convolve8_sse2)));
+const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
#endif // CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve_sse2));
#endif
#if HAVE_SSSE3
@@ -1238,22 +1158,11 @@ const ConvolveFunctions convolve8_ssse3(
vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
- vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
+ vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_ssse3),
- make_tuple(8, 4, &convolve8_ssse3),
- make_tuple(4, 8, &convolve8_ssse3),
- make_tuple(8, 8, &convolve8_ssse3),
- make_tuple(16, 8, &convolve8_ssse3),
- make_tuple(8, 16, &convolve8_ssse3),
- make_tuple(16, 16, &convolve8_ssse3),
- make_tuple(32, 16, &convolve8_ssse3),
- make_tuple(16, 32, &convolve8_ssse3),
- make_tuple(32, 32, &convolve8_ssse3),
- make_tuple(64, 32, &convolve8_ssse3),
- make_tuple(32, 64, &convolve8_ssse3),
- make_tuple(64, 64, &convolve8_ssse3)));
+const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
+INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_ssse3));
#endif
#if HAVE_AVX2 && HAVE_SSSE3
@@ -1266,20 +1175,9 @@ const ConvolveFunctions convolve8_avx2(
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_avx2),
- make_tuple(8, 4, &convolve8_avx2),
- make_tuple(4, 8, &convolve8_avx2),
- make_tuple(8, 8, &convolve8_avx2),
- make_tuple(8, 16, &convolve8_avx2),
- make_tuple(16, 8, &convolve8_avx2),
- make_tuple(16, 16, &convolve8_avx2),
- make_tuple(32, 16, &convolve8_avx2),
- make_tuple(16, 32, &convolve8_avx2),
- make_tuple(32, 32, &convolve8_avx2),
- make_tuple(64, 32, &convolve8_avx2),
- make_tuple(32, 64, &convolve8_avx2),
- make_tuple(64, 64, &convolve8_avx2)));
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
+INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_avx2));
#endif // HAVE_AVX2 && HAVE_SSSE3
#if HAVE_NEON
@@ -1303,20 +1201,9 @@ const ConvolveFunctions convolve8_neon(
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
#endif // HAVE_NEON_ASM
-INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_neon),
- make_tuple(8, 4, &convolve8_neon),
- make_tuple(4, 8, &convolve8_neon),
- make_tuple(8, 8, &convolve8_neon),
- make_tuple(16, 8, &convolve8_neon),
- make_tuple(8, 16, &convolve8_neon),
- make_tuple(16, 16, &convolve8_neon),
- make_tuple(32, 16, &convolve8_neon),
- make_tuple(16, 32, &convolve8_neon),
- make_tuple(32, 32, &convolve8_neon),
- make_tuple(64, 32, &convolve8_neon),
- make_tuple(32, 64, &convolve8_neon),
- make_tuple(64, 64, &convolve8_neon)));
+const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_neon));
#endif // HAVE_NEON
#if HAVE_DSPR2
@@ -1329,21 +1216,10 @@ const ConvolveFunctions convolve8_dspr2(
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_dspr2),
- make_tuple(8, 4, &convolve8_dspr2),
- make_tuple(4, 8, &convolve8_dspr2),
- make_tuple(8, 8, &convolve8_dspr2),
- make_tuple(16, 8, &convolve8_dspr2),
- make_tuple(8, 16, &convolve8_dspr2),
- make_tuple(16, 16, &convolve8_dspr2),
- make_tuple(32, 16, &convolve8_dspr2),
- make_tuple(16, 32, &convolve8_dspr2),
- make_tuple(32, 32, &convolve8_dspr2),
- make_tuple(64, 32, &convolve8_dspr2),
- make_tuple(32, 64, &convolve8_dspr2),
- make_tuple(64, 64, &convolve8_dspr2)));
-#endif
+const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
+INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_dspr2));
+#endif // HAVE_DSPR2
#if HAVE_MSA
const ConvolveFunctions convolve8_msa(
@@ -1355,19 +1231,8 @@ const ConvolveFunctions convolve8_msa(
vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
- make_tuple(4, 4, &convolve8_msa),
- make_tuple(8, 4, &convolve8_msa),
- make_tuple(4, 8, &convolve8_msa),
- make_tuple(8, 8, &convolve8_msa),
- make_tuple(16, 8, &convolve8_msa),
- make_tuple(8, 16, &convolve8_msa),
- make_tuple(16, 16, &convolve8_msa),
- make_tuple(32, 16, &convolve8_msa),
- make_tuple(16, 32, &convolve8_msa),
- make_tuple(32, 32, &convolve8_msa),
- make_tuple(64, 32, &convolve8_msa),
- make_tuple(32, 64, &convolve8_msa),
- make_tuple(64, 64, &convolve8_msa)));
+const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
+INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
+ ::testing::ValuesIn(kArrayConvolve8_msa));
#endif // HAVE_MSA
} // namespace
diff --git a/libvpx/test/cpu_speed_test.cc b/libvpx/test/cpu_speed_test.cc
index 8baa2f9c8..2cad30fbb 100644
--- a/libvpx/test/cpu_speed_test.cc
+++ b/libvpx/test/cpu_speed_test.cc
@@ -26,7 +26,8 @@ class CpuSpeedTest
: EncoderTest(GET_PARAM(0)),
encoding_mode_(GET_PARAM(1)),
set_cpu_used_(GET_PARAM(2)),
- min_psnr_(kMaxPSNR) {}
+ min_psnr_(kMaxPSNR),
+ tune_content_(VP9E_CONTENT_DEFAULT) {}
virtual ~CpuSpeedTest() {}
virtual void SetUp() {
@@ -49,6 +50,7 @@ class CpuSpeedTest
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+ encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
if (encoding_mode_ != ::libvpx_test::kRealTime) {
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
@@ -66,6 +68,7 @@ class CpuSpeedTest
::libvpx_test::TestMode encoding_mode_;
int set_cpu_used_;
double min_psnr_;
+ int tune_content_;
};
TEST_P(CpuSpeedTest, TestQ0) {
@@ -74,7 +77,7 @@ TEST_P(CpuSpeedTest, TestQ0) {
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 0;
cfg_.rc_min_quantizer = 0;
@@ -92,7 +95,7 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) {
::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
cfg_.g_timebase = video.timebase();
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 0;
cfg_.rc_min_quantizer = 0;
@@ -103,13 +106,28 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) {
EXPECT_GE(min_psnr_, kMaxPSNR);
}
+TEST_P(CpuSpeedTest, TestTuneScreen) {
+ ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
+ cfg_.g_timebase = video.timebase();
+ cfg_.rc_2pass_vbr_minsection_pct = 5;
+ cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_target_bitrate = 2000;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_min_quantizer = 0;
+ tune_content_ = VP9E_CONTENT_SCREEN;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 12000;
cfg_.rc_max_quantizer = 10;
cfg_.rc_min_quantizer = 0;
@@ -125,7 +143,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
// when passing in a very high min q. This pushes the encoder to producing
// lots of small partitions which might will test the other condition.
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 200;
cfg_.rc_min_quantizer = 40;
@@ -140,9 +158,4 @@ VP9_INSTANTIATE_TEST_CASE(
::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(0, 9));
-
-VP10_INSTANTIATE_TEST_CASE(
- CpuSpeedTest,
- ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
- ::testing::Range(0, 3));
} // namespace
diff --git a/libvpx/test/datarate_test.cc b/libvpx/test/datarate_test.cc
index b6cae7903..220cbf3a3 100644
--- a/libvpx/test/datarate_test.cc
+++ b/libvpx/test/datarate_test.cc
@@ -90,7 +90,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
<< pkt->data.frame.pts;
}
- const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+ const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
// Subtract from the buffer the bits associated with a played back frame.
bits_in_buffer_model_ -= frame_size_in_bits;
@@ -135,7 +135,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
double duration_;
double file_datarate_;
double effective_datarate_;
- size_t bits_in_last_frame_;
+ int64_t bits_in_last_frame_;
int denoiser_on_;
int denoiser_offon_test_;
int denoiser_offon_period_;
@@ -450,7 +450,28 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
int denoiser_offon_period_;
};
-// Check basic rate targeting,
+// Check basic rate targeting for VBR mode.
+TEST_P(DatarateTestVP9Large, BasicRateTargetingVBR) {
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.g_error_resilient = 0;
+ cfg_.rc_end_usage = VPX_VBR;
+ cfg_.g_lag_in_frames = 0;
+
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 300);
+ for (int i = 400; i <= 800; i += 400) {
+ cfg_.rc_target_bitrate = i;
+ ResetModel();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
+ << " The datarate for the file is lower than target by too much!";
+ ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25)
+ << " The datarate for the file is greater than target by too much!";
+ }
+}
+
+// Check basic rate targeting for CBR,
TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
@@ -474,7 +495,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
}
}
-// Check basic rate targeting,
+// Check basic rate targeting for CBR.
TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
@@ -519,6 +540,9 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
cfg_.rc_end_usage = VPX_CBR;
cfg_.rc_target_bitrate = 200;
cfg_.g_lag_in_frames = 0;
+ // TODO(marpan): Investigate datarate target failures with a smaller keyframe
+ // interval (128).
+ cfg_.kf_max_dist = 9999;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 140);
@@ -538,7 +562,7 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
<< " The first dropped frame for drop_thresh " << i
<< " > first dropped frame for drop_thresh "
<< i - kDropFrameThreshTestStep;
- ASSERT_GE(num_drops_, last_num_drops * 0.90)
+ ASSERT_GE(num_drops_, last_num_drops * 0.85)
<< " The number of dropped frames for drop_thresh " << i
<< " < number of dropped frames for drop_thresh "
<< i - kDropFrameThreshTestStep;
@@ -770,14 +794,10 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 0) {
int i;
- for (i = 0; i < 2; ++i) {
+ for (i = 0; i < VPX_MAX_LAYERS; ++i) {
svc_params_.max_quantizers[i] = 63;
svc_params_.min_quantizers[i] = 0;
}
- svc_params_.scaling_factor_num[0] = 144;
- svc_params_.scaling_factor_den[0] = 288;
- svc_params_.scaling_factor_num[1] = 288;
- svc_params_.scaling_factor_den[1] = 288;
encoder->Control(VP9E_SET_SVC, 1);
encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
@@ -814,8 +834,6 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
if (bits_total_) {
const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit
duration_ = (last_pts_ + 1) * timebase_;
- effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
- / (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
file_datarate_ = file_size_in_kb / duration_;
}
}
@@ -839,7 +857,6 @@ class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
int64_t bits_total_;
double duration_;
double file_datarate_;
- double effective_datarate_;
size_t bits_in_last_frame_;
vpx_svc_extra_cfg_t svc_params_;
int speed_setting_;
@@ -850,8 +867,7 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
const vpx_svc_extra_cfg_t *svc_params,
int spatial_layers,
int temporal_layers,
- int temporal_layering_mode,
- unsigned int total_rate) {
+ int temporal_layering_mode) {
int sl, spatial_layer_target;
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = {0};
@@ -885,7 +901,7 @@ static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
// 3 temporal layers. Run CIF clip with 1 thread.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
@@ -905,31 +921,71 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288;
- // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
- // frame dropping for SVC.
- cfg_.rc_dropframe_thresh = 0;
+ cfg_.rc_dropframe_thresh = 10;
+ cfg_.kf_max_dist = 9999;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 200);
// TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
- // layer target_bitrate. Also check if test can pass at lower bitrate (~200k).
- for (int i = 400; i <= 800; i += 200) {
+ // layer target_bitrate.
+ for (int i = 200; i <= 800; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
- cfg_.ts_number_layers, cfg_.temporal_layering_mode,
- cfg_.rc_target_bitrate);
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+ ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
- EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+ }
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3
+// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayersSmallKf) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 2;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 1;
+ cfg_.temporal_layering_mode = 3;
+ svc_params_.scaling_factor_num[0] = 144;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 288;
+ svc_params_.scaling_factor_den[1] = 288;
+ cfg_.rc_dropframe_thresh = 10;
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 200);
+ cfg_.rc_target_bitrate = 400;
+ // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
+ // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
+ for (int j = 64; j <= 67; j++) {
+ cfg_.kf_max_dist = j;
+ ResetModel();
+ assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
+ << " The datarate for the file exceeds the target by too much!";
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
+ << " The datarate for the file is lower than the target by too much!";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
}
}
// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
// 3 temporal layers. Run HD clip with 4 threads.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) {
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers4threads) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
@@ -949,30 +1005,152 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) {
svc_params_.scaling_factor_den[0] = 288;
svc_params_.scaling_factor_num[1] = 288;
svc_params_.scaling_factor_den[1] = 288;
- // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
- // frame dropping for SVC.
- cfg_.rc_dropframe_thresh = 0;
+ cfg_.rc_dropframe_thresh = 10;
+ cfg_.kf_max_dist = 9999;
::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
30, 1, 0, 300);
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
- cfg_.ts_number_layers, cfg_.temporal_layering_mode,
- cfg_.rc_target_bitrate);
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+ ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
<< " The datarate for the file exceeds the target by too much!";
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
<< " The datarate for the file is lower than the target by too much!";
- EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
+// 3 temporal layers. Run CIF clip with 1 thread.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 1;
+ cfg_.temporal_layering_mode = 3;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ cfg_.rc_dropframe_thresh = 10;
+ cfg_.kf_max_dist = 9999;
+ ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
+ 30, 1, 0, 300);
+ cfg_.rc_target_bitrate = 800;
+ ResetModel();
+ assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
+ << " The datarate for the file exceeds the target by too much!";
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
+ << " The datarate for the file is lower than the target by too much!";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
+// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayersSmallKf) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 1;
+ cfg_.temporal_layering_mode = 3;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ cfg_.rc_dropframe_thresh = 10;
+ ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
+ 30, 1, 0, 300);
+ cfg_.rc_target_bitrate = 800;
+ // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
+ // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
+ for (int j = 32; j <= 35; j++) {
+ cfg_.kf_max_dist = j;
+ ResetModel();
+ assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
+ << " The datarate for the file exceeds the target by too much!";
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30)
+ << " The datarate for the file is lower than the target by too much!";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+ }
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
+// 3 temporal layers. Run HD clip with 4 threads.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers4threads) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 4;
+ cfg_.temporal_layering_mode = 3;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ cfg_.rc_dropframe_thresh = 10;
+ cfg_.kf_max_dist = 9999;
+ ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
+ 30, 1, 0, 300);
+ cfg_.rc_target_bitrate = 800;
+ ResetModel();
+ assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
+ << " The datarate for the file exceeds the target by too much!";
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
+ << " The datarate for the file is lower than the target by too much!";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
}
VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
::testing::Values(::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
- ::testing::Range(2, 7));
+ ::testing::Range(2, 9));
VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
::testing::Values(::libvpx_test::kRealTime),
- ::testing::Range(5, 8));
+ ::testing::Range(5, 9));
} // namespace
diff --git a/libvpx/test/dct16x16_test.cc b/libvpx/test/dct16x16_test.cc
index 332210daa..ddaf9395b 100644
--- a/libvpx/test/dct16x16_test.cc
+++ b/libvpx/test/dct16x16_test.cc
@@ -276,12 +276,12 @@ void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
}
void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
idct16x16_10(in, out, stride);
}
void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
idct16x16_12(in, out, stride);
}
@@ -293,6 +293,7 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
}
+#if HAVE_SSE2
void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
}
@@ -301,7 +302,6 @@ void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
}
-#if HAVE_SSE2
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
}
@@ -373,10 +373,10 @@ class Trans16x16TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
+ const int32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
- const uint32_t diff = dst[j] - src[j];
+ const int32_t diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
if (max_error < error)
@@ -778,7 +778,7 @@ class InvTrans16x16DCT
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
- void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
+ void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride);
}
@@ -792,6 +792,67 @@ TEST_P(InvTrans16x16DCT, CompareReference) {
CompareInvReference(ref_txfm_, thresh_);
}
+class PartialTrans16x16Test
+ : public ::testing::TestWithParam<
+ std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > {
+ public:
+ virtual ~PartialTrans16x16Test() {}
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ bit_depth_ = GET_PARAM(1);
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ vpx_bit_depth_t bit_depth_;
+ FdctFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans16x16Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int16_t maxval =
+ static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+ const int16_t maxval = 255;
+#endif
+ const int minval = -maxval;
+ DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+ for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+ output[0] = 0;
+ ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+ EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
+
+ for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+ output[0] = 0;
+ ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+ EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
+}
+
+TEST_P(PartialTrans16x16Test, Random) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int16_t maxval =
+ static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+ const int16_t maxval = 255;
+#endif
+ DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+ int sum = 0;
+ for (int i = 0; i < kNumCoeffs; ++i) {
+ const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+ input[i] = val;
+ sum += val;
+ }
+ output[0] = 0;
+ ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+ EXPECT_EQ(sum >> 1, output[0]);
+}
+
using std::tr1::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -824,6 +885,11 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+ C, PartialTrans16x16Test,
+ ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans16x16HT,
@@ -832,6 +898,9 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
+ ::testing::Values(make_tuple(&vpx_fdct16x16_1_c,
+ VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -859,6 +928,9 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8),
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+ ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
+ VPX_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -896,6 +968,9 @@ INSTANTIATE_TEST_CASE_P(
&idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
make_tuple(&idct16x16_12,
&idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+ ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
+ VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -912,5 +987,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
+ ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa,
+ VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
diff --git a/libvpx/test/dct32x32_test.cc b/libvpx/test/dct32x32_test.cc
index f7327b100..16d88255e 100644
--- a/libvpx/test/dct32x32_test.cc
+++ b/libvpx/test/dct32x32_test.cc
@@ -81,10 +81,6 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
Trans32x32Param;
#if CONFIG_VP9_HIGHBITDEPTH
-void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
- vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);
-}
-
void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
}
@@ -158,10 +154,10 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
+ const int32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
- const uint32_t diff = dst[j] - src[j];
+ const int32_t diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
if (max_error < error)
@@ -309,6 +305,67 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
}
}
+class PartialTrans32x32Test
+ : public ::testing::TestWithParam<
+ std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
+ public:
+ virtual ~PartialTrans32x32Test() {}
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ bit_depth_ = GET_PARAM(1);
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ vpx_bit_depth_t bit_depth_;
+ FwdTxfmFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans32x32Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int16_t maxval =
+ static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+ const int16_t maxval = 255;
+#endif
+ const int minval = -maxval;
+ DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+ for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+ output[0] = 0;
+ ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+ EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
+
+ for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+ output[0] = 0;
+ ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+ EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
+}
+
+TEST_P(PartialTrans32x32Test, Random) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int16_t maxval =
+ static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+ const int16_t maxval = 255;
+#endif
+ DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+ int sum = 0;
+ for (int i = 0; i < kNumCoeffs; ++i) {
+ const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+ input[i] = val;
+ sum += val;
+ }
+ output[0] = 0;
+ ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+ EXPECT_EQ(sum >> 3, output[0]);
+}
+
using std::tr1::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -327,6 +384,11 @@ INSTANTIATE_TEST_CASE_P(
&vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c,
&vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+ C, PartialTrans32x32Test,
+ ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
+ make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
+ make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
#else
INSTANTIATE_TEST_CASE_P(
C, Trans32x32Test,
@@ -335,9 +397,12 @@ INSTANTIATE_TEST_CASE_P(
&vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c,
&vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
+ ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
+ VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
NEON, Trans32x32Test,
::testing::Values(
@@ -345,7 +410,7 @@ INSTANTIATE_TEST_CASE_P(
&vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c,
&vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
@@ -355,6 +420,9 @@ INSTANTIATE_TEST_CASE_P(
&vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_sse2,
&vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+ ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
+ VPX_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -371,6 +439,9 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+ ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
+ VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -391,5 +462,8 @@ INSTANTIATE_TEST_CASE_P(
&vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_msa,
&vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
+ ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
+ VPX_BITS_8)));
#endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
diff --git a/libvpx/test/decode_api_test.cc b/libvpx/test/decode_api_test.cc
index 318351b73..99b4db10f 100644
--- a/libvpx/test/decode_api_test.cc
+++ b/libvpx/test/decode_api_test.cc
@@ -27,9 +27,6 @@ TEST(DecodeAPI, InvalidParams) {
#if CONFIG_VP9_DECODER
&vpx_codec_vp9_dx_algo,
#endif
-#if CONFIG_VP10_DECODER
- &vpx_codec_vp10_dx_algo,
-#endif
};
uint8_t buf[1] = {0};
vpx_codec_ctx_t dec;
@@ -146,6 +143,40 @@ TEST(DecodeAPI, Vp9InvalidDecode) {
TestVp9Controls(&dec);
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
}
+
+TEST(DecodeAPI, Vp9PeekSI) {
+ const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo;
+ // The first 9 bytes are valid and the rest of the bytes are made up. Until
+ // size 10, this should return VPX_CODEC_UNSUP_BITSTREAM and after that it
+ // should return VPX_CODEC_CORRUPT_FRAME.
+ const uint8_t data[32] = {
+ 0x85, 0xa4, 0xc1, 0xa1, 0x38, 0x81, 0xa3, 0x49,
+ 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+
+ for (uint32_t data_sz = 1; data_sz <= 32; ++data_sz) {
+ // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get
+ // to decoder_peek_si_internal on frames of size < 8.
+ if (data_sz >= 8) {
+ vpx_codec_ctx_t dec;
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
+ EXPECT_EQ((data_sz < 10) ?
+ VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_CORRUPT_FRAME,
+ vpx_codec_decode(&dec, data, data_sz, NULL, 0));
+ vpx_codec_iter_t iter = NULL;
+ EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
+ }
+
+ // Verify behavior of vpx_codec_peek_stream_info.
+ vpx_codec_stream_info_t si;
+ si.sz = sizeof(si);
+ EXPECT_EQ((data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK,
+ vpx_codec_peek_stream_info(codec, data, data_sz, &si));
+ }
+}
#endif // CONFIG_VP9_DECODER
} // namespace
diff --git a/libvpx/test/encode_api_test.cc b/libvpx/test/encode_api_test.cc
new file mode 100644
index 000000000..94afddeb6
--- /dev/null
+++ b/libvpx/test/encode_api_test.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+
+namespace {
+
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
+TEST(EncodeAPI, InvalidParams) {
+ static const vpx_codec_iface_t *kCodecs[] = {
+#if CONFIG_VP8_ENCODER
+ &vpx_codec_vp8_cx_algo,
+#endif
+#if CONFIG_VP9_ENCODER
+ &vpx_codec_vp9_cx_algo,
+#endif
+ };
+ uint8_t buf[1] = {0};
+ vpx_image_t img;
+ vpx_codec_ctx_t enc;
+ vpx_codec_enc_cfg_t cfg;
+
+ EXPECT_EQ(&img, vpx_img_wrap(&img, VPX_IMG_FMT_I420, 1, 1, 1, buf));
+
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, NULL, NULL, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, NULL, NULL, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, NULL, 0, 0, 0, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, &img, 0, 0, 0, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+ vpx_codec_enc_config_default(NULL, NULL, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+ vpx_codec_enc_config_default(NULL, &cfg, 0));
+ EXPECT_TRUE(vpx_codec_error(NULL) != NULL);
+
+ for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
+ SCOPED_TRACE(vpx_codec_iface_name(kCodecs[i]));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+ vpx_codec_enc_init(NULL, kCodecs[i], NULL, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+ vpx_codec_enc_init(&enc, kCodecs[i], NULL, 0));
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+ vpx_codec_enc_config_default(kCodecs[i], &cfg, 1));
+
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(kCodecs[i], &cfg, 0));
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, NULL, 0, 0, 0, 0));
+
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
+ }
+}
+
+} // namespace
diff --git a/libvpx/test/encode_test_driver.cc b/libvpx/test/encode_test_driver.cc
index 128436ee9..b8c737187 100644
--- a/libvpx/test/encode_test_driver.cc
+++ b/libvpx/test/encode_test_driver.cc
@@ -43,15 +43,6 @@ void Encoder::InitEncoder(VideoSource *video) {
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
} else
#endif
-#if CONFIG_VP10_ENCODER
- if (CodecInterface() == &vpx_codec_vp10_cx_algo) {
- // Default to 1 tile column for VP10.
- const int log2_tile_columns = 0;
- res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
- log2_tile_columns);
- ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
- } else
-#endif
{
#if CONFIG_VP8_ENCODER
ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
diff --git a/libvpx/test/encode_test_driver.h b/libvpx/test/encode_test_driver.h
index 6d0a72f98..d14ddc7d7 100644
--- a/libvpx/test/encode_test_driver.h
+++ b/libvpx/test/encode_test_driver.h
@@ -16,7 +16,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
#endif
#include "vpx/vpx_encoder.h"
@@ -143,7 +143,7 @@ class Encoder {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
void Control(int ctrl_id, vpx_active_map_t *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
diff --git a/libvpx/test/error_resilience_test.cc b/libvpx/test/error_resilience_test.cc
index 9a2ad2f35..00a095ce8 100644
--- a/libvpx/test/error_resilience_test.cc
+++ b/libvpx/test/error_resilience_test.cc
@@ -100,7 +100,7 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
- ::libvpx_test::Encoder *encoder) {
+ ::libvpx_test::Encoder * /*encoder*/) {
frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF);
@@ -596,7 +596,4 @@ VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
ONE_PASS_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
::testing::Values(true));
-// SVC-related tests don't run for VP10 since SVC is not supported.
-VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
- ::testing::Values(false));
} // namespace
diff --git a/libvpx/test/external_frame_buffer_test.cc b/libvpx/test/external_frame_buffer_test.cc
index d02dca2be..2570f44eb 100644
--- a/libvpx/test/external_frame_buffer_test.cc
+++ b/libvpx/test/external_frame_buffer_test.cc
@@ -24,7 +24,6 @@
namespace {
const int kVideoNameParam = 1;
-const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
struct ExternalFrameBuffer {
uint8_t *data;
@@ -155,6 +154,8 @@ class ExternalFrameBufferList {
ExternalFrameBuffer *ext_fb_list_;
};
+#if CONFIG_WEBM_IO
+
// Callback used by libvpx to request the application to return a frame
// buffer of at least |min_size| in bytes.
int get_vp9_frame_buffer(void *user_priv, size_t min_size,
@@ -197,6 +198,8 @@ int do_not_release_vp9_frame_buffer(void *user_priv,
return 0;
}
+#endif // CONFIG_WEBM_IO
+
// Class for testing passing in external frame buffers to libvpx.
class ExternalFrameBufferMD5Test
: public ::libvpx_test::DecoderTest,
@@ -278,6 +281,8 @@ class ExternalFrameBufferMD5Test
};
#if CONFIG_WEBM_IO
+const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
+
// Class for testing passing in external frame buffers to libvpx.
class ExternalFrameBufferTest : public ::testing::Test {
protected:
diff --git a/libvpx/test/fdct4x4_test.cc b/libvpx/test/fdct4x4_test.cc
index 3f6b738e5..735cccf8d 100644
--- a/libvpx/test/fdct4x4_test.cc
+++ b/libvpx/test/fdct4x4_test.cc
@@ -40,7 +40,7 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
vpx_fdct4x4_c(in, out, stride);
}
@@ -49,7 +49,7 @@ void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
}
void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
vp9_fwht4x4_c(in, out, stride);
}
@@ -141,11 +141,11 @@ class Trans4x4TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
ASSERT_EQ(VPX_BITS_8, bit_depth_);
- const uint32_t diff = dst[j] - src[j];
+ const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
if (max_error < error)
@@ -258,10 +258,10 @@ class Trans4x4TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
- const uint32_t diff = dst[j] - src[j];
+ const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
EXPECT_GE(static_cast<uint32_t>(limit), error)
@@ -487,19 +487,11 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
- !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
- MMX, Trans4x4WHT,
- ::testing::Values(
- make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
-#endif
-
-#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
- !CONFIG_EMULATE_HARDWARE
+#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4WHT,
::testing::Values(
+ make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8),
make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
#endif
diff --git a/libvpx/test/fdct8x8_test.cc b/libvpx/test/fdct8x8_test.cc
index c0deaf406..29f215817 100644
--- a/libvpx/test/fdct8x8_test.cc
+++ b/libvpx/test/fdct8x8_test.cc
@@ -47,7 +47,7 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
-void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
+void reference_8x8_dct_1d(const double in[8], double out[8]) {
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 8; k++) {
out[k] = 0.0;
@@ -65,7 +65,7 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
double temp_in[8], temp_out[8];
for (int j = 0; j < 8; ++j)
temp_in[j] = input[j*8 + i];
- reference_8x8_dct_1d(temp_in, temp_out, 1);
+ reference_8x8_dct_1d(temp_in, temp_out);
for (int j = 0; j < 8; ++j)
output[j * 8 + i] = temp_out[j];
}
@@ -74,7 +74,7 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
double temp_in[8], temp_out[8];
for (int j = 0; j < 8; ++j)
temp_in[j] = output[j + i*8];
- reference_8x8_dct_1d(temp_in, temp_out, 1);
+ reference_8x8_dct_1d(temp_in, temp_out);
// Scale by some magic number
for (int j = 0; j < 8; ++j)
output[j + i * 8] = temp_out[j] * 2;
@@ -82,7 +82,8 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
}
-void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
+ int /*tx_type*/) {
vpx_fdct8x8_c(in, out, stride);
}
@@ -107,6 +108,8 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
}
+#if HAVE_SSE2
+
void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
}
@@ -115,7 +118,6 @@ void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
}
-#if HAVE_SSE2
void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
}
@@ -423,10 +425,10 @@ class FwdTrans8x8TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
- const uint32_t diff = dst[j] - src[j];
+ const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
@@ -456,7 +458,7 @@ class FwdTrans8x8TestBase {
coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
for (int j = 0; j < kNumCoeffs; ++j) {
- const uint32_t diff = coeff[j] - coeff_r[j];
+ const int32_t diff = coeff[j] - coeff_r[j];
const uint32_t error = diff * diff;
EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
<< "Error: 8x8 DCT has error " << error
@@ -509,10 +511,10 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
- const uint32_t diff =
+ const int diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
- const uint32_t diff = dst[j] - ref[j];
+ const int diff = dst[j] - ref[j];
#endif
const uint32_t error = diff * diff;
EXPECT_EQ(0u, error)
@@ -641,7 +643,7 @@ class InvTrans8x8DCT
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride);
}
- void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}
+ void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
IdctFunc ref_txfm_;
IdctFunc inv_txfm_;
diff --git a/libvpx/test/hadamard_test.cc b/libvpx/test/hadamard_test.cc
new file mode 100644
index 000000000..7a5bd5b4c
--- /dev/null
+++ b/libvpx/test/hadamard_test.cc
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <algorithm>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
+
+void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
+ int16_t b[8];
+ for (int i = 0; i < 8; i += 2) {
+ b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
+ b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
+ }
+ int16_t c[8];
+ for (int i = 0; i < 8; i += 4) {
+ c[i + 0] = b[i + 0] + b[i + 2];
+ c[i + 1] = b[i + 1] + b[i + 3];
+ c[i + 2] = b[i + 0] - b[i + 2];
+ c[i + 3] = b[i + 1] - b[i + 3];
+ }
+ out[0] = c[0] + c[4];
+ out[7] = c[1] + c[5];
+ out[3] = c[2] + c[6];
+ out[4] = c[3] + c[7];
+ out[2] = c[0] - c[4];
+ out[6] = c[1] - c[5];
+ out[1] = c[2] - c[6];
+ out[5] = c[3] - c[7];
+}
+
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
+ int16_t buf[64];
+ for (int i = 0; i < 8; ++i) {
+ hadamard_loop(a + i, a_stride, buf + i * 8);
+ }
+
+ for (int i = 0; i < 8; ++i) {
+ hadamard_loop(buf + i, 8, b + i * 8);
+ }
+}
+
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
+ /* The source is a 16x16 block. The destination is rearranged to 8x32.
+ * Input is 9 bit. */
+ reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
+ reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
+ reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
+ reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
+
+ /* Overlay the 8x8 blocks and combine. */
+ for (int i = 0; i < 64; ++i) {
+ /* 8x8 steps the range up to 15 bits. */
+ const int16_t a0 = b[0];
+ const int16_t a1 = b[64];
+ const int16_t a2 = b[128];
+ const int16_t a3 = b[192];
+
+ /* Prevent the result from escaping int16_t. */
+ const int16_t b0 = (a0 + a1) >> 1;
+ const int16_t b1 = (a0 - a1) >> 1;
+ const int16_t b2 = (a2 + a3) >> 1;
+ const int16_t b3 = (a2 - a3) >> 1;
+
+ /* Store a 16 bit value. */
+ b[ 0] = b0 + b2;
+ b[ 64] = b1 + b3;
+ b[128] = b0 - b2;
+ b[192] = b1 - b3;
+
+ ++b;
+ }
+}
+
+class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
+ public:
+ virtual void SetUp() {
+ h_func_ = GetParam();
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ protected:
+ HadamardFunc h_func_;
+ ACMRandom rnd_;
+};
+
+class Hadamard8x8Test : public HadamardTestBase {};
+
+TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
+ DECLARE_ALIGNED(16, int16_t, a[64]);
+ DECLARE_ALIGNED(16, int16_t, b[64]);
+ int16_t b_ref[64];
+ for (int i = 0; i < 64; ++i) {
+ a[i] = rnd_.Rand9Signed();
+ }
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ reference_hadamard8x8(a, 8, b_ref);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + 64);
+ std::sort(b_ref, b_ref + 64);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard8x8Test, VaryStride) {
+ DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
+ DECLARE_ALIGNED(16, int16_t, b[64]);
+ int16_t b_ref[64];
+ for (int i = 0; i < 64 * 8; ++i) {
+ a[i] = rnd_.Rand9Signed();
+ }
+
+ for (int i = 8; i < 64; i += 8) {
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ reference_hadamard8x8(a, i, b_ref);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + 64);
+ std::sort(b_ref, b_ref + 64);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
+ ::testing::Values(&vpx_hadamard_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
+ ::testing::Values(&vpx_hadamard_8x8_sse2));
+#endif // HAVE_SSE2
+
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
+ ::testing::Values(&vpx_hadamard_8x8_ssse3));
+#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
+ ::testing::Values(&vpx_hadamard_8x8_neon));
+#endif // HAVE_NEON
+
+class Hadamard16x16Test : public HadamardTestBase {};
+
+TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
+ DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
+ DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+ int16_t b_ref[16 * 16];
+ for (int i = 0; i < 16 * 16; ++i) {
+ a[i] = rnd_.Rand9Signed();
+ }
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ reference_hadamard16x16(a, 16, b_ref);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + 16 * 16);
+ std::sort(b_ref, b_ref + 16 * 16);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+}
+
+TEST_P(Hadamard16x16Test, VaryStride) {
+ DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
+ DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+ int16_t b_ref[16 * 16];
+ for (int i = 0; i < 16 * 16 * 8; ++i) {
+ a[i] = rnd_.Rand9Signed();
+ }
+
+ for (int i = 8; i < 64; i += 8) {
+ memset(b, 0, sizeof(b));
+ memset(b_ref, 0, sizeof(b_ref));
+
+ reference_hadamard16x16(a, i, b_ref);
+ ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
+
+ // The order of the output is not important. Sort before checking.
+ std::sort(b, b + 16 * 16);
+ std::sort(b_ref, b_ref + 16 * 16);
+ EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
+ ::testing::Values(&vpx_hadamard_16x16_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
+ ::testing::Values(&vpx_hadamard_16x16_sse2));
+#endif // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
+ ::testing::Values(&vpx_hadamard_16x16_neon));
+#endif // HAVE_NEON
+} // namespace
diff --git a/libvpx/test/level_test.cc b/libvpx/test/level_test.cc
new file mode 100644
index 000000000..62d0247d4
--- /dev/null
+++ b/libvpx/test/level_test.cc
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+class LevelTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+ LevelTest()
+ : EncoderTest(GET_PARAM(0)),
+ encoding_mode_(GET_PARAM(1)),
+ cpu_used_(GET_PARAM(2)),
+ min_gf_internal_(24),
+ target_level_(0),
+ level_(0) {}
+ virtual ~LevelTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(encoding_mode_);
+ if (encoding_mode_ != ::libvpx_test::kRealTime) {
+ cfg_.g_lag_in_frames = 25;
+ cfg_.rc_end_usage = VPX_VBR;
+ } else {
+ cfg_.g_lag_in_frames = 0;
+ cfg_.rc_end_usage = VPX_CBR;
+ }
+ cfg_.rc_2pass_vbr_minsection_pct = 5;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+ cfg_.rc_target_bitrate = 400;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_min_quantizer = 0;
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+ encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
+ encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
+ if (encoding_mode_ != ::libvpx_test::kRealTime) {
+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+ encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+ encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+ encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+ }
+ }
+ encoder->Control(VP9E_GET_LEVEL, &level_);
+ ASSERT_LE(level_, 51);
+ ASSERT_GE(level_, 0);
+ }
+
+ ::libvpx_test::TestMode encoding_mode_;
+ int cpu_used_;
+ int min_gf_internal_;
+ int target_level_;
+ int level_;
+};
+
+// Test for keeping level stats only
+TEST_P(LevelTest, TestTargetLevel0) {
+ ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+ 40);
+ target_level_ = 0;
+ min_gf_internal_ = 4;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_EQ(11, level_);
+
+ cfg_.rc_target_bitrate = 1600;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_EQ(20, level_);
+}
+
+// Test for level control being turned off
+TEST_P(LevelTest, TestTargetLevel255) {
+ ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+ 30);
+ target_level_ = 255;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+TEST_P(LevelTest, TestTargetLevelApi) {
+ ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
+ static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo;
+ vpx_codec_ctx_t enc;
+ vpx_codec_enc_cfg_t cfg;
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
+ for (int level = 0; level <= 256; ++level) {
+ if (level == 10 || level == 11 || level == 20 || level == 21 ||
+ level == 30 || level == 31 || level == 40 || level == 41 ||
+ level == 50 || level == 51 || level == 52 || level == 60 ||
+ level == 61 || level == 62 || level == 0 || level == 255)
+ EXPECT_EQ(VPX_CODEC_OK,
+ vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
+ else
+ EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
+ vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
+ }
+ EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
+}
+
+VP9_INSTANTIATE_TEST_CASE(LevelTest,
+ ::testing::Values(::libvpx_test::kTwoPassGood,
+ ::libvpx_test::kOnePassGood),
+ ::testing::Range(0, 9));
+} // namespace
diff --git a/libvpx/test/lpf_8_test.cc b/libvpx/test/lpf_8_test.cc
index 0bf6b0c23..94646e4ff 100644
--- a/libvpx/test/lpf_8_test.cc
+++ b/libvpx/test/lpf_8_test.cc
@@ -37,120 +37,23 @@ const int number_of_iterations = 10000;
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
- int count, int bd);
+ int bd);
typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd);
#else
typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count);
+ const uint8_t *limit, const uint8_t *thresh);
typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1);
#endif // CONFIG_VP9_HIGHBITDEPTH
-typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
- vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
- vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
- vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
- vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
-}
-#else
-void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // HAVE_SSE2
-
-#if HAVE_NEON_ASM
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // HAVE_NEON_ASM
-
-#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
- vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-
class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
public:
virtual ~Loop8Test6Param() {}
@@ -158,7 +61,6 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
loopfilter_op_ = GET_PARAM(0);
ref_loopfilter_op_ = GET_PARAM(1);
bit_depth_ = GET_PARAM(2);
- count_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
}
@@ -166,7 +68,6 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
protected:
int bit_depth_;
- int count_;
int mask_;
loop_op_t loopfilter_op_;
loop_op_t ref_loopfilter_op_;
@@ -253,13 +154,13 @@ TEST_P(Loop8Test6Param, OperationCheck) {
ref_s[j] = s[j];
}
#if CONFIG_VP9_HIGHBITDEPTH
- ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
+ ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
#else
- ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
+ ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int j = 0; j < kNumCoeffs; ++j) {
@@ -325,13 +226,13 @@ TEST_P(Loop8Test6Param, ValueCheck) {
ref_s[j] = s[j];
}
#if CONFIG_VP9_HIGHBITDEPTH
- ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
+ ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
#else
- ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
+ ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int j = 0; j < kNumCoeffs; ++j) {
err_count += ref_s[j] != s[j];
@@ -535,64 +436,73 @@ INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+ &vpx_highbd_lpf_horizontal_4_c, 8),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
- &vpx_highbd_lpf_vertical_4_c, 8, 1),
+ &vpx_highbd_lpf_vertical_4_c, 8),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
- &vpx_highbd_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 8, 2),
+ &vpx_highbd_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+ &vpx_highbd_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+ &vpx_highbd_lpf_horizontal_edge_16_c, 8),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
- &vpx_highbd_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 8, 1),
+ &vpx_highbd_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+ &vpx_highbd_lpf_vertical_16_c, 8),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+ &vpx_highbd_lpf_horizontal_4_c, 10),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
- &vpx_highbd_lpf_vertical_4_c, 10, 1),
+ &vpx_highbd_lpf_vertical_4_c, 10),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
- &vpx_highbd_lpf_horizontal_8_c, 10, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 10, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 10, 2),
+ &vpx_highbd_lpf_horizontal_8_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+ &vpx_highbd_lpf_horizontal_edge_8_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+ &vpx_highbd_lpf_horizontal_edge_16_c, 10),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
- &vpx_highbd_lpf_vertical_8_c, 10, 1),
- make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 10, 1),
+ &vpx_highbd_lpf_vertical_8_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+ &vpx_highbd_lpf_vertical_16_c, 10),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+ &vpx_highbd_lpf_horizontal_4_c, 12),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
- &vpx_highbd_lpf_vertical_4_c, 12, 1),
+ &vpx_highbd_lpf_vertical_4_c, 12),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
- &vpx_highbd_lpf_horizontal_8_c, 12, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 12, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 12, 2),
+ &vpx_highbd_lpf_horizontal_8_c, 12),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+ &vpx_highbd_lpf_horizontal_edge_8_c, 12),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+ &vpx_highbd_lpf_horizontal_edge_16_c, 12),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
- &vpx_highbd_lpf_vertical_8_c, 12, 1),
- make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 12, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 8, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 10, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 12, 1)));
+ &vpx_highbd_lpf_vertical_8_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+ &vpx_highbd_lpf_vertical_16_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+ &vpx_highbd_lpf_vertical_16_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+ &vpx_highbd_lpf_vertical_16_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+ &vpx_highbd_lpf_vertical_16_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 8, 1)));
+ make_tuple(&vpx_lpf_horizontal_4_sse2,
+ &vpx_lpf_horizontal_4_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_sse2,
+ &vpx_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
+ &vpx_lpf_horizontal_edge_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_sse2,
+ &vpx_lpf_vertical_4_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_sse2,
+ &vpx_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_sse2,
+ &vpx_lpf_vertical_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dual_sse2,
+ &vpx_lpf_vertical_16_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@@ -600,9 +510,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
AVX2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
- 2)));
+ make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
+ &vpx_lpf_horizontal_edge_16_c, 8)));
#endif
#if HAVE_SSE2
@@ -659,23 +570,23 @@ INSTANTIATE_TEST_CASE_P(
#if HAVE_NEON_ASM
// Using #if inside the macro is unsupported on MSVS but the tests are not
// currently built for MSVS with ARM and NEON.
- make_tuple(&vpx_lpf_horizontal_16_neon,
- &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_neon,
- &vpx_lpf_horizontal_16_c, 8, 2),
- make_tuple(&wrapper_vertical_16_neon,
- &wrapper_vertical_16_c, 8, 1),
- make_tuple(&wrapper_vertical_16_dual_neon,
- &wrapper_vertical_16_dual_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_edge_8_neon,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_neon,
+ &vpx_lpf_horizontal_edge_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_neon,
+ &vpx_lpf_vertical_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dual_neon,
+ &vpx_lpf_vertical_16_dual_c, 8),
#endif // HAVE_NEON_ASM
make_tuple(&vpx_lpf_horizontal_8_neon,
- &vpx_lpf_horizontal_8_c, 8, 1),
+ &vpx_lpf_horizontal_8_c, 8),
make_tuple(&vpx_lpf_vertical_8_neon,
- &vpx_lpf_vertical_8_c, 8, 1),
+ &vpx_lpf_vertical_8_c, 8),
make_tuple(&vpx_lpf_horizontal_4_neon,
- &vpx_lpf_horizontal_4_c, 8, 1),
+ &vpx_lpf_horizontal_4_c, 8),
make_tuple(&vpx_lpf_vertical_4_neon,
- &vpx_lpf_vertical_4_c, 8, 1)));
+ &vpx_lpf_vertical_4_c, 8)));
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param,
::testing::Values(
@@ -692,15 +603,58 @@ INSTANTIATE_TEST_CASE_P(
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
+#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ DSPR2, Loop8Test6Param,
+ ::testing::Values(
+ make_tuple(&vpx_lpf_horizontal_4_dspr2,
+ &vpx_lpf_horizontal_4_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dspr2,
+ &vpx_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_8,
+ &vpx_lpf_horizontal_edge_8, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16,
+ &vpx_lpf_horizontal_edge_16, 8),
+ make_tuple(&vpx_lpf_vertical_4_dspr2,
+ &vpx_lpf_vertical_4_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dspr2,
+ &vpx_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dspr2,
+ &vpx_lpf_vertical_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dual_dspr2,
+ &vpx_lpf_vertical_16_dual_c, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+ DSPR2, Loop8Test9Param,
+ ::testing::Values(
+ make_tuple(&vpx_lpf_horizontal_4_dual_dspr2,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_dspr2,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_dspr2,
+ &vpx_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_dspr2,
+ &vpx_lpf_vertical_8_dual_c, 8)));
+#endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
+
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
+ make_tuple(&vpx_lpf_horizontal_4_msa,
+ &vpx_lpf_horizontal_4_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_msa,
+ &vpx_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_8_msa,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_msa,
+ &vpx_lpf_horizontal_edge_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_msa,
+ &vpx_lpf_vertical_4_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_msa,
+ &vpx_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_msa,
+ &vpx_lpf_vertical_16_c, 8)));
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test9Param,
diff --git a/libvpx/test/minmax_test.cc b/libvpx/test/minmax_test.cc
new file mode 100644
index 000000000..dbe4342dc
--- /dev/null
+++ b/libvpx/test/minmax_test.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int *min, int *max);
+
+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
+ public:
+ virtual void SetUp() {
+ mm_func_ = GetParam();
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ protected:
+ MinMaxFunc mm_func_;
+ ACMRandom rnd_;
+};
+
+void reference_minmax(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int *min_ret, int *max_ret) {
+ int min = 255;
+ int max = 0;
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
+ if (min > diff) min = diff;
+ if (max < diff) max = diff;
+ }
+ }
+
+ *min_ret = min;
+ *max_ret = max;
+}
+
+TEST_P(MinMaxTest, MinValue) {
+ for (int i = 0; i < 64; i++) {
+ uint8_t a[64], b[64];
+ memset(a, 0, sizeof(a));
+ memset(b, 255, sizeof(b));
+ b[i] = i; // Set a minimum difference of i.
+
+ int min, max;
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+ EXPECT_EQ(255, max);
+ EXPECT_EQ(i, min);
+ }
+}
+
+TEST_P(MinMaxTest, MaxValue) {
+ for (int i = 0; i < 64; i++) {
+ uint8_t a[64], b[64];
+ memset(a, 0, sizeof(a));
+ memset(b, 0, sizeof(b));
+ b[i] = i; // Set a maximum difference of i.
+
+ int min, max;
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+ EXPECT_EQ(i, max);
+ EXPECT_EQ(0, min);
+ }
+}
+
+TEST_P(MinMaxTest, CompareReference) {
+ uint8_t a[64], b[64];
+ for (int j = 0; j < 64; j++) {
+ a[j] = rnd_.Rand8();
+ b[j] = rnd_.Rand8();
+ }
+
+ int min_ref, max_ref, min, max;
+ reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+ EXPECT_EQ(max_ref, max);
+ EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
+ uint8_t a[8 * 64], b[8 * 64];
+ for (int i = 0; i < 8 * 64; i++) {
+ a[i] = rnd_.Rand8();
+ b[i] = rnd_.Rand8();
+ }
+ for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+ for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+ int min_ref, max_ref, min, max;
+ reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+ EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
+ << " and b_stride = " << b_stride;;
+ EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
+ << " and b_stride = " << b_stride;;
+ }
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
+ ::testing::Values(&vpx_minmax_8x8_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
+ ::testing::Values(&vpx_minmax_8x8_neon));
+#endif
+
+} // namespace
diff --git a/libvpx/test/realtime_test.cc b/libvpx/test/realtime_test.cc
new file mode 100644
index 000000000..24749e4ec
--- /dev/null
+++ b/libvpx/test/realtime_test.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const int kVideoSourceWidth = 320;
+const int kVideoSourceHeight = 240;
+const int kFramesToEncode = 2;
+
+class RealtimeTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+ RealtimeTest()
+ : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
+ virtual ~RealtimeTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ cfg_.g_lag_in_frames = 0;
+ SetMode(::libvpx_test::kRealTime);
+ }
+
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ // TODO(tomfinegan): We're changing the pass value here to make sure
+ // we get frames when real time mode is combined with |g_pass| set to
+ // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
+ // the pass value based on the mode passed into EncoderTest::SetMode(),
+ // which overrides the one specified in SetUp() above.
+ cfg_.g_pass = VPX_RC_FIRST_PASS;
+ }
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
+ frame_packets_++;
+ }
+
+ int frame_packets_;
+};
+
+TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
+ ::libvpx_test::RandomVideoSource video;
+ video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
+ video.set_limit(kFramesToEncode);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ EXPECT_EQ(kFramesToEncode, frame_packets_);
+}
+
+VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
+ ::testing::Values(::libvpx_test::kRealTime));
+VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
+ ::testing::Values(::libvpx_test::kRealTime));
+
+} // namespace
diff --git a/libvpx/test/register_state_check.h b/libvpx/test/register_state_check.h
index 489c41942..5336f2fbe 100644
--- a/libvpx/test/register_state_check.h
+++ b/libvpx/test/register_state_check.h
@@ -36,16 +36,10 @@
#include <windows.h>
#include <winnt.h>
-namespace testing {
-namespace internal {
-
inline bool operator==(const M128A& lhs, const M128A& rhs) {
return (lhs.Low == rhs.Low && lhs.High == rhs.High);
}
-} // namespace internal
-} // namespace testing
-
namespace libvpx_test {
// Compares the state of xmm[6-15] at construction with their state at
diff --git a/libvpx/test/resize_test.cc b/libvpx/test/resize_test.cc
index 98b6f87e1..90f5452e9 100644
--- a/libvpx/test/resize_test.cc
+++ b/libvpx/test/resize_test.cc
@@ -7,6 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <stdio.h>
+
#include <climits>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
@@ -90,34 +92,178 @@ struct FrameInfo {
unsigned int h;
};
-unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
- if (frame < 10)
- return val;
- if (frame < 20)
- return val / 2;
- if (frame < 30)
- return val * 2 / 3;
- if (frame < 40)
- return val / 4;
- if (frame < 50)
- return val * 7 / 8;
- return val;
+void ScaleForFrameNumber(unsigned int frame,
+ unsigned int initial_w,
+ unsigned int initial_h,
+ unsigned int *w,
+ unsigned int *h,
+ int flag_codec) {
+ if (frame < 10) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 20) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 30) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 40) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 50) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 60) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 70) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 80) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 90) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 100) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 110) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 120) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 130) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 140) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 150) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 160) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 170) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 180) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 190) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 200) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 210) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 220) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 230) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 240) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 250) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 260) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ // Go down very low.
+ if (frame < 270) {
+ *w = initial_w / 4;
+ *h = initial_h / 4;
+ return;
+ }
+ if (flag_codec == 1) {
+ // Cases that only works for VP9.
+ // For VP9: Swap width and height of original.
+ if (frame < 320) {
+ *w = initial_h;
+ *h = initial_w;
+ return;
+ }
+ }
+ *w = initial_w;
+ *h = initial_h;
}
class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
public:
ResizingVideoSource() {
SetSize(kInitialWidth, kInitialHeight);
- limit_ = 60;
+ limit_ = 350;
}
-
+ int flag_codec_;
virtual ~ResizingVideoSource() {}
protected:
virtual void Next() {
++frame_;
- SetSize(ScaleForFrameNumber(frame_, kInitialWidth),
- ScaleForFrameNumber(frame_, kInitialHeight));
+ unsigned int width;
+ unsigned int height;
+ ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
+ flag_codec_);
+ SetSize(width, height);
FillFrame();
}
};
@@ -144,15 +290,17 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
TEST_P(ResizeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
+ video.flag_codec_ = 0;
cfg_.g_lag_in_frames = 0;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
const unsigned int frame = static_cast<unsigned>(info->pts);
- const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
- const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
-
+ unsigned int expected_w;
+ unsigned int expected_h;
+ ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
+ &expected_w, &expected_h, 0);
EXPECT_EQ(expected_w, info->w)
<< "Frame " << frame << " had unexpected width";
EXPECT_EQ(expected_h, info->h)
@@ -286,11 +434,11 @@ TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
-class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
+class ResizeRealtimeTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
protected:
- ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~ResizeInternalRealtimeTest() {}
+ ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
+ virtual ~ResizeRealtimeTest() {}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
@@ -317,9 +465,18 @@ class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
}
+ virtual void MismatchHook(const vpx_image_t *img1,
+ const vpx_image_t *img2) {
+ double mismatch_psnr = compute_psnr(img1, img2);
+ mismatch_psnr_ += mismatch_psnr;
+ ++mismatch_nframes_;
+ }
+
+ unsigned int GetMismatchFrames() {
+ return mismatch_nframes_;
+ }
+
void DefaultConfig() {
- cfg_.g_w = 352;
- cfg_.g_h = 288;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 600;
cfg_.rc_buf_sz = 1000;
@@ -344,16 +501,48 @@ class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
std::vector< FrameInfo > frame_info_list_;
int set_cpu_used_;
bool change_bitrate_;
+ double mismatch_psnr_;
+ int mismatch_nframes_;
};
+TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
+ ResizingVideoSource video;
+ video.flag_codec_ = 1;
+ DefaultConfig();
+ // Disable internal resize for this test.
+ cfg_.rc_resize_allowed = 0;
+ change_bitrate_ = false;
+ mismatch_psnr_ = 0.0;
+ mismatch_nframes_ = 0;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ const unsigned int frame = static_cast<unsigned>(info->pts);
+ unsigned int expected_w;
+ unsigned int expected_h;
+ ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
+ &expected_w, &expected_h, 1);
+ EXPECT_EQ(expected_w, info->w)
+ << "Frame " << frame << " had unexpected width";
+ EXPECT_EQ(expected_h, info->h)
+ << "Frame " << frame << " had unexpected height";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+ }
+}
+
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Run at low bitrate, with resize_allowed = 1, and verify that we get
// one resize down event.
-TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
+TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 299);
DefaultConfig();
+ cfg_.g_w = 352;
+ cfg_.g_h = 288;
change_bitrate_ = false;
+ mismatch_psnr_ = 0.0;
+ mismatch_nframes_ = 0;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;
@@ -371,22 +560,31 @@ TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
}
}
+#if CONFIG_VP9_DECODER
// Verify that we get 1 resize down event in this test.
ASSERT_EQ(1, resize_count) << "Resizing should occur.";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#else
+ printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
+#endif
}
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Start at low target bitrate, raise the bitrate in the middle of the clip,
// scaling-up should occur after bitrate changed.
-TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 299);
+ 30, 1, 0, 359);
DefaultConfig();
+ cfg_.g_w = 352;
+ cfg_.g_h = 288;
change_bitrate_ = true;
+ mismatch_psnr_ = 0.0;
+ mismatch_nframes_ = 0;
// Disable dropped frames.
cfg_.rc_dropframe_thresh = 0;
// Starting bitrate low.
- cfg_.rc_target_bitrate = 100;
+ cfg_.rc_target_bitrate = 80;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;
@@ -410,8 +608,13 @@ TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
}
}
+#if CONFIG_VP9_DECODER
// Verify that we get 2 resize events in this test.
- ASSERT_EQ(2, resize_count) << "Resizing should occur twice.";
+ ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
+ EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#else
+ printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
+#endif
}
vpx_img_fmt_t CspForFrameNumber(int frame) {
@@ -524,7 +727,7 @@ VP9_INSTANTIATE_TEST_CASE(ResizeTest,
::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
::testing::Values(::libvpx_test::kOnePassBest));
-VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest,
+VP9_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
::testing::Values(::libvpx_test::kRealTime),
::testing::Range(5, 9));
VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
diff --git a/libvpx/test/sad_test.cc b/libvpx/test/sad_test.cc
index e6a5e0ba6..e6bd0d793 100644
--- a/libvpx/test/sad_test.cc
+++ b/libvpx/test/sad_test.cc
@@ -484,260 +484,176 @@ using std::tr1::make_tuple;
//------------------------------------------------------------------------------
// C functions
-const SadMxNFunc sad64x64_c = vpx_sad64x64_c;
-const SadMxNFunc sad64x32_c = vpx_sad64x32_c;
-const SadMxNFunc sad32x64_c = vpx_sad32x64_c;
-const SadMxNFunc sad32x32_c = vpx_sad32x32_c;
-const SadMxNFunc sad32x16_c = vpx_sad32x16_c;
-const SadMxNFunc sad16x32_c = vpx_sad16x32_c;
-const SadMxNFunc sad16x16_c = vpx_sad16x16_c;
-const SadMxNFunc sad16x8_c = vpx_sad16x8_c;
-const SadMxNFunc sad8x16_c = vpx_sad8x16_c;
-const SadMxNFunc sad8x8_c = vpx_sad8x8_c;
-const SadMxNFunc sad8x4_c = vpx_sad8x4_c;
-const SadMxNFunc sad4x8_c = vpx_sad4x8_c;
-const SadMxNFunc sad4x4_c = vpx_sad4x4_c;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNFunc highbd_sad64x64_c = vpx_highbd_sad64x64_c;
-const SadMxNFunc highbd_sad64x32_c = vpx_highbd_sad64x32_c;
-const SadMxNFunc highbd_sad32x64_c = vpx_highbd_sad32x64_c;
-const SadMxNFunc highbd_sad32x32_c = vpx_highbd_sad32x32_c;
-const SadMxNFunc highbd_sad32x16_c = vpx_highbd_sad32x16_c;
-const SadMxNFunc highbd_sad16x32_c = vpx_highbd_sad16x32_c;
-const SadMxNFunc highbd_sad16x16_c = vpx_highbd_sad16x16_c;
-const SadMxNFunc highbd_sad16x8_c = vpx_highbd_sad16x8_c;
-const SadMxNFunc highbd_sad8x16_c = vpx_highbd_sad8x16_c;
-const SadMxNFunc highbd_sad8x8_c = vpx_highbd_sad8x8_c;
-const SadMxNFunc highbd_sad8x4_c = vpx_highbd_sad8x4_c;
-const SadMxNFunc highbd_sad4x8_c = vpx_highbd_sad4x8_c;
-const SadMxNFunc highbd_sad4x4_c = vpx_highbd_sad4x4_c;
-#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNParam c_tests[] = {
- make_tuple(64, 64, sad64x64_c, -1),
- make_tuple(64, 32, sad64x32_c, -1),
- make_tuple(32, 64, sad32x64_c, -1),
- make_tuple(32, 32, sad32x32_c, -1),
- make_tuple(32, 16, sad32x16_c, -1),
- make_tuple(16, 32, sad16x32_c, -1),
- make_tuple(16, 16, sad16x16_c, -1),
- make_tuple(16, 8, sad16x8_c, -1),
- make_tuple(8, 16, sad8x16_c, -1),
- make_tuple(8, 8, sad8x8_c, -1),
- make_tuple(8, 4, sad8x4_c, -1),
- make_tuple(4, 8, sad4x8_c, -1),
- make_tuple(4, 4, sad4x4_c, -1),
+ make_tuple(64, 64, &vpx_sad64x64_c, -1),
+ make_tuple(64, 32, &vpx_sad64x32_c, -1),
+ make_tuple(32, 64, &vpx_sad32x64_c, -1),
+ make_tuple(32, 32, &vpx_sad32x32_c, -1),
+ make_tuple(32, 16, &vpx_sad32x16_c, -1),
+ make_tuple(16, 32, &vpx_sad16x32_c, -1),
+ make_tuple(16, 16, &vpx_sad16x16_c, -1),
+ make_tuple(16, 8, &vpx_sad16x8_c, -1),
+ make_tuple(8, 16, &vpx_sad8x16_c, -1),
+ make_tuple(8, 8, &vpx_sad8x8_c, -1),
+ make_tuple(8, 4, &vpx_sad8x4_c, -1),
+ make_tuple(4, 8, &vpx_sad4x8_c, -1),
+ make_tuple(4, 4, &vpx_sad4x4_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
- make_tuple(64, 64, highbd_sad64x64_c, 8),
- make_tuple(64, 32, highbd_sad64x32_c, 8),
- make_tuple(32, 64, highbd_sad32x64_c, 8),
- make_tuple(32, 32, highbd_sad32x32_c, 8),
- make_tuple(32, 16, highbd_sad32x16_c, 8),
- make_tuple(16, 32, highbd_sad16x32_c, 8),
- make_tuple(16, 16, highbd_sad16x16_c, 8),
- make_tuple(16, 8, highbd_sad16x8_c, 8),
- make_tuple(8, 16, highbd_sad8x16_c, 8),
- make_tuple(8, 8, highbd_sad8x8_c, 8),
- make_tuple(8, 4, highbd_sad8x4_c, 8),
- make_tuple(4, 8, highbd_sad4x8_c, 8),
- make_tuple(4, 4, highbd_sad4x4_c, 8),
- make_tuple(64, 64, highbd_sad64x64_c, 10),
- make_tuple(64, 32, highbd_sad64x32_c, 10),
- make_tuple(32, 64, highbd_sad32x64_c, 10),
- make_tuple(32, 32, highbd_sad32x32_c, 10),
- make_tuple(32, 16, highbd_sad32x16_c, 10),
- make_tuple(16, 32, highbd_sad16x32_c, 10),
- make_tuple(16, 16, highbd_sad16x16_c, 10),
- make_tuple(16, 8, highbd_sad16x8_c, 10),
- make_tuple(8, 16, highbd_sad8x16_c, 10),
- make_tuple(8, 8, highbd_sad8x8_c, 10),
- make_tuple(8, 4, highbd_sad8x4_c, 10),
- make_tuple(4, 8, highbd_sad4x8_c, 10),
- make_tuple(4, 4, highbd_sad4x4_c, 10),
- make_tuple(64, 64, highbd_sad64x64_c, 12),
- make_tuple(64, 32, highbd_sad64x32_c, 12),
- make_tuple(32, 64, highbd_sad32x64_c, 12),
- make_tuple(32, 32, highbd_sad32x32_c, 12),
- make_tuple(32, 16, highbd_sad32x16_c, 12),
- make_tuple(16, 32, highbd_sad16x32_c, 12),
- make_tuple(16, 16, highbd_sad16x16_c, 12),
- make_tuple(16, 8, highbd_sad16x8_c, 12),
- make_tuple(8, 16, highbd_sad8x16_c, 12),
- make_tuple(8, 8, highbd_sad8x8_c, 12),
- make_tuple(8, 4, highbd_sad8x4_c, 12),
- make_tuple(4, 8, highbd_sad4x8_c, 12),
- make_tuple(4, 4, highbd_sad4x4_c, 12),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_c, 8),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_c, 8),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_c, 8),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_c, 8),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_c, 8),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_c, 8),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_c, 8),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_c, 8),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_c, 8),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_c, 8),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_c, 8),
+ make_tuple(4, 8, &vpx_highbd_sad4x8_c, 8),
+ make_tuple(4, 4, &vpx_highbd_sad4x4_c, 8),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_c, 10),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_c, 10),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_c, 10),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_c, 10),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_c, 10),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_c, 10),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_c, 10),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_c, 10),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_c, 10),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_c, 10),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_c, 10),
+ make_tuple(4, 8, &vpx_highbd_sad4x8_c, 10),
+ make_tuple(4, 4, &vpx_highbd_sad4x4_c, 10),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_c, 12),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_c, 12),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_c, 12),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_c, 12),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_c, 12),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_c, 12),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_c, 12),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_c, 12),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_c, 12),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_c, 12),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_c, 12),
+ make_tuple(4, 8, &vpx_highbd_sad4x8_c, 12),
+ make_tuple(4, 4, &vpx_highbd_sad4x4_c, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests));
-const SadMxNAvgFunc sad64x64_avg_c = vpx_sad64x64_avg_c;
-const SadMxNAvgFunc sad64x32_avg_c = vpx_sad64x32_avg_c;
-const SadMxNAvgFunc sad32x64_avg_c = vpx_sad32x64_avg_c;
-const SadMxNAvgFunc sad32x32_avg_c = vpx_sad32x32_avg_c;
-const SadMxNAvgFunc sad32x16_avg_c = vpx_sad32x16_avg_c;
-const SadMxNAvgFunc sad16x32_avg_c = vpx_sad16x32_avg_c;
-const SadMxNAvgFunc sad16x16_avg_c = vpx_sad16x16_avg_c;
-const SadMxNAvgFunc sad16x8_avg_c = vpx_sad16x8_avg_c;
-const SadMxNAvgFunc sad8x16_avg_c = vpx_sad8x16_avg_c;
-const SadMxNAvgFunc sad8x8_avg_c = vpx_sad8x8_avg_c;
-const SadMxNAvgFunc sad8x4_avg_c = vpx_sad8x4_avg_c;
-const SadMxNAvgFunc sad4x8_avg_c = vpx_sad4x8_avg_c;
-const SadMxNAvgFunc sad4x4_avg_c = vpx_sad4x4_avg_c;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNAvgFunc highbd_sad64x64_avg_c = vpx_highbd_sad64x64_avg_c;
-const SadMxNAvgFunc highbd_sad64x32_avg_c = vpx_highbd_sad64x32_avg_c;
-const SadMxNAvgFunc highbd_sad32x64_avg_c = vpx_highbd_sad32x64_avg_c;
-const SadMxNAvgFunc highbd_sad32x32_avg_c = vpx_highbd_sad32x32_avg_c;
-const SadMxNAvgFunc highbd_sad32x16_avg_c = vpx_highbd_sad32x16_avg_c;
-const SadMxNAvgFunc highbd_sad16x32_avg_c = vpx_highbd_sad16x32_avg_c;
-const SadMxNAvgFunc highbd_sad16x16_avg_c = vpx_highbd_sad16x16_avg_c;
-const SadMxNAvgFunc highbd_sad16x8_avg_c = vpx_highbd_sad16x8_avg_c;
-const SadMxNAvgFunc highbd_sad8x16_avg_c = vpx_highbd_sad8x16_avg_c;
-const SadMxNAvgFunc highbd_sad8x8_avg_c = vpx_highbd_sad8x8_avg_c;
-const SadMxNAvgFunc highbd_sad8x4_avg_c = vpx_highbd_sad8x4_avg_c;
-const SadMxNAvgFunc highbd_sad4x8_avg_c = vpx_highbd_sad4x8_avg_c;
-const SadMxNAvgFunc highbd_sad4x4_avg_c = vpx_highbd_sad4x4_avg_c;
-#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNAvgParam avg_c_tests[] = {
- make_tuple(64, 64, sad64x64_avg_c, -1),
- make_tuple(64, 32, sad64x32_avg_c, -1),
- make_tuple(32, 64, sad32x64_avg_c, -1),
- make_tuple(32, 32, sad32x32_avg_c, -1),
- make_tuple(32, 16, sad32x16_avg_c, -1),
- make_tuple(16, 32, sad16x32_avg_c, -1),
- make_tuple(16, 16, sad16x16_avg_c, -1),
- make_tuple(16, 8, sad16x8_avg_c, -1),
- make_tuple(8, 16, sad8x16_avg_c, -1),
- make_tuple(8, 8, sad8x8_avg_c, -1),
- make_tuple(8, 4, sad8x4_avg_c, -1),
- make_tuple(4, 8, sad4x8_avg_c, -1),
- make_tuple(4, 4, sad4x4_avg_c, -1),
+ make_tuple(64, 64, &vpx_sad64x64_avg_c, -1),
+ make_tuple(64, 32, &vpx_sad64x32_avg_c, -1),
+ make_tuple(32, 64, &vpx_sad32x64_avg_c, -1),
+ make_tuple(32, 32, &vpx_sad32x32_avg_c, -1),
+ make_tuple(32, 16, &vpx_sad32x16_avg_c, -1),
+ make_tuple(16, 32, &vpx_sad16x32_avg_c, -1),
+ make_tuple(16, 16, &vpx_sad16x16_avg_c, -1),
+ make_tuple(16, 8, &vpx_sad16x8_avg_c, -1),
+ make_tuple(8, 16, &vpx_sad8x16_avg_c, -1),
+ make_tuple(8, 8, &vpx_sad8x8_avg_c, -1),
+ make_tuple(8, 4, &vpx_sad8x4_avg_c, -1),
+ make_tuple(4, 8, &vpx_sad4x8_avg_c, -1),
+ make_tuple(4, 4, &vpx_sad4x4_avg_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
- make_tuple(64, 64, highbd_sad64x64_avg_c, 8),
- make_tuple(64, 32, highbd_sad64x32_avg_c, 8),
- make_tuple(32, 64, highbd_sad32x64_avg_c, 8),
- make_tuple(32, 32, highbd_sad32x32_avg_c, 8),
- make_tuple(32, 16, highbd_sad32x16_avg_c, 8),
- make_tuple(16, 32, highbd_sad16x32_avg_c, 8),
- make_tuple(16, 16, highbd_sad16x16_avg_c, 8),
- make_tuple(16, 8, highbd_sad16x8_avg_c, 8),
- make_tuple(8, 16, highbd_sad8x16_avg_c, 8),
- make_tuple(8, 8, highbd_sad8x8_avg_c, 8),
- make_tuple(8, 4, highbd_sad8x4_avg_c, 8),
- make_tuple(4, 8, highbd_sad4x8_avg_c, 8),
- make_tuple(4, 4, highbd_sad4x4_avg_c, 8),
- make_tuple(64, 64, highbd_sad64x64_avg_c, 10),
- make_tuple(64, 32, highbd_sad64x32_avg_c, 10),
- make_tuple(32, 64, highbd_sad32x64_avg_c, 10),
- make_tuple(32, 32, highbd_sad32x32_avg_c, 10),
- make_tuple(32, 16, highbd_sad32x16_avg_c, 10),
- make_tuple(16, 32, highbd_sad16x32_avg_c, 10),
- make_tuple(16, 16, highbd_sad16x16_avg_c, 10),
- make_tuple(16, 8, highbd_sad16x8_avg_c, 10),
- make_tuple(8, 16, highbd_sad8x16_avg_c, 10),
- make_tuple(8, 8, highbd_sad8x8_avg_c, 10),
- make_tuple(8, 4, highbd_sad8x4_avg_c, 10),
- make_tuple(4, 8, highbd_sad4x8_avg_c, 10),
- make_tuple(4, 4, highbd_sad4x4_avg_c, 10),
- make_tuple(64, 64, highbd_sad64x64_avg_c, 12),
- make_tuple(64, 32, highbd_sad64x32_avg_c, 12),
- make_tuple(32, 64, highbd_sad32x64_avg_c, 12),
- make_tuple(32, 32, highbd_sad32x32_avg_c, 12),
- make_tuple(32, 16, highbd_sad32x16_avg_c, 12),
- make_tuple(16, 32, highbd_sad16x32_avg_c, 12),
- make_tuple(16, 16, highbd_sad16x16_avg_c, 12),
- make_tuple(16, 8, highbd_sad16x8_avg_c, 12),
- make_tuple(8, 16, highbd_sad8x16_avg_c, 12),
- make_tuple(8, 8, highbd_sad8x8_avg_c, 12),
- make_tuple(8, 4, highbd_sad8x4_avg_c, 12),
- make_tuple(4, 8, highbd_sad4x8_avg_c, 12),
- make_tuple(4, 4, highbd_sad4x4_avg_c, 12),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 8),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 8),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 8),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 8),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 8),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 8),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 8),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 8),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 8),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 8),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 8),
+ make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 8),
+ make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 8),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 10),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 10),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 10),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 10),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 10),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 10),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 10),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 10),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 10),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 10),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 10),
+ make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 10),
+ make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 10),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_avg_c, 12),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_avg_c, 12),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_avg_c, 12),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_avg_c, 12),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_avg_c, 12),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_avg_c, 12),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_avg_c, 12),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_avg_c, 12),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_avg_c, 12),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_avg_c, 12),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_avg_c, 12),
+ make_tuple(4, 8, &vpx_highbd_sad4x8_avg_c, 12),
+ make_tuple(4, 4, &vpx_highbd_sad4x4_avg_c, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests));
-const SadMxNx4Func sad64x64x4d_c = vpx_sad64x64x4d_c;
-const SadMxNx4Func sad64x32x4d_c = vpx_sad64x32x4d_c;
-const SadMxNx4Func sad32x64x4d_c = vpx_sad32x64x4d_c;
-const SadMxNx4Func sad32x32x4d_c = vpx_sad32x32x4d_c;
-const SadMxNx4Func sad32x16x4d_c = vpx_sad32x16x4d_c;
-const SadMxNx4Func sad16x32x4d_c = vpx_sad16x32x4d_c;
-const SadMxNx4Func sad16x16x4d_c = vpx_sad16x16x4d_c;
-const SadMxNx4Func sad16x8x4d_c = vpx_sad16x8x4d_c;
-const SadMxNx4Func sad8x16x4d_c = vpx_sad8x16x4d_c;
-const SadMxNx4Func sad8x8x4d_c = vpx_sad8x8x4d_c;
-const SadMxNx4Func sad8x4x4d_c = vpx_sad8x4x4d_c;
-const SadMxNx4Func sad4x8x4d_c = vpx_sad4x8x4d_c;
-const SadMxNx4Func sad4x4x4d_c = vpx_sad4x4x4d_c;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNx4Func highbd_sad64x64x4d_c = vpx_highbd_sad64x64x4d_c;
-const SadMxNx4Func highbd_sad64x32x4d_c = vpx_highbd_sad64x32x4d_c;
-const SadMxNx4Func highbd_sad32x64x4d_c = vpx_highbd_sad32x64x4d_c;
-const SadMxNx4Func highbd_sad32x32x4d_c = vpx_highbd_sad32x32x4d_c;
-const SadMxNx4Func highbd_sad32x16x4d_c = vpx_highbd_sad32x16x4d_c;
-const SadMxNx4Func highbd_sad16x32x4d_c = vpx_highbd_sad16x32x4d_c;
-const SadMxNx4Func highbd_sad16x16x4d_c = vpx_highbd_sad16x16x4d_c;
-const SadMxNx4Func highbd_sad16x8x4d_c = vpx_highbd_sad16x8x4d_c;
-const SadMxNx4Func highbd_sad8x16x4d_c = vpx_highbd_sad8x16x4d_c;
-const SadMxNx4Func highbd_sad8x8x4d_c = vpx_highbd_sad8x8x4d_c;
-const SadMxNx4Func highbd_sad8x4x4d_c = vpx_highbd_sad8x4x4d_c;
-const SadMxNx4Func highbd_sad4x8x4d_c = vpx_highbd_sad4x8x4d_c;
-const SadMxNx4Func highbd_sad4x4x4d_c = vpx_highbd_sad4x4x4d_c;
-#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNx4Param x4d_c_tests[] = {
- make_tuple(64, 64, sad64x64x4d_c, -1),
- make_tuple(64, 32, sad64x32x4d_c, -1),
- make_tuple(32, 64, sad32x64x4d_c, -1),
- make_tuple(32, 32, sad32x32x4d_c, -1),
- make_tuple(32, 16, sad32x16x4d_c, -1),
- make_tuple(16, 32, sad16x32x4d_c, -1),
- make_tuple(16, 16, sad16x16x4d_c, -1),
- make_tuple(16, 8, sad16x8x4d_c, -1),
- make_tuple(8, 16, sad8x16x4d_c, -1),
- make_tuple(8, 8, sad8x8x4d_c, -1),
- make_tuple(8, 4, sad8x4x4d_c, -1),
- make_tuple(4, 8, sad4x8x4d_c, -1),
- make_tuple(4, 4, sad4x4x4d_c, -1),
+ make_tuple(64, 64, &vpx_sad64x64x4d_c, -1),
+ make_tuple(64, 32, &vpx_sad64x32x4d_c, -1),
+ make_tuple(32, 64, &vpx_sad32x64x4d_c, -1),
+ make_tuple(32, 32, &vpx_sad32x32x4d_c, -1),
+ make_tuple(32, 16, &vpx_sad32x16x4d_c, -1),
+ make_tuple(16, 32, &vpx_sad16x32x4d_c, -1),
+ make_tuple(16, 16, &vpx_sad16x16x4d_c, -1),
+ make_tuple(16, 8, &vpx_sad16x8x4d_c, -1),
+ make_tuple(8, 16, &vpx_sad8x16x4d_c, -1),
+ make_tuple(8, 8, &vpx_sad8x8x4d_c, -1),
+ make_tuple(8, 4, &vpx_sad8x4x4d_c, -1),
+ make_tuple(4, 8, &vpx_sad4x8x4d_c, -1),
+ make_tuple(4, 4, &vpx_sad4x4x4d_c, -1),
#if CONFIG_VP9_HIGHBITDEPTH
- make_tuple(64, 64, highbd_sad64x64x4d_c, 8),
- make_tuple(64, 32, highbd_sad64x32x4d_c, 8),
- make_tuple(32, 64, highbd_sad32x64x4d_c, 8),
- make_tuple(32, 32, highbd_sad32x32x4d_c, 8),
- make_tuple(32, 16, highbd_sad32x16x4d_c, 8),
- make_tuple(16, 32, highbd_sad16x32x4d_c, 8),
- make_tuple(16, 16, highbd_sad16x16x4d_c, 8),
- make_tuple(16, 8, highbd_sad16x8x4d_c, 8),
- make_tuple(8, 16, highbd_sad8x16x4d_c, 8),
- make_tuple(8, 8, highbd_sad8x8x4d_c, 8),
- make_tuple(8, 4, highbd_sad8x4x4d_c, 8),
- make_tuple(4, 8, highbd_sad4x8x4d_c, 8),
- make_tuple(4, 4, highbd_sad4x4x4d_c, 8),
- make_tuple(64, 64, highbd_sad64x64x4d_c, 10),
- make_tuple(64, 32, highbd_sad64x32x4d_c, 10),
- make_tuple(32, 64, highbd_sad32x64x4d_c, 10),
- make_tuple(32, 32, highbd_sad32x32x4d_c, 10),
- make_tuple(32, 16, highbd_sad32x16x4d_c, 10),
- make_tuple(16, 32, highbd_sad16x32x4d_c, 10),
- make_tuple(16, 16, highbd_sad16x16x4d_c, 10),
- make_tuple(16, 8, highbd_sad16x8x4d_c, 10),
- make_tuple(8, 16, highbd_sad8x16x4d_c, 10),
- make_tuple(8, 8, highbd_sad8x8x4d_c, 10),
- make_tuple(8, 4, highbd_sad8x4x4d_c, 10),
- make_tuple(4, 8, highbd_sad4x8x4d_c, 10),
- make_tuple(4, 4, highbd_sad4x4x4d_c, 10),
- make_tuple(64, 64, highbd_sad64x64x4d_c, 12),
- make_tuple(64, 32, highbd_sad64x32x4d_c, 12),
- make_tuple(32, 64, highbd_sad32x64x4d_c, 12),
- make_tuple(32, 32, highbd_sad32x32x4d_c, 12),
- make_tuple(32, 16, highbd_sad32x16x4d_c, 12),
- make_tuple(16, 32, highbd_sad16x32x4d_c, 12),
- make_tuple(16, 16, highbd_sad16x16x4d_c, 12),
- make_tuple(16, 8, highbd_sad16x8x4d_c, 12),
- make_tuple(8, 16, highbd_sad8x16x4d_c, 12),
- make_tuple(8, 8, highbd_sad8x8x4d_c, 12),
- make_tuple(8, 4, highbd_sad8x4x4d_c, 12),
- make_tuple(4, 8, highbd_sad4x8x4d_c, 12),
- make_tuple(4, 4, highbd_sad4x4x4d_c, 12),
+ make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 8),
+ make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 8),
+ make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 8),
+ make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 8),
+ make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 8),
+ make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 8),
+ make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 8),
+ make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 8),
+ make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 8),
+ make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 8),
+ make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 8),
+ make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 8),
+ make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 8),
+ make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 10),
+ make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 10),
+ make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 10),
+ make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 10),
+ make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 10),
+ make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 10),
+ make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 10),
+ make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 10),
+ make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 10),
+ make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 10),
+ make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 10),
+ make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 10),
+ make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 10),
+ make_tuple(64, 64, &vpx_highbd_sad64x64x4d_c, 12),
+ make_tuple(64, 32, &vpx_highbd_sad64x32x4d_c, 12),
+ make_tuple(32, 64, &vpx_highbd_sad32x64x4d_c, 12),
+ make_tuple(32, 32, &vpx_highbd_sad32x32x4d_c, 12),
+ make_tuple(32, 16, &vpx_highbd_sad32x16x4d_c, 12),
+ make_tuple(16, 32, &vpx_highbd_sad16x32x4d_c, 12),
+ make_tuple(16, 16, &vpx_highbd_sad16x16x4d_c, 12),
+ make_tuple(16, 8, &vpx_highbd_sad16x8x4d_c, 12),
+ make_tuple(8, 16, &vpx_highbd_sad8x16x4d_c, 12),
+ make_tuple(8, 8, &vpx_highbd_sad8x8x4d_c, 12),
+ make_tuple(8, 4, &vpx_highbd_sad8x4x4d_c, 12),
+ make_tuple(4, 8, &vpx_highbd_sad4x8x4d_c, 12),
+ make_tuple(4, 4, &vpx_highbd_sad4x4x4d_c, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
@@ -745,318 +661,194 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
//------------------------------------------------------------------------------
// ARM functions
#if HAVE_MEDIA
-const SadMxNFunc sad16x16_media = vpx_sad16x16_media;
const SadMxNParam media_tests[] = {
- make_tuple(16, 16, sad16x16_media, -1),
+ make_tuple(16, 16, &vpx_sad16x16_media, -1),
};
INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::ValuesIn(media_tests));
#endif // HAVE_MEDIA
#if HAVE_NEON
-const SadMxNFunc sad64x64_neon = vpx_sad64x64_neon;
-const SadMxNFunc sad32x32_neon = vpx_sad32x32_neon;
-const SadMxNFunc sad16x16_neon = vpx_sad16x16_neon;
-const SadMxNFunc sad16x8_neon = vpx_sad16x8_neon;
-const SadMxNFunc sad8x16_neon = vpx_sad8x16_neon;
-const SadMxNFunc sad8x8_neon = vpx_sad8x8_neon;
-const SadMxNFunc sad4x4_neon = vpx_sad4x4_neon;
-
const SadMxNParam neon_tests[] = {
- make_tuple(64, 64, sad64x64_neon, -1),
- make_tuple(32, 32, sad32x32_neon, -1),
- make_tuple(16, 16, sad16x16_neon, -1),
- make_tuple(16, 8, sad16x8_neon, -1),
- make_tuple(8, 16, sad8x16_neon, -1),
- make_tuple(8, 8, sad8x8_neon, -1),
- make_tuple(4, 4, sad4x4_neon, -1),
+ make_tuple(64, 64, &vpx_sad64x64_neon, -1),
+ make_tuple(32, 32, &vpx_sad32x32_neon, -1),
+ make_tuple(16, 16, &vpx_sad16x16_neon, -1),
+ make_tuple(16, 8, &vpx_sad16x8_neon, -1),
+ make_tuple(8, 16, &vpx_sad8x16_neon, -1),
+ make_tuple(8, 8, &vpx_sad8x8_neon, -1),
+ make_tuple(4, 4, &vpx_sad4x4_neon, -1),
};
INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
-const SadMxNx4Func sad64x64x4d_neon = vpx_sad64x64x4d_neon;
-const SadMxNx4Func sad32x32x4d_neon = vpx_sad32x32x4d_neon;
-const SadMxNx4Func sad16x16x4d_neon = vpx_sad16x16x4d_neon;
const SadMxNx4Param x4d_neon_tests[] = {
- make_tuple(64, 64, sad64x64x4d_neon, -1),
- make_tuple(32, 32, sad32x32x4d_neon, -1),
- make_tuple(16, 16, sad16x16x4d_neon, -1),
+ make_tuple(64, 64, &vpx_sad64x64x4d_neon, -1),
+ make_tuple(32, 32, &vpx_sad32x32x4d_neon, -1),
+ make_tuple(16, 16, &vpx_sad16x16x4d_neon, -1),
};
INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
#endif // HAVE_NEON
//------------------------------------------------------------------------------
// x86 functions
-#if HAVE_MMX
-const SadMxNFunc sad16x16_mmx = vpx_sad16x16_mmx;
-const SadMxNFunc sad16x8_mmx = vpx_sad16x8_mmx;
-const SadMxNFunc sad8x16_mmx = vpx_sad8x16_mmx;
-const SadMxNFunc sad8x8_mmx = vpx_sad8x8_mmx;
-const SadMxNFunc sad4x4_mmx = vpx_sad4x4_mmx;
-const SadMxNParam mmx_tests[] = {
- make_tuple(16, 16, sad16x16_mmx, -1),
- make_tuple(16, 8, sad16x8_mmx, -1),
- make_tuple(8, 16, sad8x16_mmx, -1),
- make_tuple(8, 8, sad8x8_mmx, -1),
- make_tuple(4, 4, sad4x4_mmx, -1),
-};
-INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
-#endif // HAVE_MMX
-
-#if HAVE_SSE
-#if CONFIG_USE_X86INC
-const SadMxNFunc sad4x8_sse = vpx_sad4x8_sse;
-const SadMxNFunc sad4x4_sse = vpx_sad4x4_sse;
-const SadMxNParam sse_tests[] = {
- make_tuple(4, 8, sad4x8_sse, -1),
- make_tuple(4, 4, sad4x4_sse, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::ValuesIn(sse_tests));
-
-const SadMxNAvgFunc sad4x8_avg_sse = vpx_sad4x8_avg_sse;
-const SadMxNAvgFunc sad4x4_avg_sse = vpx_sad4x4_avg_sse;
-const SadMxNAvgParam avg_sse_tests[] = {
- make_tuple(4, 8, sad4x8_avg_sse, -1),
- make_tuple(4, 4, sad4x4_avg_sse, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE, SADavgTest, ::testing::ValuesIn(avg_sse_tests));
-
-const SadMxNx4Func sad4x8x4d_sse = vpx_sad4x8x4d_sse;
-const SadMxNx4Func sad4x4x4d_sse = vpx_sad4x4x4d_sse;
-const SadMxNx4Param x4d_sse_tests[] = {
- make_tuple(4, 8, sad4x8x4d_sse, -1),
- make_tuple(4, 4, sad4x4x4d_sse, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
-#endif // CONFIG_USE_X86INC
-#endif // HAVE_SSE
-
#if HAVE_SSE2
#if CONFIG_USE_X86INC
-const SadMxNFunc sad64x64_sse2 = vpx_sad64x64_sse2;
-const SadMxNFunc sad64x32_sse2 = vpx_sad64x32_sse2;
-const SadMxNFunc sad32x64_sse2 = vpx_sad32x64_sse2;
-const SadMxNFunc sad32x32_sse2 = vpx_sad32x32_sse2;
-const SadMxNFunc sad32x16_sse2 = vpx_sad32x16_sse2;
-const SadMxNFunc sad16x32_sse2 = vpx_sad16x32_sse2;
-const SadMxNFunc sad16x16_sse2 = vpx_sad16x16_sse2;
-const SadMxNFunc sad16x8_sse2 = vpx_sad16x8_sse2;
-const SadMxNFunc sad8x16_sse2 = vpx_sad8x16_sse2;
-const SadMxNFunc sad8x8_sse2 = vpx_sad8x8_sse2;
-const SadMxNFunc sad8x4_sse2 = vpx_sad8x4_sse2;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNFunc highbd_sad64x64_sse2 = vpx_highbd_sad64x64_sse2;
-const SadMxNFunc highbd_sad64x32_sse2 = vpx_highbd_sad64x32_sse2;
-const SadMxNFunc highbd_sad32x64_sse2 = vpx_highbd_sad32x64_sse2;
-const SadMxNFunc highbd_sad32x32_sse2 = vpx_highbd_sad32x32_sse2;
-const SadMxNFunc highbd_sad32x16_sse2 = vpx_highbd_sad32x16_sse2;
-const SadMxNFunc highbd_sad16x32_sse2 = vpx_highbd_sad16x32_sse2;
-const SadMxNFunc highbd_sad16x16_sse2 = vpx_highbd_sad16x16_sse2;
-const SadMxNFunc highbd_sad16x8_sse2 = vpx_highbd_sad16x8_sse2;
-const SadMxNFunc highbd_sad8x16_sse2 = vpx_highbd_sad8x16_sse2;
-const SadMxNFunc highbd_sad8x8_sse2 = vpx_highbd_sad8x8_sse2;
-const SadMxNFunc highbd_sad8x4_sse2 = vpx_highbd_sad8x4_sse2;
-#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNParam sse2_tests[] = {
- make_tuple(64, 64, sad64x64_sse2, -1),
- make_tuple(64, 32, sad64x32_sse2, -1),
- make_tuple(32, 64, sad32x64_sse2, -1),
- make_tuple(32, 32, sad32x32_sse2, -1),
- make_tuple(32, 16, sad32x16_sse2, -1),
- make_tuple(16, 32, sad16x32_sse2, -1),
- make_tuple(16, 16, sad16x16_sse2, -1),
- make_tuple(16, 8, sad16x8_sse2, -1),
- make_tuple(8, 16, sad8x16_sse2, -1),
- make_tuple(8, 8, sad8x8_sse2, -1),
- make_tuple(8, 4, sad8x4_sse2, -1),
+ make_tuple(64, 64, &vpx_sad64x64_sse2, -1),
+ make_tuple(64, 32, &vpx_sad64x32_sse2, -1),
+ make_tuple(32, 64, &vpx_sad32x64_sse2, -1),
+ make_tuple(32, 32, &vpx_sad32x32_sse2, -1),
+ make_tuple(32, 16, &vpx_sad32x16_sse2, -1),
+ make_tuple(16, 32, &vpx_sad16x32_sse2, -1),
+ make_tuple(16, 16, &vpx_sad16x16_sse2, -1),
+ make_tuple(16, 8, &vpx_sad16x8_sse2, -1),
+ make_tuple(8, 16, &vpx_sad8x16_sse2, -1),
+ make_tuple(8, 8, &vpx_sad8x8_sse2, -1),
+ make_tuple(8, 4, &vpx_sad8x4_sse2, -1),
+ make_tuple(4, 8, &vpx_sad4x8_sse2, -1),
+ make_tuple(4, 4, &vpx_sad4x4_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
- make_tuple(64, 64, highbd_sad64x64_sse2, 8),
- make_tuple(64, 32, highbd_sad64x32_sse2, 8),
- make_tuple(32, 64, highbd_sad32x64_sse2, 8),
- make_tuple(32, 32, highbd_sad32x32_sse2, 8),
- make_tuple(32, 16, highbd_sad32x16_sse2, 8),
- make_tuple(16, 32, highbd_sad16x32_sse2, 8),
- make_tuple(16, 16, highbd_sad16x16_sse2, 8),
- make_tuple(16, 8, highbd_sad16x8_sse2, 8),
- make_tuple(8, 16, highbd_sad8x16_sse2, 8),
- make_tuple(8, 8, highbd_sad8x8_sse2, 8),
- make_tuple(8, 4, highbd_sad8x4_sse2, 8),
- make_tuple(64, 64, highbd_sad64x64_sse2, 10),
- make_tuple(64, 32, highbd_sad64x32_sse2, 10),
- make_tuple(32, 64, highbd_sad32x64_sse2, 10),
- make_tuple(32, 32, highbd_sad32x32_sse2, 10),
- make_tuple(32, 16, highbd_sad32x16_sse2, 10),
- make_tuple(16, 32, highbd_sad16x32_sse2, 10),
- make_tuple(16, 16, highbd_sad16x16_sse2, 10),
- make_tuple(16, 8, highbd_sad16x8_sse2, 10),
- make_tuple(8, 16, highbd_sad8x16_sse2, 10),
- make_tuple(8, 8, highbd_sad8x8_sse2, 10),
- make_tuple(8, 4, highbd_sad8x4_sse2, 10),
- make_tuple(64, 64, highbd_sad64x64_sse2, 12),
- make_tuple(64, 32, highbd_sad64x32_sse2, 12),
- make_tuple(32, 64, highbd_sad32x64_sse2, 12),
- make_tuple(32, 32, highbd_sad32x32_sse2, 12),
- make_tuple(32, 16, highbd_sad32x16_sse2, 12),
- make_tuple(16, 32, highbd_sad16x32_sse2, 12),
- make_tuple(16, 16, highbd_sad16x16_sse2, 12),
- make_tuple(16, 8, highbd_sad16x8_sse2, 12),
- make_tuple(8, 16, highbd_sad8x16_sse2, 12),
- make_tuple(8, 8, highbd_sad8x8_sse2, 12),
- make_tuple(8, 4, highbd_sad8x4_sse2, 12),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 8),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 8),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 8),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 8),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 8),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 8),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 8),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 8),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 8),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 8),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 8),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 10),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 10),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 10),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 10),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 10),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 10),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 10),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 10),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 10),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 10),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 10),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_sse2, 12),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_sse2, 12),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_sse2, 12),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_sse2, 12),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_sse2, 12),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_sse2, 12),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_sse2, 12),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_sse2, 12),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_sse2, 12),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_sse2, 12),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_sse2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
-const SadMxNAvgFunc sad64x64_avg_sse2 = vpx_sad64x64_avg_sse2;
-const SadMxNAvgFunc sad64x32_avg_sse2 = vpx_sad64x32_avg_sse2;
-const SadMxNAvgFunc sad32x64_avg_sse2 = vpx_sad32x64_avg_sse2;
-const SadMxNAvgFunc sad32x32_avg_sse2 = vpx_sad32x32_avg_sse2;
-const SadMxNAvgFunc sad32x16_avg_sse2 = vpx_sad32x16_avg_sse2;
-const SadMxNAvgFunc sad16x32_avg_sse2 = vpx_sad16x32_avg_sse2;
-const SadMxNAvgFunc sad16x16_avg_sse2 = vpx_sad16x16_avg_sse2;
-const SadMxNAvgFunc sad16x8_avg_sse2 = vpx_sad16x8_avg_sse2;
-const SadMxNAvgFunc sad8x16_avg_sse2 = vpx_sad8x16_avg_sse2;
-const SadMxNAvgFunc sad8x8_avg_sse2 = vpx_sad8x8_avg_sse2;
-const SadMxNAvgFunc sad8x4_avg_sse2 = vpx_sad8x4_avg_sse2;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNAvgFunc highbd_sad64x64_avg_sse2 = vpx_highbd_sad64x64_avg_sse2;
-const SadMxNAvgFunc highbd_sad64x32_avg_sse2 = vpx_highbd_sad64x32_avg_sse2;
-const SadMxNAvgFunc highbd_sad32x64_avg_sse2 = vpx_highbd_sad32x64_avg_sse2;
-const SadMxNAvgFunc highbd_sad32x32_avg_sse2 = vpx_highbd_sad32x32_avg_sse2;
-const SadMxNAvgFunc highbd_sad32x16_avg_sse2 = vpx_highbd_sad32x16_avg_sse2;
-const SadMxNAvgFunc highbd_sad16x32_avg_sse2 = vpx_highbd_sad16x32_avg_sse2;
-const SadMxNAvgFunc highbd_sad16x16_avg_sse2 = vpx_highbd_sad16x16_avg_sse2;
-const SadMxNAvgFunc highbd_sad16x8_avg_sse2 = vpx_highbd_sad16x8_avg_sse2;
-const SadMxNAvgFunc highbd_sad8x16_avg_sse2 = vpx_highbd_sad8x16_avg_sse2;
-const SadMxNAvgFunc highbd_sad8x8_avg_sse2 = vpx_highbd_sad8x8_avg_sse2;
-const SadMxNAvgFunc highbd_sad8x4_avg_sse2 = vpx_highbd_sad8x4_avg_sse2;
-#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNAvgParam avg_sse2_tests[] = {
- make_tuple(64, 64, sad64x64_avg_sse2, -1),
- make_tuple(64, 32, sad64x32_avg_sse2, -1),
- make_tuple(32, 64, sad32x64_avg_sse2, -1),
- make_tuple(32, 32, sad32x32_avg_sse2, -1),
- make_tuple(32, 16, sad32x16_avg_sse2, -1),
- make_tuple(16, 32, sad16x32_avg_sse2, -1),
- make_tuple(16, 16, sad16x16_avg_sse2, -1),
- make_tuple(16, 8, sad16x8_avg_sse2, -1),
- make_tuple(8, 16, sad8x16_avg_sse2, -1),
- make_tuple(8, 8, sad8x8_avg_sse2, -1),
- make_tuple(8, 4, sad8x4_avg_sse2, -1),
+ make_tuple(64, 64, &vpx_sad64x64_avg_sse2, -1),
+ make_tuple(64, 32, &vpx_sad64x32_avg_sse2, -1),
+ make_tuple(32, 64, &vpx_sad32x64_avg_sse2, -1),
+ make_tuple(32, 32, &vpx_sad32x32_avg_sse2, -1),
+ make_tuple(32, 16, &vpx_sad32x16_avg_sse2, -1),
+ make_tuple(16, 32, &vpx_sad16x32_avg_sse2, -1),
+ make_tuple(16, 16, &vpx_sad16x16_avg_sse2, -1),
+ make_tuple(16, 8, &vpx_sad16x8_avg_sse2, -1),
+ make_tuple(8, 16, &vpx_sad8x16_avg_sse2, -1),
+ make_tuple(8, 8, &vpx_sad8x8_avg_sse2, -1),
+ make_tuple(8, 4, &vpx_sad8x4_avg_sse2, -1),
+ make_tuple(4, 8, &vpx_sad4x8_avg_sse2, -1),
+ make_tuple(4, 4, &vpx_sad4x4_avg_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
- make_tuple(64, 64, highbd_sad64x64_avg_sse2, 8),
- make_tuple(64, 32, highbd_sad64x32_avg_sse2, 8),
- make_tuple(32, 64, highbd_sad32x64_avg_sse2, 8),
- make_tuple(32, 32, highbd_sad32x32_avg_sse2, 8),
- make_tuple(32, 16, highbd_sad32x16_avg_sse2, 8),
- make_tuple(16, 32, highbd_sad16x32_avg_sse2, 8),
- make_tuple(16, 16, highbd_sad16x16_avg_sse2, 8),
- make_tuple(16, 8, highbd_sad16x8_avg_sse2, 8),
- make_tuple(8, 16, highbd_sad8x16_avg_sse2, 8),
- make_tuple(8, 8, highbd_sad8x8_avg_sse2, 8),
- make_tuple(8, 4, highbd_sad8x4_avg_sse2, 8),
- make_tuple(64, 64, highbd_sad64x64_avg_sse2, 10),
- make_tuple(64, 32, highbd_sad64x32_avg_sse2, 10),
- make_tuple(32, 64, highbd_sad32x64_avg_sse2, 10),
- make_tuple(32, 32, highbd_sad32x32_avg_sse2, 10),
- make_tuple(32, 16, highbd_sad32x16_avg_sse2, 10),
- make_tuple(16, 32, highbd_sad16x32_avg_sse2, 10),
- make_tuple(16, 16, highbd_sad16x16_avg_sse2, 10),
- make_tuple(16, 8, highbd_sad16x8_avg_sse2, 10),
- make_tuple(8, 16, highbd_sad8x16_avg_sse2, 10),
- make_tuple(8, 8, highbd_sad8x8_avg_sse2, 10),
- make_tuple(8, 4, highbd_sad8x4_avg_sse2, 10),
- make_tuple(64, 64, highbd_sad64x64_avg_sse2, 12),
- make_tuple(64, 32, highbd_sad64x32_avg_sse2, 12),
- make_tuple(32, 64, highbd_sad32x64_avg_sse2, 12),
- make_tuple(32, 32, highbd_sad32x32_avg_sse2, 12),
- make_tuple(32, 16, highbd_sad32x16_avg_sse2, 12),
- make_tuple(16, 32, highbd_sad16x32_avg_sse2, 12),
- make_tuple(16, 16, highbd_sad16x16_avg_sse2, 12),
- make_tuple(16, 8, highbd_sad16x8_avg_sse2, 12),
- make_tuple(8, 16, highbd_sad8x16_avg_sse2, 12),
- make_tuple(8, 8, highbd_sad8x8_avg_sse2, 12),
- make_tuple(8, 4, highbd_sad8x4_avg_sse2, 12),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 8),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 8),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 8),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 8),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 8),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 8),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 8),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 8),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 8),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 8),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 8),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 10),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 10),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 10),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 10),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 10),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 10),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 10),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 10),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 10),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 10),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 10),
+ make_tuple(64, 64, &vpx_highbd_sad64x64_avg_sse2, 12),
+ make_tuple(64, 32, &vpx_highbd_sad64x32_avg_sse2, 12),
+ make_tuple(32, 64, &vpx_highbd_sad32x64_avg_sse2, 12),
+ make_tuple(32, 32, &vpx_highbd_sad32x32_avg_sse2, 12),
+ make_tuple(32, 16, &vpx_highbd_sad32x16_avg_sse2, 12),
+ make_tuple(16, 32, &vpx_highbd_sad16x32_avg_sse2, 12),
+ make_tuple(16, 16, &vpx_highbd_sad16x16_avg_sse2, 12),
+ make_tuple(16, 8, &vpx_highbd_sad16x8_avg_sse2, 12),
+ make_tuple(8, 16, &vpx_highbd_sad8x16_avg_sse2, 12),
+ make_tuple(8, 8, &vpx_highbd_sad8x8_avg_sse2, 12),
+ make_tuple(8, 4, &vpx_highbd_sad8x4_avg_sse2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests));
-const SadMxNx4Func sad64x64x4d_sse2 = vpx_sad64x64x4d_sse2;
-const SadMxNx4Func sad64x32x4d_sse2 = vpx_sad64x32x4d_sse2;
-const SadMxNx4Func sad32x64x4d_sse2 = vpx_sad32x64x4d_sse2;
-const SadMxNx4Func sad32x32x4d_sse2 = vpx_sad32x32x4d_sse2;
-const SadMxNx4Func sad32x16x4d_sse2 = vpx_sad32x16x4d_sse2;
-const SadMxNx4Func sad16x32x4d_sse2 = vpx_sad16x32x4d_sse2;
-const SadMxNx4Func sad16x16x4d_sse2 = vpx_sad16x16x4d_sse2;
-const SadMxNx4Func sad16x8x4d_sse2 = vpx_sad16x8x4d_sse2;
-const SadMxNx4Func sad8x16x4d_sse2 = vpx_sad8x16x4d_sse2;
-const SadMxNx4Func sad8x8x4d_sse2 = vpx_sad8x8x4d_sse2;
-const SadMxNx4Func sad8x4x4d_sse2 = vpx_sad8x4x4d_sse2;
-#if CONFIG_VP9_HIGHBITDEPTH
-const SadMxNx4Func highbd_sad64x64x4d_sse2 = vpx_highbd_sad64x64x4d_sse2;
-const SadMxNx4Func highbd_sad64x32x4d_sse2 = vpx_highbd_sad64x32x4d_sse2;
-const SadMxNx4Func highbd_sad32x64x4d_sse2 = vpx_highbd_sad32x64x4d_sse2;
-const SadMxNx4Func highbd_sad32x32x4d_sse2 = vpx_highbd_sad32x32x4d_sse2;
-const SadMxNx4Func highbd_sad32x16x4d_sse2 = vpx_highbd_sad32x16x4d_sse2;
-const SadMxNx4Func highbd_sad16x32x4d_sse2 = vpx_highbd_sad16x32x4d_sse2;
-const SadMxNx4Func highbd_sad16x16x4d_sse2 = vpx_highbd_sad16x16x4d_sse2;
-const SadMxNx4Func highbd_sad16x8x4d_sse2 = vpx_highbd_sad16x8x4d_sse2;
-const SadMxNx4Func highbd_sad8x16x4d_sse2 = vpx_highbd_sad8x16x4d_sse2;
-const SadMxNx4Func highbd_sad8x8x4d_sse2 = vpx_highbd_sad8x8x4d_sse2;
-const SadMxNx4Func highbd_sad8x4x4d_sse2 = vpx_highbd_sad8x4x4d_sse2;
-const SadMxNx4Func highbd_sad4x8x4d_sse2 = vpx_highbd_sad4x8x4d_sse2;
-const SadMxNx4Func highbd_sad4x4x4d_sse2 = vpx_highbd_sad4x4x4d_sse2;
-#endif // CONFIG_VP9_HIGHBITDEPTH
const SadMxNx4Param x4d_sse2_tests[] = {
- make_tuple(64, 64, sad64x64x4d_sse2, -1),
- make_tuple(64, 32, sad64x32x4d_sse2, -1),
- make_tuple(32, 64, sad32x64x4d_sse2, -1),
- make_tuple(32, 32, sad32x32x4d_sse2, -1),
- make_tuple(32, 16, sad32x16x4d_sse2, -1),
- make_tuple(16, 32, sad16x32x4d_sse2, -1),
- make_tuple(16, 16, sad16x16x4d_sse2, -1),
- make_tuple(16, 8, sad16x8x4d_sse2, -1),
- make_tuple(8, 16, sad8x16x4d_sse2, -1),
- make_tuple(8, 8, sad8x8x4d_sse2, -1),
- make_tuple(8, 4, sad8x4x4d_sse2, -1),
+ make_tuple(64, 64, &vpx_sad64x64x4d_sse2, -1),
+ make_tuple(64, 32, &vpx_sad64x32x4d_sse2, -1),
+ make_tuple(32, 64, &vpx_sad32x64x4d_sse2, -1),
+ make_tuple(32, 32, &vpx_sad32x32x4d_sse2, -1),
+ make_tuple(32, 16, &vpx_sad32x16x4d_sse2, -1),
+ make_tuple(16, 32, &vpx_sad16x32x4d_sse2, -1),
+ make_tuple(16, 16, &vpx_sad16x16x4d_sse2, -1),
+ make_tuple(16, 8, &vpx_sad16x8x4d_sse2, -1),
+ make_tuple(8, 16, &vpx_sad8x16x4d_sse2, -1),
+ make_tuple(8, 8, &vpx_sad8x8x4d_sse2, -1),
+ make_tuple(8, 4, &vpx_sad8x4x4d_sse2, -1),
+ make_tuple(4, 8, &vpx_sad4x8x4d_sse2, -1),
+ make_tuple(4, 4, &vpx_sad4x4x4d_sse2, -1),
#if CONFIG_VP9_HIGHBITDEPTH
- make_tuple(64, 64, highbd_sad64x64x4d_sse2, 8),
- make_tuple(64, 32, highbd_sad64x32x4d_sse2, 8),
- make_tuple(32, 64, highbd_sad32x64x4d_sse2, 8),
- make_tuple(32, 32, highbd_sad32x32x4d_sse2, 8),
- make_tuple(32, 16, highbd_sad32x16x4d_sse2, 8),
- make_tuple(16, 32, highbd_sad16x32x4d_sse2, 8),
- make_tuple(16, 16, highbd_sad16x16x4d_sse2, 8),
- make_tuple(16, 8, highbd_sad16x8x4d_sse2, 8),
- make_tuple(8, 16, highbd_sad8x16x4d_sse2, 8),
- make_tuple(8, 8, highbd_sad8x8x4d_sse2, 8),
- make_tuple(8, 4, highbd_sad8x4x4d_sse2, 8),
- make_tuple(4, 8, highbd_sad4x8x4d_sse2, 8),
- make_tuple(4, 4, highbd_sad4x4x4d_sse2, 8),
- make_tuple(64, 64, highbd_sad64x64x4d_sse2, 10),
- make_tuple(64, 32, highbd_sad64x32x4d_sse2, 10),
- make_tuple(32, 64, highbd_sad32x64x4d_sse2, 10),
- make_tuple(32, 32, highbd_sad32x32x4d_sse2, 10),
- make_tuple(32, 16, highbd_sad32x16x4d_sse2, 10),
- make_tuple(16, 32, highbd_sad16x32x4d_sse2, 10),
- make_tuple(16, 16, highbd_sad16x16x4d_sse2, 10),
- make_tuple(16, 8, highbd_sad16x8x4d_sse2, 10),
- make_tuple(8, 16, highbd_sad8x16x4d_sse2, 10),
- make_tuple(8, 8, highbd_sad8x8x4d_sse2, 10),
- make_tuple(8, 4, highbd_sad8x4x4d_sse2, 10),
- make_tuple(4, 8, highbd_sad4x8x4d_sse2, 10),
- make_tuple(4, 4, highbd_sad4x4x4d_sse2, 10),
- make_tuple(64, 64, highbd_sad64x64x4d_sse2, 12),
- make_tuple(64, 32, highbd_sad64x32x4d_sse2, 12),
- make_tuple(32, 64, highbd_sad32x64x4d_sse2, 12),
- make_tuple(32, 32, highbd_sad32x32x4d_sse2, 12),
- make_tuple(32, 16, highbd_sad32x16x4d_sse2, 12),
- make_tuple(16, 32, highbd_sad16x32x4d_sse2, 12),
- make_tuple(16, 16, highbd_sad16x16x4d_sse2, 12),
- make_tuple(16, 8, highbd_sad16x8x4d_sse2, 12),
- make_tuple(8, 16, highbd_sad8x16x4d_sse2, 12),
- make_tuple(8, 8, highbd_sad8x8x4d_sse2, 12),
- make_tuple(8, 4, highbd_sad8x4x4d_sse2, 12),
- make_tuple(4, 8, highbd_sad4x8x4d_sse2, 12),
- make_tuple(4, 4, highbd_sad4x4x4d_sse2, 12),
+ make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 8),
+ make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 8),
+ make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 8),
+ make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 8),
+ make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 8),
+ make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 8),
+ make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 8),
+ make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 8),
+ make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 8),
+ make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 8),
+ make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 8),
+ make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 8),
+ make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 8),
+ make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 10),
+ make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 10),
+ make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 10),
+ make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 10),
+ make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 10),
+ make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 10),
+ make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 10),
+ make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 10),
+ make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 10),
+ make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 10),
+ make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 10),
+ make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 10),
+ make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 10),
+ make_tuple(64, 64, &vpx_highbd_sad64x64x4d_sse2, 12),
+ make_tuple(64, 32, &vpx_highbd_sad64x32x4d_sse2, 12),
+ make_tuple(32, 64, &vpx_highbd_sad32x64x4d_sse2, 12),
+ make_tuple(32, 32, &vpx_highbd_sad32x32x4d_sse2, 12),
+ make_tuple(32, 16, &vpx_highbd_sad32x16x4d_sse2, 12),
+ make_tuple(16, 32, &vpx_highbd_sad16x32x4d_sse2, 12),
+ make_tuple(16, 16, &vpx_highbd_sad16x16x4d_sse2, 12),
+ make_tuple(16, 8, &vpx_highbd_sad16x8x4d_sse2, 12),
+ make_tuple(8, 16, &vpx_highbd_sad8x16x4d_sse2, 12),
+ make_tuple(8, 8, &vpx_highbd_sad8x8x4d_sse2, 12),
+ make_tuple(8, 4, &vpx_highbd_sad8x4x4d_sse2, 12),
+ make_tuple(4, 8, &vpx_highbd_sad4x8x4d_sse2, 12),
+ make_tuple(4, 4, &vpx_highbd_sad4x4x4d_sse2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
@@ -1076,39 +868,27 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
#endif // HAVE_SSE4_1
#if HAVE_AVX2
-const SadMxNFunc sad64x64_avx2 = vpx_sad64x64_avx2;
-const SadMxNFunc sad64x32_avx2 = vpx_sad64x32_avx2;
-const SadMxNFunc sad32x64_avx2 = vpx_sad32x64_avx2;
-const SadMxNFunc sad32x32_avx2 = vpx_sad32x32_avx2;
-const SadMxNFunc sad32x16_avx2 = vpx_sad32x16_avx2;
const SadMxNParam avx2_tests[] = {
- make_tuple(64, 64, sad64x64_avx2, -1),
- make_tuple(64, 32, sad64x32_avx2, -1),
- make_tuple(32, 64, sad32x64_avx2, -1),
- make_tuple(32, 32, sad32x32_avx2, -1),
- make_tuple(32, 16, sad32x16_avx2, -1),
+ make_tuple(64, 64, &vpx_sad64x64_avx2, -1),
+ make_tuple(64, 32, &vpx_sad64x32_avx2, -1),
+ make_tuple(32, 64, &vpx_sad32x64_avx2, -1),
+ make_tuple(32, 32, &vpx_sad32x32_avx2, -1),
+ make_tuple(32, 16, &vpx_sad32x16_avx2, -1),
};
INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
-const SadMxNAvgFunc sad64x64_avg_avx2 = vpx_sad64x64_avg_avx2;
-const SadMxNAvgFunc sad64x32_avg_avx2 = vpx_sad64x32_avg_avx2;
-const SadMxNAvgFunc sad32x64_avg_avx2 = vpx_sad32x64_avg_avx2;
-const SadMxNAvgFunc sad32x32_avg_avx2 = vpx_sad32x32_avg_avx2;
-const SadMxNAvgFunc sad32x16_avg_avx2 = vpx_sad32x16_avg_avx2;
const SadMxNAvgParam avg_avx2_tests[] = {
- make_tuple(64, 64, sad64x64_avg_avx2, -1),
- make_tuple(64, 32, sad64x32_avg_avx2, -1),
- make_tuple(32, 64, sad32x64_avg_avx2, -1),
- make_tuple(32, 32, sad32x32_avg_avx2, -1),
- make_tuple(32, 16, sad32x16_avg_avx2, -1),
+ make_tuple(64, 64, &vpx_sad64x64_avg_avx2, -1),
+ make_tuple(64, 32, &vpx_sad64x32_avg_avx2, -1),
+ make_tuple(32, 64, &vpx_sad32x64_avg_avx2, -1),
+ make_tuple(32, 32, &vpx_sad32x32_avg_avx2, -1),
+ make_tuple(32, 16, &vpx_sad32x16_avg_avx2, -1),
};
INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests));
-const SadMxNx4Func sad64x64x4d_avx2 = vpx_sad64x64x4d_avx2;
-const SadMxNx4Func sad32x32x4d_avx2 = vpx_sad32x32x4d_avx2;
const SadMxNx4Param x4d_avx2_tests[] = {
- make_tuple(64, 64, sad64x64x4d_avx2, -1),
- make_tuple(32, 32, sad32x32x4d_avx2, -1),
+ make_tuple(64, 64, &vpx_sad64x64x4d_avx2, -1),
+ make_tuple(32, 32, &vpx_sad32x32x4d_avx2, -1),
};
INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
#endif // HAVE_AVX2
@@ -1116,93 +896,54 @@ INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
//------------------------------------------------------------------------------
// MIPS functions
#if HAVE_MSA
-const SadMxNFunc sad64x64_msa = vpx_sad64x64_msa;
-const SadMxNFunc sad64x32_msa = vpx_sad64x32_msa;
-const SadMxNFunc sad32x64_msa = vpx_sad32x64_msa;
-const SadMxNFunc sad32x32_msa = vpx_sad32x32_msa;
-const SadMxNFunc sad32x16_msa = vpx_sad32x16_msa;
-const SadMxNFunc sad16x32_msa = vpx_sad16x32_msa;
-const SadMxNFunc sad16x16_msa = vpx_sad16x16_msa;
-const SadMxNFunc sad16x8_msa = vpx_sad16x8_msa;
-const SadMxNFunc sad8x16_msa = vpx_sad8x16_msa;
-const SadMxNFunc sad8x8_msa = vpx_sad8x8_msa;
-const SadMxNFunc sad8x4_msa = vpx_sad8x4_msa;
-const SadMxNFunc sad4x8_msa = vpx_sad4x8_msa;
-const SadMxNFunc sad4x4_msa = vpx_sad4x4_msa;
const SadMxNParam msa_tests[] = {
- make_tuple(64, 64, sad64x64_msa, -1),
- make_tuple(64, 32, sad64x32_msa, -1),
- make_tuple(32, 64, sad32x64_msa, -1),
- make_tuple(32, 32, sad32x32_msa, -1),
- make_tuple(32, 16, sad32x16_msa, -1),
- make_tuple(16, 32, sad16x32_msa, -1),
- make_tuple(16, 16, sad16x16_msa, -1),
- make_tuple(16, 8, sad16x8_msa, -1),
- make_tuple(8, 16, sad8x16_msa, -1),
- make_tuple(8, 8, sad8x8_msa, -1),
- make_tuple(8, 4, sad8x4_msa, -1),
- make_tuple(4, 8, sad4x8_msa, -1),
- make_tuple(4, 4, sad4x4_msa, -1),
+ make_tuple(64, 64, &vpx_sad64x64_msa, -1),
+ make_tuple(64, 32, &vpx_sad64x32_msa, -1),
+ make_tuple(32, 64, &vpx_sad32x64_msa, -1),
+ make_tuple(32, 32, &vpx_sad32x32_msa, -1),
+ make_tuple(32, 16, &vpx_sad32x16_msa, -1),
+ make_tuple(16, 32, &vpx_sad16x32_msa, -1),
+ make_tuple(16, 16, &vpx_sad16x16_msa, -1),
+ make_tuple(16, 8, &vpx_sad16x8_msa, -1),
+ make_tuple(8, 16, &vpx_sad8x16_msa, -1),
+ make_tuple(8, 8, &vpx_sad8x8_msa, -1),
+ make_tuple(8, 4, &vpx_sad8x4_msa, -1),
+ make_tuple(4, 8, &vpx_sad4x8_msa, -1),
+ make_tuple(4, 4, &vpx_sad4x4_msa, -1),
};
INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests));
-const SadMxNAvgFunc sad64x64_avg_msa = vpx_sad64x64_avg_msa;
-const SadMxNAvgFunc sad64x32_avg_msa = vpx_sad64x32_avg_msa;
-const SadMxNAvgFunc sad32x64_avg_msa = vpx_sad32x64_avg_msa;
-const SadMxNAvgFunc sad32x32_avg_msa = vpx_sad32x32_avg_msa;
-const SadMxNAvgFunc sad32x16_avg_msa = vpx_sad32x16_avg_msa;
-const SadMxNAvgFunc sad16x32_avg_msa = vpx_sad16x32_avg_msa;
-const SadMxNAvgFunc sad16x16_avg_msa = vpx_sad16x16_avg_msa;
-const SadMxNAvgFunc sad16x8_avg_msa = vpx_sad16x8_avg_msa;
-const SadMxNAvgFunc sad8x16_avg_msa = vpx_sad8x16_avg_msa;
-const SadMxNAvgFunc sad8x8_avg_msa = vpx_sad8x8_avg_msa;
-const SadMxNAvgFunc sad8x4_avg_msa = vpx_sad8x4_avg_msa;
-const SadMxNAvgFunc sad4x8_avg_msa = vpx_sad4x8_avg_msa;
-const SadMxNAvgFunc sad4x4_avg_msa = vpx_sad4x4_avg_msa;
const SadMxNAvgParam avg_msa_tests[] = {
- make_tuple(64, 64, sad64x64_avg_msa, -1),
- make_tuple(64, 32, sad64x32_avg_msa, -1),
- make_tuple(32, 64, sad32x64_avg_msa, -1),
- make_tuple(32, 32, sad32x32_avg_msa, -1),
- make_tuple(32, 16, sad32x16_avg_msa, -1),
- make_tuple(16, 32, sad16x32_avg_msa, -1),
- make_tuple(16, 16, sad16x16_avg_msa, -1),
- make_tuple(16, 8, sad16x8_avg_msa, -1),
- make_tuple(8, 16, sad8x16_avg_msa, -1),
- make_tuple(8, 8, sad8x8_avg_msa, -1),
- make_tuple(8, 4, sad8x4_avg_msa, -1),
- make_tuple(4, 8, sad4x8_avg_msa, -1),
- make_tuple(4, 4, sad4x4_avg_msa, -1),
+ make_tuple(64, 64, &vpx_sad64x64_avg_msa, -1),
+ make_tuple(64, 32, &vpx_sad64x32_avg_msa, -1),
+ make_tuple(32, 64, &vpx_sad32x64_avg_msa, -1),
+ make_tuple(32, 32, &vpx_sad32x32_avg_msa, -1),
+ make_tuple(32, 16, &vpx_sad32x16_avg_msa, -1),
+ make_tuple(16, 32, &vpx_sad16x32_avg_msa, -1),
+ make_tuple(16, 16, &vpx_sad16x16_avg_msa, -1),
+ make_tuple(16, 8, &vpx_sad16x8_avg_msa, -1),
+ make_tuple(8, 16, &vpx_sad8x16_avg_msa, -1),
+ make_tuple(8, 8, &vpx_sad8x8_avg_msa, -1),
+ make_tuple(8, 4, &vpx_sad8x4_avg_msa, -1),
+ make_tuple(4, 8, &vpx_sad4x8_avg_msa, -1),
+ make_tuple(4, 4, &vpx_sad4x4_avg_msa, -1),
};
INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests));
-const SadMxNx4Func sad64x64x4d_msa = vpx_sad64x64x4d_msa;
-const SadMxNx4Func sad64x32x4d_msa = vpx_sad64x32x4d_msa;
-const SadMxNx4Func sad32x64x4d_msa = vpx_sad32x64x4d_msa;
-const SadMxNx4Func sad32x32x4d_msa = vpx_sad32x32x4d_msa;
-const SadMxNx4Func sad32x16x4d_msa = vpx_sad32x16x4d_msa;
-const SadMxNx4Func sad16x32x4d_msa = vpx_sad16x32x4d_msa;
-const SadMxNx4Func sad16x16x4d_msa = vpx_sad16x16x4d_msa;
-const SadMxNx4Func sad16x8x4d_msa = vpx_sad16x8x4d_msa;
-const SadMxNx4Func sad8x16x4d_msa = vpx_sad8x16x4d_msa;
-const SadMxNx4Func sad8x8x4d_msa = vpx_sad8x8x4d_msa;
-const SadMxNx4Func sad8x4x4d_msa = vpx_sad8x4x4d_msa;
-const SadMxNx4Func sad4x8x4d_msa = vpx_sad4x8x4d_msa;
-const SadMxNx4Func sad4x4x4d_msa = vpx_sad4x4x4d_msa;
const SadMxNx4Param x4d_msa_tests[] = {
- make_tuple(64, 64, sad64x64x4d_msa, -1),
- make_tuple(64, 32, sad64x32x4d_msa, -1),
- make_tuple(32, 64, sad32x64x4d_msa, -1),
- make_tuple(32, 32, sad32x32x4d_msa, -1),
- make_tuple(32, 16, sad32x16x4d_msa, -1),
- make_tuple(16, 32, sad16x32x4d_msa, -1),
- make_tuple(16, 16, sad16x16x4d_msa, -1),
- make_tuple(16, 8, sad16x8x4d_msa, -1),
- make_tuple(8, 16, sad8x16x4d_msa, -1),
- make_tuple(8, 8, sad8x8x4d_msa, -1),
- make_tuple(8, 4, sad8x4x4d_msa, -1),
- make_tuple(4, 8, sad4x8x4d_msa, -1),
- make_tuple(4, 4, sad4x4x4d_msa, -1),
+ make_tuple(64, 64, &vpx_sad64x64x4d_msa, -1),
+ make_tuple(64, 32, &vpx_sad64x32x4d_msa, -1),
+ make_tuple(32, 64, &vpx_sad32x64x4d_msa, -1),
+ make_tuple(32, 32, &vpx_sad32x32x4d_msa, -1),
+ make_tuple(32, 16, &vpx_sad32x16x4d_msa, -1),
+ make_tuple(16, 32, &vpx_sad16x32x4d_msa, -1),
+ make_tuple(16, 16, &vpx_sad16x16x4d_msa, -1),
+ make_tuple(16, 8, &vpx_sad16x8x4d_msa, -1),
+ make_tuple(8, 16, &vpx_sad8x16x4d_msa, -1),
+ make_tuple(8, 8, &vpx_sad8x8x4d_msa, -1),
+ make_tuple(8, 4, &vpx_sad8x4x4d_msa, -1),
+ make_tuple(4, 8, &vpx_sad4x8x4d_msa, -1),
+ make_tuple(4, 4, &vpx_sad4x4x4d_msa, -1),
};
INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
#endif // HAVE_MSA
diff --git a/libvpx/test/simple_encoder.sh b/libvpx/test/simple_encoder.sh
index c4a628030..ee633ae99 100755
--- a/libvpx/test/simple_encoder.sh
+++ b/libvpx/test/simple_encoder.sh
@@ -23,7 +23,7 @@ simple_encoder_verify_environment() {
fi
}
-# Runs simple_encoder using the codec specified by $1.
+# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
simple_encoder() {
local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
local codec="$1"
@@ -35,7 +35,7 @@ simple_encoder() {
fi
eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
- "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
+ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 100 \
${devnull}
[ -e "${output_file}" ] || return 1
@@ -47,16 +47,13 @@ simple_encoder_vp8() {
fi
}
-# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
-# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
-# machine.
-DISABLED_simple_encoder_vp9() {
+simple_encoder_vp9() {
if [ "$(vp9_encode_available)" = "yes" ]; then
simple_encoder vp9 || return 1
fi
}
simple_encoder_tests="simple_encoder_vp8
- DISABLED_simple_encoder_vp9"
+ simple_encoder_vp9"
run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/libvpx/test/sixtap_predict_test.cc b/libvpx/test/sixtap_predict_test.cc
index 1e682e7bd..304a1484a 100644
--- a/libvpx/test/sixtap_predict_test.cc
+++ b/libvpx/test/sixtap_predict_test.cc
@@ -186,70 +186,48 @@ TEST_P(SixtapPredictTest, TestWithRandomData) {
using std::tr1::make_tuple;
-const SixtapPredictFunc sixtap_16x16_c = vp8_sixtap_predict16x16_c;
-const SixtapPredictFunc sixtap_8x8_c = vp8_sixtap_predict8x8_c;
-const SixtapPredictFunc sixtap_8x4_c = vp8_sixtap_predict8x4_c;
-const SixtapPredictFunc sixtap_4x4_c = vp8_sixtap_predict4x4_c;
INSTANTIATE_TEST_CASE_P(
C, SixtapPredictTest, ::testing::Values(
- make_tuple(16, 16, sixtap_16x16_c),
- make_tuple(8, 8, sixtap_8x8_c),
- make_tuple(8, 4, sixtap_8x4_c),
- make_tuple(4, 4, sixtap_4x4_c)));
+ make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
+ make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
+ make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
+ make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
#if HAVE_NEON
-const SixtapPredictFunc sixtap_16x16_neon = vp8_sixtap_predict16x16_neon;
-const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
-const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
INSTANTIATE_TEST_CASE_P(
NEON, SixtapPredictTest, ::testing::Values(
- make_tuple(16, 16, sixtap_16x16_neon),
- make_tuple(8, 8, sixtap_8x8_neon),
- make_tuple(8, 4, sixtap_8x4_neon)));
+ make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
+ make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
+ make_tuple(8, 4, &vp8_sixtap_predict8x4_neon)));
#endif
#if HAVE_MMX
-const SixtapPredictFunc sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
-const SixtapPredictFunc sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
-const SixtapPredictFunc sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx;
-const SixtapPredictFunc sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx;
INSTANTIATE_TEST_CASE_P(
MMX, SixtapPredictTest, ::testing::Values(
- make_tuple(16, 16, sixtap_16x16_mmx),
- make_tuple(8, 8, sixtap_8x8_mmx),
- make_tuple(8, 4, sixtap_8x4_mmx),
- make_tuple(4, 4, sixtap_4x4_mmx)));
+ make_tuple(16, 16, &vp8_sixtap_predict16x16_mmx),
+ make_tuple(8, 8, &vp8_sixtap_predict8x8_mmx),
+ make_tuple(8, 4, &vp8_sixtap_predict8x4_mmx),
+ make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
#endif
#if HAVE_SSE2
-const SixtapPredictFunc sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2;
-const SixtapPredictFunc sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2;
-const SixtapPredictFunc sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, SixtapPredictTest, ::testing::Values(
- make_tuple(16, 16, sixtap_16x16_sse2),
- make_tuple(8, 8, sixtap_8x8_sse2),
- make_tuple(8, 4, sixtap_8x4_sse2)));
+ make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
+ make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
+ make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
#endif
#if HAVE_SSSE3
-const SixtapPredictFunc sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3;
-const SixtapPredictFunc sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3;
-const SixtapPredictFunc sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3;
-const SixtapPredictFunc sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3;
INSTANTIATE_TEST_CASE_P(
SSSE3, SixtapPredictTest, ::testing::Values(
- make_tuple(16, 16, sixtap_16x16_ssse3),
- make_tuple(8, 8, sixtap_8x8_ssse3),
- make_tuple(8, 4, sixtap_8x4_ssse3),
- make_tuple(4, 4, sixtap_4x4_ssse3)));
+ make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
+ make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
+ make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
+ make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
#endif
#if HAVE_MSA
-const SixtapPredictFunc sixtap_16x16_msa = vp8_sixtap_predict16x16_msa;
-const SixtapPredictFunc sixtap_8x8_msa = vp8_sixtap_predict8x8_msa;
-const SixtapPredictFunc sixtap_8x4_msa = vp8_sixtap_predict8x4_msa;
-const SixtapPredictFunc sixtap_4x4_msa = vp8_sixtap_predict4x4_msa;
INSTANTIATE_TEST_CASE_P(
MSA, SixtapPredictTest, ::testing::Values(
- make_tuple(16, 16, sixtap_16x16_msa),
- make_tuple(8, 8, sixtap_8x8_msa),
- make_tuple(8, 4, sixtap_8x4_msa),
- make_tuple(4, 4, sixtap_4x4_msa)));
+ make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
+ make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
+ make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
+ make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
#endif
} // namespace
diff --git a/libvpx/test/superframe_test.cc b/libvpx/test/superframe_test.cc
index 90aa75b41..b07bcb284 100644
--- a/libvpx/test/superframe_test.cc
+++ b/libvpx/test/superframe_test.cc
@@ -17,7 +17,6 @@
namespace {
const int kTestMode = 0;
-const int kSuperframeSyntax = 1;
typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam;
@@ -32,11 +31,9 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
InitializeConfig();
const SuperframeTestParam input = GET_PARAM(1);
const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input);
- const int syntax = std::tr1::get<kSuperframeSyntax>(input);
SetMode(mode);
sf_count_ = 0;
sf_count_max_ = INT_MAX;
- is_vp10_style_superframe_ = syntax;
}
virtual void TearDown() {
@@ -59,8 +56,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
const uint8_t marker = buffer[pkt->data.frame.sz - 1];
const int frames = (marker & 0x7) + 1;
const int mag = ((marker >> 3) & 3) + 1;
- const unsigned int index_sz =
- 2 + mag * (frames - is_vp10_style_superframe_);
+ const unsigned int index_sz = 2 + mag * frames;
if ((marker & 0xe0) == 0xc0 &&
pkt->data.frame.sz >= index_sz &&
buffer[pkt->data.frame.sz - index_sz] == marker) {
@@ -85,7 +81,6 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
return pkt;
}
- int is_vp10_style_superframe_;
int sf_count_;
int sf_count_max_;
vpx_codec_cx_pkt_t modified_pkt_;
@@ -106,8 +101,4 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
::testing::Values(::libvpx_test::kTwoPassGood),
::testing::Values(0)));
-
-VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
- ::testing::Values(::libvpx_test::kTwoPassGood),
- ::testing::Values(CONFIG_MISC_FIXES)));
} // namespace
diff --git a/libvpx/test/test-data.mk b/libvpx/test/test-data.mk
index 4280b35f8..05a0885ed 100644
--- a/libvpx/test/test-data.mk
+++ b/libvpx/test/test-data.mk
@@ -418,6 +418,18 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
@@ -550,6 +562,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
@@ -642,6 +656,34 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
@@ -769,3 +811,53 @@ endif # CONFIG_ENCODE_PERF_TESTS
# sort and remove duplicates
LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes))
+
+# VP9 dynamic resizing test (decoder)
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5
diff --git a/libvpx/test/test-data.sha1 b/libvpx/test/test-data.sha1
index 4e4ac6237..a4ed1742f 100644
--- a/libvpx/test/test-data.sha1
+++ b/libvpx/test/test-data.sha1
@@ -550,6 +550,8 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f *vp90-2-03-size-226x202.webm
83c6d8f2969b759e10e5c6542baca1265c874c29 *vp90-2-03-size-226x224.webm.md5
fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce *vp90-2-03-size-226x226.webm
94ad19b8b699cea105e2ff18f0df2afd7242bcf7 *vp90-2-03-size-226x226.webm.md5
+52bc1dfd3a97b24d922eb8a31d07527891561f2a *vp90-2-03-size-352x288.webm
+3084d6d0a1eec22e85a394422fbc8faae58930a5 *vp90-2-03-size-352x288.webm.md5
b6524e4084d15b5d0caaa3d3d1368db30cbee69c *vp90-2-03-deltaq.webm
65f45ec9a55537aac76104818278e0978f94a678 *vp90-2-03-deltaq.webm.md5
4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba *vp90-2-05-resize.ivf
@@ -744,3 +746,91 @@ e60d859b0ef2b331b21740cf6cb83fabe469b079 *invalid-vp90-2-03-size-202x210.webm.iv
0ae808dca4d3c1152a9576e14830b6faa39f1b4a *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
+85771f6ab44e4a0226e206c0cde8351dd5918953 *vp90-2-02-size-130x132.webm
+512dad5eabbed37b4bbbc64ce153f1a5484427b8 *vp90-2-02-size-130x132.webm.md5
+01f7127d40360289db63b27f61cb9afcda350e95 *vp90-2-02-size-132x130.webm
+4a94275328ae076cf60f966c097a8721010fbf5a *vp90-2-02-size-132x130.webm.md5
+f41c0400b5716b4b70552c40dd03d44be131e1cc *vp90-2-02-size-132x132.webm
+1a69e989f697e424bfe3e3e8a77bb0c0992c8e47 *vp90-2-02-size-132x132.webm.md5
+94a5cbfacacba100e0c5f7861c72a1b417feca0f *vp90-2-02-size-178x180.webm
+dedfecf1d784bcf70629592fa5e6f01d5441ccc9 *vp90-2-02-size-178x180.webm.md5
+4828b62478c04014bba3095a83106911a71cf387 *vp90-2-02-size-180x178.webm
+423da2b861050c969d78ed8e8f8f14045d1d8199 *vp90-2-02-size-180x178.webm.md5
+338f7c9282f43e29940f5391118aadd17e4f9234 *vp90-2-02-size-180x180.webm
+6c2ef013392310778dca5dd5351160eca66b0a60 *vp90-2-02-size-180x180.webm.md5
+679fa7d6807e936ff937d7b282e7dbd8ac76447e *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm
+fc7267ab8fc2bf5d6c234e34ee6c078a967b4888 *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5
+9d33a137c819792209c5ce4e4e1ee5da73d574fe *vp90-2-14-resize-10frames-fp-tiles-1-2.webm
+0c78a154956a8605d050bdd75e0dcc4d39c040a6 *vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5
+d6a8d8c57f66a91d23e8e7df480f9ae841e56c37 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm
+e9b4e8c7b33b5fda745d340c3f47e6623ae40cf2 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5
+aa6fe043a0c4a42b49c87ebbe812d4afd9945bec *vp90-2-14-resize-10frames-fp-tiles-1-8.webm
+028520578994c2d013d4c0129033d4f2ff31bbe0 *vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5
+d1d5463c9ea7b5cc5f609ddedccddf656f348d1a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm
+92d5872f5bdffbed721703b7e959b4f885e3d77a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5
+677cb29de1215d97346015af5807a9b1faad54cf *vp90-2-14-resize-10frames-fp-tiles-2-4.webm
+a5db19f977094ec3fd60b4f7671b3e6740225e12 *vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5
+cdd3c52ba21067efdbb2de917fe2a965bf27332e *vp90-2-14-resize-10frames-fp-tiles-2-8.webm
+db17ec5d894ea8b8d0b7f32206d0dd3d46dcfa6d *vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5
+0f6093c472125d05b764d7d1965c1d56771c0ea2 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm
+bc7c79e1bee07926dd970462ce6f64fc30eec3e1 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5
+c5142e2bff4091338196c8ea8bc9266e64f548bc *vp90-2-14-resize-10frames-fp-tiles-4-2.webm
+22aa3dd430b69fd3d92f6561bac86deeed90486d *vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5
+ede8b1466d2f26e1b1bd9602addb9cd1017e1d8c *vp90-2-14-resize-10frames-fp-tiles-4-8.webm
+508d5ebb9c0eac2a4100281a3ee052ec2fc19217 *vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5
+2b292e3392854cd1d76ae597a6f53656cf741cfa *vp90-2-14-resize-10frames-fp-tiles-8-1.webm
+1c24e54fa19e94e1722f24676404444e941c3d31 *vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5
+61beda21064e09634564caa6697ab90bd53c9af7 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm
+9c0657b4d9e1d0e4c9d28a90e5a8630a65519124 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5
+1758c50a11a7c92522749b4a251664705f1f0d4b *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm
+4f454a06750614314ae15a44087b79016fe2db97 *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5
+3920c95ba94f1f048a731d9d9b416043b44aa4bd *vp90-2-14-resize-10frames-fp-tiles-8-4.webm
+4eb347a0456d2c49a1e1d8de5aa1c51acc39887e *vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5
+4b95a74c032a473b6683d7ad5754db1b0ec378e9 *vp90-2-21-resize_inter_1280x720_5_1-2.webm
+a7826dd386bedfe69d02736969bfb47fb6a40a5e *vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5
+5cfff79e82c4d69964ccb8e75b4f0c53b9295167 *vp90-2-21-resize_inter_1280x720_5_3-4.webm
+a18f57db4a25e1f543a99f2ceb182e00db0ee22f *vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5
+d26db0811bf30eb4131d928669713e2485f8e833 *vp90-2-21-resize_inter_1280x720_7_1-2.webm
+fd6f9f332cd5bea4c0f0d57be4297bea493cc5a1 *vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5
+5c7d73d4d268e2ba9593b31cb091fd339505c7fd *vp90-2-21-resize_inter_1280x720_7_3-4.webm
+7bbb949cabc1e70dadcc74582739f63b833034e0 *vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5
+f2d2a41a60eb894aff0c5854afca15931f1445a8 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm
+66d7789992613ac9d678ff905ff1059daa1b89e4 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5
+764edb75fe7dd64e73a1b4f3b4b2b1bf237a4dea *vp90-2-21-resize_inter_1920x1080_5_3-4.webm
+f78bea1075983fd990e7f25d4f31438f9b5efa34 *vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5
+96496f2ade764a5de9f0c27917c7df1f120fb2ef *vp90-2-21-resize_inter_1920x1080_7_1-2.webm
+2632b635135ed5ecd67fd22dec7990d29c4f4cb5 *vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5
+74889ea42001bf41428cb742ca74e65129c886dc *vp90-2-21-resize_inter_1920x1080_7_3-4.webm
+d2cf3b25956415bb579d368e7098097e482dd73a *vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5
+4658986a8ce36ebfcc80a1903e446eaab3985336 *vp90-2-21-resize_inter_320x180_5_1-2.webm
+8a3d8cf325109ffa913cc9426c32eea8c202a09a *vp90-2-21-resize_inter_320x180_5_1-2.webm.md5
+16303aa45176520ee42c2c425247aadc1506b881 *vp90-2-21-resize_inter_320x180_5_3-4.webm
+41cab1ddf7715b680a4dbce42faa9bcd72af4e5c *vp90-2-21-resize_inter_320x180_5_3-4.webm.md5
+56648adcee66dd0e5cb6ac947f5ee1b9cc8ba129 *vp90-2-21-resize_inter_320x180_7_1-2.webm
+70047377787003cc03dda7b2394e6d7eaa666d9e *vp90-2-21-resize_inter_320x180_7_1-2.webm.md5
+d2ff99165488499cc55f75929f1ce5ca9c9e359b *vp90-2-21-resize_inter_320x180_7_3-4.webm
+e69019e378114a4643db283b66d1a7e304761a56 *vp90-2-21-resize_inter_320x180_7_3-4.webm.md5
+4834d129bed0f4289d3a88f2ae3a1736f77621b0 *vp90-2-21-resize_inter_320x240_5_1-2.webm
+a75653c53d22b623c1927fc0088da21dafef21f4 *vp90-2-21-resize_inter_320x240_5_1-2.webm.md5
+19818e1b7fd1c1e63d8873c31b0babe29dd33ba6 *vp90-2-21-resize_inter_320x240_5_3-4.webm
+8d89814ff469a186312111651b16601dfbce4336 *vp90-2-21-resize_inter_320x240_5_3-4.webm.md5
+ac8057bae52498f324ce92a074d5f8207cc4a4a7 *vp90-2-21-resize_inter_320x240_7_1-2.webm
+2643440898c83c08cc47bc744245af696b877c24 *vp90-2-21-resize_inter_320x240_7_1-2.webm.md5
+cf4a4cd38ac8b18c42d8c25a3daafdb39132256b *vp90-2-21-resize_inter_320x240_7_3-4.webm
+70ba8ec9120b26e9b0ffa2c79b432f16cbcb50ec *vp90-2-21-resize_inter_320x240_7_3-4.webm.md5
+669f10409fe1c4a054010162ca47773ea1fdbead *vp90-2-21-resize_inter_640x360_5_1-2.webm
+6355a04249004a35fb386dd1024214234f044383 *vp90-2-21-resize_inter_640x360_5_1-2.webm.md5
+c23763b950b8247c1775d1f8158d93716197676c *vp90-2-21-resize_inter_640x360_5_3-4.webm
+59e6fc381e3ec3b7bdaac586334e0bc944d18fb6 *vp90-2-21-resize_inter_640x360_5_3-4.webm.md5
+71b45cbfdd068baa1f679a69e5e6f421d256a85f *vp90-2-21-resize_inter_640x360_7_1-2.webm
+1416fc761b690c54a955c4cf017fa078520e8c18 *vp90-2-21-resize_inter_640x360_7_1-2.webm.md5
+6c409903279448a697e4db63bab1061784bcd8d2 *vp90-2-21-resize_inter_640x360_7_3-4.webm
+60de1299793433a630b71130cf76c9f5965758e2 *vp90-2-21-resize_inter_640x360_7_3-4.webm.md5
+852b597b8af096d90c80bf0ed6ed3b336b851f19 *vp90-2-21-resize_inter_640x480_5_1-2.webm
+f6856f19236ee46ed462bd0a2e7e72b9c3b9cea6 *vp90-2-21-resize_inter_640x480_5_1-2.webm.md5
+792a16c6f60043bd8dceb515f0b95b8891647858 *vp90-2-21-resize_inter_640x480_5_3-4.webm
+68ffe59877e9a7863805e1c0a3ce18ce037d7c9d *vp90-2-21-resize_inter_640x480_5_3-4.webm.md5
+61e044c4759972a35ea3db8c1478a988910a4ef4 *vp90-2-21-resize_inter_640x480_7_1-2.webm
+7739bfca167b1b43fea72f807f01e097b7cb98d8 *vp90-2-21-resize_inter_640x480_7_1-2.webm.md5
+7291af354b4418917eee00e3a7e366086a0b7a10 *vp90-2-21-resize_inter_640x480_7_3-4.webm
+4a18b09ccb36564193f0215f599d745d95bb558c *vp90-2-21-resize_inter_640x480_7_3-4.webm.md5
diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk
index 8d662448a..2d50ce813 100644
--- a/libvpx/test/test.mk
+++ b/libvpx/test/test.mk
@@ -18,15 +18,17 @@ LIBVPX_TEST_SRCS-yes += video_source.h
LIBVPX_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += altref_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
@@ -44,6 +46,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
LIBVPX_TEST_SRCS-yes += decode_test_driver.h
@@ -58,10 +61,10 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_test.cc ../y4menc.c ../y4menc.h
## WebM Parsing
ifeq ($(CONFIG_WEBM_IO), yes)
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.cpp
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.hpp
-LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvreader.hpp
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h
+LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS)
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc
@@ -92,10 +95,9 @@ endif
## shared library builds don't make these functions accessible.
##
ifeq ($(CONFIG_SHARED),)
-LIBVPX_TEST_SRCS-$(CONFIG_VP9) += lpf_8_test.cc
## VP8
-ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+ifeq ($(CONFIG_VP8),yes)
# These tests require both the encoder and decoder to be built.
ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
@@ -103,12 +105,13 @@ LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc
LIBVPX_TEST_SRCS-yes += vp8_fragments_test.cc
endif
+LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += add_noise_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
LIBVPX_TEST_SRCS-yes += idct_test.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
@@ -121,7 +124,7 @@ endif
endif # VP8
## VP9
-ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+ifeq ($(CONFIG_VP9),yes)
# These tests require both the encoder and decoder to be built.
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes)
@@ -134,25 +137,27 @@ LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc
LIBVPX_TEST_SRCS-yes += vp9_encoder_parms_get_to_decoder.cc
endif
-LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
+LIBVPX_TEST_SRCS-yes += convolve_test.cc
+LIBVPX_TEST_SRCS-yes += lpf_8_test.cc
+LIBVPX_TEST_SRCS-yes += vp9_intrapred_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes)
LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
-
endif
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
@@ -162,14 +167,12 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc
endif # VP9
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
+## Multi-codec / unconditional whitebox tests.
-TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
-TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
-## VP10
-LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
+TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
+TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
endif # CONFIG_SHARED
diff --git a/libvpx/test/test_intra_pred_speed.cc b/libvpx/test/test_intra_pred_speed.cc
index 5d59e83f7..2acf744d5 100644
--- a/libvpx/test/test_intra_pred_speed.cc
+++ b/libvpx/test/test_intra_pred_speed.cc
@@ -187,18 +187,20 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c,
vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c)
-#if HAVE_SSE && CONFIG_USE_X86INC
-INTRA_PRED_TEST(SSE, TestIntraPred4, vpx_dc_predictor_4x4_sse,
- vpx_dc_left_predictor_4x4_sse, vpx_dc_top_predictor_4x4_sse,
- vpx_dc_128_predictor_4x4_sse, vpx_v_predictor_4x4_sse, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_sse)
-#endif // HAVE_SSE && CONFIG_USE_X86INC
+#if HAVE_SSE2 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
+ vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
+ vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
+ vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
+ NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
+ vpx_tm_predictor_4x4_sse2)
+#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
- vpx_h_predictor_4x4_ssse3, vpx_d45_predictor_4x4_ssse3, NULL,
- NULL, vpx_d153_predictor_4x4_ssse3,
- vpx_d207_predictor_4x4_ssse3, vpx_d63_predictor_4x4_ssse3, NULL)
+ NULL, NULL, NULL, NULL,
+ vpx_d153_predictor_4x4_ssse3, NULL,
+ vpx_d63_predictor_4x4_ssse3, NULL)
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_DSPR2
@@ -235,23 +237,19 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c,
vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c)
-#if HAVE_SSE && CONFIG_USE_X86INC
-INTRA_PRED_TEST(SSE, TestIntraPred8, vpx_dc_predictor_8x8_sse,
- vpx_dc_left_predictor_8x8_sse, vpx_dc_top_predictor_8x8_sse,
- vpx_dc_128_predictor_8x8_sse, vpx_v_predictor_8x8_sse, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-#endif // HAVE_SSE && CONFIG_USE_X86INC
-
#if HAVE_SSE2 && CONFIG_USE_X86INC
-INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
+INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
+ vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
+ vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
+ vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
+ NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
- vpx_h_predictor_8x8_ssse3, vpx_d45_predictor_8x8_ssse3, NULL,
- NULL, vpx_d153_predictor_8x8_ssse3,
- vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
+ NULL, NULL, NULL, NULL,
+ vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3,
+ vpx_d63_predictor_8x8_ssse3, NULL)
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_DSPR2
@@ -293,13 +291,13 @@ INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
vpx_dc_left_predictor_16x16_sse2,
vpx_dc_top_predictor_16x16_sse2,
vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
vpx_tm_predictor_16x16_sse2)
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
- vpx_h_predictor_16x16_ssse3, vpx_d45_predictor_16x16_ssse3,
+ NULL, vpx_d45_predictor_16x16_ssse3,
NULL, NULL, vpx_d153_predictor_16x16_ssse3,
vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3,
NULL)
@@ -340,28 +338,19 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)
#if HAVE_SSE2 && CONFIG_USE_X86INC
-#if ARCH_X86_64
-INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
- vpx_dc_left_predictor_32x32_sse2,
- vpx_dc_top_predictor_32x32_sse2,
- vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- vpx_tm_predictor_32x32_sse2)
-#else
INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
vpx_dc_left_predictor_32x32_sse2,
vpx_dc_top_predictor_32x32_sse2,
vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
-#endif // ARCH_X86_64
+ vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL,
+ NULL, vpx_tm_predictor_32x32_sse2)
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
- vpx_h_predictor_32x32_ssse3, vpx_d45_predictor_32x32_ssse3,
- NULL, NULL, vpx_d153_predictor_32x32_ssse3,
- vpx_d207_predictor_32x32_ssse3, vpx_d63_predictor_32x32_ssse3,
- NULL)
+ NULL, vpx_d45_predictor_32x32_ssse3, NULL, NULL,
+ vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3,
+ vpx_d63_predictor_32x32_ssse3, NULL)
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
#if HAVE_NEON
diff --git a/libvpx/test/test_vector_test.cc b/libvpx/test/test_vector_test.cc
index 437ce44b6..f1aa4d7f7 100644
--- a/libvpx/test/test_vector_test.cc
+++ b/libvpx/test/test_vector_test.cc
@@ -10,6 +10,7 @@
#include <cstdio>
#include <cstdlib>
+#include <set>
#include <string>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "../tools_common.h"
@@ -44,6 +45,12 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
TestVectorTest()
: DecoderTest(GET_PARAM(0)),
md5_file_(NULL) {
+#if CONFIG_VP9_DECODER
+ resize_clips_.insert(
+ ::libvpx_test::kVP9TestVectorsResize,
+ ::libvpx_test::kVP9TestVectorsResize +
+ ::libvpx_test::kNumVP9TestVectorsResize);
+#endif
}
virtual ~TestVectorTest() {
@@ -77,6 +84,10 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
<< "Md5 checksums don't match: frame number = " << frame_number;
}
+#if CONFIG_VP9_DECODER
+ std::set<std::string> resize_clips_;
+#endif
+
private:
FILE *md5_file_;
};
@@ -92,11 +103,19 @@ TEST_P(TestVectorTest, MD5Match) {
const int mode = std::tr1::get<kDecodeMode>(input);
libvpx_test::CompressedVideoSource *video = NULL;
vpx_codec_flags_t flags = 0;
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
char str[256];
if (mode == kFrameParallelMode) {
flags |= VPX_CODEC_USE_FRAME_THREADING;
+#if CONFIG_VP9_DECODER
+ // TODO(hkuang): Fix frame parallel decode bug. See issue 1086.
+ if (resize_clips_.find(filename) != resize_clips_.end()) {
+ printf("Skipping the test file: %s, due to frame parallel decode bug.\n",
+ filename.c_str());
+ return;
+ }
+#endif
}
cfg.threads = threads;
diff --git a/libvpx/test/test_vectors.cc b/libvpx/test/test_vectors.cc
index 434a38251..c82247966 100644
--- a/libvpx/test/test_vectors.cc
+++ b/libvpx/test/test_vectors.cc
@@ -52,6 +52,31 @@ const char *const kVP8TestVectors[] = {
const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors);
#endif // CONFIG_VP8_DECODER
#if CONFIG_VP9_DECODER
+#define RESIZE_TEST_VECTORS "vp90-2-21-resize_inter_320x180_5_1-2.webm", \
+ "vp90-2-21-resize_inter_320x180_5_3-4.webm", \
+ "vp90-2-21-resize_inter_320x180_7_1-2.webm", \
+ "vp90-2-21-resize_inter_320x180_7_3-4.webm", \
+ "vp90-2-21-resize_inter_320x240_5_1-2.webm", \
+ "vp90-2-21-resize_inter_320x240_5_3-4.webm", \
+ "vp90-2-21-resize_inter_320x240_7_1-2.webm", \
+ "vp90-2-21-resize_inter_320x240_7_3-4.webm", \
+ "vp90-2-21-resize_inter_640x360_5_1-2.webm", \
+ "vp90-2-21-resize_inter_640x360_5_3-4.webm", \
+ "vp90-2-21-resize_inter_640x360_7_1-2.webm", \
+ "vp90-2-21-resize_inter_640x360_7_3-4.webm", \
+ "vp90-2-21-resize_inter_640x480_5_1-2.webm", \
+ "vp90-2-21-resize_inter_640x480_5_3-4.webm", \
+ "vp90-2-21-resize_inter_640x480_7_1-2.webm", \
+ "vp90-2-21-resize_inter_640x480_7_3-4.webm", \
+ "vp90-2-21-resize_inter_1280x720_5_1-2.webm", \
+ "vp90-2-21-resize_inter_1280x720_5_3-4.webm", \
+ "vp90-2-21-resize_inter_1280x720_7_1-2.webm", \
+ "vp90-2-21-resize_inter_1280x720_7_3-4.webm", \
+ "vp90-2-21-resize_inter_1920x1080_5_1-2.webm", \
+ "vp90-2-21-resize_inter_1920x1080_5_3-4.webm", \
+ "vp90-2-21-resize_inter_1920x1080_7_1-2.webm", \
+ "vp90-2-21-resize_inter_1920x1080_7_3-4.webm",
+
const char *const kVP9TestVectors[] = {
"vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm",
"vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm",
@@ -120,7 +145,10 @@ const char *const kVP9TestVectors[] = {
"vp90-2-02-size-66x10.webm", "vp90-2-02-size-66x16.webm",
"vp90-2-02-size-66x18.webm", "vp90-2-02-size-66x32.webm",
"vp90-2-02-size-66x34.webm", "vp90-2-02-size-66x64.webm",
- "vp90-2-02-size-66x66.webm", "vp90-2-03-size-196x196.webm",
+ "vp90-2-02-size-66x66.webm", "vp90-2-02-size-130x132.webm",
+ "vp90-2-02-size-132x130.webm", "vp90-2-02-size-132x132.webm",
+ "vp90-2-02-size-178x180.webm", "vp90-2-02-size-180x178.webm",
+ "vp90-2-02-size-180x180.webm", "vp90-2-03-size-196x196.webm",
"vp90-2-03-size-196x198.webm", "vp90-2-03-size-196x200.webm",
"vp90-2-03-size-196x202.webm", "vp90-2-03-size-196x208.webm",
"vp90-2-03-size-196x210.webm", "vp90-2-03-size-196x224.webm",
@@ -152,7 +180,8 @@ const char *const kVP9TestVectors[] = {
"vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
"vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
"vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
- "vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
+ "vp90-2-03-size-226x226.webm", "vp90-2-03-size-352x288.webm",
+ "vp90-2-03-deltaq.webm",
"vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm",
"vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm",
"vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm",
@@ -182,6 +211,20 @@ const char *const kVP9TestVectors[] = {
"vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
"vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
"vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-1-2.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-1-4.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-1-8.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-2-1.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-2-4.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-2-8.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-4-1.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-4-2.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-4-8.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-8-1.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-8-2.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm",
+ "vp90-2-14-resize-10frames-fp-tiles-8-4.webm",
"vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
"vp90-2-16-intra-only.webm", "vp90-2-17-show-existing-frame.webm",
"vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
@@ -193,10 +236,16 @@ const char *const kVP9TestVectors[] = {
"vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm",
"vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm",
"vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm",
-#endif // CONFIG_VP9_HIGHBITDEPTH`
+#endif // CONFIG_VP9_HIGHBITDEPTH
"vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm",
+ RESIZE_TEST_VECTORS
};
const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
+const char *const kVP9TestVectorsResize[] = {
+ RESIZE_TEST_VECTORS
+};
+const int kNumVP9TestVectorsResize = NELEMENTS(kVP9TestVectorsResize);
+#undef RESIZE_TEST_VECTORS
#endif // CONFIG_VP9_DECODER
} // namespace libvpx_test
diff --git a/libvpx/test/test_vectors.h b/libvpx/test/test_vectors.h
index 8e1aabb32..2c6918abd 100644
--- a/libvpx/test/test_vectors.h
+++ b/libvpx/test/test_vectors.h
@@ -23,6 +23,8 @@ extern const char *const kVP8TestVectors[];
#if CONFIG_VP9_DECODER
extern const int kNumVP9TestVectors;
extern const char *const kVP9TestVectors[];
+extern const int kNumVP9TestVectorsResize;
+extern const char *const kVP9TestVectorsResize[];
#endif // CONFIG_VP9_DECODER
} // namespace libvpx_test
diff --git a/libvpx/test/tile_independence_test.cc b/libvpx/test/tile_independence_test.cc
index 193bd4598..f15d94a0a 100644
--- a/libvpx/test/tile_independence_test.cc
+++ b/libvpx/test/tile_independence_test.cc
@@ -103,6 +103,4 @@ TEST_P(TileIndependenceTest, MD5Match) {
}
VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
-
-VP10_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
} // namespace
diff --git a/libvpx/test/twopass_encoder.sh b/libvpx/test/twopass_encoder.sh
index 1189e5131..7a223f2af 100755
--- a/libvpx/test/twopass_encoder.sh
+++ b/libvpx/test/twopass_encoder.sh
@@ -23,7 +23,8 @@ twopass_encoder_verify_environment() {
fi
}
-# Runs twopass_encoder using the codec specified by $1.
+# Runs twopass_encoder using the codec specified by $1 with a frame limit of
+# 100.
twopass_encoder() {
local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}"
local codec="$1"
@@ -35,7 +36,7 @@ twopass_encoder() {
fi
eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
- "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 100 \
${devnull}
[ -e "${output_file}" ] || return 1
@@ -47,16 +48,13 @@ twopass_encoder_vp8() {
fi
}
-# TODO(tomfinegan): Add a frame limit param to twopass_encoder and enable this
-# test. VP9 is just too slow right now: This test takes 31m16s+ on a fast
-# machine.
-DISABLED_twopass_encoder_vp9() {
+twopass_encoder_vp9() {
if [ "$(vp9_encode_available)" = "yes" ]; then
twopass_encoder vp9 || return 1
fi
}
twopass_encoder_tests="twopass_encoder_vp8
- DISABLED_twopass_encoder_vp9"
+ twopass_encoder_vp9"
run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
diff --git a/libvpx/test/variance_test.cc b/libvpx/test/variance_test.cc
index 7a34db6b3..cb6339041 100644
--- a/libvpx/test/variance_test.cc
+++ b/libvpx/test/variance_test.cc
@@ -74,6 +74,10 @@ static unsigned int mb_ss_ref(const int16_t *src) {
return res;
}
+/* Note:
+ * Our codebase calculates the "diff" value in the variance algorithm by
+ * (src - ref).
+ */
static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref,
int l2w, int l2h, int src_stride_coeff,
int ref_stride_coeff, uint32_t *sse_ptr,
@@ -87,14 +91,14 @@ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref,
for (int x = 0; x < w; x++) {
int diff;
if (!use_high_bit_depth_) {
- diff = ref[w * y * ref_stride_coeff + x] -
- src[w * y * src_stride_coeff + x];
+ diff = src[w * y * src_stride_coeff + x] -
+ ref[w * y * ref_stride_coeff + x];
se += diff;
sse += diff * diff;
#if CONFIG_VP9_HIGHBITDEPTH
} else {
- diff = CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x] -
- CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x];
+ diff = CONVERT_TO_SHORTPTR(src)[w * y * src_stride_coeff + x] -
+ CONVERT_TO_SHORTPTR(ref)[w * y * ref_stride_coeff + x];
se += diff;
sse += diff * diff;
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -309,15 +313,15 @@ template<typename VarianceFunctionType>
void VarianceTest<VarianceFunctionType>::RefTest() {
for (int i = 0; i < 10; ++i) {
for (int j = 0; j < block_size_; j++) {
- if (!use_high_bit_depth_) {
- src_[j] = rnd_.Rand8();
- ref_[j] = rnd_.Rand8();
+ if (!use_high_bit_depth_) {
+ src_[j] = rnd_.Rand8();
+ ref_[j] = rnd_.Rand8();
#if CONFIG_VP9_HIGHBITDEPTH
- } else {
- CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() && mask_;
- CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() && mask_;
+ } else {
+ CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask_;
+ CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask_;
#endif // CONFIG_VP9_HIGHBITDEPTH
- }
+ }
}
unsigned int sse1, sse2;
unsigned int var1;
@@ -328,8 +332,10 @@ void VarianceTest<VarianceFunctionType>::RefTest() {
log2height_, stride_coeff,
stride_coeff, &sse2,
use_high_bit_depth_, bit_depth_);
- EXPECT_EQ(sse1, sse2);
- EXPECT_EQ(var1, var2);
+ EXPECT_EQ(sse1, sse2)
+ << "Error at test index: " << i;
+ EXPECT_EQ(var1, var2)
+ << "Error at test index: " << i;
}
}
@@ -346,8 +352,8 @@ void VarianceTest<VarianceFunctionType>::RefStrideTest() {
ref_[ref_ind] = rnd_.Rand8();
#if CONFIG_VP9_HIGHBITDEPTH
} else {
- CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() && mask_;
- CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() && mask_;
+ CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask_;
+ CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask_;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
@@ -361,8 +367,10 @@ void VarianceTest<VarianceFunctionType>::RefStrideTest() {
log2height_, src_stride_coeff,
ref_stride_coeff, &sse2,
use_high_bit_depth_, bit_depth_);
- EXPECT_EQ(sse1, sse2);
- EXPECT_EQ(var1, var2);
+ EXPECT_EQ(sse1, sse2)
+ << "Error at test index: " << i;
+ EXPECT_EQ(var1, var2)
+ << "Error at test index: " << i;
}
}
@@ -747,115 +755,63 @@ TEST_P(VpxSubpelAvgVarianceTest, Ref) { RefTest(); }
INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest,
::testing::Values(vpx_get_mb_ss_c));
-const Get4x4SseFunc get4x4sse_cs_c = vpx_get4x4sse_cs_c;
INSTANTIATE_TEST_CASE_P(C, VpxSseTest,
- ::testing::Values(make_tuple(2, 2, get4x4sse_cs_c)));
+ ::testing::Values(make_tuple(2, 2,
+ &vpx_get4x4sse_cs_c)));
-const VarianceMxNFunc mse16x16_c = vpx_mse16x16_c;
-const VarianceMxNFunc mse16x8_c = vpx_mse16x8_c;
-const VarianceMxNFunc mse8x16_c = vpx_mse8x16_c;
-const VarianceMxNFunc mse8x8_c = vpx_mse8x8_c;
INSTANTIATE_TEST_CASE_P(C, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_c),
- make_tuple(4, 3, mse16x8_c),
- make_tuple(3, 4, mse8x16_c),
- make_tuple(3, 3, mse8x8_c)));
-
-const VarianceMxNFunc variance64x64_c = vpx_variance64x64_c;
-const VarianceMxNFunc variance64x32_c = vpx_variance64x32_c;
-const VarianceMxNFunc variance32x64_c = vpx_variance32x64_c;
-const VarianceMxNFunc variance32x32_c = vpx_variance32x32_c;
-const VarianceMxNFunc variance32x16_c = vpx_variance32x16_c;
-const VarianceMxNFunc variance16x32_c = vpx_variance16x32_c;
-const VarianceMxNFunc variance16x16_c = vpx_variance16x16_c;
-const VarianceMxNFunc variance16x8_c = vpx_variance16x8_c;
-const VarianceMxNFunc variance8x16_c = vpx_variance8x16_c;
-const VarianceMxNFunc variance8x8_c = vpx_variance8x8_c;
-const VarianceMxNFunc variance8x4_c = vpx_variance8x4_c;
-const VarianceMxNFunc variance4x8_c = vpx_variance4x8_c;
-const VarianceMxNFunc variance4x4_c = vpx_variance4x4_c;
+ ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_c),
+ make_tuple(4, 3, &vpx_mse16x8_c),
+ make_tuple(3, 4, &vpx_mse8x16_c),
+ make_tuple(3, 3, &vpx_mse8x8_c)));
+
INSTANTIATE_TEST_CASE_P(
C, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, variance64x64_c, 0),
- make_tuple(6, 5, variance64x32_c, 0),
- make_tuple(5, 6, variance32x64_c, 0),
- make_tuple(5, 5, variance32x32_c, 0),
- make_tuple(5, 4, variance32x16_c, 0),
- make_tuple(4, 5, variance16x32_c, 0),
- make_tuple(4, 4, variance16x16_c, 0),
- make_tuple(4, 3, variance16x8_c, 0),
- make_tuple(3, 4, variance8x16_c, 0),
- make_tuple(3, 3, variance8x8_c, 0),
- make_tuple(3, 2, variance8x4_c, 0),
- make_tuple(2, 3, variance4x8_c, 0),
- make_tuple(2, 2, variance4x4_c, 0)));
-
-const SubpixVarMxNFunc subpel_var64x64_c = vpx_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc subpel_var64x32_c = vpx_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc subpel_var32x64_c = vpx_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc subpel_var32x32_c = vpx_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc subpel_var32x16_c = vpx_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc subpel_var16x32_c = vpx_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc subpel_var16x16_c = vpx_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc subpel_var16x8_c = vpx_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc subpel_var8x16_c = vpx_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc subpel_var8x8_c = vpx_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc subpel_var8x4_c = vpx_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc subpel_var4x8_c = vpx_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc subpel_var4x4_c = vpx_sub_pixel_variance4x4_c;
+ ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_c, 0),
+ make_tuple(6, 5, &vpx_variance64x32_c, 0),
+ make_tuple(5, 6, &vpx_variance32x64_c, 0),
+ make_tuple(5, 5, &vpx_variance32x32_c, 0),
+ make_tuple(5, 4, &vpx_variance32x16_c, 0),
+ make_tuple(4, 5, &vpx_variance16x32_c, 0),
+ make_tuple(4, 4, &vpx_variance16x16_c, 0),
+ make_tuple(4, 3, &vpx_variance16x8_c, 0),
+ make_tuple(3, 4, &vpx_variance8x16_c, 0),
+ make_tuple(3, 3, &vpx_variance8x8_c, 0),
+ make_tuple(3, 2, &vpx_variance8x4_c, 0),
+ make_tuple(2, 3, &vpx_variance4x8_c, 0),
+ make_tuple(2, 2, &vpx_variance4x4_c, 0)));
+
INSTANTIATE_TEST_CASE_P(
C, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_var64x64_c, 0),
- make_tuple(6, 5, subpel_var64x32_c, 0),
- make_tuple(5, 6, subpel_var32x64_c, 0),
- make_tuple(5, 5, subpel_var32x32_c, 0),
- make_tuple(5, 4, subpel_var32x16_c, 0),
- make_tuple(4, 5, subpel_var16x32_c, 0),
- make_tuple(4, 4, subpel_var16x16_c, 0),
- make_tuple(4, 3, subpel_var16x8_c, 0),
- make_tuple(3, 4, subpel_var8x16_c, 0),
- make_tuple(3, 3, subpel_var8x8_c, 0),
- make_tuple(3, 2, subpel_var8x4_c, 0),
- make_tuple(2, 3, subpel_var4x8_c, 0),
- make_tuple(2, 2, subpel_var4x4_c, 0)));
-
-const SubpixAvgVarMxNFunc subpel_avg_var64x64_c =
- vpx_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc subpel_avg_var64x32_c =
- vpx_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc subpel_avg_var32x64_c =
- vpx_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc subpel_avg_var32x32_c =
- vpx_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc subpel_avg_var32x16_c =
- vpx_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc subpel_avg_var16x32_c =
- vpx_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc subpel_avg_var16x16_c =
- vpx_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc subpel_avg_var16x8_c =
- vpx_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc subpel_avg_var8x16_c =
- vpx_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc subpel_avg_var8x8_c = vpx_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc subpel_avg_var8x4_c = vpx_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc subpel_avg_var4x8_c = vpx_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc subpel_avg_var4x4_c = vpx_sub_pixel_avg_variance4x4_c;
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_variance64x32_c, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_variance32x64_c, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_variance32x32_c, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_variance32x16_c, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_variance16x32_c, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_variance16x16_c, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_variance16x8_c, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_variance8x16_c, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_variance8x8_c, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_variance8x4_c, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_variance4x8_c, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_variance4x4_c, 0)));
+
INSTANTIATE_TEST_CASE_P(
C, VpxSubpelAvgVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_avg_var64x64_c, 0),
- make_tuple(6, 5, subpel_avg_var64x32_c, 0),
- make_tuple(5, 6, subpel_avg_var32x64_c, 0),
- make_tuple(5, 5, subpel_avg_var32x32_c, 0),
- make_tuple(5, 4, subpel_avg_var32x16_c, 0),
- make_tuple(4, 5, subpel_avg_var16x32_c, 0),
- make_tuple(4, 4, subpel_avg_var16x16_c, 0),
- make_tuple(4, 3, subpel_avg_var16x8_c, 0),
- make_tuple(3, 4, subpel_avg_var8x16_c, 0),
- make_tuple(3, 3, subpel_avg_var8x8_c, 0),
- make_tuple(3, 2, subpel_avg_var8x4_c, 0),
- make_tuple(2, 3, subpel_avg_var4x8_c, 0),
- make_tuple(2, 2, subpel_avg_var4x4_c, 0)));
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0)));
#if CONFIG_VP9_HIGHBITDEPTH
typedef MseTest<VarianceMxNFunc> VpxHBDMseTest;
@@ -875,1166 +831,507 @@ TEST_P(VpxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(VpxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
/* TODO(debargha): This test does not support the highbd version
-const VarianceMxNFunc highbd_12_mse16x16_c = vpx_highbd_12_mse16x16_c;
-const VarianceMxNFunc highbd_12_mse16x8_c = vpx_highbd_12_mse16x8_c;
-const VarianceMxNFunc highbd_12_mse8x16_c = vpx_highbd_12_mse8x16_c;
-const VarianceMxNFunc highbd_12_mse8x8_c = vpx_highbd_12_mse8x8_c;
-
-const VarianceMxNFunc highbd_10_mse16x16_c = vpx_highbd_10_mse16x16_c;
-const VarianceMxNFunc highbd_10_mse16x8_c = vpx_highbd_10_mse16x8_c;
-const VarianceMxNFunc highbd_10_mse8x16_c = vpx_highbd_10_mse8x16_c;
-const VarianceMxNFunc highbd_10_mse8x8_c = vpx_highbd_10_mse8x8_c;
-
-const VarianceMxNFunc highbd_8_mse16x16_c = vpx_highbd_8_mse16x16_c;
-const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
-const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
-const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
INSTANTIATE_TEST_CASE_P(
- C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
- make_tuple(4, 4, highbd_12_mse16x8_c),
- make_tuple(4, 4, highbd_12_mse8x16_c),
- make_tuple(4, 4, highbd_12_mse8x8_c),
- make_tuple(4, 4, highbd_10_mse16x16_c),
- make_tuple(4, 4, highbd_10_mse16x8_c),
- make_tuple(4, 4, highbd_10_mse8x16_c),
- make_tuple(4, 4, highbd_10_mse8x8_c),
- make_tuple(4, 4, highbd_8_mse16x16_c),
- make_tuple(4, 4, highbd_8_mse16x8_c),
- make_tuple(4, 4, highbd_8_mse8x16_c),
- make_tuple(4, 4, highbd_8_mse8x8_c)));
+ C, VpxHBDMseTest,
+ ::testing::Values(make_tuple(4, 4, &vpx_highbd_12_mse16x16_c),
+ make_tuple(4, 4, &vpx_highbd_12_mse16x8_c),
+ make_tuple(4, 4, &vpx_highbd_12_mse8x16_c),
+ make_tuple(4, 4, &vpx_highbd_12_mse8x8_c),
+ make_tuple(4, 4, &vpx_highbd_10_mse16x16_c),
+ make_tuple(4, 4, &vpx_highbd_10_mse16x8_c),
+ make_tuple(4, 4, &vpx_highbd_10_mse8x16_c),
+ make_tuple(4, 4, &vpx_highbd_10_mse8x8_c),
+ make_tuple(4, 4, &vpx_highbd_8_mse16x16_c),
+ make_tuple(4, 4, &vpx_highbd_8_mse16x8_c),
+ make_tuple(4, 4, &vpx_highbd_8_mse8x16_c),
+ make_tuple(4, 4, &vpx_highbd_8_mse8x8_c)));
*/
-const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
-const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
-const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
-const VarianceMxNFunc highbd_12_variance32x32_c = vpx_highbd_12_variance32x32_c;
-const VarianceMxNFunc highbd_12_variance32x16_c = vpx_highbd_12_variance32x16_c;
-const VarianceMxNFunc highbd_12_variance16x32_c = vpx_highbd_12_variance16x32_c;
-const VarianceMxNFunc highbd_12_variance16x16_c = vpx_highbd_12_variance16x16_c;
-const VarianceMxNFunc highbd_12_variance16x8_c = vpx_highbd_12_variance16x8_c;
-const VarianceMxNFunc highbd_12_variance8x16_c = vpx_highbd_12_variance8x16_c;
-const VarianceMxNFunc highbd_12_variance8x8_c = vpx_highbd_12_variance8x8_c;
-const VarianceMxNFunc highbd_12_variance8x4_c = vpx_highbd_12_variance8x4_c;
-const VarianceMxNFunc highbd_12_variance4x8_c = vpx_highbd_12_variance4x8_c;
-const VarianceMxNFunc highbd_12_variance4x4_c = vpx_highbd_12_variance4x4_c;
-const VarianceMxNFunc highbd_10_variance64x64_c = vpx_highbd_10_variance64x64_c;
-const VarianceMxNFunc highbd_10_variance64x32_c = vpx_highbd_10_variance64x32_c;
-const VarianceMxNFunc highbd_10_variance32x64_c = vpx_highbd_10_variance32x64_c;
-const VarianceMxNFunc highbd_10_variance32x32_c = vpx_highbd_10_variance32x32_c;
-const VarianceMxNFunc highbd_10_variance32x16_c = vpx_highbd_10_variance32x16_c;
-const VarianceMxNFunc highbd_10_variance16x32_c = vpx_highbd_10_variance16x32_c;
-const VarianceMxNFunc highbd_10_variance16x16_c = vpx_highbd_10_variance16x16_c;
-const VarianceMxNFunc highbd_10_variance16x8_c = vpx_highbd_10_variance16x8_c;
-const VarianceMxNFunc highbd_10_variance8x16_c = vpx_highbd_10_variance8x16_c;
-const VarianceMxNFunc highbd_10_variance8x8_c = vpx_highbd_10_variance8x8_c;
-const VarianceMxNFunc highbd_10_variance8x4_c = vpx_highbd_10_variance8x4_c;
-const VarianceMxNFunc highbd_10_variance4x8_c = vpx_highbd_10_variance4x8_c;
-const VarianceMxNFunc highbd_10_variance4x4_c = vpx_highbd_10_variance4x4_c;
-const VarianceMxNFunc highbd_8_variance64x64_c = vpx_highbd_8_variance64x64_c;
-const VarianceMxNFunc highbd_8_variance64x32_c = vpx_highbd_8_variance64x32_c;
-const VarianceMxNFunc highbd_8_variance32x64_c = vpx_highbd_8_variance32x64_c;
-const VarianceMxNFunc highbd_8_variance32x32_c = vpx_highbd_8_variance32x32_c;
-const VarianceMxNFunc highbd_8_variance32x16_c = vpx_highbd_8_variance32x16_c;
-const VarianceMxNFunc highbd_8_variance16x32_c = vpx_highbd_8_variance16x32_c;
-const VarianceMxNFunc highbd_8_variance16x16_c = vpx_highbd_8_variance16x16_c;
-const VarianceMxNFunc highbd_8_variance16x8_c = vpx_highbd_8_variance16x8_c;
-const VarianceMxNFunc highbd_8_variance8x16_c = vpx_highbd_8_variance8x16_c;
-const VarianceMxNFunc highbd_8_variance8x8_c = vpx_highbd_8_variance8x8_c;
-const VarianceMxNFunc highbd_8_variance8x4_c = vpx_highbd_8_variance8x4_c;
-const VarianceMxNFunc highbd_8_variance4x8_c = vpx_highbd_8_variance4x8_c;
-const VarianceMxNFunc highbd_8_variance4x4_c = vpx_highbd_8_variance4x4_c;
INSTANTIATE_TEST_CASE_P(
C, VpxHBDVarianceTest,
- ::testing::Values(make_tuple(6, 6, highbd_12_variance64x64_c, 12),
- make_tuple(6, 5, highbd_12_variance64x32_c, 12),
- make_tuple(5, 6, highbd_12_variance32x64_c, 12),
- make_tuple(5, 5, highbd_12_variance32x32_c, 12),
- make_tuple(5, 4, highbd_12_variance32x16_c, 12),
- make_tuple(4, 5, highbd_12_variance16x32_c, 12),
- make_tuple(4, 4, highbd_12_variance16x16_c, 12),
- make_tuple(4, 3, highbd_12_variance16x8_c, 12),
- make_tuple(3, 4, highbd_12_variance8x16_c, 12),
- make_tuple(3, 3, highbd_12_variance8x8_c, 12),
- make_tuple(3, 2, highbd_12_variance8x4_c, 12),
- make_tuple(2, 3, highbd_12_variance4x8_c, 12),
- make_tuple(2, 2, highbd_12_variance4x4_c, 12),
- make_tuple(6, 6, highbd_10_variance64x64_c, 10),
- make_tuple(6, 5, highbd_10_variance64x32_c, 10),
- make_tuple(5, 6, highbd_10_variance32x64_c, 10),
- make_tuple(5, 5, highbd_10_variance32x32_c, 10),
- make_tuple(5, 4, highbd_10_variance32x16_c, 10),
- make_tuple(4, 5, highbd_10_variance16x32_c, 10),
- make_tuple(4, 4, highbd_10_variance16x16_c, 10),
- make_tuple(4, 3, highbd_10_variance16x8_c, 10),
- make_tuple(3, 4, highbd_10_variance8x16_c, 10),
- make_tuple(3, 3, highbd_10_variance8x8_c, 10),
- make_tuple(3, 2, highbd_10_variance8x4_c, 10),
- make_tuple(2, 3, highbd_10_variance4x8_c, 10),
- make_tuple(2, 2, highbd_10_variance4x4_c, 10),
- make_tuple(6, 6, highbd_8_variance64x64_c, 8),
- make_tuple(6, 5, highbd_8_variance64x32_c, 8),
- make_tuple(5, 6, highbd_8_variance32x64_c, 8),
- make_tuple(5, 5, highbd_8_variance32x32_c, 8),
- make_tuple(5, 4, highbd_8_variance32x16_c, 8),
- make_tuple(4, 5, highbd_8_variance16x32_c, 8),
- make_tuple(4, 4, highbd_8_variance16x16_c, 8),
- make_tuple(4, 3, highbd_8_variance16x8_c, 8),
- make_tuple(3, 4, highbd_8_variance8x16_c, 8),
- make_tuple(3, 3, highbd_8_variance8x8_c, 8),
- make_tuple(3, 2, highbd_8_variance8x4_c, 8),
- make_tuple(2, 3, highbd_8_variance4x8_c, 8),
- make_tuple(2, 2, highbd_8_variance4x4_c, 8)));
-
-const SubpixVarMxNFunc highbd_8_subpel_var64x64_c =
- vpx_highbd_8_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc highbd_8_subpel_var64x32_c =
- vpx_highbd_8_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc highbd_8_subpel_var32x64_c =
- vpx_highbd_8_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc highbd_8_subpel_var32x32_c =
- vpx_highbd_8_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc highbd_8_subpel_var32x16_c =
- vpx_highbd_8_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc highbd_8_subpel_var16x32_c =
- vpx_highbd_8_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc highbd_8_subpel_var16x16_c =
- vpx_highbd_8_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc highbd_8_subpel_var16x8_c =
- vpx_highbd_8_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc highbd_8_subpel_var8x16_c =
- vpx_highbd_8_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc highbd_8_subpel_var8x8_c =
- vpx_highbd_8_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc highbd_8_subpel_var8x4_c =
- vpx_highbd_8_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc highbd_8_subpel_var4x8_c =
- vpx_highbd_8_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc highbd_8_subpel_var4x4_c =
- vpx_highbd_8_sub_pixel_variance4x4_c;
-const SubpixVarMxNFunc highbd_10_subpel_var64x64_c =
- vpx_highbd_10_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc highbd_10_subpel_var64x32_c =
- vpx_highbd_10_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc highbd_10_subpel_var32x64_c =
- vpx_highbd_10_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc highbd_10_subpel_var32x32_c =
- vpx_highbd_10_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc highbd_10_subpel_var32x16_c =
- vpx_highbd_10_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc highbd_10_subpel_var16x32_c =
- vpx_highbd_10_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc highbd_10_subpel_var16x16_c =
- vpx_highbd_10_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc highbd_10_subpel_var16x8_c =
- vpx_highbd_10_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc highbd_10_subpel_var8x16_c =
- vpx_highbd_10_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc highbd_10_subpel_var8x8_c =
- vpx_highbd_10_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc highbd_10_subpel_var8x4_c =
- vpx_highbd_10_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc highbd_10_subpel_var4x8_c =
- vpx_highbd_10_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc highbd_10_subpel_var4x4_c =
- vpx_highbd_10_sub_pixel_variance4x4_c;
-const SubpixVarMxNFunc highbd_12_subpel_var64x64_c =
- vpx_highbd_12_sub_pixel_variance64x64_c;
-const SubpixVarMxNFunc highbd_12_subpel_var64x32_c =
- vpx_highbd_12_sub_pixel_variance64x32_c;
-const SubpixVarMxNFunc highbd_12_subpel_var32x64_c =
- vpx_highbd_12_sub_pixel_variance32x64_c;
-const SubpixVarMxNFunc highbd_12_subpel_var32x32_c =
- vpx_highbd_12_sub_pixel_variance32x32_c;
-const SubpixVarMxNFunc highbd_12_subpel_var32x16_c =
- vpx_highbd_12_sub_pixel_variance32x16_c;
-const SubpixVarMxNFunc highbd_12_subpel_var16x32_c =
- vpx_highbd_12_sub_pixel_variance16x32_c;
-const SubpixVarMxNFunc highbd_12_subpel_var16x16_c =
- vpx_highbd_12_sub_pixel_variance16x16_c;
-const SubpixVarMxNFunc highbd_12_subpel_var16x8_c =
- vpx_highbd_12_sub_pixel_variance16x8_c;
-const SubpixVarMxNFunc highbd_12_subpel_var8x16_c =
- vpx_highbd_12_sub_pixel_variance8x16_c;
-const SubpixVarMxNFunc highbd_12_subpel_var8x8_c =
- vpx_highbd_12_sub_pixel_variance8x8_c;
-const SubpixVarMxNFunc highbd_12_subpel_var8x4_c =
- vpx_highbd_12_sub_pixel_variance8x4_c;
-const SubpixVarMxNFunc highbd_12_subpel_var4x8_c =
- vpx_highbd_12_sub_pixel_variance4x8_c;
-const SubpixVarMxNFunc highbd_12_subpel_var4x4_c =
- vpx_highbd_12_sub_pixel_variance4x4_c;
+ ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_c, 12),
+ make_tuple(6, 5, &vpx_highbd_12_variance64x32_c, 12),
+ make_tuple(5, 6, &vpx_highbd_12_variance32x64_c, 12),
+ make_tuple(5, 5, &vpx_highbd_12_variance32x32_c, 12),
+ make_tuple(5, 4, &vpx_highbd_12_variance32x16_c, 12),
+ make_tuple(4, 5, &vpx_highbd_12_variance16x32_c, 12),
+ make_tuple(4, 4, &vpx_highbd_12_variance16x16_c, 12),
+ make_tuple(4, 3, &vpx_highbd_12_variance16x8_c, 12),
+ make_tuple(3, 4, &vpx_highbd_12_variance8x16_c, 12),
+ make_tuple(3, 3, &vpx_highbd_12_variance8x8_c, 12),
+ make_tuple(3, 2, &vpx_highbd_12_variance8x4_c, 12),
+ make_tuple(2, 3, &vpx_highbd_12_variance4x8_c, 12),
+ make_tuple(2, 2, &vpx_highbd_12_variance4x4_c, 12),
+ make_tuple(6, 6, &vpx_highbd_10_variance64x64_c, 10),
+ make_tuple(6, 5, &vpx_highbd_10_variance64x32_c, 10),
+ make_tuple(5, 6, &vpx_highbd_10_variance32x64_c, 10),
+ make_tuple(5, 5, &vpx_highbd_10_variance32x32_c, 10),
+ make_tuple(5, 4, &vpx_highbd_10_variance32x16_c, 10),
+ make_tuple(4, 5, &vpx_highbd_10_variance16x32_c, 10),
+ make_tuple(4, 4, &vpx_highbd_10_variance16x16_c, 10),
+ make_tuple(4, 3, &vpx_highbd_10_variance16x8_c, 10),
+ make_tuple(3, 4, &vpx_highbd_10_variance8x16_c, 10),
+ make_tuple(3, 3, &vpx_highbd_10_variance8x8_c, 10),
+ make_tuple(3, 2, &vpx_highbd_10_variance8x4_c, 10),
+ make_tuple(2, 3, &vpx_highbd_10_variance4x8_c, 10),
+ make_tuple(2, 2, &vpx_highbd_10_variance4x4_c, 10),
+ make_tuple(6, 6, &vpx_highbd_8_variance64x64_c, 8),
+ make_tuple(6, 5, &vpx_highbd_8_variance64x32_c, 8),
+ make_tuple(5, 6, &vpx_highbd_8_variance32x64_c, 8),
+ make_tuple(5, 5, &vpx_highbd_8_variance32x32_c, 8),
+ make_tuple(5, 4, &vpx_highbd_8_variance32x16_c, 8),
+ make_tuple(4, 5, &vpx_highbd_8_variance16x32_c, 8),
+ make_tuple(4, 4, &vpx_highbd_8_variance16x16_c, 8),
+ make_tuple(4, 3, &vpx_highbd_8_variance16x8_c, 8),
+ make_tuple(3, 4, &vpx_highbd_8_variance8x16_c, 8),
+ make_tuple(3, 3, &vpx_highbd_8_variance8x8_c, 8),
+ make_tuple(3, 2, &vpx_highbd_8_variance8x4_c, 8),
+ make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8),
+ make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8)));
+
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, highbd_8_subpel_var64x64_c, 8),
- make_tuple(6, 5, highbd_8_subpel_var64x32_c, 8),
- make_tuple(5, 6, highbd_8_subpel_var32x64_c, 8),
- make_tuple(5, 5, highbd_8_subpel_var32x32_c, 8),
- make_tuple(5, 4, highbd_8_subpel_var32x16_c, 8),
- make_tuple(4, 5, highbd_8_subpel_var16x32_c, 8),
- make_tuple(4, 4, highbd_8_subpel_var16x16_c, 8),
- make_tuple(4, 3, highbd_8_subpel_var16x8_c, 8),
- make_tuple(3, 4, highbd_8_subpel_var8x16_c, 8),
- make_tuple(3, 3, highbd_8_subpel_var8x8_c, 8),
- make_tuple(3, 2, highbd_8_subpel_var8x4_c, 8),
- make_tuple(2, 3, highbd_8_subpel_var4x8_c, 8),
- make_tuple(2, 2, highbd_8_subpel_var4x4_c, 8),
- make_tuple(6, 6, highbd_10_subpel_var64x64_c, 10),
- make_tuple(6, 5, highbd_10_subpel_var64x32_c, 10),
- make_tuple(5, 6, highbd_10_subpel_var32x64_c, 10),
- make_tuple(5, 5, highbd_10_subpel_var32x32_c, 10),
- make_tuple(5, 4, highbd_10_subpel_var32x16_c, 10),
- make_tuple(4, 5, highbd_10_subpel_var16x32_c, 10),
- make_tuple(4, 4, highbd_10_subpel_var16x16_c, 10),
- make_tuple(4, 3, highbd_10_subpel_var16x8_c, 10),
- make_tuple(3, 4, highbd_10_subpel_var8x16_c, 10),
- make_tuple(3, 3, highbd_10_subpel_var8x8_c, 10),
- make_tuple(3, 2, highbd_10_subpel_var8x4_c, 10),
- make_tuple(2, 3, highbd_10_subpel_var4x8_c, 10),
- make_tuple(2, 2, highbd_10_subpel_var4x4_c, 10),
- make_tuple(6, 6, highbd_12_subpel_var64x64_c, 12),
- make_tuple(6, 5, highbd_12_subpel_var64x32_c, 12),
- make_tuple(5, 6, highbd_12_subpel_var32x64_c, 12),
- make_tuple(5, 5, highbd_12_subpel_var32x32_c, 12),
- make_tuple(5, 4, highbd_12_subpel_var32x16_c, 12),
- make_tuple(4, 5, highbd_12_subpel_var16x32_c, 12),
- make_tuple(4, 4, highbd_12_subpel_var16x16_c, 12),
- make_tuple(4, 3, highbd_12_subpel_var16x8_c, 12),
- make_tuple(3, 4, highbd_12_subpel_var8x16_c, 12),
- make_tuple(3, 3, highbd_12_subpel_var8x8_c, 12),
- make_tuple(3, 2, highbd_12_subpel_var8x4_c, 12),
- make_tuple(2, 3, highbd_12_subpel_var4x8_c, 12),
- make_tuple(2, 2, highbd_12_subpel_var4x4_c, 12)));
-
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var64x64_c =
- vpx_highbd_8_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var64x32_c =
- vpx_highbd_8_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x64_c =
- vpx_highbd_8_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x32_c =
- vpx_highbd_8_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var32x16_c =
- vpx_highbd_8_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x32_c =
- vpx_highbd_8_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x16_c =
- vpx_highbd_8_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var16x8_c =
- vpx_highbd_8_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x16_c =
- vpx_highbd_8_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x8_c =
- vpx_highbd_8_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var8x4_c =
- vpx_highbd_8_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var4x8_c =
- vpx_highbd_8_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_var4x4_c =
- vpx_highbd_8_sub_pixel_avg_variance4x4_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var64x64_c =
- vpx_highbd_10_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var64x32_c =
- vpx_highbd_10_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x64_c =
- vpx_highbd_10_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x32_c =
- vpx_highbd_10_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var32x16_c =
- vpx_highbd_10_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x32_c =
- vpx_highbd_10_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x16_c =
- vpx_highbd_10_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var16x8_c =
- vpx_highbd_10_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x16_c =
- vpx_highbd_10_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x8_c =
- vpx_highbd_10_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var8x4_c =
- vpx_highbd_10_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var4x8_c =
- vpx_highbd_10_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_var4x4_c =
- vpx_highbd_10_sub_pixel_avg_variance4x4_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var64x64_c =
- vpx_highbd_12_sub_pixel_avg_variance64x64_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var64x32_c =
- vpx_highbd_12_sub_pixel_avg_variance64x32_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x64_c =
- vpx_highbd_12_sub_pixel_avg_variance32x64_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x32_c =
- vpx_highbd_12_sub_pixel_avg_variance32x32_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var32x16_c =
- vpx_highbd_12_sub_pixel_avg_variance32x16_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x32_c =
- vpx_highbd_12_sub_pixel_avg_variance16x32_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x16_c =
- vpx_highbd_12_sub_pixel_avg_variance16x16_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var16x8_c =
- vpx_highbd_12_sub_pixel_avg_variance16x8_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x16_c =
- vpx_highbd_12_sub_pixel_avg_variance8x16_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x8_c =
- vpx_highbd_12_sub_pixel_avg_variance8x8_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var8x4_c =
- vpx_highbd_12_sub_pixel_avg_variance8x4_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var4x8_c =
- vpx_highbd_12_sub_pixel_avg_variance4x8_c;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_var4x4_c =
- vpx_highbd_12_sub_pixel_avg_variance4x4_c;
+ ::testing::Values(
+ make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8),
+ make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8),
+ make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8),
+ make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_c, 8),
+ make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_c, 8),
+ make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_c, 8),
+ make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_c, 8),
+ make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_c, 8),
+ make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_c, 8),
+ make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_c, 8),
+ make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8),
+ make_tuple(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8),
+ make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8),
+ make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10),
+ make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10),
+ make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10),
+ make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_c, 10),
+ make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_c, 10),
+ make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_c, 10),
+ make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_c, 10),
+ make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_c, 10),
+ make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_c, 10),
+ make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_c, 10),
+ make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10),
+ make_tuple(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10),
+ make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10),
+ make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12),
+ make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12),
+ make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12),
+ make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_c, 12),
+ make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_c, 12),
+ make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_c, 12),
+ make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_c, 12),
+ make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_c, 12),
+ make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_c, 12),
+ make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_c, 12),
+ make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_c, 12),
+ make_tuple(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_c, 12),
+ make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_c, 12)));
+
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelAvgVarianceTest,
::testing::Values(
- make_tuple(6, 6, highbd_8_subpel_avg_var64x64_c, 8),
- make_tuple(6, 5, highbd_8_subpel_avg_var64x32_c, 8),
- make_tuple(5, 6, highbd_8_subpel_avg_var32x64_c, 8),
- make_tuple(5, 5, highbd_8_subpel_avg_var32x32_c, 8),
- make_tuple(5, 4, highbd_8_subpel_avg_var32x16_c, 8),
- make_tuple(4, 5, highbd_8_subpel_avg_var16x32_c, 8),
- make_tuple(4, 4, highbd_8_subpel_avg_var16x16_c, 8),
- make_tuple(4, 3, highbd_8_subpel_avg_var16x8_c, 8),
- make_tuple(3, 4, highbd_8_subpel_avg_var8x16_c, 8),
- make_tuple(3, 3, highbd_8_subpel_avg_var8x8_c, 8),
- make_tuple(3, 2, highbd_8_subpel_avg_var8x4_c, 8),
- make_tuple(2, 3, highbd_8_subpel_avg_var4x8_c, 8),
- make_tuple(2, 2, highbd_8_subpel_avg_var4x4_c, 8),
- make_tuple(6, 6, highbd_10_subpel_avg_var64x64_c, 10),
- make_tuple(6, 5, highbd_10_subpel_avg_var64x32_c, 10),
- make_tuple(5, 6, highbd_10_subpel_avg_var32x64_c, 10),
- make_tuple(5, 5, highbd_10_subpel_avg_var32x32_c, 10),
- make_tuple(5, 4, highbd_10_subpel_avg_var32x16_c, 10),
- make_tuple(4, 5, highbd_10_subpel_avg_var16x32_c, 10),
- make_tuple(4, 4, highbd_10_subpel_avg_var16x16_c, 10),
- make_tuple(4, 3, highbd_10_subpel_avg_var16x8_c, 10),
- make_tuple(3, 4, highbd_10_subpel_avg_var8x16_c, 10),
- make_tuple(3, 3, highbd_10_subpel_avg_var8x8_c, 10),
- make_tuple(3, 2, highbd_10_subpel_avg_var8x4_c, 10),
- make_tuple(2, 3, highbd_10_subpel_avg_var4x8_c, 10),
- make_tuple(2, 2, highbd_10_subpel_avg_var4x4_c, 10),
- make_tuple(6, 6, highbd_12_subpel_avg_var64x64_c, 12),
- make_tuple(6, 5, highbd_12_subpel_avg_var64x32_c, 12),
- make_tuple(5, 6, highbd_12_subpel_avg_var32x64_c, 12),
- make_tuple(5, 5, highbd_12_subpel_avg_var32x32_c, 12),
- make_tuple(5, 4, highbd_12_subpel_avg_var32x16_c, 12),
- make_tuple(4, 5, highbd_12_subpel_avg_var16x32_c, 12),
- make_tuple(4, 4, highbd_12_subpel_avg_var16x16_c, 12),
- make_tuple(4, 3, highbd_12_subpel_avg_var16x8_c, 12),
- make_tuple(3, 4, highbd_12_subpel_avg_var8x16_c, 12),
- make_tuple(3, 3, highbd_12_subpel_avg_var8x8_c, 12),
- make_tuple(3, 2, highbd_12_subpel_avg_var8x4_c, 12),
- make_tuple(2, 3, highbd_12_subpel_avg_var4x8_c, 12),
- make_tuple(2, 2, highbd_12_subpel_avg_var4x4_c, 12)));
+ make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8),
+ make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8),
+ make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8),
+ make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_c, 8),
+ make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_c, 8),
+ make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_c, 8),
+ make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_c, 8),
+ make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_c, 8),
+ make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_c, 8),
+ make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_c, 8),
+ make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8),
+ make_tuple(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8),
+ make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8),
+ make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10),
+ make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10),
+ make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10),
+ make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_c, 10),
+ make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_c, 10),
+ make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_c, 10),
+ make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_c, 10),
+ make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_c, 10),
+ make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_c, 10),
+ make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_c, 10),
+ make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10),
+ make_tuple(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10),
+ make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10),
+ make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12),
+ make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12),
+ make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12),
+ make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_c, 12),
+ make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_c, 12),
+ make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_c, 12),
+ make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_c, 12),
+ make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_c, 12),
+ make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_c, 12),
+ make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_c, 12),
+ make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_c, 12),
+ make_tuple(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_c, 12),
+ make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_c, 12)));
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_MMX
-const VarianceMxNFunc mse16x16_mmx = vpx_mse16x16_mmx;
-INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_mmx)));
-
-INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest,
- ::testing::Values(vpx_get_mb_ss_mmx));
-
-const VarianceMxNFunc variance16x16_mmx = vpx_variance16x16_mmx;
-const VarianceMxNFunc variance16x8_mmx = vpx_variance16x8_mmx;
-const VarianceMxNFunc variance8x16_mmx = vpx_variance8x16_mmx;
-const VarianceMxNFunc variance8x8_mmx = vpx_variance8x8_mmx;
-const VarianceMxNFunc variance4x4_mmx = vpx_variance4x4_mmx;
-INSTANTIATE_TEST_CASE_P(
- MMX, VpxVarianceTest,
- ::testing::Values(make_tuple(4, 4, variance16x16_mmx, 0),
- make_tuple(4, 3, variance16x8_mmx, 0),
- make_tuple(3, 4, variance8x16_mmx, 0),
- make_tuple(3, 3, variance8x8_mmx, 0),
- make_tuple(2, 2, variance4x4_mmx, 0)));
-
-const SubpixVarMxNFunc subpel_var16x16_mmx = vpx_sub_pixel_variance16x16_mmx;
-const SubpixVarMxNFunc subpel_var16x8_mmx = vpx_sub_pixel_variance16x8_mmx;
-const SubpixVarMxNFunc subpel_var8x16_mmx = vpx_sub_pixel_variance8x16_mmx;
-const SubpixVarMxNFunc subpel_var8x8_mmx = vpx_sub_pixel_variance8x8_mmx;
-const SubpixVarMxNFunc subpel_var4x4_mmx = vpx_sub_pixel_variance4x4_mmx;
-INSTANTIATE_TEST_CASE_P(
- MMX, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(4, 4, subpel_var16x16_mmx, 0),
- make_tuple(4, 3, subpel_var16x8_mmx, 0),
- make_tuple(3, 4, subpel_var8x16_mmx, 0),
- make_tuple(3, 3, subpel_var8x8_mmx, 0),
- make_tuple(2, 2, subpel_var4x4_mmx, 0)));
-#endif // HAVE_MMX
-
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
::testing::Values(vpx_get_mb_ss_sse2));
-const VarianceMxNFunc mse16x16_sse2 = vpx_mse16x16_sse2;
-const VarianceMxNFunc mse16x8_sse2 = vpx_mse16x8_sse2;
-const VarianceMxNFunc mse8x16_sse2 = vpx_mse8x16_sse2;
-const VarianceMxNFunc mse8x8_sse2 = vpx_mse8x8_sse2;
INSTANTIATE_TEST_CASE_P(SSE2, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_sse2),
- make_tuple(4, 3, mse16x8_sse2),
- make_tuple(3, 4, mse8x16_sse2),
- make_tuple(3, 3, mse8x8_sse2)));
-
-const VarianceMxNFunc variance64x64_sse2 = vpx_variance64x64_sse2;
-const VarianceMxNFunc variance64x32_sse2 = vpx_variance64x32_sse2;
-const VarianceMxNFunc variance32x64_sse2 = vpx_variance32x64_sse2;
-const VarianceMxNFunc variance32x32_sse2 = vpx_variance32x32_sse2;
-const VarianceMxNFunc variance32x16_sse2 = vpx_variance32x16_sse2;
-const VarianceMxNFunc variance16x32_sse2 = vpx_variance16x32_sse2;
-const VarianceMxNFunc variance16x16_sse2 = vpx_variance16x16_sse2;
-const VarianceMxNFunc variance16x8_sse2 = vpx_variance16x8_sse2;
-const VarianceMxNFunc variance8x16_sse2 = vpx_variance8x16_sse2;
-const VarianceMxNFunc variance8x8_sse2 = vpx_variance8x8_sse2;
-const VarianceMxNFunc variance8x4_sse2 = vpx_variance8x4_sse2;
-const VarianceMxNFunc variance4x8_sse2 = vpx_variance4x8_sse2;
-const VarianceMxNFunc variance4x4_sse2 = vpx_variance4x4_sse2;
+ ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_sse2),
+ make_tuple(4, 3, &vpx_mse16x8_sse2),
+ make_tuple(3, 4, &vpx_mse8x16_sse2),
+ make_tuple(3, 3, &vpx_mse8x8_sse2)));
+
INSTANTIATE_TEST_CASE_P(
SSE2, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, variance64x64_sse2, 0),
- make_tuple(6, 5, variance64x32_sse2, 0),
- make_tuple(5, 6, variance32x64_sse2, 0),
- make_tuple(5, 5, variance32x32_sse2, 0),
- make_tuple(5, 4, variance32x16_sse2, 0),
- make_tuple(4, 5, variance16x32_sse2, 0),
- make_tuple(4, 4, variance16x16_sse2, 0),
- make_tuple(4, 3, variance16x8_sse2, 0),
- make_tuple(3, 4, variance8x16_sse2, 0),
- make_tuple(3, 3, variance8x8_sse2, 0),
- make_tuple(3, 2, variance8x4_sse2, 0),
- make_tuple(2, 3, variance4x8_sse2, 0),
- make_tuple(2, 2, variance4x4_sse2, 0)));
+ ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_sse2, 0),
+ make_tuple(6, 5, &vpx_variance64x32_sse2, 0),
+ make_tuple(5, 6, &vpx_variance32x64_sse2, 0),
+ make_tuple(5, 5, &vpx_variance32x32_sse2, 0),
+ make_tuple(5, 4, &vpx_variance32x16_sse2, 0),
+ make_tuple(4, 5, &vpx_variance16x32_sse2, 0),
+ make_tuple(4, 4, &vpx_variance16x16_sse2, 0),
+ make_tuple(4, 3, &vpx_variance16x8_sse2, 0),
+ make_tuple(3, 4, &vpx_variance8x16_sse2, 0),
+ make_tuple(3, 3, &vpx_variance8x8_sse2, 0),
+ make_tuple(3, 2, &vpx_variance8x4_sse2, 0),
+ make_tuple(2, 3, &vpx_variance4x8_sse2, 0),
+ make_tuple(2, 2, &vpx_variance4x4_sse2, 0)));
#if CONFIG_USE_X86INC
-const SubpixVarMxNFunc subpel_variance64x64_sse2 =
- vpx_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc subpel_variance64x32_sse2 =
- vpx_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc subpel_variance32x64_sse2 =
- vpx_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc subpel_variance32x32_sse2 =
- vpx_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc subpel_variance32x16_sse2 =
- vpx_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc subpel_variance16x32_sse2 =
- vpx_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc subpel_variance16x16_sse2 =
- vpx_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc subpel_variance16x8_sse2 =
- vpx_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc subpel_variance8x16_sse2 =
- vpx_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc subpel_variance8x8_sse2 = vpx_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc subpel_variance8x4_sse2 = vpx_sub_pixel_variance8x4_sse2;
-const SubpixVarMxNFunc subpel_variance4x8_sse = vpx_sub_pixel_variance4x8_sse;
-const SubpixVarMxNFunc subpel_variance4x4_sse = vpx_sub_pixel_variance4x4_sse;
INSTANTIATE_TEST_CASE_P(
SSE2, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_variance64x64_sse2, 0),
- make_tuple(6, 5, subpel_variance64x32_sse2, 0),
- make_tuple(5, 6, subpel_variance32x64_sse2, 0),
- make_tuple(5, 5, subpel_variance32x32_sse2, 0),
- make_tuple(5, 4, subpel_variance32x16_sse2, 0),
- make_tuple(4, 5, subpel_variance16x32_sse2, 0),
- make_tuple(4, 4, subpel_variance16x16_sse2, 0),
- make_tuple(4, 3, subpel_variance16x8_sse2, 0),
- make_tuple(3, 4, subpel_variance8x16_sse2, 0),
- make_tuple(3, 3, subpel_variance8x8_sse2, 0),
- make_tuple(3, 2, subpel_variance8x4_sse2, 0),
- make_tuple(2, 3, subpel_variance4x8_sse, 0),
- make_tuple(2, 2, subpel_variance4x4_sse, 0)));
-
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_sse2 =
- vpx_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance64x32_sse2 =
- vpx_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x64_sse2 =
- vpx_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_sse2 =
- vpx_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x16_sse2 =
- vpx_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x32_sse2 =
- vpx_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x16_sse2 =
- vpx_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x8_sse2 =
- vpx_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x16_sse2 =
- vpx_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x8_sse2 =
- vpx_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x4_sse2 =
- vpx_sub_pixel_avg_variance8x4_sse2;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x8_sse =
- vpx_sub_pixel_avg_variance4x8_sse;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x4_sse =
- vpx_sub_pixel_avg_variance4x4_sse;
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_sse2, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_variance64x32_sse2, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_variance32x64_sse2, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_variance32x32_sse2, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_variance32x16_sse2, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_variance16x32_sse2, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_variance16x16_sse2, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_variance16x8_sse2, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_variance8x16_sse2, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_variance8x8_sse2, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_variance8x4_sse2, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse2, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse2, 0)));
+
INSTANTIATE_TEST_CASE_P(
SSE2, VpxSubpelAvgVarianceTest,
::testing::Values(
- make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0),
- make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
- make_tuple(5, 6, subpel_avg_variance32x64_sse2, 0),
- make_tuple(5, 5, subpel_avg_variance32x32_sse2, 0),
- make_tuple(5, 4, subpel_avg_variance32x16_sse2, 0),
- make_tuple(4, 5, subpel_avg_variance16x32_sse2, 0),
- make_tuple(4, 4, subpel_avg_variance16x16_sse2, 0),
- make_tuple(4, 3, subpel_avg_variance16x8_sse2, 0),
- make_tuple(3, 4, subpel_avg_variance8x16_sse2, 0),
- make_tuple(3, 3, subpel_avg_variance8x8_sse2, 0),
- make_tuple(3, 2, subpel_avg_variance8x4_sse2, 0),
- make_tuple(2, 3, subpel_avg_variance4x8_sse, 0),
- make_tuple(2, 2, subpel_avg_variance4x4_sse, 0)));
+ make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_sse2, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_sse2, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_sse2, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_sse2, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_sse2, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_sse2, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_sse2, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_sse2, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0)));
#endif // CONFIG_USE_X86INC
#if CONFIG_VP9_HIGHBITDEPTH
/* TODO(debargha): This test does not support the highbd version
-const VarianceMxNFunc highbd_12_mse16x16_sse2 = vpx_highbd_12_mse16x16_sse2;
-const VarianceMxNFunc highbd_12_mse16x8_sse2 = vpx_highbd_12_mse16x8_sse2;
-const VarianceMxNFunc highbd_12_mse8x16_sse2 = vpx_highbd_12_mse8x16_sse2;
-const VarianceMxNFunc highbd_12_mse8x8_sse2 = vpx_highbd_12_mse8x8_sse2;
-
-const VarianceMxNFunc highbd_10_mse16x16_sse2 = vpx_highbd_10_mse16x16_sse2;
-const VarianceMxNFunc highbd_10_mse16x8_sse2 = vpx_highbd_10_mse16x8_sse2;
-const VarianceMxNFunc highbd_10_mse8x16_sse2 = vpx_highbd_10_mse8x16_sse2;
-const VarianceMxNFunc highbd_10_mse8x8_sse2 = vpx_highbd_10_mse8x8_sse2;
-
-const VarianceMxNFunc highbd_8_mse16x16_sse2 = vpx_highbd_8_mse16x16_sse2;
-const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
-const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
-const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
INSTANTIATE_TEST_CASE_P(
- SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
- make_tuple(4, 3, highbd_12_mse16x8_sse2),
- make_tuple(3, 4, highbd_12_mse8x16_sse2),
- make_tuple(3, 3, highbd_12_mse8x8_sse2),
- make_tuple(4, 4, highbd_10_mse16x16_sse2),
- make_tuple(4, 3, highbd_10_mse16x8_sse2),
- make_tuple(3, 4, highbd_10_mse8x16_sse2),
- make_tuple(3, 3, highbd_10_mse8x8_sse2),
- make_tuple(4, 4, highbd_8_mse16x16_sse2),
- make_tuple(4, 3, highbd_8_mse16x8_sse2),
- make_tuple(3, 4, highbd_8_mse8x16_sse2),
- make_tuple(3, 3, highbd_8_mse8x8_sse2)));
+ SSE2, VpxHBDMseTest,
+ ::testing::Values(make_tuple(4, 4, &vpx_highbd_12_mse16x16_sse2),
+ make_tuple(4, 3, &vpx_highbd_12_mse16x8_sse2),
+ make_tuple(3, 4, &vpx_highbd_12_mse8x16_sse2),
+ make_tuple(3, 3, &vpx_highbd_12_mse8x8_sse2),
+ make_tuple(4, 4, &vpx_highbd_10_mse16x16_sse2),
+ make_tuple(4, 3, &vpx_highbd_10_mse16x8_sse2),
+ make_tuple(3, 4, &vpx_highbd_10_mse8x16_sse2),
+ make_tuple(3, 3, &vpx_highbd_10_mse8x8_sse2),
+ make_tuple(4, 4, &vpx_highbd_8_mse16x16_sse2),
+ make_tuple(4, 3, &vpx_highbd_8_mse16x8_sse2),
+ make_tuple(3, 4, &vpx_highbd_8_mse8x16_sse2),
+ make_tuple(3, 3, &vpx_highbd_8_mse8x8_sse2)));
*/
-const VarianceMxNFunc highbd_12_variance64x64_sse2 =
- vpx_highbd_12_variance64x64_sse2;
-const VarianceMxNFunc highbd_12_variance64x32_sse2 =
- vpx_highbd_12_variance64x32_sse2;
-const VarianceMxNFunc highbd_12_variance32x64_sse2 =
- vpx_highbd_12_variance32x64_sse2;
-const VarianceMxNFunc highbd_12_variance32x32_sse2 =
- vpx_highbd_12_variance32x32_sse2;
-const VarianceMxNFunc highbd_12_variance32x16_sse2 =
- vpx_highbd_12_variance32x16_sse2;
-const VarianceMxNFunc highbd_12_variance16x32_sse2 =
- vpx_highbd_12_variance16x32_sse2;
-const VarianceMxNFunc highbd_12_variance16x16_sse2 =
- vpx_highbd_12_variance16x16_sse2;
-const VarianceMxNFunc highbd_12_variance16x8_sse2 =
- vpx_highbd_12_variance16x8_sse2;
-const VarianceMxNFunc highbd_12_variance8x16_sse2 =
- vpx_highbd_12_variance8x16_sse2;
-const VarianceMxNFunc highbd_12_variance8x8_sse2 =
- vpx_highbd_12_variance8x8_sse2;
-const VarianceMxNFunc highbd_10_variance64x64_sse2 =
- vpx_highbd_10_variance64x64_sse2;
-const VarianceMxNFunc highbd_10_variance64x32_sse2 =
- vpx_highbd_10_variance64x32_sse2;
-const VarianceMxNFunc highbd_10_variance32x64_sse2 =
- vpx_highbd_10_variance32x64_sse2;
-const VarianceMxNFunc highbd_10_variance32x32_sse2 =
- vpx_highbd_10_variance32x32_sse2;
-const VarianceMxNFunc highbd_10_variance32x16_sse2 =
- vpx_highbd_10_variance32x16_sse2;
-const VarianceMxNFunc highbd_10_variance16x32_sse2 =
- vpx_highbd_10_variance16x32_sse2;
-const VarianceMxNFunc highbd_10_variance16x16_sse2 =
- vpx_highbd_10_variance16x16_sse2;
-const VarianceMxNFunc highbd_10_variance16x8_sse2 =
- vpx_highbd_10_variance16x8_sse2;
-const VarianceMxNFunc highbd_10_variance8x16_sse2 =
- vpx_highbd_10_variance8x16_sse2;
-const VarianceMxNFunc highbd_10_variance8x8_sse2 =
- vpx_highbd_10_variance8x8_sse2;
-const VarianceMxNFunc highbd_8_variance64x64_sse2 =
- vpx_highbd_8_variance64x64_sse2;
-const VarianceMxNFunc highbd_8_variance64x32_sse2 =
- vpx_highbd_8_variance64x32_sse2;
-const VarianceMxNFunc highbd_8_variance32x64_sse2 =
- vpx_highbd_8_variance32x64_sse2;
-const VarianceMxNFunc highbd_8_variance32x32_sse2 =
- vpx_highbd_8_variance32x32_sse2;
-const VarianceMxNFunc highbd_8_variance32x16_sse2 =
- vpx_highbd_8_variance32x16_sse2;
-const VarianceMxNFunc highbd_8_variance16x32_sse2 =
- vpx_highbd_8_variance16x32_sse2;
-const VarianceMxNFunc highbd_8_variance16x16_sse2 =
- vpx_highbd_8_variance16x16_sse2;
-const VarianceMxNFunc highbd_8_variance16x8_sse2 =
- vpx_highbd_8_variance16x8_sse2;
-const VarianceMxNFunc highbd_8_variance8x16_sse2 =
- vpx_highbd_8_variance8x16_sse2;
-const VarianceMxNFunc highbd_8_variance8x8_sse2 =
- vpx_highbd_8_variance8x8_sse2;
-
INSTANTIATE_TEST_CASE_P(
SSE2, VpxHBDVarianceTest,
- ::testing::Values(make_tuple(6, 6, highbd_12_variance64x64_sse2, 12),
- make_tuple(6, 5, highbd_12_variance64x32_sse2, 12),
- make_tuple(5, 6, highbd_12_variance32x64_sse2, 12),
- make_tuple(5, 5, highbd_12_variance32x32_sse2, 12),
- make_tuple(5, 4, highbd_12_variance32x16_sse2, 12),
- make_tuple(4, 5, highbd_12_variance16x32_sse2, 12),
- make_tuple(4, 4, highbd_12_variance16x16_sse2, 12),
- make_tuple(4, 3, highbd_12_variance16x8_sse2, 12),
- make_tuple(3, 4, highbd_12_variance8x16_sse2, 12),
- make_tuple(3, 3, highbd_12_variance8x8_sse2, 12),
- make_tuple(6, 6, highbd_10_variance64x64_sse2, 10),
- make_tuple(6, 5, highbd_10_variance64x32_sse2, 10),
- make_tuple(5, 6, highbd_10_variance32x64_sse2, 10),
- make_tuple(5, 5, highbd_10_variance32x32_sse2, 10),
- make_tuple(5, 4, highbd_10_variance32x16_sse2, 10),
- make_tuple(4, 5, highbd_10_variance16x32_sse2, 10),
- make_tuple(4, 4, highbd_10_variance16x16_sse2, 10),
- make_tuple(4, 3, highbd_10_variance16x8_sse2, 10),
- make_tuple(3, 4, highbd_10_variance8x16_sse2, 10),
- make_tuple(3, 3, highbd_10_variance8x8_sse2, 10),
- make_tuple(6, 6, highbd_8_variance64x64_sse2, 8),
- make_tuple(6, 5, highbd_8_variance64x32_sse2, 8),
- make_tuple(5, 6, highbd_8_variance32x64_sse2, 8),
- make_tuple(5, 5, highbd_8_variance32x32_sse2, 8),
- make_tuple(5, 4, highbd_8_variance32x16_sse2, 8),
- make_tuple(4, 5, highbd_8_variance16x32_sse2, 8),
- make_tuple(4, 4, highbd_8_variance16x16_sse2, 8),
- make_tuple(4, 3, highbd_8_variance16x8_sse2, 8),
- make_tuple(3, 4, highbd_8_variance8x16_sse2, 8),
- make_tuple(3, 3, highbd_8_variance8x8_sse2, 8)));
+ ::testing::Values(make_tuple(6, 6, &vpx_highbd_12_variance64x64_sse2, 12),
+ make_tuple(6, 5, &vpx_highbd_12_variance64x32_sse2, 12),
+ make_tuple(5, 6, &vpx_highbd_12_variance32x64_sse2, 12),
+ make_tuple(5, 5, &vpx_highbd_12_variance32x32_sse2, 12),
+ make_tuple(5, 4, &vpx_highbd_12_variance32x16_sse2, 12),
+ make_tuple(4, 5, &vpx_highbd_12_variance16x32_sse2, 12),
+ make_tuple(4, 4, &vpx_highbd_12_variance16x16_sse2, 12),
+ make_tuple(4, 3, &vpx_highbd_12_variance16x8_sse2, 12),
+ make_tuple(3, 4, &vpx_highbd_12_variance8x16_sse2, 12),
+ make_tuple(3, 3, &vpx_highbd_12_variance8x8_sse2, 12),
+ make_tuple(6, 6, &vpx_highbd_10_variance64x64_sse2, 10),
+ make_tuple(6, 5, &vpx_highbd_10_variance64x32_sse2, 10),
+ make_tuple(5, 6, &vpx_highbd_10_variance32x64_sse2, 10),
+ make_tuple(5, 5, &vpx_highbd_10_variance32x32_sse2, 10),
+ make_tuple(5, 4, &vpx_highbd_10_variance32x16_sse2, 10),
+ make_tuple(4, 5, &vpx_highbd_10_variance16x32_sse2, 10),
+ make_tuple(4, 4, &vpx_highbd_10_variance16x16_sse2, 10),
+ make_tuple(4, 3, &vpx_highbd_10_variance16x8_sse2, 10),
+ make_tuple(3, 4, &vpx_highbd_10_variance8x16_sse2, 10),
+ make_tuple(3, 3, &vpx_highbd_10_variance8x8_sse2, 10),
+ make_tuple(6, 6, &vpx_highbd_8_variance64x64_sse2, 8),
+ make_tuple(6, 5, &vpx_highbd_8_variance64x32_sse2, 8),
+ make_tuple(5, 6, &vpx_highbd_8_variance32x64_sse2, 8),
+ make_tuple(5, 5, &vpx_highbd_8_variance32x32_sse2, 8),
+ make_tuple(5, 4, &vpx_highbd_8_variance32x16_sse2, 8),
+ make_tuple(4, 5, &vpx_highbd_8_variance16x32_sse2, 8),
+ make_tuple(4, 4, &vpx_highbd_8_variance16x16_sse2, 8),
+ make_tuple(4, 3, &vpx_highbd_8_variance16x8_sse2, 8),
+ make_tuple(3, 4, &vpx_highbd_8_variance8x16_sse2, 8),
+ make_tuple(3, 3, &vpx_highbd_8_variance8x8_sse2, 8)));
#if CONFIG_USE_X86INC
-const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
- vpx_highbd_12_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
- vpx_highbd_12_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
- vpx_highbd_12_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
- vpx_highbd_12_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
- vpx_highbd_12_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
- vpx_highbd_12_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
- vpx_highbd_12_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
- vpx_highbd_12_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
- vpx_highbd_12_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
- vpx_highbd_12_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
- vpx_highbd_12_sub_pixel_variance8x4_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
- vpx_highbd_10_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
- vpx_highbd_10_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
- vpx_highbd_10_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
- vpx_highbd_10_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
- vpx_highbd_10_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
- vpx_highbd_10_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
- vpx_highbd_10_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
- vpx_highbd_10_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
- vpx_highbd_10_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
- vpx_highbd_10_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
- vpx_highbd_10_sub_pixel_variance8x4_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance64x64_sse2 =
- vpx_highbd_8_sub_pixel_variance64x64_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance64x32_sse2 =
- vpx_highbd_8_sub_pixel_variance64x32_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance32x64_sse2 =
- vpx_highbd_8_sub_pixel_variance32x64_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance32x32_sse2 =
- vpx_highbd_8_sub_pixel_variance32x32_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance32x16_sse2 =
- vpx_highbd_8_sub_pixel_variance32x16_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance16x32_sse2 =
- vpx_highbd_8_sub_pixel_variance16x32_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance16x16_sse2 =
- vpx_highbd_8_sub_pixel_variance16x16_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance16x8_sse2 =
- vpx_highbd_8_sub_pixel_variance16x8_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance8x16_sse2 =
- vpx_highbd_8_sub_pixel_variance8x16_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance8x8_sse2 =
- vpx_highbd_8_sub_pixel_variance8x8_sse2;
-const SubpixVarMxNFunc highbd_8_subpel_variance8x4_sse2 =
- vpx_highbd_8_sub_pixel_variance8x4_sse2;
INSTANTIATE_TEST_CASE_P(
SSE2, VpxHBDSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, highbd_12_subpel_variance64x64_sse2, 12),
- make_tuple(6, 5, highbd_12_subpel_variance64x32_sse2, 12),
- make_tuple(5, 6, highbd_12_subpel_variance32x64_sse2, 12),
- make_tuple(5, 5, highbd_12_subpel_variance32x32_sse2, 12),
- make_tuple(5, 4, highbd_12_subpel_variance32x16_sse2, 12),
- make_tuple(4, 5, highbd_12_subpel_variance16x32_sse2, 12),
- make_tuple(4, 4, highbd_12_subpel_variance16x16_sse2, 12),
- make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2, 12),
- make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2, 12),
- make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2, 12),
- make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2, 12),
- make_tuple(6, 6, highbd_10_subpel_variance64x64_sse2, 10),
- make_tuple(6, 5, highbd_10_subpel_variance64x32_sse2, 10),
- make_tuple(5, 6, highbd_10_subpel_variance32x64_sse2, 10),
- make_tuple(5, 5, highbd_10_subpel_variance32x32_sse2, 10),
- make_tuple(5, 4, highbd_10_subpel_variance32x16_sse2, 10),
- make_tuple(4, 5, highbd_10_subpel_variance16x32_sse2, 10),
- make_tuple(4, 4, highbd_10_subpel_variance16x16_sse2, 10),
- make_tuple(4, 3, highbd_10_subpel_variance16x8_sse2, 10),
- make_tuple(3, 4, highbd_10_subpel_variance8x16_sse2, 10),
- make_tuple(3, 3, highbd_10_subpel_variance8x8_sse2, 10),
- make_tuple(3, 2, highbd_10_subpel_variance8x4_sse2, 10),
- make_tuple(6, 6, highbd_8_subpel_variance64x64_sse2, 8),
- make_tuple(6, 5, highbd_8_subpel_variance64x32_sse2, 8),
- make_tuple(5, 6, highbd_8_subpel_variance32x64_sse2, 8),
- make_tuple(5, 5, highbd_8_subpel_variance32x32_sse2, 8),
- make_tuple(5, 4, highbd_8_subpel_variance32x16_sse2, 8),
- make_tuple(4, 5, highbd_8_subpel_variance16x32_sse2, 8),
- make_tuple(4, 4, highbd_8_subpel_variance16x16_sse2, 8),
- make_tuple(4, 3, highbd_8_subpel_variance16x8_sse2, 8),
- make_tuple(3, 4, highbd_8_subpel_variance8x16_sse2, 8),
- make_tuple(3, 3, highbd_8_subpel_variance8x8_sse2, 8),
- make_tuple(3, 2, highbd_8_subpel_variance8x4_sse2, 8)));
-
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance64x64_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance64x32_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x64_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x32_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance32x16_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x32_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x16_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance16x8_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x16_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x8_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc highbd_12_subpel_avg_variance8x4_sse2 =
- vpx_highbd_12_sub_pixel_avg_variance8x4_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance64x64_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance64x32_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x64_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x32_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance32x16_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x32_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x16_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance16x8_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x16_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x8_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc highbd_10_subpel_avg_variance8x4_sse2 =
- vpx_highbd_10_sub_pixel_avg_variance8x4_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance64x64_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance64x64_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance64x32_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance64x32_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x64_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance32x64_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x32_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance32x32_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance32x16_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance32x16_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x32_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance16x32_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x16_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance16x16_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance16x8_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance16x8_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x16_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance8x16_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x8_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance8x8_sse2;
-const SubpixAvgVarMxNFunc highbd_8_subpel_avg_variance8x4_sse2 =
- vpx_highbd_8_sub_pixel_avg_variance8x4_sse2;
+ ::testing::Values(
+ make_tuple(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_sse2, 12),
+ make_tuple(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_sse2, 12),
+ make_tuple(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_sse2, 12),
+ make_tuple(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_sse2, 12),
+ make_tuple(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_sse2, 12),
+ make_tuple(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_sse2, 12),
+ make_tuple(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_sse2, 12),
+ make_tuple(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_sse2, 12),
+ make_tuple(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_sse2, 12),
+ make_tuple(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_sse2, 12),
+ make_tuple(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_sse2, 12),
+ make_tuple(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_sse2, 10),
+ make_tuple(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_sse2, 10),
+ make_tuple(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_sse2, 10),
+ make_tuple(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_sse2, 10),
+ make_tuple(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_sse2, 10),
+ make_tuple(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_sse2, 10),
+ make_tuple(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_sse2, 10),
+ make_tuple(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_sse2, 10),
+ make_tuple(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_sse2, 10),
+ make_tuple(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_sse2, 10),
+ make_tuple(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_sse2, 10),
+ make_tuple(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_sse2, 8),
+ make_tuple(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_sse2, 8),
+ make_tuple(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_sse2, 8),
+ make_tuple(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_sse2, 8),
+ make_tuple(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_sse2, 8),
+ make_tuple(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_sse2, 8),
+ make_tuple(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_sse2, 8),
+ make_tuple(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_sse2, 8),
+ make_tuple(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_sse2, 8),
+ make_tuple(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_sse2, 8),
+ make_tuple(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_sse2, 8)));
+
INSTANTIATE_TEST_CASE_P(
SSE2, VpxHBDSubpelAvgVarianceTest,
::testing::Values(
- make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_sse2, 12),
- make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_sse2, 12),
- make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_sse2, 12),
- make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_sse2, 12),
- make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_sse2, 12),
- make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_sse2, 12),
- make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_sse2, 12),
- make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2, 12),
- make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2, 12),
- make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2, 12),
- make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2, 12),
- make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_sse2, 10),
- make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_sse2, 10),
- make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_sse2, 10),
- make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_sse2, 10),
- make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_sse2, 10),
- make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_sse2, 10),
- make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_sse2, 10),
- make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2, 10),
- make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2, 10),
- make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2, 10),
- make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2, 10),
- make_tuple(6, 6, highbd_8_subpel_avg_variance64x64_sse2, 8),
- make_tuple(6, 5, highbd_8_subpel_avg_variance64x32_sse2, 8),
- make_tuple(5, 6, highbd_8_subpel_avg_variance32x64_sse2, 8),
- make_tuple(5, 5, highbd_8_subpel_avg_variance32x32_sse2, 8),
- make_tuple(5, 4, highbd_8_subpel_avg_variance32x16_sse2, 8),
- make_tuple(4, 5, highbd_8_subpel_avg_variance16x32_sse2, 8),
- make_tuple(4, 4, highbd_8_subpel_avg_variance16x16_sse2, 8),
- make_tuple(4, 3, highbd_8_subpel_avg_variance16x8_sse2, 8),
- make_tuple(3, 4, highbd_8_subpel_avg_variance8x16_sse2, 8),
- make_tuple(3, 3, highbd_8_subpel_avg_variance8x8_sse2, 8),
- make_tuple(3, 2, highbd_8_subpel_avg_variance8x4_sse2, 8)));
+ make_tuple(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_sse2, 12),
+ make_tuple(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_sse2, 12),
+ make_tuple(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_sse2, 12),
+ make_tuple(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_sse2, 12),
+ make_tuple(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_sse2, 12),
+ make_tuple(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_sse2, 12),
+ make_tuple(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_sse2, 12),
+ make_tuple(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_sse2, 12),
+ make_tuple(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_sse2, 12),
+ make_tuple(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_sse2, 12),
+ make_tuple(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_sse2, 12),
+ make_tuple(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_sse2, 10),
+ make_tuple(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_sse2, 10),
+ make_tuple(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_sse2, 10),
+ make_tuple(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_sse2, 10),
+ make_tuple(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_sse2, 10),
+ make_tuple(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_sse2, 10),
+ make_tuple(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_sse2, 10),
+ make_tuple(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_sse2, 10),
+ make_tuple(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_sse2, 10),
+ make_tuple(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_sse2, 10),
+ make_tuple(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_sse2, 10),
+ make_tuple(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_sse2, 8),
+ make_tuple(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_sse2, 8),
+ make_tuple(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_sse2, 8),
+ make_tuple(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_sse2, 8),
+ make_tuple(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_sse2, 8),
+ make_tuple(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_sse2, 8),
+ make_tuple(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_sse2, 8),
+ make_tuple(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_sse2, 8),
+ make_tuple(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_sse2, 8),
+ make_tuple(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_sse2, 8),
+ make_tuple(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_sse2, 8)));
#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
#if HAVE_SSSE3
#if CONFIG_USE_X86INC
-const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
- vpx_sub_pixel_variance64x64_ssse3;
-const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
- vpx_sub_pixel_variance64x32_ssse3;
-const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
- vpx_sub_pixel_variance32x64_ssse3;
-const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
- vpx_sub_pixel_variance32x32_ssse3;
-const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
- vpx_sub_pixel_variance32x16_ssse3;
-const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
- vpx_sub_pixel_variance16x32_ssse3;
-const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
- vpx_sub_pixel_variance16x16_ssse3;
-const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
- vpx_sub_pixel_variance16x8_ssse3;
-const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
- vpx_sub_pixel_variance8x16_ssse3;
-const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
- vpx_sub_pixel_variance8x8_ssse3;
-const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
- vpx_sub_pixel_variance8x4_ssse3;
-const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
- vpx_sub_pixel_variance4x8_ssse3;
-const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
- vpx_sub_pixel_variance4x4_ssse3;
INSTANTIATE_TEST_CASE_P(
SSSE3, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_variance64x64_ssse3, 0),
- make_tuple(6, 5, subpel_variance64x32_ssse3, 0),
- make_tuple(5, 6, subpel_variance32x64_ssse3, 0),
- make_tuple(5, 5, subpel_variance32x32_ssse3, 0),
- make_tuple(5, 4, subpel_variance32x16_ssse3, 0),
- make_tuple(4, 5, subpel_variance16x32_ssse3, 0),
- make_tuple(4, 4, subpel_variance16x16_ssse3, 0),
- make_tuple(4, 3, subpel_variance16x8_ssse3, 0),
- make_tuple(3, 4, subpel_variance8x16_ssse3, 0),
- make_tuple(3, 3, subpel_variance8x8_ssse3, 0),
- make_tuple(3, 2, subpel_variance8x4_ssse3, 0),
- make_tuple(2, 3, subpel_variance4x8_ssse3, 0),
- make_tuple(2, 2, subpel_variance4x4_ssse3, 0)));
-
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_ssse3 =
- vpx_sub_pixel_avg_variance64x64_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance64x32_ssse3 =
- vpx_sub_pixel_avg_variance64x32_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x64_ssse3 =
- vpx_sub_pixel_avg_variance32x64_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_ssse3 =
- vpx_sub_pixel_avg_variance32x32_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x16_ssse3 =
- vpx_sub_pixel_avg_variance32x16_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x32_ssse3 =
- vpx_sub_pixel_avg_variance16x32_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x16_ssse3 =
- vpx_sub_pixel_avg_variance16x16_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x8_ssse3 =
- vpx_sub_pixel_avg_variance16x8_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x16_ssse3 =
- vpx_sub_pixel_avg_variance8x16_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x8_ssse3 =
- vpx_sub_pixel_avg_variance8x8_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x4_ssse3 =
- vpx_sub_pixel_avg_variance8x4_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x8_ssse3 =
- vpx_sub_pixel_avg_variance4x8_ssse3;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x4_ssse3 =
- vpx_sub_pixel_avg_variance4x4_ssse3;
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_ssse3, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_variance64x32_ssse3, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_variance32x64_ssse3, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_variance32x32_ssse3, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_variance32x16_ssse3, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_variance16x32_ssse3, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_variance16x16_ssse3, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_variance16x8_ssse3, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_variance8x16_ssse3, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_variance8x8_ssse3, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_variance8x4_ssse3, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_variance4x8_ssse3, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_variance4x4_ssse3, 0)));
+
INSTANTIATE_TEST_CASE_P(
SSSE3, VpxSubpelAvgVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_ssse3, 0),
- make_tuple(6, 5, subpel_avg_variance64x32_ssse3, 0),
- make_tuple(5, 6, subpel_avg_variance32x64_ssse3, 0),
- make_tuple(5, 5, subpel_avg_variance32x32_ssse3, 0),
- make_tuple(5, 4, subpel_avg_variance32x16_ssse3, 0),
- make_tuple(4, 5, subpel_avg_variance16x32_ssse3, 0),
- make_tuple(4, 4, subpel_avg_variance16x16_ssse3, 0),
- make_tuple(4, 3, subpel_avg_variance16x8_ssse3, 0),
- make_tuple(3, 4, subpel_avg_variance8x16_ssse3, 0),
- make_tuple(3, 3, subpel_avg_variance8x8_ssse3, 0),
- make_tuple(3, 2, subpel_avg_variance8x4_ssse3, 0),
- make_tuple(2, 3, subpel_avg_variance4x8_ssse3, 0),
- make_tuple(2, 2, subpel_avg_variance4x4_ssse3, 0)));
+ ::testing::Values(
+ make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_ssse3, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_ssse3, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_ssse3, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_ssse3, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_ssse3, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_ssse3, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_ssse3, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_ssse3, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_ssse3, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_ssse3, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_ssse3, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_ssse3, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_ssse3, 0)));
#endif // CONFIG_USE_X86INC
#endif // HAVE_SSSE3
#if HAVE_AVX2
-const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_avx2)));
+ ::testing::Values(make_tuple(4, 4,
+ &vpx_mse16x16_avx2)));
-const VarianceMxNFunc variance64x64_avx2 = vpx_variance64x64_avx2;
-const VarianceMxNFunc variance64x32_avx2 = vpx_variance64x32_avx2;
-const VarianceMxNFunc variance32x32_avx2 = vpx_variance32x32_avx2;
-const VarianceMxNFunc variance32x16_avx2 = vpx_variance32x16_avx2;
-const VarianceMxNFunc variance16x16_avx2 = vpx_variance16x16_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, variance64x64_avx2, 0),
- make_tuple(6, 5, variance64x32_avx2, 0),
- make_tuple(5, 5, variance32x32_avx2, 0),
- make_tuple(5, 4, variance32x16_avx2, 0),
- make_tuple(4, 4, variance16x16_avx2, 0)));
-
-const SubpixVarMxNFunc subpel_variance64x64_avx2 =
- vpx_sub_pixel_variance64x64_avx2;
-const SubpixVarMxNFunc subpel_variance32x32_avx2 =
- vpx_sub_pixel_variance32x32_avx2;
+ ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_avx2, 0),
+ make_tuple(6, 5, &vpx_variance64x32_avx2, 0),
+ make_tuple(5, 5, &vpx_variance32x32_avx2, 0),
+ make_tuple(5, 4, &vpx_variance32x16_avx2, 0),
+ make_tuple(4, 4, &vpx_variance16x16_avx2, 0)));
+
INSTANTIATE_TEST_CASE_P(
AVX2, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_variance64x64_avx2, 0),
- make_tuple(5, 5, subpel_variance32x32_avx2, 0)));
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_avx2, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_variance32x32_avx2, 0)));
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_avx2 =
- vpx_sub_pixel_avg_variance64x64_avx2;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_avx2 =
- vpx_sub_pixel_avg_variance32x32_avx2;
INSTANTIATE_TEST_CASE_P(
AVX2, VpxSubpelAvgVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_avx2, 0),
- make_tuple(5, 5, subpel_avg_variance32x32_avx2, 0)));
+ ::testing::Values(
+ make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_avx2, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_avx2, 0)));
#endif // HAVE_AVX2
#if HAVE_MEDIA
-const VarianceMxNFunc mse16x16_media = vpx_mse16x16_media;
INSTANTIATE_TEST_CASE_P(MEDIA, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_media)));
+ ::testing::Values(make_tuple(4, 4,
+ &vpx_mse16x16_media)));
-const VarianceMxNFunc variance16x16_media = vpx_variance16x16_media;
-const VarianceMxNFunc variance8x8_media = vpx_variance8x8_media;
INSTANTIATE_TEST_CASE_P(
MEDIA, VpxVarianceTest,
- ::testing::Values(make_tuple(4, 4, variance16x16_media, 0),
- make_tuple(3, 3, variance8x8_media, 0)));
+ ::testing::Values(make_tuple(4, 4, &vpx_variance16x16_media, 0),
+ make_tuple(3, 3, &vpx_variance8x8_media, 0)));
-const SubpixVarMxNFunc subpel_variance16x16_media =
- vpx_sub_pixel_variance16x16_media;
-const SubpixVarMxNFunc subpel_variance8x8_media =
- vpx_sub_pixel_variance8x8_media;
INSTANTIATE_TEST_CASE_P(
MEDIA, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(4, 4, subpel_variance16x16_media, 0),
- make_tuple(3, 3, subpel_variance8x8_media, 0)));
+ ::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_media, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_variance8x8_media, 0)));
#endif // HAVE_MEDIA
#if HAVE_NEON
-const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
- ::testing::Values(make_tuple(2, 2, get4x4sse_cs_neon)));
+ ::testing::Values(make_tuple(2, 2,
+ &vpx_get4x4sse_cs_neon)));
-const VarianceMxNFunc mse16x16_neon = vpx_mse16x16_neon;
INSTANTIATE_TEST_CASE_P(NEON, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_neon)));
-
-const VarianceMxNFunc variance64x64_neon = vpx_variance64x64_neon;
-const VarianceMxNFunc variance64x32_neon = vpx_variance64x32_neon;
-const VarianceMxNFunc variance32x64_neon = vpx_variance32x64_neon;
-const VarianceMxNFunc variance32x32_neon = vpx_variance32x32_neon;
-const VarianceMxNFunc variance16x16_neon = vpx_variance16x16_neon;
-const VarianceMxNFunc variance16x8_neon = vpx_variance16x8_neon;
-const VarianceMxNFunc variance8x16_neon = vpx_variance8x16_neon;
-const VarianceMxNFunc variance8x8_neon = vpx_variance8x8_neon;
+ ::testing::Values(make_tuple(4, 4,
+ &vpx_mse16x16_neon)));
+
INSTANTIATE_TEST_CASE_P(
NEON, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, variance64x64_neon, 0),
- make_tuple(6, 5, variance64x32_neon, 0),
- make_tuple(5, 6, variance32x64_neon, 0),
- make_tuple(5, 5, variance32x32_neon, 0),
- make_tuple(4, 4, variance16x16_neon, 0),
- make_tuple(4, 3, variance16x8_neon, 0),
- make_tuple(3, 4, variance8x16_neon, 0),
- make_tuple(3, 3, variance8x8_neon, 0)));
-
-const SubpixVarMxNFunc subpel_variance64x64_neon =
- vpx_sub_pixel_variance64x64_neon;
-const SubpixVarMxNFunc subpel_variance32x32_neon =
- vpx_sub_pixel_variance32x32_neon;
-const SubpixVarMxNFunc subpel_variance16x16_neon =
- vpx_sub_pixel_variance16x16_neon;
-const SubpixVarMxNFunc subpel_variance8x8_neon = vpx_sub_pixel_variance8x8_neon;
+ ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_neon, 0),
+ make_tuple(6, 5, &vpx_variance64x32_neon, 0),
+ make_tuple(5, 6, &vpx_variance32x64_neon, 0),
+ make_tuple(5, 5, &vpx_variance32x32_neon, 0),
+ make_tuple(4, 4, &vpx_variance16x16_neon, 0),
+ make_tuple(4, 3, &vpx_variance16x8_neon, 0),
+ make_tuple(3, 4, &vpx_variance8x16_neon, 0),
+ make_tuple(3, 3, &vpx_variance8x8_neon, 0)));
+
INSTANTIATE_TEST_CASE_P(
NEON, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_variance64x64_neon, 0),
- make_tuple(5, 5, subpel_variance32x32_neon, 0),
- make_tuple(4, 4, subpel_variance16x16_neon, 0),
- make_tuple(3, 3, subpel_variance8x8_neon, 0)));
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_variance64x64_neon, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_variance32x32_neon, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_variance16x16_neon, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_variance8x8_neon, 0)));
#endif // HAVE_NEON
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest,
::testing::Values(vpx_get_mb_ss_msa));
-const Get4x4SseFunc get4x4sse_cs_msa = vpx_get4x4sse_cs_msa;
INSTANTIATE_TEST_CASE_P(MSA, VpxSseTest,
- ::testing::Values(make_tuple(2, 2, get4x4sse_cs_msa)));
+ ::testing::Values(make_tuple(2, 2,
+ &vpx_get4x4sse_cs_msa)));
-const VarianceMxNFunc mse16x16_msa = vpx_mse16x16_msa;
-const VarianceMxNFunc mse16x8_msa = vpx_mse16x8_msa;
-const VarianceMxNFunc mse8x16_msa = vpx_mse8x16_msa;
-const VarianceMxNFunc mse8x8_msa = vpx_mse8x8_msa;
INSTANTIATE_TEST_CASE_P(MSA, VpxMseTest,
- ::testing::Values(make_tuple(4, 4, mse16x16_msa),
- make_tuple(4, 3, mse16x8_msa),
- make_tuple(3, 4, mse8x16_msa),
- make_tuple(3, 3, mse8x8_msa)));
-
-const VarianceMxNFunc variance64x64_msa = vpx_variance64x64_msa;
-const VarianceMxNFunc variance64x32_msa = vpx_variance64x32_msa;
-const VarianceMxNFunc variance32x64_msa = vpx_variance32x64_msa;
-const VarianceMxNFunc variance32x32_msa = vpx_variance32x32_msa;
-const VarianceMxNFunc variance32x16_msa = vpx_variance32x16_msa;
-const VarianceMxNFunc variance16x32_msa = vpx_variance16x32_msa;
-const VarianceMxNFunc variance16x16_msa = vpx_variance16x16_msa;
-const VarianceMxNFunc variance16x8_msa = vpx_variance16x8_msa;
-const VarianceMxNFunc variance8x16_msa = vpx_variance8x16_msa;
-const VarianceMxNFunc variance8x8_msa = vpx_variance8x8_msa;
-const VarianceMxNFunc variance8x4_msa = vpx_variance8x4_msa;
-const VarianceMxNFunc variance4x8_msa = vpx_variance4x8_msa;
-const VarianceMxNFunc variance4x4_msa = vpx_variance4x4_msa;
+ ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_msa),
+ make_tuple(4, 3, &vpx_mse16x8_msa),
+ make_tuple(3, 4, &vpx_mse8x16_msa),
+ make_tuple(3, 3, &vpx_mse8x8_msa)));
+
INSTANTIATE_TEST_CASE_P(
MSA, VpxVarianceTest,
- ::testing::Values(make_tuple(6, 6, variance64x64_msa, 0),
- make_tuple(6, 5, variance64x32_msa, 0),
- make_tuple(5, 6, variance32x64_msa, 0),
- make_tuple(5, 5, variance32x32_msa, 0),
- make_tuple(5, 4, variance32x16_msa, 0),
- make_tuple(4, 5, variance16x32_msa, 0),
- make_tuple(4, 4, variance16x16_msa, 0),
- make_tuple(4, 3, variance16x8_msa, 0),
- make_tuple(3, 4, variance8x16_msa, 0),
- make_tuple(3, 3, variance8x8_msa, 0),
- make_tuple(3, 2, variance8x4_msa, 0),
- make_tuple(2, 3, variance4x8_msa, 0),
- make_tuple(2, 2, variance4x4_msa, 0)));
-
-const SubpixVarMxNFunc subpel_variance4x4_msa = vpx_sub_pixel_variance4x4_msa;
-const SubpixVarMxNFunc subpel_variance4x8_msa = vpx_sub_pixel_variance4x8_msa;
-const SubpixVarMxNFunc subpel_variance8x4_msa = vpx_sub_pixel_variance8x4_msa;
-const SubpixVarMxNFunc subpel_variance8x8_msa = vpx_sub_pixel_variance8x8_msa;
-const SubpixVarMxNFunc subpel_variance8x16_msa = vpx_sub_pixel_variance8x16_msa;
-const SubpixVarMxNFunc subpel_variance16x8_msa = vpx_sub_pixel_variance16x8_msa;
-const SubpixVarMxNFunc subpel_variance16x16_msa =
- vpx_sub_pixel_variance16x16_msa;
-const SubpixVarMxNFunc subpel_variance16x32_msa =
- vpx_sub_pixel_variance16x32_msa;
-const SubpixVarMxNFunc subpel_variance32x16_msa =
- vpx_sub_pixel_variance32x16_msa;
-const SubpixVarMxNFunc subpel_variance32x32_msa =
- vpx_sub_pixel_variance32x32_msa;
-const SubpixVarMxNFunc subpel_variance32x64_msa =
- vpx_sub_pixel_variance32x64_msa;
-const SubpixVarMxNFunc subpel_variance64x32_msa =
- vpx_sub_pixel_variance64x32_msa;
-const SubpixVarMxNFunc subpel_variance64x64_msa =
- vpx_sub_pixel_variance64x64_msa;
+ ::testing::Values(make_tuple(6, 6, &vpx_variance64x64_msa, 0),
+ make_tuple(6, 5, &vpx_variance64x32_msa, 0),
+ make_tuple(5, 6, &vpx_variance32x64_msa, 0),
+ make_tuple(5, 5, &vpx_variance32x32_msa, 0),
+ make_tuple(5, 4, &vpx_variance32x16_msa, 0),
+ make_tuple(4, 5, &vpx_variance16x32_msa, 0),
+ make_tuple(4, 4, &vpx_variance16x16_msa, 0),
+ make_tuple(4, 3, &vpx_variance16x8_msa, 0),
+ make_tuple(3, 4, &vpx_variance8x16_msa, 0),
+ make_tuple(3, 3, &vpx_variance8x8_msa, 0),
+ make_tuple(3, 2, &vpx_variance8x4_msa, 0),
+ make_tuple(2, 3, &vpx_variance4x8_msa, 0),
+ make_tuple(2, 2, &vpx_variance4x4_msa, 0)));
+
INSTANTIATE_TEST_CASE_P(
MSA, VpxSubpelVarianceTest,
- ::testing::Values(make_tuple(2, 2, subpel_variance4x4_msa, 0),
- make_tuple(2, 3, subpel_variance4x8_msa, 0),
- make_tuple(3, 2, subpel_variance8x4_msa, 0),
- make_tuple(3, 3, subpel_variance8x8_msa, 0),
- make_tuple(3, 4, subpel_variance8x16_msa, 0),
- make_tuple(4, 3, subpel_variance16x8_msa, 0),
- make_tuple(4, 4, subpel_variance16x16_msa, 0),
- make_tuple(4, 5, subpel_variance16x32_msa, 0),
- make_tuple(5, 4, subpel_variance32x16_msa, 0),
- make_tuple(5, 5, subpel_variance32x32_msa, 0),
- make_tuple(5, 6, subpel_variance32x64_msa, 0),
- make_tuple(6, 5, subpel_variance64x32_msa, 0),
- make_tuple(6, 6, subpel_variance64x64_msa, 0)));
-
-const SubpixAvgVarMxNFunc subpel_avg_variance64x64_msa =
- vpx_sub_pixel_avg_variance64x64_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance64x32_msa =
- vpx_sub_pixel_avg_variance64x32_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x64_msa =
- vpx_sub_pixel_avg_variance32x64_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x32_msa =
- vpx_sub_pixel_avg_variance32x32_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance32x16_msa =
- vpx_sub_pixel_avg_variance32x16_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x32_msa =
- vpx_sub_pixel_avg_variance16x32_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x16_msa =
- vpx_sub_pixel_avg_variance16x16_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance16x8_msa =
- vpx_sub_pixel_avg_variance16x8_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x16_msa =
- vpx_sub_pixel_avg_variance8x16_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x8_msa =
- vpx_sub_pixel_avg_variance8x8_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance8x4_msa =
- vpx_sub_pixel_avg_variance8x4_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x8_msa =
- vpx_sub_pixel_avg_variance4x8_msa;
-const SubpixAvgVarMxNFunc subpel_avg_variance4x4_msa =
- vpx_sub_pixel_avg_variance4x4_msa;
+ ::testing::Values(make_tuple(2, 2, &vpx_sub_pixel_variance4x4_msa, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_variance4x8_msa, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_variance8x4_msa, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_variance8x8_msa, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_variance8x16_msa, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_variance16x8_msa, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_variance16x16_msa, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_variance16x32_msa, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_variance32x16_msa, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_variance32x32_msa, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_variance32x64_msa, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_variance64x32_msa, 0),
+ make_tuple(6, 6, &vpx_sub_pixel_variance64x64_msa, 0)));
+
INSTANTIATE_TEST_CASE_P(
MSA, VpxSubpelAvgVarianceTest,
- ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_msa, 0),
- make_tuple(6, 5, subpel_avg_variance64x32_msa, 0),
- make_tuple(5, 6, subpel_avg_variance32x64_msa, 0),
- make_tuple(5, 5, subpel_avg_variance32x32_msa, 0),
- make_tuple(5, 4, subpel_avg_variance32x16_msa, 0),
- make_tuple(4, 5, subpel_avg_variance16x32_msa, 0),
- make_tuple(4, 4, subpel_avg_variance16x16_msa, 0),
- make_tuple(4, 3, subpel_avg_variance16x8_msa, 0),
- make_tuple(3, 4, subpel_avg_variance8x16_msa, 0),
- make_tuple(3, 3, subpel_avg_variance8x8_msa, 0),
- make_tuple(3, 2, subpel_avg_variance8x4_msa, 0),
- make_tuple(2, 3, subpel_avg_variance4x8_msa, 0),
- make_tuple(2, 2, subpel_avg_variance4x4_msa, 0)));
+ ::testing::Values(make_tuple(6, 6, &vpx_sub_pixel_avg_variance64x64_msa, 0),
+ make_tuple(6, 5, &vpx_sub_pixel_avg_variance64x32_msa, 0),
+ make_tuple(5, 6, &vpx_sub_pixel_avg_variance32x64_msa, 0),
+ make_tuple(5, 5, &vpx_sub_pixel_avg_variance32x32_msa, 0),
+ make_tuple(5, 4, &vpx_sub_pixel_avg_variance32x16_msa, 0),
+ make_tuple(4, 5, &vpx_sub_pixel_avg_variance16x32_msa, 0),
+ make_tuple(4, 4, &vpx_sub_pixel_avg_variance16x16_msa, 0),
+ make_tuple(4, 3, &vpx_sub_pixel_avg_variance16x8_msa, 0),
+ make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_msa, 0),
+ make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_msa, 0),
+ make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_msa, 0),
+ make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_msa, 0),
+ make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_msa, 0)));
#endif // HAVE_MSA
} // namespace
diff --git a/libvpx/test/vp10_dct_test.cc b/libvpx/test/vp10_dct_test.cc
deleted file mode 100644
index b2c301ae3..000000000
--- a/libvpx/test/vp10_dct_test.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <new>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/util.h"
-#include "./vpx_config.h"
-#include "vpx_ports/msvc.h"
-
-#undef CONFIG_COEFFICIENT_RANGE_CHECKING
-#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
-#include "vp10/encoder/dct.c"
-
-using libvpx_test::ACMRandom;
-
-namespace {
-void reference_dct_1d(const double *in, double *out, int size) {
- const double PI = 3.141592653589793238462643383279502884;
- const double kInvSqrt2 = 0.707106781186547524400844362104;
- for (int k = 0; k < size; ++k) {
- out[k] = 0;
- for (int n = 0; n < size; ++n) {
- out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
- }
- if (k == 0)
- out[k] = out[k] * kInvSqrt2;
- }
-}
-
-typedef void (*FdctFuncRef)(const double *in, double *out, int size);
-typedef void (*IdctFuncRef)(const double *in, double *out, int size);
-typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out);
-typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
-
-class TransTestBase {
- public:
- virtual ~TransTestBase() {}
-
- protected:
- void RunFwdAccuracyCheck() {
- tran_low_t *input = new tran_low_t[txfm_size_];
- tran_low_t *output = new tran_low_t[txfm_size_];
- double *ref_input = new double[txfm_size_];
- double *ref_output = new double[txfm_size_];
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
- for (int ti = 0; ti < count_test_block; ++ti) {
- for (int ni = 0; ni < txfm_size_; ++ni) {
- input[ni] = rnd.Rand8() - rnd.Rand8();
- ref_input[ni] = static_cast<double>(input[ni]);
- }
-
- fwd_txfm_(input, output);
- fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
-
- for (int ni = 0; ni < txfm_size_; ++ni) {
- EXPECT_LE(
- abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
- max_error_);
- }
- }
-
- delete[] input;
- delete[] output;
- delete[] ref_input;
- delete[] ref_output;
- }
-
- double max_error_;
- int txfm_size_;
- FdctFunc fwd_txfm_;
- FdctFuncRef fwd_txfm_ref_;
-};
-
-typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam;
-class Vp10FwdTxfm
- : public TransTestBase,
- public ::testing::TestWithParam<FdctParam> {
- public:
- virtual void SetUp() {
- fwd_txfm_ = GET_PARAM(0);
- fwd_txfm_ref_ = GET_PARAM(1);
- txfm_size_ = GET_PARAM(2);
- max_error_ = GET_PARAM(3);
- }
- virtual void TearDown() {}
-};
-
-TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) {
- RunFwdAccuracyCheck();
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, Vp10FwdTxfm,
- ::testing::Values(
- FdctParam(&fdct4, &reference_dct_1d, 4, 1),
- FdctParam(&fdct8, &reference_dct_1d, 8, 1),
- FdctParam(&fdct16, &reference_dct_1d, 16, 2)));
-} // namespace
diff --git a/libvpx/test/vp10_inv_txfm_test.cc b/libvpx/test/vp10_inv_txfm_test.cc
deleted file mode 100644
index c49081ef8..000000000
--- a/libvpx/test/vp10_inv_txfm_test.cc
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp10_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vp10/common/blockd.h"
-#include "vp10/common/scan.h"
-#include "vpx/vpx_integer.h"
-#include "vp10/common/vp10_inv_txfm.h"
-
-using libvpx_test::ACMRandom;
-
-namespace {
-const double PI = 3.141592653589793238462643383279502884;
-const double kInvSqrt2 = 0.707106781186547524400844362104;
-
-void reference_idct_1d(const double *in, double *out, int size) {
- for (int n = 0; n < size; ++n) {
- out[n] = 0;
- for (int k = 0; k < size; ++k) {
- if (k == 0)
- out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
- else
- out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
- }
- }
-}
-
-typedef void (*IdctFuncRef)(const double *in, double *out, int size);
-typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
-
-class TransTestBase {
- public:
- virtual ~TransTestBase() {}
-
- protected:
- void RunInvAccuracyCheck() {
- tran_low_t *input = new tran_low_t[txfm_size_];
- tran_low_t *output = new tran_low_t[txfm_size_];
- double *ref_input = new double[txfm_size_];
- double *ref_output = new double[txfm_size_];
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- const int count_test_block = 5000;
- for (int ti = 0; ti < count_test_block; ++ti) {
- for (int ni = 0; ni < txfm_size_; ++ni) {
- input[ni] = rnd.Rand8() - rnd.Rand8();
- ref_input[ni] = static_cast<double>(input[ni]);
- }
-
- fwd_txfm_(input, output);
- fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
-
- for (int ni = 0; ni < txfm_size_; ++ni) {
- EXPECT_LE(
- abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
- max_error_);
- }
- }
-
- delete[] input;
- delete[] output;
- delete[] ref_input;
- delete[] ref_output;
- }
-
- double max_error_;
- int txfm_size_;
- IdctFunc fwd_txfm_;
- IdctFuncRef fwd_txfm_ref_;
-};
-
-typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam;
-class Vp10InvTxfm
- : public TransTestBase,
- public ::testing::TestWithParam<IdctParam> {
- public:
- virtual void SetUp() {
- fwd_txfm_ = GET_PARAM(0);
- fwd_txfm_ref_ = GET_PARAM(1);
- txfm_size_ = GET_PARAM(2);
- max_error_ = GET_PARAM(3);
- }
- virtual void TearDown() {}
-};
-
-TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) {
- RunInvAccuracyCheck();
-}
-
-INSTANTIATE_TEST_CASE_P(
- C, Vp10InvTxfm,
- ::testing::Values(
- IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1),
- IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2),
- IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4),
- IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6))
-);
-
-typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef std::tr1::tuple<FwdTxfmFunc,
- InvTxfmFunc,
- InvTxfmFunc,
- TX_SIZE, int> PartialInvTxfmParam;
-const int kMaxNumCoeffs = 1024;
-class Vp10PartialIDctTest
- : public ::testing::TestWithParam<PartialInvTxfmParam> {
- public:
- virtual ~Vp10PartialIDctTest() {}
- virtual void SetUp() {
- ftxfm_ = GET_PARAM(0);
- full_itxfm_ = GET_PARAM(1);
- partial_itxfm_ = GET_PARAM(2);
- tx_size_ = GET_PARAM(3);
- last_nonzero_ = GET_PARAM(4);
- }
-
- virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
- int last_nonzero_;
- TX_SIZE tx_size_;
- FwdTxfmFunc ftxfm_;
- InvTxfmFunc full_itxfm_;
- InvTxfmFunc partial_itxfm_;
-};
-
-TEST_P(Vp10PartialIDctTest, RunQuantCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int size;
- switch (tx_size_) {
- case TX_4X4:
- size = 4;
- break;
- case TX_8X8:
- size = 8;
- break;
- case TX_16X16:
- size = 16;
- break;
- case TX_32X32:
- size = 32;
- break;
- default:
- FAIL() << "Wrong Size!";
- break;
- }
- DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-
- const int count_test_block = 1000;
- const int block_size = size * size;
-
- DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
-
- int max_error = 0;
- for (int i = 0; i < count_test_block; ++i) {
- // clear out destination buffer
- memset(dst1, 0, sizeof(*dst1) * block_size);
- memset(dst2, 0, sizeof(*dst2) * block_size);
- memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
- memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-255, 255].
- if (i == 0) {
- for (int j = 0; j < block_size; ++j)
- input_extreme_block[j] = 255;
- } else if (i == 1) {
- for (int j = 0; j < block_size; ++j)
- input_extreme_block[j] = -255;
- } else {
- for (int j = 0; j < block_size; ++j) {
- input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
- }
- }
-
- ftxfm_(input_extreme_block, output_ref_block, size);
-
- // quantization with maximum allowed step sizes
- test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
- for (int j = 1; j < last_nonzero_; ++j)
- test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
- = (output_ref_block[j] / 1828) * 1828;
- }
-
- ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
- ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
-
- for (int j = 0; j < block_size; ++j) {
- const int diff = dst1[j] - dst2[j];
- const int error = diff * diff;
- if (max_error < error)
- max_error = error;
- }
- }
-
- EXPECT_EQ(0, max_error)
- << "Error: partial inverse transform produces different results";
-}
-
-TEST_P(Vp10PartialIDctTest, ResultsMatch) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int size;
- switch (tx_size_) {
- case TX_4X4:
- size = 4;
- break;
- case TX_8X8:
- size = 8;
- break;
- case TX_16X16:
- size = 16;
- break;
- case TX_32X32:
- size = 32;
- break;
- default:
- FAIL() << "Wrong Size!";
- break;
- }
- DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
- DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
- const int count_test_block = 1000;
- const int max_coeff = 32766 / 4;
- const int block_size = size * size;
- int max_error = 0;
- for (int i = 0; i < count_test_block; ++i) {
- // clear out destination buffer
- memset(dst1, 0, sizeof(*dst1) * block_size);
- memset(dst2, 0, sizeof(*dst2) * block_size);
- memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
- memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
- int max_energy_leftover = max_coeff * max_coeff;
- for (int j = 0; j < last_nonzero_; ++j) {
- int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
- (rnd.Rand16() - 32768) / 65536);
- max_energy_leftover -= coef * coef;
- if (max_energy_leftover < 0) {
- max_energy_leftover = 0;
- coef = 0;
- }
- test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
- }
-
- memcpy(test_coef_block2, test_coef_block1,
- sizeof(*test_coef_block2) * block_size);
-
- ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
- ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
-
- for (int j = 0; j < block_size; ++j) {
- const int diff = dst1[j] - dst2[j];
- const int error = diff * diff;
- if (max_error < error)
- max_error = error;
- }
- }
-
- EXPECT_EQ(0, max_error)
- << "Error: partial inverse transform produces different results";
-}
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
- C, Vp10PartialIDctTest,
- ::testing::Values(
- make_tuple(&vpx_fdct32x32_c,
- &vp10_idct32x32_1024_add_c,
- &vp10_idct32x32_34_add_c,
- TX_32X32, 34),
- make_tuple(&vpx_fdct32x32_c,
- &vp10_idct32x32_1024_add_c,
- &vp10_idct32x32_1_add_c,
- TX_32X32, 1),
- make_tuple(&vpx_fdct16x16_c,
- &vp10_idct16x16_256_add_c,
- &vp10_idct16x16_10_add_c,
- TX_16X16, 10),
- make_tuple(&vpx_fdct16x16_c,
- &vp10_idct16x16_256_add_c,
- &vp10_idct16x16_1_add_c,
- TX_16X16, 1),
- make_tuple(&vpx_fdct8x8_c,
- &vp10_idct8x8_64_add_c,
- &vp10_idct8x8_12_add_c,
- TX_8X8, 12),
- make_tuple(&vpx_fdct8x8_c,
- &vp10_idct8x8_64_add_c,
- &vp10_idct8x8_1_add_c,
- TX_8X8, 1),
- make_tuple(&vpx_fdct4x4_c,
- &vp10_idct4x4_16_add_c,
- &vp10_idct4x4_1_add_c,
- TX_4X4, 1)));
-} // namespace
diff --git a/libvpx/test/vp9_arf_freq_test.cc b/libvpx/test/vp9_arf_freq_test.cc
index 89200d408..aa3e34d62 100644
--- a/libvpx/test/vp9_arf_freq_test.cc
+++ b/libvpx/test/vp9_arf_freq_test.cc
@@ -229,24 +229,4 @@ VP9_INSTANTIATE_TEST_CASE(
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
-
-#if CONFIG_VP9_HIGHBITDEPTH
-# if CONFIG_VP10_ENCODER
-// TODO(angiebird): 25-29 fail in high bitdepth mode.
-INSTANTIATE_TEST_CASE_P(
- DISABLED_VP10, ArfFreqTest,
- ::testing::Combine(
- ::testing::Values(static_cast<const libvpx_test::CodecFactory *>(
- &libvpx_test::kVP10)),
- ::testing::ValuesIn(kTestVectors),
- ::testing::ValuesIn(kEncodeVectors),
- ::testing::ValuesIn(kMinArfVectors)));
-# endif // CONFIG_VP10_ENCODER
-#else
-VP10_INSTANTIATE_TEST_CASE(
- ArfFreqTest,
- ::testing::ValuesIn(kTestVectors),
- ::testing::ValuesIn(kEncodeVectors),
- ::testing::ValuesIn(kMinArfVectors));
-#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/libvpx/test/vp9_denoiser_sse2_test.cc b/libvpx/test/vp9_denoiser_sse2_test.cc
index 17c799dff..c84d7ff01 100644
--- a/libvpx/test/vp9_denoiser_sse2_test.cc
+++ b/libvpx/test/vp9_denoiser_sse2_test.cc
@@ -94,8 +94,7 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
// Test for all block size.
INSTANTIATE_TEST_CASE_P(
SSE2, VP9DenoiserTest,
- ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
- BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32,
- BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
- BLOCK_64X64));
+ ::testing::Values(BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
+ BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64,
+ BLOCK_64X32, BLOCK_64X64));
} // namespace
diff --git a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc
index 3ef6022ad..bd8409879 100644
--- a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc
@@ -45,9 +45,9 @@ struct EncodeParameters {
};
const EncodeParameters kVP9EncodeParameterSet[] = {
- {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601},
- {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709},
- {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020},
+ {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601, { 0, 0 }},
+ {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709, { 0, 0 }},
+ {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020, { 0, 0 }},
{0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 }},
// TODO(JBB): Test profiles (requires more work).
};
@@ -93,7 +93,7 @@ class VpxEncoderParmsGetToDecoder
}
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
- const libvpx_test::VideoSource &video,
+ const libvpx_test::VideoSource & /*video*/,
libvpx_test::Decoder *decoder) {
vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder();
vpx_codec_alg_priv_t *const priv =
diff --git a/libvpx/test/vp9_end_to_end_test.cc b/libvpx/test/vp9_end_to_end_test.cc
index be1fa68c0..666919f4a 100644
--- a/libvpx/test/vp9_end_to_end_test.cc
+++ b/libvpx/test/vp9_end_to_end_test.cc
@@ -186,24 +186,4 @@ VP9_INSTANTIATE_TEST_CASE(
::testing::ValuesIn(kEncodingModeVectors),
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kCpuUsedVectors));
-
-#if CONFIG_VP9_HIGHBITDEPTH
-# if CONFIG_VP10_ENCODER
-// TODO(angiebird): many fail in high bitdepth mode.
-INSTANTIATE_TEST_CASE_P(
- DISABLED_VP10, EndToEndTestLarge,
- ::testing::Combine(
- ::testing::Values(static_cast<const libvpx_test::CodecFactory *>(
- &libvpx_test::kVP10)),
- ::testing::ValuesIn(kEncodingModeVectors),
- ::testing::ValuesIn(kTestVectors),
- ::testing::ValuesIn(kCpuUsedVectors)));
-# endif // CONFIG_VP10_ENCODER
-#else
-VP10_INSTANTIATE_TEST_CASE(
- EndToEndTestLarge,
- ::testing::ValuesIn(kEncodingModeVectors),
- ::testing::ValuesIn(kTestVectors),
- ::testing::ValuesIn(kCpuUsedVectors));
-#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/libvpx/test/vp9_error_block_test.cc b/libvpx/test/vp9_error_block_test.cc
index 77b12ea8d..23a249e2b 100644
--- a/libvpx/test/vp9_error_block_test.cc
+++ b/libvpx/test/vp9_error_block_test.cc
@@ -164,7 +164,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps) {
- assert(bps == 8);
+ EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz);
}
@@ -173,7 +173,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps) {
- assert(bps == 8);
+ EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz);
}
@@ -195,7 +195,7 @@ int64_t wrap_vp9_highbd_block_error_8bit_avx(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps) {
- assert(bps == 8);
+ EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_avx(coeff, dqcoeff, block_size, ssz);
}
diff --git a/libvpx/test/vp9_ethread_test.cc b/libvpx/test/vp9_ethread_test.cc
index 63f6dfea7..62b91094f 100644
--- a/libvpx/test/vp9_ethread_test.cc
+++ b/libvpx/test/vp9_ethread_test.cc
@@ -29,16 +29,9 @@ class VPxEncoderThreadTest
encoding_mode_(GET_PARAM(1)),
set_cpu_used_(GET_PARAM(2)) {
init_flags_ = VPX_CODEC_USE_PSNR;
- vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
- cfg.w = 1280;
- cfg.h = 720;
- decoder_ = codec_->CreateDecoder(cfg, 0);
-
md5_.clear();
}
- virtual ~VPxEncoderThreadTest() {
- delete decoder_;
- }
+ virtual ~VPxEncoderThreadTest() {}
virtual void SetUp() {
InitializeConfig();
@@ -48,7 +41,7 @@ class VPxEncoderThreadTest
cfg_.g_lag_in_frames = 3;
cfg_.rc_end_usage = VPX_VBR;
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
} else {
cfg_.g_lag_in_frames = 0;
cfg_.rc_end_usage = VPX_CBR;
@@ -62,7 +55,7 @@ class VPxEncoderThreadTest
encoder_initialized_ = false;
}
- virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/,
::libvpx_test::Encoder *encoder) {
if (!encoder_initialized_) {
// Encode 4 column tiles.
@@ -81,27 +74,28 @@ class VPxEncoderThreadTest
}
}
- virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
- const vpx_codec_err_t res = decoder_->DecodeFrame(
- reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
+ virtual void DecompressedFrameHook(const vpx_image_t &img,
+ vpx_codec_pts_t /*pts*/) {
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(&img);
+ md5_.push_back(md5_res.Get());
+ }
+
+ virtual bool HandleDecodeResult(const vpx_codec_err_t res,
+ const libvpx_test::VideoSource& /*video*/,
+ libvpx_test::Decoder * /*decoder*/) {
if (res != VPX_CODEC_OK) {
- abort_ = true;
- ASSERT_EQ(VPX_CODEC_OK, res);
+ EXPECT_EQ(VPX_CODEC_OK, res);
+ return false;
}
- const vpx_image_t *img = decoder_->GetDxData().Next();
- if (img) {
- ::libvpx_test::MD5 md5_res;
- md5_res.Add(img);
- md5_.push_back(md5_res.Get());
- }
+ return true;
}
bool encoder_initialized_;
int tiles_;
::libvpx_test::TestMode encoding_mode_;
int set_cpu_used_;
- ::libvpx_test::Decoder *decoder_;
std::vector<std::string> md5_;
};
@@ -134,9 +128,4 @@ VP9_INSTANTIATE_TEST_CASE(
::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
::libvpx_test::kRealTime),
::testing::Range(1, 9));
-
-VP10_INSTANTIATE_TEST_CASE(
- VPxEncoderThreadTest,
- ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
- ::testing::Range(1, 3));
} // namespace
diff --git a/libvpx/test/vp9_intrapred_test.cc b/libvpx/test/vp9_intrapred_test.cc
index ad3327e2d..416f3c322 100644
--- a/libvpx/test/vp9_intrapred_test.cc
+++ b/libvpx/test/vp9_intrapred_test.cc
@@ -34,7 +34,7 @@ class VP9IntraPredBase {
virtual ~VP9IntraPredBase() { libvpx_test::ClearSystemState(); }
protected:
- virtual void Predict(PREDICTION_MODE mode) = 0;
+ virtual void Predict() = 0;
void CheckPrediction(int test_case_number, int *error_count) const {
// For each pixel ensure that the calculated value is the same as reference.
@@ -73,7 +73,7 @@ class VP9IntraPredBase {
left_col_[y] = rnd.Rand16() & mask_;
}
}
- Predict(DC_PRED);
+ Predict();
CheckPrediction(i, &error_count);
}
ASSERT_EQ(0, error_count);
@@ -106,7 +106,7 @@ class VP9IntraPredTest
mask_ = (1 << bit_depth_) - 1;
}
- virtual void Predict(PREDICTION_MODE mode) {
+ virtual void Predict() {
const uint16_t *const_above_row = above_row_;
const uint16_t *const_left_col = left_col_;
ref_fn_(ref_dst_, stride_, const_above_row, const_left_col, bit_depth_);
@@ -132,7 +132,6 @@ using std::tr1::make_tuple;
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_USE_X86INC
-#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -141,13 +140,13 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_tm_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 8),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16, 8),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 8),
@@ -155,34 +154,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
&vpx_highbd_v_predictor_16x16_c, 16, 8),
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 8),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 8)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_8, VP9IntraPredTest,
- ::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
- &vpx_highbd_dc_predictor_4x4_c, 4, 8),
- make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
- &vpx_highbd_dc_predictor_8x8_c, 8, 8),
- make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
- &vpx_highbd_dc_predictor_16x16_c, 16, 8),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
- &vpx_highbd_v_predictor_4x4_c, 4, 8),
- make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
- &vpx_highbd_v_predictor_8x8_c, 8, 8),
- make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
- &vpx_highbd_v_predictor_16x16_c, 16, 8),
- make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
- &vpx_highbd_v_predictor_32x32_c, 32, 8),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
- &vpx_highbd_tm_predictor_4x4_c, 4, 8),
- make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
- &vpx_highbd_tm_predictor_8x8_c, 8, 8)));
-#endif // !ARCH_X86_64
-#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -194,14 +170,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 10),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16,
10),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 10),
@@ -211,35 +187,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
10),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
- &vpx_highbd_tm_predictor_4x4_c, 4, 10),
- make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
- &vpx_highbd_tm_predictor_8x8_c, 8, 10)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_10, VP9IntraPredTest,
- ::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
- &vpx_highbd_dc_predictor_4x4_c, 4, 10),
- make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
- &vpx_highbd_dc_predictor_8x8_c, 8, 10),
- make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
- &vpx_highbd_dc_predictor_16x16_c, 16,
- 10),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
- &vpx_highbd_v_predictor_4x4_c, 4, 10),
- make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
- &vpx_highbd_v_predictor_8x8_c, 8, 10),
- make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
- &vpx_highbd_v_predictor_16x16_c, 16, 10),
- make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
- &vpx_highbd_v_predictor_32x32_c, 32, 10),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 10),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 10)));
-#endif // !ARCH_X86_64
-#if ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
::testing::Values(
make_tuple(&vpx_highbd_dc_predictor_32x32_sse2,
@@ -251,14 +203,14 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_tm_predictor_32x32_sse2,
&vpx_highbd_tm_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_dc_predictor_4x4_sse2,
&vpx_highbd_dc_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
&vpx_highbd_dc_predictor_8x8_c, 8, 12),
make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
&vpx_highbd_dc_predictor_16x16_c, 16,
12),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_v_predictor_4x4_sse2,
&vpx_highbd_v_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
&vpx_highbd_v_predictor_8x8_c, 8, 12),
@@ -268,33 +220,11 @@ INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
&vpx_highbd_v_predictor_32x32_c, 32,
12),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
+ make_tuple(&vpx_highbd_tm_predictor_4x4_sse2,
&vpx_highbd_tm_predictor_4x4_c, 4, 12),
make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
&vpx_highbd_tm_predictor_8x8_c, 8, 12)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2_TO_C_12, VP9IntraPredTest,
- ::testing::Values(
- make_tuple(&vpx_highbd_dc_predictor_4x4_sse,
- &vpx_highbd_dc_predictor_4x4_c, 4, 12),
- make_tuple(&vpx_highbd_dc_predictor_8x8_sse2,
- &vpx_highbd_dc_predictor_8x8_c, 8, 12),
- make_tuple(&vpx_highbd_dc_predictor_16x16_sse2,
- &vpx_highbd_dc_predictor_16x16_c, 16,
- 12),
- make_tuple(&vpx_highbd_v_predictor_4x4_sse,
- &vpx_highbd_v_predictor_4x4_c, 4, 12),
- make_tuple(&vpx_highbd_v_predictor_8x8_sse2,
- &vpx_highbd_v_predictor_8x8_c, 8, 12),
- make_tuple(&vpx_highbd_v_predictor_16x16_sse2,
- &vpx_highbd_v_predictor_16x16_c, 16, 12),
- make_tuple(&vpx_highbd_v_predictor_32x32_sse2,
- &vpx_highbd_v_predictor_32x32_c, 32, 12),
- make_tuple(&vpx_highbd_tm_predictor_4x4_sse,
- &vpx_highbd_tm_predictor_4x4_c, 4, 12),
- make_tuple(&vpx_highbd_tm_predictor_8x8_sse2,
- &vpx_highbd_tm_predictor_8x8_c, 8, 12)));
-#endif // !ARCH_X86_64
+
#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
diff --git a/libvpx/test/vp9_lossless_test.cc b/libvpx/test/vp9_lossless_test.cc
index 09c1070c6..417739315 100644
--- a/libvpx/test/vp9_lossless_test.cc
+++ b/libvpx/test/vp9_lossless_test.cc
@@ -127,8 +127,4 @@ VP9_INSTANTIATE_TEST_CASE(LosslessTest,
::testing::Values(::libvpx_test::kRealTime,
::libvpx_test::kOnePassGood,
::libvpx_test::kTwoPassGood));
-
-VP10_INSTANTIATE_TEST_CASE(LosslessTest,
- ::testing::Values(::libvpx_test::kOnePassGood,
- ::libvpx_test::kTwoPassGood));
} // namespace
diff --git a/libvpx/test/vp9_spatial_svc_encoder.sh b/libvpx/test/vp9_spatial_svc_encoder.sh
index 6dd5f171b..65031073f 100755
--- a/libvpx/test/vp9_spatial_svc_encoder.sh
+++ b/libvpx/test/vp9_spatial_svc_encoder.sh
@@ -54,7 +54,7 @@ vp9_spatial_svc() {
if [ "$(vp9_encode_available)" = "yes" ]; then
local readonly test_name="vp9_spatial_svc"
for layers in $(seq 1 ${vp9_ssvc_test_layers}); do
- vp9_spatial_svc_encoder "${test_name}" -l ${layers}
+ vp9_spatial_svc_encoder "${test_name}" -sl ${layers}
done
fi
}
diff --git a/libvpx/test/webm_video_source.h b/libvpx/test/webm_video_source.h
index 650bc52dc..825875687 100644
--- a/libvpx/test/webm_video_source.h
+++ b/libvpx/test/webm_video_source.h
@@ -62,7 +62,7 @@ class WebMVideoSource : public CompressedVideoSource {
void FillFrame() {
ASSERT_TRUE(vpx_ctx_->file != NULL);
- const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
+ const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
ASSERT_GE(status, 0) << "webm_read_frame failed";
if (status == 1) {
end_of_file_ = true;
@@ -72,7 +72,7 @@ class WebMVideoSource : public CompressedVideoSource {
void SeekToNextKeyFrame() {
ASSERT_TRUE(vpx_ctx_->file != NULL);
do {
- const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
+ const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
ASSERT_GE(status, 0) << "webm_read_frame failed";
++frame_;
if (status == 1) {
diff --git a/libvpx/third_party/googletest/README.libvpx b/libvpx/third_party/googletest/README.libvpx
index 7201a67d3..1eca78dd9 100644
--- a/libvpx/third_party/googletest/README.libvpx
+++ b/libvpx/third_party/googletest/README.libvpx
@@ -12,4 +12,8 @@ failures, various options for running the tests, and XML test report
generation.
Local Modifications:
-Removed unused declarations of kPathSeparatorString to have warning free build. \ No newline at end of file
+- Removed unused declarations of kPathSeparatorString to have warning
+ free build.
+- Added GTEST_ATTRIBUTE_UNUSED_ to test registering dummies in TEST_P
+ and INSTANTIATE_TEST_CASE_P to remove warnings about unused variables
+ under GCC 5. \ No newline at end of file
diff --git a/libvpx/third_party/googletest/src/include/gtest/gtest.h b/libvpx/third_party/googletest/src/include/gtest/gtest.h
index 4f3804f70..581a44e95 100644
--- a/libvpx/third_party/googletest/src/include/gtest/gtest.h
+++ b/libvpx/third_party/googletest/src/include/gtest/gtest.h
@@ -16960,7 +16960,7 @@ internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
return 0; \
} \
- static int gtest_registering_dummy_; \
+ static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \
GTEST_DISALLOW_COPY_AND_ASSIGN_(\
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
}; \
@@ -16972,7 +16972,7 @@ internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
::testing::internal::ParamGenerator<test_case_name::ParamType> \
gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
- int gtest_##prefix##test_case_name##_dummy_ = \
+ int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \
::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
GetTestCasePatternHolder<test_case_name>(\
#test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
diff --git a/libvpx/third_party/libwebm/Android.mk b/libvpx/third_party/libwebm/Android.mk
index be9d77dee..8149a083f 100644
--- a/libvpx/third_party/libwebm/Android.mk
+++ b/libvpx/third_party/libwebm/Android.mk
@@ -2,9 +2,16 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE:= libwebm
-LOCAL_SRC_FILES:= mkvparser.cpp \
- mkvreader.cpp \
- mkvmuxer.cpp \
- mkvmuxerutil.cpp \
- mkvwriter.cpp
+LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
+LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -Wno-extern-c-compat
+LOCAL_C_INCLUDES:= $(LOCAL_PATH)
+LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH)
+
+LOCAL_SRC_FILES:= common/file_util.cc \
+ common/hdr_util.cc \
+ mkvparser/mkvparser.cc \
+ mkvparser/mkvreader.cc \
+ mkvmuxer/mkvmuxer.cc \
+ mkvmuxer/mkvmuxerutil.cc \
+ mkvmuxer/mkvwriter.cc
include $(BUILD_STATIC_LIBRARY)
diff --git a/libvpx/third_party/libwebm/README.libvpx b/libvpx/third_party/libwebm/README.libvpx
index 2989d3d89..73f830322 100644
--- a/libvpx/third_party/libwebm/README.libvpx
+++ b/libvpx/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
URL: https://chromium.googlesource.com/webm/libwebm
-Version: 476366249e1fda7710a389cd41c57db42305e0d4
+Version: 32d5ac49414a8914ec1e1f285f3f927c6e8ec29d
License: BSD
License File: LICENSE.txt
diff --git a/libvpx/third_party/libwebm/RELEASE.TXT b/libvpx/third_party/libwebm/RELEASE.TXT
deleted file mode 100644
index a7e9f032c..000000000
--- a/libvpx/third_party/libwebm/RELEASE.TXT
+++ /dev/null
@@ -1,34 +0,0 @@
-1.0.0.5
- * Handled case when no duration
- * Handled empty clusters
- * Handled empty clusters when seeking
- * Implemented check lacing bits
-
-1.0.0.4
- * Made Cues member variables mutables
- * Defined against badly-formatted cue points
- * Segment::GetCluster returns CuePoint too
- * Separated cue-based searches
-
-1.0.0.3
- * Added Block::GetOffset() to get a frame's offset in a block
- * Changed cluster count type from size_t to long
- * Parsed SeekHead to find cues
- * Allowed seeking beyond end of cluster cache
- * Added not to attempt to reparse cues element
- * Restructured Segment::LoadCluster
- * Marked position of cues without parsing cues element
- * Allowed cue points to be loaded incrementally
- * Implemented to load lazily cue points as they're searched
- * Merged Cues::LoadCuePoint into Cues::Find
- * Lazy init cues
- * Loaded cue point during find
-
-1.0.0.2
- * added support for Cues element
- * seeking was improved
-
-1.0.0.1
- * fixed item 141
- * added item 142
- * added this file, RELEASE.TXT, to repository
diff --git a/libvpx/third_party/libwebm/common/file_util.cc b/libvpx/third_party/libwebm/common/file_util.cc
new file mode 100644
index 000000000..4f91318f3
--- /dev/null
+++ b/libvpx/third_party/libwebm/common/file_util.cc
@@ -0,0 +1,67 @@
+// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#include "common/file_util.h"
+
+#include <sys/stat.h>
+#ifndef _MSC_VER
+#include <unistd.h> // close()
+#endif
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <ios>
+
+namespace libwebm {
+
+std::string GetTempFileName() {
+#if !defined _MSC_VER && !defined __MINGW32__
+ char temp_file_name_template[] = "libwebm_temp.XXXXXX";
+ int fd = mkstemp(temp_file_name_template);
+ if (fd != -1) {
+ close(fd);
+ return std::string(temp_file_name_template);
+ }
+ return std::string();
+#else
+ char tmp_file_name[_MAX_PATH];
+ errno_t err = tmpnam_s(tmp_file_name);
+ if (err == 0) {
+ return std::string(tmp_file_name);
+ }
+ return std::string();
+#endif
+}
+
+uint64_t GetFileSize(const std::string& file_name) {
+ uint64_t file_size = 0;
+#ifndef _MSC_VER
+ struct stat st;
+ st.st_size = 0;
+ if (stat(file_name.c_str(), &st) == 0) {
+#else
+ struct _stat st;
+ st.st_size = 0;
+ if (_stat(file_name.c_str(), &st) == 0) {
+#endif
+ file_size = st.st_size;
+ }
+ return file_size;
+}
+
+TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); }
+
+TempFileDeleter::~TempFileDeleter() {
+ std::ifstream file(file_name_.c_str());
+ if (file.good()) {
+ file.close();
+ std::remove(file_name_.c_str());
+ }
+}
+
+} // namespace libwebm
diff --git a/libvpx/third_party/libwebm/common/file_util.h b/libvpx/third_party/libwebm/common/file_util.h
new file mode 100644
index 000000000..0e71eac11
--- /dev/null
+++ b/libvpx/third_party/libwebm/common/file_util.h
@@ -0,0 +1,41 @@
+// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#ifndef LIBWEBM_COMMON_FILE_UTIL_H_
+#define LIBWEBM_COMMON_FILE_UTIL_H_
+
+#include <stdint.h>
+
+#include <string>
+
+#include "mkvmuxer/mkvmuxertypes.h" // LIBWEBM_DISALLOW_COPY_AND_ASSIGN()
+
+namespace libwebm {
+
+// Returns a temporary file name.
+std::string GetTempFileName();
+
+// Returns size of file specified by |file_name|, or 0 upon failure.
+uint64_t GetFileSize(const std::string& file_name);
+
+// Manages life of temporary file specified at time of construction. Deletes
+// file upon destruction.
+class TempFileDeleter {
+ public:
+ TempFileDeleter();
+ explicit TempFileDeleter(std::string file_name) : file_name_(file_name) {}
+ ~TempFileDeleter();
+ const std::string& name() const { return file_name_; }
+
+ private:
+ std::string file_name_;
+ LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TempFileDeleter);
+};
+
+} // namespace libwebm
+
+#endif // LIBWEBM_COMMON_FILE_UTIL_H_ \ No newline at end of file
diff --git a/libvpx/third_party/libwebm/common/hdr_util.cc b/libvpx/third_party/libwebm/common/hdr_util.cc
new file mode 100644
index 000000000..e1a9842fb
--- /dev/null
+++ b/libvpx/third_party/libwebm/common/hdr_util.cc
@@ -0,0 +1,182 @@
+// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#include "hdr_util.h"
+
+#include <cstddef>
+#include <new>
+
+#include "mkvparser/mkvparser.h"
+
+namespace libwebm {
+bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
+ PrimaryChromaticityPtr* muxer_pc) {
+ muxer_pc->reset(new (std::nothrow)
+ mkvmuxer::PrimaryChromaticity(parser_pc.x, parser_pc.y));
+ if (!muxer_pc->get())
+ return false;
+ return true;
+}
+
+bool MasteringMetadataValuePresent(double value) {
+ return value != mkvparser::MasteringMetadata::kValueNotPresent;
+}
+
+bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
+ mkvmuxer::MasteringMetadata* muxer_mm) {
+ if (MasteringMetadataValuePresent(parser_mm.luminance_max))
+ muxer_mm->luminance_max = parser_mm.luminance_max;
+ if (MasteringMetadataValuePresent(parser_mm.luminance_min))
+ muxer_mm->luminance_min = parser_mm.luminance_min;
+
+ PrimaryChromaticityPtr r_ptr(NULL);
+ PrimaryChromaticityPtr g_ptr(NULL);
+ PrimaryChromaticityPtr b_ptr(NULL);
+ PrimaryChromaticityPtr wp_ptr(NULL);
+
+ if (parser_mm.r) {
+ if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr))
+ return false;
+ }
+ if (parser_mm.g) {
+ if (!CopyPrimaryChromaticity(*parser_mm.g, &g_ptr))
+ return false;
+ }
+ if (parser_mm.b) {
+ if (!CopyPrimaryChromaticity(*parser_mm.b, &b_ptr))
+ return false;
+ }
+ if (parser_mm.white_point) {
+ if (!CopyPrimaryChromaticity(*parser_mm.white_point, &wp_ptr))
+ return false;
+ }
+
+ if (!muxer_mm->SetChromaticity(r_ptr.get(), g_ptr.get(), b_ptr.get(),
+ wp_ptr.get())) {
+ return false;
+ }
+
+ return true;
+}
+
+bool ColourValuePresent(long long value) {
+ return value != mkvparser::Colour::kValueNotPresent;
+}
+
+bool CopyColour(const mkvparser::Colour& parser_colour,
+ mkvmuxer::Colour* muxer_colour) {
+ if (!muxer_colour)
+ return false;
+
+ if (ColourValuePresent(parser_colour.matrix_coefficients))
+ muxer_colour->matrix_coefficients = parser_colour.matrix_coefficients;
+ if (ColourValuePresent(parser_colour.bits_per_channel))
+ muxer_colour->bits_per_channel = parser_colour.bits_per_channel;
+ if (ColourValuePresent(parser_colour.chroma_subsampling_horz))
+ muxer_colour->chroma_subsampling_horz =
+ parser_colour.chroma_subsampling_horz;
+ if (ColourValuePresent(parser_colour.chroma_subsampling_vert))
+ muxer_colour->chroma_subsampling_vert =
+ parser_colour.chroma_subsampling_vert;
+ if (ColourValuePresent(parser_colour.cb_subsampling_horz))
+ muxer_colour->cb_subsampling_horz = parser_colour.cb_subsampling_horz;
+ if (ColourValuePresent(parser_colour.cb_subsampling_vert))
+ muxer_colour->cb_subsampling_vert = parser_colour.cb_subsampling_vert;
+ if (ColourValuePresent(parser_colour.chroma_siting_horz))
+ muxer_colour->chroma_siting_horz = parser_colour.chroma_siting_horz;
+ if (ColourValuePresent(parser_colour.chroma_siting_vert))
+ muxer_colour->chroma_siting_vert = parser_colour.chroma_siting_vert;
+ if (ColourValuePresent(parser_colour.range))
+ muxer_colour->range = parser_colour.range;
+ if (ColourValuePresent(parser_colour.transfer_characteristics))
+ muxer_colour->transfer_characteristics =
+ parser_colour.transfer_characteristics;
+ if (ColourValuePresent(parser_colour.primaries))
+ muxer_colour->primaries = parser_colour.primaries;
+ if (ColourValuePresent(parser_colour.max_cll))
+ muxer_colour->max_cll = parser_colour.max_cll;
+ if (ColourValuePresent(parser_colour.max_fall))
+ muxer_colour->max_fall = parser_colour.max_fall;
+
+ if (parser_colour.mastering_metadata) {
+ mkvmuxer::MasteringMetadata muxer_mm;
+ if (!CopyMasteringMetadata(*parser_colour.mastering_metadata, &muxer_mm))
+ return false;
+ if (!muxer_colour->SetMasteringMetadata(muxer_mm))
+ return false;
+ }
+ return true;
+}
+
+// Format of VPx private data:
+//
+// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+// | ID Byte | Length | |
+// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
+// | |
+// : Bytes 1..Length of Codec Feature :
+// | |
+// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+//
+// ID Byte Format
+// ID byte is an unsigned byte.
+// 0 1 2 3 4 5 6 7
+// +-+-+-+-+-+-+-+-+
+// |X| ID |
+// +-+-+-+-+-+-+-+-+
+//
+// The X bit is reserved.
+//
+// Currently only profile level is supported. ID byte must be set to 1, and
+// length must be 1. Supported values are:
+//
+// 10: Level 1
+// 11: Level 1.1
+// 20: Level 2
+// 21: Level 2.1
+// 30: Level 3
+// 31: Level 3.1
+// 40: Level 4
+// 41: Level 4.1
+// 50: Level 5
+// 51: Level 5.1
+// 52: Level 5.2
+// 60: Level 6
+// 61: Level 6.1
+// 62: Level 6.2
+//
+// See the following link for more information:
+// http://www.webmproject.org/vp9/profiles/
+int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length) {
+ const int kVpxCodecPrivateLength = 3;
+ if (!private_data || length != kVpxCodecPrivateLength)
+ return 0;
+
+ const uint8_t id_byte = *private_data;
+ if (id_byte != 1)
+ return 0;
+
+ const int kVpxProfileLength = 1;
+ const uint8_t length_byte = private_data[1];
+ if (length_byte != kVpxProfileLength)
+ return 0;
+
+ const int level = static_cast<int>(private_data[2]);
+
+ const int kNumLevels = 14;
+ const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40,
+ 41, 50, 51, 52, 60, 61, 62};
+
+ for (int i = 0; i < kNumLevels; ++i) {
+ if (level == levels[i])
+ return level;
+ }
+
+ return 0;
+}
+} // namespace libwebm
diff --git a/libvpx/third_party/libwebm/common/hdr_util.h b/libvpx/third_party/libwebm/common/hdr_util.h
new file mode 100644
index 000000000..d30c2b9f2
--- /dev/null
+++ b/libvpx/third_party/libwebm/common/hdr_util.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#ifndef LIBWEBM_COMMON_HDR_UTIL_H_
+#define LIBWEBM_COMMON_HDR_UTIL_H_
+
+#include <stdint.h>
+
+#include <memory>
+
+#include "mkvmuxer/mkvmuxer.h"
+
+namespace mkvparser {
+struct Colour;
+struct MasteringMetadata;
+struct PrimaryChromaticity;
+} // namespace mkvparser
+
+namespace libwebm {
+// Utility types and functions for working with the Colour element and its
+// children. Copiers return true upon success. Presence functions return true
+// when the specified element is present.
+
+// TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is
+// required by libwebm.
+
+typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
+
+bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
+ PrimaryChromaticityPtr* muxer_pc);
+
+bool MasteringMetadataValuePresent(double value);
+
+bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
+ mkvmuxer::MasteringMetadata* muxer_mm);
+
+bool ColourValuePresent(long long value);
+
+bool CopyColour(const mkvparser::Colour& parser_colour,
+ mkvmuxer::Colour* muxer_colour);
+
+// Returns VP9 profile upon success or 0 upon failure.
+int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length);
+
+} // namespace libwebm
+
+#endif // LIBWEBM_COMMON_HDR_UTIL_H_
diff --git a/libvpx/third_party/libwebm/webmids.hpp b/libvpx/third_party/libwebm/common/webmids.h
index ad4ab5738..32a0c5fb9 100644
--- a/libvpx/third_party/libwebm/webmids.hpp
+++ b/libvpx/third_party/libwebm/common/webmids.h
@@ -6,10 +6,10 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#ifndef WEBMIDS_HPP
-#define WEBMIDS_HPP
+#ifndef COMMON_WEBMIDS_H_
+#define COMMON_WEBMIDS_H_
-namespace mkvmuxer {
+namespace libwebm {
enum MkvId {
kMkvEBML = 0x1A45DFA3,
@@ -95,6 +95,35 @@ enum MkvId {
kMkvAspectRatioType = 0x54B3,
kMkvFrameRate = 0x2383E3,
// end video
+ // colour
+ kMkvColour = 0x55B0,
+ kMkvMatrixCoefficients = 0x55B1,
+ kMkvBitsPerChannel = 0x55B2,
+ kMkvChromaSubsamplingHorz = 0x55B3,
+ kMkvChromaSubsamplingVert = 0x55B4,
+ kMkvCbSubsamplingHorz = 0x55B5,
+ kMkvCbSubsamplingVert = 0x55B6,
+ kMkvChromaSitingHorz = 0x55B7,
+ kMkvChromaSitingVert = 0x55B8,
+ kMkvRange = 0x55B9,
+ kMkvTransferCharacteristics = 0x55BA,
+ kMkvPrimaries = 0x55BB,
+ kMkvMaxCLL = 0x55BC,
+ kMkvMaxFALL = 0x55BD,
+ // mastering metadata
+ kMkvMasteringMetadata = 0x55D0,
+ kMkvPrimaryRChromaticityX = 0x55D1,
+ kMkvPrimaryRChromaticityY = 0x55D2,
+ kMkvPrimaryGChromaticityX = 0x55D3,
+ kMkvPrimaryGChromaticityY = 0x55D4,
+ kMkvPrimaryBChromaticityX = 0x55D5,
+ kMkvPrimaryBChromaticityY = 0x55D6,
+ kMkvWhitePointChromaticityX = 0x55D7,
+ kMkvWhitePointChromaticityY = 0x55D8,
+ kMkvLuminanceMax = 0x55D9,
+ kMkvLuminanceMin = 0x55DA,
+ // end mastering metadata
+ // end colour
// audio
kMkvAudio = 0xE1,
kMkvSamplingFrequency = 0xB5,
@@ -150,6 +179,6 @@ enum MkvId {
kMkvTagString = 0x4487
};
-} // end namespace mkvmuxer
+} // namespace libwebm
-#endif // WEBMIDS_HPP
+#endif // COMMON_WEBMIDS_H_
diff --git a/libvpx/third_party/libwebm/mkvmuxer.cpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
index 9be3119a4..c79ce24ed 100644
--- a/libvpx/third_party/libwebm/mkvmuxer.cpp
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
@@ -6,27 +6,28 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#include "mkvmuxer.hpp"
+#include "mkvmuxer/mkvmuxer.h"
+#include <cfloat>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
+#include <memory>
#include <new>
+#include <vector>
-#include "mkvmuxerutil.hpp"
-#include "mkvparser.hpp"
-#include "mkvwriter.hpp"
-#include "webmids.hpp"
-
-#ifdef _MSC_VER
-// Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable : 4996)
-#endif
+#include "common/webmids.h"
+#include "mkvmuxer/mkvmuxerutil.h"
+#include "mkvmuxer/mkvwriter.h"
+#include "mkvparser/mkvparser.h"
namespace mkvmuxer {
+const float MasteringMetadata::kValueNotPresent = FLT_MAX;
+const uint64_t Colour::kValueNotPresent = UINT64_MAX;
+
namespace {
// Deallocate the string designated by |dst|, and then copy the |src|
// string to |dst|. The caller owns both the |src| string and the
@@ -55,6 +56,20 @@ bool StrCpy(const char* src, char** dst_ptr) {
strcpy(dst, src); // NOLINT
return true;
}
+
+typedef std::auto_ptr<PrimaryChromaticity> PrimaryChromaticityPtr;
+bool CopyChromaticity(const PrimaryChromaticity* src,
+ PrimaryChromaticityPtr* dst) {
+ if (!dst)
+ return false;
+
+ dst->reset(new (std::nothrow) PrimaryChromaticity(src->x, src->y));
+ if (!dst->get())
+ return false;
+
+ return true;
+}
+
} // namespace
///////////////////////////////////////////////////////////////
@@ -65,31 +80,31 @@ IMkvWriter::IMkvWriter() {}
IMkvWriter::~IMkvWriter() {}
-bool WriteEbmlHeader(IMkvWriter* writer, uint64 doc_type_version) {
+bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) {
// Level 0
- uint64 size = EbmlElementSize(kMkvEBMLVersion, 1ULL);
- size += EbmlElementSize(kMkvEBMLReadVersion, 1ULL);
- size += EbmlElementSize(kMkvEBMLMaxIDLength, 4ULL);
- size += EbmlElementSize(kMkvEBMLMaxSizeLength, 8ULL);
- size += EbmlElementSize(kMkvDocType, "webm");
- size += EbmlElementSize(kMkvDocTypeVersion, doc_type_version);
- size += EbmlElementSize(kMkvDocTypeReadVersion, 2ULL);
+ uint64_t size = EbmlElementSize(libwebm::kMkvEBMLVersion, UINT64_C(1));
+ size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, UINT64_C(1));
+ size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, UINT64_C(4));
+ size += EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, UINT64_C(8));
+ size += EbmlElementSize(libwebm::kMkvDocType, "webm");
+ size += EbmlElementSize(libwebm::kMkvDocTypeVersion, doc_type_version);
+ size += EbmlElementSize(libwebm::kMkvDocTypeReadVersion, UINT64_C(2));
- if (!WriteEbmlMasterElement(writer, kMkvEBML, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvEBML, size))
return false;
- if (!WriteEbmlElement(writer, kMkvEBMLVersion, 1ULL))
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion, UINT64_C(1)))
return false;
- if (!WriteEbmlElement(writer, kMkvEBMLReadVersion, 1ULL))
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion, UINT64_C(1)))
return false;
- if (!WriteEbmlElement(writer, kMkvEBMLMaxIDLength, 4ULL))
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength, UINT64_C(4)))
return false;
- if (!WriteEbmlElement(writer, kMkvEBMLMaxSizeLength, 8ULL))
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength, UINT64_C(8)))
return false;
- if (!WriteEbmlElement(writer, kMkvDocType, "webm"))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDocType, "webm"))
return false;
- if (!WriteEbmlElement(writer, kMkvDocTypeVersion, doc_type_version))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion, doc_type_version))
return false;
- if (!WriteEbmlElement(writer, kMkvDocTypeReadVersion, 2ULL))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion, UINT64_C(2)))
return false;
return true;
@@ -100,16 +115,16 @@ bool WriteEbmlHeader(IMkvWriter* writer) {
}
bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst,
- mkvmuxer::int64 start, int64 size) {
+ int64_t start, int64_t size) {
// TODO(vigneshv): Check if this is a reasonable value.
- const uint32 kBufSize = 2048;
- uint8* buf = new uint8[kBufSize];
- int64 offset = start;
+ const uint32_t kBufSize = 2048;
+ uint8_t* buf = new uint8_t[kBufSize];
+ int64_t offset = start;
while (size > 0) {
- const int64 read_len = (size > kBufSize) ? kBufSize : size;
+ const int64_t read_len = (size > kBufSize) ? kBufSize : size;
if (source->Read(offset, static_cast<long>(read_len), buf))
return false;
- dst->Write(buf, static_cast<uint32>(read_len));
+ dst->Write(buf, static_cast<uint32_t>(read_len));
offset += read_len;
size -= read_len;
}
@@ -126,6 +141,7 @@ Frame::Frame()
additional_(NULL),
additional_length_(0),
duration_(0),
+ duration_set_(false),
frame_(NULL),
is_key_(false),
length_(0),
@@ -158,16 +174,19 @@ bool Frame::CopyFrom(const Frame& frame) {
return false;
}
duration_ = frame.duration();
+ duration_set_ = frame.duration_set();
is_key_ = frame.is_key();
track_number_ = frame.track_number();
timestamp_ = frame.timestamp();
discard_padding_ = frame.discard_padding();
+ reference_block_timestamp_ = frame.reference_block_timestamp();
+ reference_block_timestamp_set_ = frame.reference_block_timestamp_set();
return true;
}
-bool Frame::Init(const uint8* frame, uint64 length) {
- uint8* const data =
- new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT
+bool Frame::Init(const uint8_t* frame, uint64_t length) {
+ uint8_t* const data =
+ new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
if (!data)
return false;
@@ -179,10 +198,10 @@ bool Frame::Init(const uint8* frame, uint64 length) {
return true;
}
-bool Frame::AddAdditionalData(const uint8* additional, uint64 length,
- uint64 add_id) {
- uint8* const data =
- new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT
+bool Frame::AddAdditionalData(const uint8_t* additional, uint64_t length,
+ uint64_t add_id) {
+ uint8_t* const data =
+ new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
if (!data)
return false;
@@ -216,7 +235,12 @@ bool Frame::CanBeSimpleBlock() const {
return additional_ == NULL && discard_padding_ == 0 && duration_ == 0;
}
-void Frame::set_reference_block_timestamp(int64 reference_block_timestamp) {
+void Frame::set_duration(uint64_t duration) {
+ duration_ = duration;
+ duration_set_ = true;
+}
+
+void Frame::set_reference_block_timestamp(int64_t reference_block_timestamp) {
reference_block_timestamp_ = reference_block_timestamp;
reference_block_timestamp_set_ = true;
}
@@ -238,61 +262,64 @@ bool CuePoint::Write(IMkvWriter* writer) const {
if (!writer || track_ < 1 || cluster_pos_ < 1)
return false;
- uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_);
- size += EbmlElementSize(kMkvCueTrack, track_);
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvCueClusterPosition, cluster_pos_);
+ size += EbmlElementSize(libwebm::kMkvCueTrack, track_);
if (output_block_number_ && block_number_ > 1)
- size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
- const uint64 track_pos_size =
- EbmlMasterElementSize(kMkvCueTrackPositions, size) + size;
- const uint64 payload_size =
- EbmlElementSize(kMkvCueTime, time_) + track_pos_size;
+ size += EbmlElementSize(libwebm::kMkvCueBlockNumber, block_number_);
+ const uint64_t track_pos_size =
+ EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size;
+ const uint64_t payload_size =
+ EbmlElementSize(libwebm::kMkvCueTime, time_) + track_pos_size;
- if (!WriteEbmlMasterElement(writer, kMkvCuePoint, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvCuePoint, payload_size))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, kMkvCueTime, time_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueTime, time_))
return false;
- if (!WriteEbmlMasterElement(writer, kMkvCueTrackPositions, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvCueTrackPositions, size))
return false;
- if (!WriteEbmlElement(writer, kMkvCueTrack, track_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack, track_))
return false;
- if (!WriteEbmlElement(writer, kMkvCueClusterPosition, cluster_pos_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition, cluster_pos_))
return false;
if (output_block_number_ && block_number_ > 1)
- if (!WriteEbmlElement(writer, kMkvCueBlockNumber, block_number_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber, block_number_))
return false;
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0)
return false;
- if (stop_position - payload_position != static_cast<int64>(payload_size))
+ if (stop_position - payload_position != static_cast<int64_t>(payload_size))
return false;
return true;
}
-uint64 CuePoint::PayloadSize() const {
- uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_);
- size += EbmlElementSize(kMkvCueTrack, track_);
+uint64_t CuePoint::PayloadSize() const {
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvCueClusterPosition, cluster_pos_);
+ size += EbmlElementSize(libwebm::kMkvCueTrack, track_);
if (output_block_number_ && block_number_ > 1)
- size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
- const uint64 track_pos_size =
- EbmlMasterElementSize(kMkvCueTrackPositions, size) + size;
- const uint64 payload_size =
- EbmlElementSize(kMkvCueTime, time_) + track_pos_size;
+ size += EbmlElementSize(libwebm::kMkvCueBlockNumber, block_number_);
+ const uint64_t track_pos_size =
+ EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size;
+ const uint64_t payload_size =
+ EbmlElementSize(libwebm::kMkvCueTime, time_) + track_pos_size;
return payload_size;
}
-uint64 CuePoint::Size() const {
- const uint64 payload_size = PayloadSize();
- return EbmlMasterElementSize(kMkvCuePoint, payload_size) + payload_size;
+uint64_t CuePoint::Size() const {
+ const uint64_t payload_size = PayloadSize();
+ return EbmlMasterElementSize(libwebm::kMkvCuePoint, payload_size) +
+ payload_size;
}
///////////////////////////////////////////////////////////////
@@ -307,7 +334,7 @@ Cues::Cues()
Cues::~Cues() {
if (cue_entries_) {
- for (int32 i = 0; i < cue_entries_size_; ++i) {
+ for (int32_t i = 0; i < cue_entries_size_; ++i) {
CuePoint* const cue = cue_entries_[i];
delete cue;
}
@@ -321,7 +348,7 @@ bool Cues::AddCue(CuePoint* cue) {
if ((cue_entries_size_ + 1) > cue_entries_capacity_) {
// Add more CuePoints.
- const int32 new_capacity =
+ const int32_t new_capacity =
(!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2;
if (new_capacity < 1)
@@ -332,7 +359,7 @@ bool Cues::AddCue(CuePoint* cue) {
if (!cues)
return false;
- for (int32 i = 0; i < cue_entries_size_; ++i) {
+ for (int32_t i = 0; i < cue_entries_size_; ++i) {
cues[i] = cue_entries_[i];
}
@@ -347,7 +374,7 @@ bool Cues::AddCue(CuePoint* cue) {
return true;
}
-CuePoint* Cues::GetCueByIndex(int32 index) const {
+CuePoint* Cues::GetCueByIndex(int32_t index) const {
if (cue_entries_ == NULL)
return NULL;
@@ -357,11 +384,11 @@ CuePoint* Cues::GetCueByIndex(int32 index) const {
return cue_entries_[index];
}
-uint64 Cues::Size() {
- uint64 size = 0;
- for (int32 i = 0; i < cue_entries_size_; ++i)
+uint64_t Cues::Size() {
+ uint64_t size = 0;
+ for (int32_t i = 0; i < cue_entries_size_; ++i)
size += GetCueByIndex(i)->Size();
- size += EbmlMasterElementSize(kMkvCues, size);
+ size += EbmlMasterElementSize(libwebm::kMkvCues, size);
return size;
}
@@ -369,8 +396,8 @@ bool Cues::Write(IMkvWriter* writer) const {
if (!writer)
return false;
- uint64 size = 0;
- for (int32 i = 0; i < cue_entries_size_; ++i) {
+ uint64_t size = 0;
+ for (int32_t i = 0; i < cue_entries_size_; ++i) {
const CuePoint* const cue = GetCueByIndex(i);
if (!cue)
@@ -379,25 +406,25 @@ bool Cues::Write(IMkvWriter* writer) const {
size += cue->Size();
}
- if (!WriteEbmlMasterElement(writer, kMkvCues, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvCues, size))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- for (int32 i = 0; i < cue_entries_size_; ++i) {
+ for (int32_t i = 0; i < cue_entries_size_; ++i) {
const CuePoint* const cue = GetCueByIndex(i);
if (!cue->Write(writer))
return false;
}
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0)
return false;
- if (stop_position - payload_position != static_cast<int64>(size))
+ if (stop_position - payload_position != static_cast<int64_t>(size))
return false;
return true;
@@ -409,36 +436,40 @@ bool Cues::Write(IMkvWriter* writer) const {
ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {}
-uint64 ContentEncAESSettings::Size() const {
- const uint64 payload = PayloadSize();
- const uint64 size =
- EbmlMasterElementSize(kMkvContentEncAESSettings, payload) + payload;
+uint64_t ContentEncAESSettings::Size() const {
+ const uint64_t payload = PayloadSize();
+ const uint64_t size =
+ EbmlMasterElementSize(libwebm::kMkvContentEncAESSettings, payload) +
+ payload;
return size;
}
bool ContentEncAESSettings::Write(IMkvWriter* writer) const {
- const uint64 payload = PayloadSize();
+ const uint64_t payload = PayloadSize();
- if (!WriteEbmlMasterElement(writer, kMkvContentEncAESSettings, payload))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncAESSettings,
+ payload))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, kMkvAESSettingsCipherMode, cipher_mode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvAESSettingsCipherMode,
+ cipher_mode_))
return false;
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(payload))
+ stop_position - payload_position != static_cast<int64_t>(payload))
return false;
return true;
}
-uint64 ContentEncAESSettings::PayloadSize() const {
- uint64 size = EbmlElementSize(kMkvAESSettingsCipherMode, cipher_mode_);
+uint64_t ContentEncAESSettings::PayloadSize() const {
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvAESSettingsCipherMode, cipher_mode_);
return size;
}
@@ -456,14 +487,14 @@ ContentEncoding::ContentEncoding()
ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; }
-bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) {
+bool ContentEncoding::SetEncryptionID(const uint8_t* id, uint64_t length) {
if (!id || length < 1)
return false;
delete[] enc_key_id_;
enc_key_id_ =
- new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT
+ new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
if (!enc_key_id_)
return false;
@@ -473,79 +504,89 @@ bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) {
return true;
}
-uint64 ContentEncoding::Size() const {
- const uint64 encryption_size = EncryptionSize();
- const uint64 encoding_size = EncodingSize(0, encryption_size);
- const uint64 encodings_size =
- EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size;
+uint64_t ContentEncoding::Size() const {
+ const uint64_t encryption_size = EncryptionSize();
+ const uint64_t encoding_size = EncodingSize(0, encryption_size);
+ const uint64_t encodings_size =
+ EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) +
+ encoding_size;
return encodings_size;
}
bool ContentEncoding::Write(IMkvWriter* writer) const {
- const uint64 encryption_size = EncryptionSize();
- const uint64 encoding_size = EncodingSize(0, encryption_size);
- const uint64 size =
- EbmlMasterElementSize(kMkvContentEncoding, encoding_size) + encoding_size;
+ const uint64_t encryption_size = EncryptionSize();
+ const uint64_t encoding_size = EncodingSize(0, encryption_size);
+ const uint64_t size =
+ EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) +
+ encoding_size;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlMasterElement(writer, kMkvContentEncoding, encoding_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncoding,
+ encoding_size))
return false;
- if (!WriteEbmlElement(writer, kMkvContentEncodingOrder, encoding_order_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingOrder,
+ encoding_order_))
return false;
- if (!WriteEbmlElement(writer, kMkvContentEncodingScope, encoding_scope_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingScope,
+ encoding_scope_))
return false;
- if (!WriteEbmlElement(writer, kMkvContentEncodingType, encoding_type_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingType,
+ encoding_type_))
return false;
- if (!WriteEbmlMasterElement(writer, kMkvContentEncryption, encryption_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncryption,
+ encryption_size))
return false;
- if (!WriteEbmlElement(writer, kMkvContentEncAlgo, enc_algo_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo, enc_algo_))
return false;
- if (!WriteEbmlElement(writer, kMkvContentEncKeyID, enc_key_id_,
+ if (!WriteEbmlElement(writer, libwebm::kMkvContentEncKeyID, enc_key_id_,
enc_key_id_length_))
return false;
if (!enc_aes_settings_.Write(writer))
return false;
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(size))
+ stop_position - payload_position != static_cast<int64_t>(size))
return false;
return true;
}
-uint64 ContentEncoding::EncodingSize(uint64 compresion_size,
- uint64 encryption_size) const {
+uint64_t ContentEncoding::EncodingSize(uint64_t compresion_size,
+ uint64_t encryption_size) const {
// TODO(fgalligan): Add support for compression settings.
if (compresion_size != 0)
return 0;
- uint64 encoding_size = 0;
+ uint64_t encoding_size = 0;
if (encryption_size > 0) {
encoding_size +=
- EbmlMasterElementSize(kMkvContentEncryption, encryption_size) +
+ EbmlMasterElementSize(libwebm::kMkvContentEncryption, encryption_size) +
encryption_size;
}
- encoding_size += EbmlElementSize(kMkvContentEncodingType, encoding_type_);
- encoding_size += EbmlElementSize(kMkvContentEncodingScope, encoding_scope_);
- encoding_size += EbmlElementSize(kMkvContentEncodingOrder, encoding_order_);
+ encoding_size +=
+ EbmlElementSize(libwebm::kMkvContentEncodingType, encoding_type_);
+ encoding_size +=
+ EbmlElementSize(libwebm::kMkvContentEncodingScope, encoding_scope_);
+ encoding_size +=
+ EbmlElementSize(libwebm::kMkvContentEncodingOrder, encoding_order_);
return encoding_size;
}
-uint64 ContentEncoding::EncryptionSize() const {
- const uint64 aes_size = enc_aes_settings_.Size();
+uint64_t ContentEncoding::EncryptionSize() const {
+ const uint64_t aes_size = enc_aes_settings_.Size();
- uint64 encryption_size =
- EbmlElementSize(kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_);
- encryption_size += EbmlElementSize(kMkvContentEncAlgo, enc_algo_);
+ uint64_t encryption_size = EbmlElementSize(libwebm::kMkvContentEncKeyID,
+ enc_key_id_, enc_key_id_length_);
+ encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo, enc_algo_);
return encryption_size + aes_size;
}
@@ -577,7 +618,7 @@ Track::~Track() {
delete[] name_;
if (content_encoding_entries_) {
- for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+ for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
ContentEncoding* const encoding = content_encoding_entries_[i];
delete encoding;
}
@@ -586,7 +627,7 @@ Track::~Track() {
}
bool Track::AddContentEncoding() {
- const uint32 count = content_encoding_entries_size_ + 1;
+ const uint32_t count = content_encoding_entries_size_ + 1;
ContentEncoding** const content_encoding_entries =
new (std::nothrow) ContentEncoding*[count]; // NOLINT
@@ -600,7 +641,7 @@ bool Track::AddContentEncoding() {
return false;
}
- for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+ for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
content_encoding_entries[i] = content_encoding_entries_[i];
}
@@ -612,7 +653,7 @@ bool Track::AddContentEncoding() {
return true;
}
-ContentEncoding* Track::GetContentEncodingByIndex(uint32 index) const {
+ContentEncoding* Track::GetContentEncodingByIndex(uint32_t index) const {
if (content_encoding_entries_ == NULL)
return NULL;
@@ -622,46 +663,47 @@ ContentEncoding* Track::GetContentEncodingByIndex(uint32 index) const {
return content_encoding_entries_[index];
}
-uint64 Track::PayloadSize() const {
- uint64 size = EbmlElementSize(kMkvTrackNumber, number_);
- size += EbmlElementSize(kMkvTrackUID, uid_);
- size += EbmlElementSize(kMkvTrackType, type_);
+uint64_t Track::PayloadSize() const {
+ uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, number_);
+ size += EbmlElementSize(libwebm::kMkvTrackUID, uid_);
+ size += EbmlElementSize(libwebm::kMkvTrackType, type_);
if (codec_id_)
- size += EbmlElementSize(kMkvCodecID, codec_id_);
+ size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_);
if (codec_private_)
- size += EbmlElementSize(kMkvCodecPrivate, codec_private_,
+ size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_,
codec_private_length_);
if (language_)
- size += EbmlElementSize(kMkvLanguage, language_);
+ size += EbmlElementSize(libwebm::kMkvLanguage, language_);
if (name_)
- size += EbmlElementSize(kMkvName, name_);
+ size += EbmlElementSize(libwebm::kMkvName, name_);
if (max_block_additional_id_)
- size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_);
+ size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID,
+ max_block_additional_id_);
if (codec_delay_)
- size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
+ size += EbmlElementSize(libwebm::kMkvCodecDelay, codec_delay_);
if (seek_pre_roll_)
- size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
+ size += EbmlElementSize(libwebm::kMkvSeekPreRoll, seek_pre_roll_);
if (default_duration_)
- size += EbmlElementSize(kMkvDefaultDuration, default_duration_);
+ size += EbmlElementSize(libwebm::kMkvDefaultDuration, default_duration_);
if (content_encoding_entries_size_ > 0) {
- uint64 content_encodings_size = 0;
- for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+ uint64_t content_encodings_size = 0;
+ for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
ContentEncoding* const encoding = content_encoding_entries_[i];
content_encodings_size += encoding->Size();
}
- size +=
- EbmlMasterElementSize(kMkvContentEncodings, content_encodings_size) +
- content_encodings_size;
+ size += EbmlMasterElementSize(libwebm::kMkvContentEncodings,
+ content_encodings_size) +
+ content_encodings_size;
}
return size;
}
-uint64 Track::Size() const {
- uint64 size = PayloadSize();
- size += EbmlMasterElementSize(kMkvTrackEntry, size);
+uint64_t Track::Size() const {
+ uint64_t size = PayloadSize();
+ size += EbmlMasterElementSize(libwebm::kMkvTrackEntry, size);
return size;
}
@@ -675,95 +717,97 @@ bool Track::Write(IMkvWriter* writer) const {
// |size| may be bigger than what is written out in this function because
// derived classes may write out more data in the Track element.
- const uint64 payload_size = PayloadSize();
+ const uint64_t payload_size = PayloadSize();
- if (!WriteEbmlMasterElement(writer, kMkvTrackEntry, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvTrackEntry, payload_size))
return false;
- uint64 size = EbmlElementSize(kMkvTrackNumber, number_);
- size += EbmlElementSize(kMkvTrackUID, uid_);
- size += EbmlElementSize(kMkvTrackType, type_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, number_);
+ size += EbmlElementSize(libwebm::kMkvTrackUID, uid_);
+ size += EbmlElementSize(libwebm::kMkvTrackType, type_);
if (codec_id_)
- size += EbmlElementSize(kMkvCodecID, codec_id_);
+ size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_);
if (codec_private_)
- size += EbmlElementSize(kMkvCodecPrivate, codec_private_,
+ size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_,
codec_private_length_);
if (language_)
- size += EbmlElementSize(kMkvLanguage, language_);
+ size += EbmlElementSize(libwebm::kMkvLanguage, language_);
if (name_)
- size += EbmlElementSize(kMkvName, name_);
+ size += EbmlElementSize(libwebm::kMkvName, name_);
if (max_block_additional_id_)
- size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_);
+ size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID,
+ max_block_additional_id_);
if (codec_delay_)
- size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
+ size += EbmlElementSize(libwebm::kMkvCodecDelay, codec_delay_);
if (seek_pre_roll_)
- size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
+ size += EbmlElementSize(libwebm::kMkvSeekPreRoll, seek_pre_roll_);
if (default_duration_)
- size += EbmlElementSize(kMkvDefaultDuration, default_duration_);
+ size += EbmlElementSize(libwebm::kMkvDefaultDuration, default_duration_);
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, kMkvTrackNumber, number_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber, number_))
return false;
- if (!WriteEbmlElement(writer, kMkvTrackUID, uid_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID, uid_))
return false;
- if (!WriteEbmlElement(writer, kMkvTrackType, type_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTrackType, type_))
return false;
if (max_block_additional_id_) {
- if (!WriteEbmlElement(writer, kMkvMaxBlockAdditionID,
+ if (!WriteEbmlElement(writer, libwebm::kMkvMaxBlockAdditionID,
max_block_additional_id_)) {
return false;
}
}
if (codec_delay_) {
- if (!WriteEbmlElement(writer, kMkvCodecDelay, codec_delay_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay, codec_delay_))
return false;
}
if (seek_pre_roll_) {
- if (!WriteEbmlElement(writer, kMkvSeekPreRoll, seek_pre_roll_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll, seek_pre_roll_))
return false;
}
if (default_duration_) {
- if (!WriteEbmlElement(writer, kMkvDefaultDuration, default_duration_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDefaultDuration,
+ default_duration_))
return false;
}
if (codec_id_) {
- if (!WriteEbmlElement(writer, kMkvCodecID, codec_id_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCodecID, codec_id_))
return false;
}
if (codec_private_) {
- if (!WriteEbmlElement(writer, kMkvCodecPrivate, codec_private_,
+ if (!WriteEbmlElement(writer, libwebm::kMkvCodecPrivate, codec_private_,
codec_private_length_))
return false;
}
if (language_) {
- if (!WriteEbmlElement(writer, kMkvLanguage, language_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvLanguage, language_))
return false;
}
if (name_) {
- if (!WriteEbmlElement(writer, kMkvName, name_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvName, name_))
return false;
}
- int64 stop_position = writer->Position();
+ int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(size))
+ stop_position - payload_position != static_cast<int64_t>(size))
return false;
if (content_encoding_entries_size_ > 0) {
- uint64 content_encodings_size = 0;
- for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+ uint64_t content_encodings_size = 0;
+ for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
ContentEncoding* const encoding = content_encoding_entries_[i];
content_encodings_size += encoding->Size();
}
- if (!WriteEbmlMasterElement(writer, kMkvContentEncodings,
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncodings,
content_encodings_size))
return false;
- for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+ for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) {
ContentEncoding* const encoding = content_encoding_entries_[i];
if (!encoding->Write(writer))
return false;
@@ -776,14 +820,14 @@ bool Track::Write(IMkvWriter* writer) const {
return true;
}
-bool Track::SetCodecPrivate(const uint8* codec_private, uint64 length) {
+bool Track::SetCodecPrivate(const uint8_t* codec_private, uint64_t length) {
if (!codec_private || length < 1)
return false;
delete[] codec_private_;
codec_private_ =
- new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT
+ new (std::nothrow) uint8_t[static_cast<size_t>(length)]; // NOLINT
if (!codec_private_)
return false;
@@ -844,6 +888,279 @@ void Track::set_name(const char* name) {
///////////////////////////////////////////////////////////////
//
+// Colour and its child elements
+
+uint64_t PrimaryChromaticity::PrimaryChromaticityPayloadSize(
+ libwebm::MkvId x_id, libwebm::MkvId y_id) const {
+ return EbmlElementSize(x_id, x) + EbmlElementSize(y_id, y);
+}
+
+bool PrimaryChromaticity::Write(IMkvWriter* writer, libwebm::MkvId x_id,
+ libwebm::MkvId y_id) const {
+ return WriteEbmlElement(writer, x_id, x) && WriteEbmlElement(writer, y_id, y);
+}
+
+uint64_t MasteringMetadata::MasteringMetadataSize() const {
+ uint64_t size = PayloadSize();
+
+ if (size > 0)
+ size += EbmlMasterElementSize(libwebm::kMkvMasteringMetadata, size);
+
+ return size;
+}
+
+bool MasteringMetadata::Write(IMkvWriter* writer) const {
+ const uint64_t size = PayloadSize();
+
+ // Don't write an empty element.
+ if (size == 0)
+ return true;
+
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvMasteringMetadata, size))
+ return false;
+ if (luminance_max != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max)) {
+ return false;
+ }
+ if (luminance_min != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min)) {
+ return false;
+ }
+ if (r_ &&
+ !r_->Write(writer, libwebm::kMkvPrimaryRChromaticityX,
+ libwebm::kMkvPrimaryRChromaticityY)) {
+ return false;
+ }
+ if (g_ &&
+ !g_->Write(writer, libwebm::kMkvPrimaryGChromaticityX,
+ libwebm::kMkvPrimaryGChromaticityY)) {
+ return false;
+ }
+ if (b_ &&
+ !b_->Write(writer, libwebm::kMkvPrimaryBChromaticityX,
+ libwebm::kMkvPrimaryBChromaticityY)) {
+ return false;
+ }
+ if (white_point_ &&
+ !white_point_->Write(writer, libwebm::kMkvWhitePointChromaticityX,
+ libwebm::kMkvWhitePointChromaticityY)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool MasteringMetadata::SetChromaticity(
+ const PrimaryChromaticity* r, const PrimaryChromaticity* g,
+ const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) {
+ PrimaryChromaticityPtr r_ptr(NULL);
+ if (r) {
+ if (!CopyChromaticity(r, &r_ptr))
+ return false;
+ }
+ PrimaryChromaticityPtr g_ptr(NULL);
+ if (g) {
+ if (!CopyChromaticity(g, &g_ptr))
+ return false;
+ }
+ PrimaryChromaticityPtr b_ptr(NULL);
+ if (b) {
+ if (!CopyChromaticity(b, &b_ptr))
+ return false;
+ }
+ PrimaryChromaticityPtr wp_ptr(NULL);
+ if (white_point) {
+ if (!CopyChromaticity(white_point, &wp_ptr))
+ return false;
+ }
+
+ r_ = r_ptr.release();
+ g_ = g_ptr.release();
+ b_ = b_ptr.release();
+ white_point_ = wp_ptr.release();
+ return true;
+}
+
+uint64_t MasteringMetadata::PayloadSize() const {
+ uint64_t size = 0;
+
+ if (luminance_max != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max);
+ if (luminance_min != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min);
+
+ if (r_) {
+ size += r_->PrimaryChromaticityPayloadSize(
+ libwebm::kMkvPrimaryRChromaticityX, libwebm::kMkvPrimaryRChromaticityY);
+ }
+ if (g_) {
+ size += g_->PrimaryChromaticityPayloadSize(
+ libwebm::kMkvPrimaryGChromaticityX, libwebm::kMkvPrimaryGChromaticityY);
+ }
+ if (b_) {
+ size += b_->PrimaryChromaticityPayloadSize(
+ libwebm::kMkvPrimaryBChromaticityX, libwebm::kMkvPrimaryBChromaticityY);
+ }
+ if (white_point_) {
+ size += white_point_->PrimaryChromaticityPayloadSize(
+ libwebm::kMkvWhitePointChromaticityX,
+ libwebm::kMkvWhitePointChromaticityY);
+ }
+
+ return size;
+}
+
+uint64_t Colour::ColourSize() const {
+ uint64_t size = PayloadSize();
+
+ if (size > 0)
+ size += EbmlMasterElementSize(libwebm::kMkvColour, size);
+
+ return size;
+}
+
+bool Colour::Write(IMkvWriter* writer) const {
+ const uint64_t size = PayloadSize();
+
+ // Don't write an empty element.
+ if (size == 0)
+ return true;
+
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvColour, size))
+ return false;
+
+ if (matrix_coefficients != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvMatrixCoefficients,
+ matrix_coefficients)) {
+ return false;
+ }
+ if (bits_per_channel != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvBitsPerChannel,
+ bits_per_channel)) {
+ return false;
+ }
+ if (chroma_subsampling_horz != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingHorz,
+ chroma_subsampling_horz)) {
+ return false;
+ }
+ if (chroma_subsampling_vert != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingVert,
+ chroma_subsampling_vert)) {
+ return false;
+ }
+
+ if (cb_subsampling_horz != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingHorz,
+ cb_subsampling_horz)) {
+ return false;
+ }
+ if (cb_subsampling_vert != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingVert,
+ cb_subsampling_vert)) {
+ return false;
+ }
+ if (chroma_siting_horz != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvChromaSitingHorz,
+ chroma_siting_horz)) {
+ return false;
+ }
+ if (chroma_siting_vert != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvChromaSitingVert,
+ chroma_siting_vert)) {
+ return false;
+ }
+ if (range != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvRange, range)) {
+ return false;
+ }
+ if (transfer_characteristics != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvTransferCharacteristics,
+ transfer_characteristics)) {
+ return false;
+ }
+ if (primaries != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvPrimaries, primaries)) {
+ return false;
+ }
+ if (max_cll != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvMaxCLL, max_cll)) {
+ return false;
+ }
+ if (max_fall != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvMaxFALL, max_fall)) {
+ return false;
+ }
+
+ if (mastering_metadata_ && !mastering_metadata_->Write(writer))
+ return false;
+
+ return true;
+}
+
+bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) {
+ std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ if (!mm_ptr.get())
+ return false;
+
+ mm_ptr->luminance_max = mastering_metadata.luminance_max;
+ mm_ptr->luminance_min = mastering_metadata.luminance_min;
+
+ if (!mm_ptr->SetChromaticity(mastering_metadata.r(), mastering_metadata.g(),
+ mastering_metadata.b(),
+ mastering_metadata.white_point())) {
+ return false;
+ }
+
+ delete mastering_metadata_;
+ mastering_metadata_ = mm_ptr.release();
+ return true;
+}
+
+uint64_t Colour::PayloadSize() const {
+ uint64_t size = 0;
+
+ if (matrix_coefficients != kValueNotPresent)
+ size +=
+ EbmlElementSize(libwebm::kMkvMatrixCoefficients, matrix_coefficients);
+ if (bits_per_channel != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvBitsPerChannel, bits_per_channel);
+ if (chroma_subsampling_horz != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvChromaSubsamplingHorz,
+ chroma_subsampling_horz);
+ if (chroma_subsampling_vert != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvChromaSubsamplingVert,
+ chroma_subsampling_vert);
+ if (cb_subsampling_horz != kValueNotPresent)
+ size +=
+ EbmlElementSize(libwebm::kMkvCbSubsamplingHorz, cb_subsampling_horz);
+ if (cb_subsampling_vert != kValueNotPresent)
+ size +=
+ EbmlElementSize(libwebm::kMkvCbSubsamplingVert, cb_subsampling_vert);
+ if (chroma_siting_horz != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvChromaSitingHorz, chroma_siting_horz);
+ if (chroma_siting_vert != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvChromaSitingVert, chroma_siting_vert);
+ if (range != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvRange, range);
+ if (transfer_characteristics != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvTransferCharacteristics,
+ transfer_characteristics);
+ if (primaries != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvPrimaries, primaries);
+ if (max_cll != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvMaxCLL, max_cll);
+ if (max_fall != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvMaxFALL, max_fall);
+
+ if (mastering_metadata_)
+ size += mastering_metadata_->MasteringMetadataSize();
+
+ return size;
+}
+
+///////////////////////////////////////////////////////////////
+//
// VideoTrack Class
VideoTrack::VideoTrack(unsigned int* seed)
@@ -858,11 +1175,12 @@ VideoTrack::VideoTrack(unsigned int* seed)
height_(0),
stereo_mode_(0),
alpha_mode_(0),
- width_(0) {}
+ width_(0),
+ colour_(NULL) {}
-VideoTrack::~VideoTrack() {}
+VideoTrack::~VideoTrack() { delete colour_; }
-bool VideoTrack::SetStereoMode(uint64 stereo_mode) {
+bool VideoTrack::SetStereoMode(uint64_t stereo_mode) {
if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst &&
stereo_mode != kTopBottomRightIsFirst &&
stereo_mode != kTopBottomLeftIsFirst &&
@@ -873,7 +1191,7 @@ bool VideoTrack::SetStereoMode(uint64 stereo_mode) {
return true;
}
-bool VideoTrack::SetAlphaMode(uint64 alpha_mode) {
+bool VideoTrack::SetAlphaMode(uint64_t alpha_mode) {
if (alpha_mode != kNoAlpha && alpha_mode != kAlpha)
return false;
@@ -881,11 +1199,11 @@ bool VideoTrack::SetAlphaMode(uint64 alpha_mode) {
return true;
}
-uint64 VideoTrack::PayloadSize() const {
- const uint64 parent_size = Track::PayloadSize();
+uint64_t VideoTrack::PayloadSize() const {
+ const uint64_t parent_size = Track::PayloadSize();
- uint64 size = VideoPayloadSize();
- size += EbmlMasterElementSize(kMkvVideo, size);
+ uint64_t size = VideoPayloadSize();
+ size += EbmlMasterElementSize(libwebm::kMkvVideo, size);
return parent_size + size;
}
@@ -894,88 +1212,122 @@ bool VideoTrack::Write(IMkvWriter* writer) const {
if (!Track::Write(writer))
return false;
- const uint64 size = VideoPayloadSize();
+ const uint64_t size = VideoPayloadSize();
- if (!WriteEbmlMasterElement(writer, kMkvVideo, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvVideo, size))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, kMkvPixelWidth, width_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelWidth, width_))
return false;
- if (!WriteEbmlElement(writer, kMkvPixelHeight, height_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelHeight, height_))
return false;
if (display_width_ > 0) {
- if (!WriteEbmlElement(writer, kMkvDisplayWidth, display_width_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth, display_width_))
return false;
}
if (display_height_ > 0) {
- if (!WriteEbmlElement(writer, kMkvDisplayHeight, display_height_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight, display_height_))
return false;
}
if (crop_left_ > 0) {
- if (!WriteEbmlElement(writer, kMkvPixelCropLeft, crop_left_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft, crop_left_))
return false;
}
if (crop_right_ > 0) {
- if (!WriteEbmlElement(writer, kMkvPixelCropRight, crop_right_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight, crop_right_))
return false;
}
if (crop_top_ > 0) {
- if (!WriteEbmlElement(writer, kMkvPixelCropTop, crop_top_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop, crop_top_))
return false;
}
if (crop_bottom_ > 0) {
- if (!WriteEbmlElement(writer, kMkvPixelCropBottom, crop_bottom_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom, crop_bottom_))
return false;
}
if (stereo_mode_ > kMono) {
- if (!WriteEbmlElement(writer, kMkvStereoMode, stereo_mode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode, stereo_mode_))
return false;
}
if (alpha_mode_ > kNoAlpha) {
- if (!WriteEbmlElement(writer, kMkvAlphaMode, alpha_mode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode, alpha_mode_))
return false;
}
if (frame_rate_ > 0.0) {
- if (!WriteEbmlElement(writer, kMkvFrameRate,
+ if (!WriteEbmlElement(writer, libwebm::kMkvFrameRate,
static_cast<float>(frame_rate_))) {
return false;
}
}
+ if (colour_) {
+ if (!colour_->Write(writer))
+ return false;
+ }
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(size)) {
+ stop_position - payload_position != static_cast<int64_t>(size)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool VideoTrack::SetColour(const Colour& colour) {
+ std::auto_ptr<Colour> colour_ptr(new Colour());
+ if (!colour_ptr.get())
return false;
+
+ if (colour.mastering_metadata()) {
+ if (!colour_ptr->SetMasteringMetadata(*colour.mastering_metadata()))
+ return false;
}
+ colour_ptr->matrix_coefficients = colour.matrix_coefficients;
+ colour_ptr->bits_per_channel = colour.bits_per_channel;
+ colour_ptr->chroma_subsampling_horz = colour.chroma_subsampling_horz;
+ colour_ptr->chroma_subsampling_vert = colour.chroma_subsampling_vert;
+ colour_ptr->cb_subsampling_horz = colour.cb_subsampling_horz;
+ colour_ptr->cb_subsampling_vert = colour.cb_subsampling_vert;
+ colour_ptr->chroma_siting_horz = colour.chroma_siting_horz;
+ colour_ptr->chroma_siting_vert = colour.chroma_siting_vert;
+ colour_ptr->range = colour.range;
+ colour_ptr->transfer_characteristics = colour.transfer_characteristics;
+ colour_ptr->primaries = colour.primaries;
+ colour_ptr->max_cll = colour.max_cll;
+ colour_ptr->max_fall = colour.max_fall;
+ colour_ = colour_ptr.release();
return true;
}
-uint64 VideoTrack::VideoPayloadSize() const {
- uint64 size = EbmlElementSize(kMkvPixelWidth, width_);
- size += EbmlElementSize(kMkvPixelHeight, height_);
+uint64_t VideoTrack::VideoPayloadSize() const {
+ uint64_t size = EbmlElementSize(libwebm::kMkvPixelWidth, width_);
+ size += EbmlElementSize(libwebm::kMkvPixelHeight, height_);
if (display_width_ > 0)
- size += EbmlElementSize(kMkvDisplayWidth, display_width_);
+ size += EbmlElementSize(libwebm::kMkvDisplayWidth, display_width_);
if (display_height_ > 0)
- size += EbmlElementSize(kMkvDisplayHeight, display_height_);
+ size += EbmlElementSize(libwebm::kMkvDisplayHeight, display_height_);
if (crop_left_ > 0)
- size += EbmlElementSize(kMkvPixelCropLeft, crop_left_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropLeft, crop_left_);
if (crop_right_ > 0)
- size += EbmlElementSize(kMkvPixelCropRight, crop_right_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropRight, crop_right_);
if (crop_top_ > 0)
- size += EbmlElementSize(kMkvPixelCropTop, crop_top_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropTop, crop_top_);
if (crop_bottom_ > 0)
- size += EbmlElementSize(kMkvPixelCropBottom, crop_bottom_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropBottom, crop_bottom_);
if (stereo_mode_ > kMono)
- size += EbmlElementSize(kMkvStereoMode, stereo_mode_);
+ size += EbmlElementSize(libwebm::kMkvStereoMode, stereo_mode_);
if (alpha_mode_ > kNoAlpha)
- size += EbmlElementSize(kMkvAlphaMode, alpha_mode_);
+ size += EbmlElementSize(libwebm::kMkvAlphaMode, alpha_mode_);
if (frame_rate_ > 0.0)
- size += EbmlElementSize(kMkvFrameRate, static_cast<float>(frame_rate_));
+ size += EbmlElementSize(libwebm::kMkvFrameRate,
+ static_cast<float>(frame_rate_));
+ if (colour_)
+ size += colour_->ColourSize();
return size;
}
@@ -989,15 +1341,15 @@ AudioTrack::AudioTrack(unsigned int* seed)
AudioTrack::~AudioTrack() {}
-uint64 AudioTrack::PayloadSize() const {
- const uint64 parent_size = Track::PayloadSize();
+uint64_t AudioTrack::PayloadSize() const {
+ const uint64_t parent_size = Track::PayloadSize();
- uint64 size =
- EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_));
- size += EbmlElementSize(kMkvChannels, channels_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency,
+ static_cast<float>(sample_rate_));
+ size += EbmlElementSize(libwebm::kMkvChannels, channels_);
if (bit_depth_ > 0)
- size += EbmlElementSize(kMkvBitDepth, bit_depth_);
- size += EbmlMasterElementSize(kMkvAudio, size);
+ size += EbmlElementSize(libwebm::kMkvBitDepth, bit_depth_);
+ size += EbmlMasterElementSize(libwebm::kMkvAudio, size);
return parent_size + size;
}
@@ -1007,31 +1359,31 @@ bool AudioTrack::Write(IMkvWriter* writer) const {
return false;
// Calculate AudioSettings size.
- uint64 size =
- EbmlElementSize(kMkvSamplingFrequency, static_cast<float>(sample_rate_));
- size += EbmlElementSize(kMkvChannels, channels_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency,
+ static_cast<float>(sample_rate_));
+ size += EbmlElementSize(libwebm::kMkvChannels, channels_);
if (bit_depth_ > 0)
- size += EbmlElementSize(kMkvBitDepth, bit_depth_);
+ size += EbmlElementSize(libwebm::kMkvBitDepth, bit_depth_);
- if (!WriteEbmlMasterElement(writer, kMkvAudio, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvAudio, size))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, kMkvSamplingFrequency,
+ if (!WriteEbmlElement(writer, libwebm::kMkvSamplingFrequency,
static_cast<float>(sample_rate_)))
return false;
- if (!WriteEbmlElement(writer, kMkvChannels, channels_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChannels, channels_))
return false;
if (bit_depth_ > 0)
- if (!WriteEbmlElement(writer, kMkvBitDepth, bit_depth_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth, bit_depth_))
return false;
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(size))
+ stop_position - payload_position != static_cast<int64_t>(size))
return false;
return true;
@@ -1047,11 +1399,12 @@ const char Tracks::kVp8CodecId[] = "V_VP8";
const char Tracks::kVp9CodecId[] = "V_VP9";
const char Tracks::kVp10CodecId[] = "V_VP10";
-Tracks::Tracks() : track_entries_(NULL), track_entries_size_(0) {}
+Tracks::Tracks()
+ : track_entries_(NULL), track_entries_size_(0), wrote_tracks_(false) {}
Tracks::~Tracks() {
if (track_entries_) {
- for (uint32 i = 0; i < track_entries_size_; ++i) {
+ for (uint32_t i = 0; i < track_entries_size_; ++i) {
Track* const track = track_entries_[i];
delete track;
}
@@ -1059,8 +1412,8 @@ Tracks::~Tracks() {
}
}
-bool Tracks::AddTrack(Track* track, int32 number) {
- if (number < 0)
+bool Tracks::AddTrack(Track* track, int32_t number) {
+ if (number < 0 || wrote_tracks_)
return false;
// This muxer only supports track numbers in the range [1, 126], in
@@ -1071,23 +1424,23 @@ bool Tracks::AddTrack(Track* track, int32 number) {
if (number > 0x7E)
return false;
- uint32 track_num = number;
+ uint32_t track_num = number;
if (track_num > 0) {
// Check to make sure a track does not already have |track_num|.
- for (uint32 i = 0; i < track_entries_size_; ++i) {
+ for (uint32_t i = 0; i < track_entries_size_; ++i) {
if (track_entries_[i]->number() == track_num)
return false;
}
}
- const uint32 count = track_entries_size_ + 1;
+ const uint32_t count = track_entries_size_ + 1;
Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT
if (!track_entries)
return false;
- for (uint32 i = 0; i < track_entries_size_; ++i) {
+ for (uint32_t i = 0; i < track_entries_size_; ++i) {
track_entries[i] = track_entries_[i];
}
@@ -1101,7 +1454,7 @@ bool Tracks::AddTrack(Track* track, int32 number) {
bool exit = false;
do {
exit = true;
- for (uint32 i = 0; i < track_entries_size_; ++i) {
+ for (uint32_t i = 0; i < track_entries_size_; ++i) {
if (track_entries[i]->number() == track_num) {
track_num++;
exit = false;
@@ -1118,7 +1471,7 @@ bool Tracks::AddTrack(Track* track, int32 number) {
return true;
}
-const Track* Tracks::GetTrackByIndex(uint32 index) const {
+const Track* Tracks::GetTrackByIndex(uint32_t index) const {
if (track_entries_ == NULL)
return NULL;
@@ -1128,9 +1481,9 @@ const Track* Tracks::GetTrackByIndex(uint32 index) const {
return track_entries_[index];
}
-Track* Tracks::GetTrackByNumber(uint64 track_number) const {
- const int32 count = track_entries_size();
- for (int32 i = 0; i < count; ++i) {
+Track* Tracks::GetTrackByNumber(uint64_t track_number) const {
+ const int32_t count = track_entries_size();
+ for (int32_t i = 0; i < count; ++i) {
if (track_entries_[i]->number() == track_number)
return track_entries_[i];
}
@@ -1138,7 +1491,7 @@ Track* Tracks::GetTrackByNumber(uint64 track_number) const {
return NULL;
}
-bool Tracks::TrackIsAudio(uint64 track_number) const {
+bool Tracks::TrackIsAudio(uint64_t track_number) const {
const Track* const track = GetTrackByNumber(track_number);
if (track->type() == kAudio)
@@ -1147,7 +1500,7 @@ bool Tracks::TrackIsAudio(uint64 track_number) const {
return false;
}
-bool Tracks::TrackIsVideo(uint64 track_number) const {
+bool Tracks::TrackIsVideo(uint64_t track_number) const {
const Track* const track = GetTrackByNumber(track_number);
if (track->type() == kVideo)
@@ -1157,9 +1510,9 @@ bool Tracks::TrackIsVideo(uint64 track_number) const {
}
bool Tracks::Write(IMkvWriter* writer) const {
- uint64 size = 0;
- const int32 count = track_entries_size();
- for (int32 i = 0; i < count; ++i) {
+ uint64_t size = 0;
+ const int32_t count = track_entries_size();
+ for (int32_t i = 0; i < count; ++i) {
const Track* const track = GetTrackByIndex(i);
if (!track)
@@ -1168,24 +1521,25 @@ bool Tracks::Write(IMkvWriter* writer) const {
size += track->Size();
}
- if (!WriteEbmlMasterElement(writer, kMkvTracks, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvTracks, size))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- for (int32 i = 0; i < count; ++i) {
+ for (int32_t i = 0; i < count; ++i) {
const Track* const track = GetTrackByIndex(i);
if (!track->Write(writer))
return false;
}
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(size))
+ stop_position - payload_position != static_cast<int64_t>(size))
return false;
+ wrote_tracks_ = true;
return true;
}
@@ -1195,9 +1549,10 @@ bool Tracks::Write(IMkvWriter* writer) const {
bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); }
-void Chapter::set_time(const Segment& segment, uint64 start_ns, uint64 end_ns) {
+void Chapter::set_time(const Segment& segment, uint64_t start_ns,
+ uint64_t end_ns) {
const SegmentInfo* const info = segment.GetSegmentInfo();
- const uint64 timecode_scale = info->timecode_scale();
+ const uint64_t timecode_scale = info->timecode_scale();
start_timecode_ = start_ns / timecode_scale;
end_timecode_ = end_ns / timecode_scale;
}
@@ -1292,38 +1647,40 @@ bool Chapter::ExpandDisplaysArray() {
return true;
}
-uint64 Chapter::WriteAtom(IMkvWriter* writer) const {
- uint64 payload_size = EbmlElementSize(kMkvChapterStringUID, id_) +
- EbmlElementSize(kMkvChapterUID, uid_) +
- EbmlElementSize(kMkvChapterTimeStart, start_timecode_) +
- EbmlElementSize(kMkvChapterTimeEnd, end_timecode_);
+uint64_t Chapter::WriteAtom(IMkvWriter* writer) const {
+ uint64_t payload_size =
+ EbmlElementSize(libwebm::kMkvChapterStringUID, id_) +
+ EbmlElementSize(libwebm::kMkvChapterUID, uid_) +
+ EbmlElementSize(libwebm::kMkvChapterTimeStart, start_timecode_) +
+ EbmlElementSize(libwebm::kMkvChapterTimeEnd, end_timecode_);
for (int idx = 0; idx < displays_count_; ++idx) {
const Display& d = displays_[idx];
payload_size += d.WriteDisplay(NULL);
}
- const uint64 atom_size =
- EbmlMasterElementSize(kMkvChapterAtom, payload_size) + payload_size;
+ const uint64_t atom_size =
+ EbmlMasterElementSize(libwebm::kMkvChapterAtom, payload_size) +
+ payload_size;
if (writer == NULL)
return atom_size;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
- if (!WriteEbmlMasterElement(writer, kMkvChapterAtom, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterAtom, payload_size))
return 0;
- if (!WriteEbmlElement(writer, kMkvChapterStringUID, id_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterStringUID, id_))
return 0;
- if (!WriteEbmlElement(writer, kMkvChapterUID, uid_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID, uid_))
return 0;
- if (!WriteEbmlElement(writer, kMkvChapterTimeStart, start_timecode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart, start_timecode_))
return 0;
- if (!WriteEbmlElement(writer, kMkvChapterTimeEnd, end_timecode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd, end_timecode_))
return 0;
for (int idx = 0; idx < displays_count_; ++idx) {
@@ -1333,9 +1690,9 @@ uint64 Chapter::WriteAtom(IMkvWriter* writer) const {
return 0;
}
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != atom_size)
+ if (stop >= start && uint64_t(stop - start) != atom_size)
return 0;
return atom_size;
@@ -1365,42 +1722,44 @@ bool Chapter::Display::set_country(const char* country) {
return StrCpy(country, &country_);
}
-uint64 Chapter::Display::WriteDisplay(IMkvWriter* writer) const {
- uint64 payload_size = EbmlElementSize(kMkvChapString, title_);
+uint64_t Chapter::Display::WriteDisplay(IMkvWriter* writer) const {
+ uint64_t payload_size = EbmlElementSize(libwebm::kMkvChapString, title_);
if (language_)
- payload_size += EbmlElementSize(kMkvChapLanguage, language_);
+ payload_size += EbmlElementSize(libwebm::kMkvChapLanguage, language_);
if (country_)
- payload_size += EbmlElementSize(kMkvChapCountry, country_);
+ payload_size += EbmlElementSize(libwebm::kMkvChapCountry, country_);
- const uint64 display_size =
- EbmlMasterElementSize(kMkvChapterDisplay, payload_size) + payload_size;
+ const uint64_t display_size =
+ EbmlMasterElementSize(libwebm::kMkvChapterDisplay, payload_size) +
+ payload_size;
if (writer == NULL)
return display_size;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
- if (!WriteEbmlMasterElement(writer, kMkvChapterDisplay, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterDisplay,
+ payload_size))
return 0;
- if (!WriteEbmlElement(writer, kMkvChapString, title_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapString, title_))
return 0;
if (language_) {
- if (!WriteEbmlElement(writer, kMkvChapLanguage, language_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapLanguage, language_))
return 0;
}
if (country_) {
- if (!WriteEbmlElement(writer, kMkvChapCountry, country_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapCountry, country_))
return 0;
}
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != display_size)
+ if (stop >= start && uint64_t(stop - start) != display_size)
return 0;
return display_size;
@@ -1438,19 +1797,19 @@ bool Chapters::Write(IMkvWriter* writer) const {
if (writer == NULL)
return false;
- const uint64 payload_size = WriteEdition(NULL); // return size only
+ const uint64_t payload_size = WriteEdition(NULL); // return size only
- if (!WriteEbmlMasterElement(writer, kMkvChapters, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapters, payload_size))
return false;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
if (WriteEdition(writer) == 0) // error
return false;
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != payload_size)
+ if (stop >= start && uint64_t(stop - start) != payload_size)
return false;
return true;
@@ -1480,36 +1839,37 @@ bool Chapters::ExpandChaptersArray() {
return true;
}
-uint64 Chapters::WriteEdition(IMkvWriter* writer) const {
- uint64 payload_size = 0;
+uint64_t Chapters::WriteEdition(IMkvWriter* writer) const {
+ uint64_t payload_size = 0;
for (int idx = 0; idx < chapters_count_; ++idx) {
const Chapter& chapter = chapters_[idx];
payload_size += chapter.WriteAtom(NULL);
}
- const uint64 edition_size =
- EbmlMasterElementSize(kMkvEditionEntry, payload_size) + payload_size;
+ const uint64_t edition_size =
+ EbmlMasterElementSize(libwebm::kMkvEditionEntry, payload_size) +
+ payload_size;
if (writer == NULL) // return size only
return edition_size;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
- if (!WriteEbmlMasterElement(writer, kMkvEditionEntry, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvEditionEntry, payload_size))
return 0; // error
for (int idx = 0; idx < chapters_count_; ++idx) {
const Chapter& chapter = chapters_[idx];
- const uint64 chapter_size = chapter.WriteAtom(writer);
+ const uint64_t chapter_size = chapter.WriteAtom(writer);
if (chapter_size == 0) // error
return 0;
}
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != edition_size)
+ if (stop >= start && uint64_t(stop - start) != edition_size)
return 0;
return edition_size;
@@ -1581,23 +1941,23 @@ bool Tag::ExpandSimpleTagsArray() {
return true;
}
-uint64 Tag::Write(IMkvWriter* writer) const {
- uint64 payload_size = 0;
+uint64_t Tag::Write(IMkvWriter* writer) const {
+ uint64_t payload_size = 0;
for (int idx = 0; idx < simple_tags_count_; ++idx) {
const SimpleTag& st = simple_tags_[idx];
payload_size += st.Write(NULL);
}
- const uint64 tag_size =
- EbmlMasterElementSize(kMkvTag, payload_size) + payload_size;
+ const uint64_t tag_size =
+ EbmlMasterElementSize(libwebm::kMkvTag, payload_size) + payload_size;
if (writer == NULL)
return tag_size;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
- if (!WriteEbmlMasterElement(writer, kMkvTag, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvTag, payload_size))
return 0;
for (int idx = 0; idx < simple_tags_count_; ++idx) {
@@ -1607,9 +1967,9 @@ uint64 Tag::Write(IMkvWriter* writer) const {
return 0;
}
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != tag_size)
+ if (stop >= start && uint64_t(stop - start) != tag_size)
return 0;
return tag_size;
@@ -1635,31 +1995,32 @@ bool Tag::SimpleTag::set_tag_string(const char* tag_string) {
return StrCpy(tag_string, &tag_string_);
}
-uint64 Tag::SimpleTag::Write(IMkvWriter* writer) const {
- uint64 payload_size = EbmlElementSize(kMkvTagName, tag_name_);
+uint64_t Tag::SimpleTag::Write(IMkvWriter* writer) const {
+ uint64_t payload_size = EbmlElementSize(libwebm::kMkvTagName, tag_name_);
- payload_size += EbmlElementSize(kMkvTagString, tag_string_);
+ payload_size += EbmlElementSize(libwebm::kMkvTagString, tag_string_);
- const uint64 simple_tag_size =
- EbmlMasterElementSize(kMkvSimpleTag, payload_size) + payload_size;
+ const uint64_t simple_tag_size =
+ EbmlMasterElementSize(libwebm::kMkvSimpleTag, payload_size) +
+ payload_size;
if (writer == NULL)
return simple_tag_size;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
- if (!WriteEbmlMasterElement(writer, kMkvSimpleTag, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvSimpleTag, payload_size))
return 0;
- if (!WriteEbmlElement(writer, kMkvTagName, tag_name_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTagName, tag_name_))
return 0;
- if (!WriteEbmlElement(writer, kMkvTagString, tag_string_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTagString, tag_string_))
return 0;
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != simple_tag_size)
+ if (stop >= start && uint64_t(stop - start) != simple_tag_size)
return 0;
return simple_tag_size;
@@ -1694,29 +2055,29 @@ bool Tags::Write(IMkvWriter* writer) const {
if (writer == NULL)
return false;
- uint64 payload_size = 0;
+ uint64_t payload_size = 0;
for (int idx = 0; idx < tags_count_; ++idx) {
const Tag& tag = tags_[idx];
payload_size += tag.Write(NULL);
}
- if (!WriteEbmlMasterElement(writer, kMkvTags, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvTags, payload_size))
return false;
- const int64 start = writer->Position();
+ const int64_t start = writer->Position();
for (int idx = 0; idx < tags_count_; ++idx) {
const Tag& tag = tags_[idx];
- const uint64 tag_size = tag.Write(writer);
+ const uint64_t tag_size = tag.Write(writer);
if (tag_size == 0) // error
return 0;
}
- const int64 stop = writer->Position();
+ const int64_t stop = writer->Position();
- if (stop >= start && uint64(stop - start) != payload_size)
+ if (stop >= start && uint64_t(stop - start) != payload_size)
return false;
return true;
@@ -1750,15 +2111,18 @@ bool Tags::ExpandTagsArray() {
//
// Cluster class
-Cluster::Cluster(uint64 timecode, int64 cues_pos, uint64 timecode_scale)
+Cluster::Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale,
+ bool write_last_frame_with_duration, bool fixed_size_timecode)
: blocks_added_(0),
finalized_(false),
+ fixed_size_timecode_(fixed_size_timecode),
header_written_(false),
payload_size_(0),
position_for_cues_(cues_pos),
size_position_(-1),
timecode_(timecode),
timecode_scale_(timecode_scale),
+ write_last_frame_with_duration_(write_last_frame_with_duration),
writer_(NULL) {}
Cluster::~Cluster() {}
@@ -1771,24 +2135,27 @@ bool Cluster::Init(IMkvWriter* ptr_writer) {
return true;
}
-bool Cluster::AddFrame(const Frame* const frame) { return DoWriteFrame(frame); }
+bool Cluster::AddFrame(const Frame* const frame) {
+ return QueueOrWriteFrame(frame);
+}
-bool Cluster::AddFrame(const uint8* data, uint64 length, uint64 track_number,
- uint64 abs_timecode, bool is_key) {
+bool Cluster::AddFrame(const uint8_t* data, uint64_t length,
+ uint64_t track_number, uint64_t abs_timecode,
+ bool is_key) {
Frame frame;
if (!frame.Init(data, length))
return false;
frame.set_track_number(track_number);
frame.set_timestamp(abs_timecode);
frame.set_is_key(is_key);
- return DoWriteFrame(&frame);
+ return QueueOrWriteFrame(&frame);
}
-bool Cluster::AddFrameWithAdditional(const uint8* data, uint64 length,
- const uint8* additional,
- uint64 additional_length, uint64 add_id,
- uint64 track_number, uint64 abs_timecode,
- bool is_key) {
+bool Cluster::AddFrameWithAdditional(const uint8_t* data, uint64_t length,
+ const uint8_t* additional,
+ uint64_t additional_length,
+ uint64_t add_id, uint64_t track_number,
+ uint64_t abs_timecode, bool is_key) {
if (!additional || additional_length == 0) {
return false;
}
@@ -1800,13 +2167,13 @@ bool Cluster::AddFrameWithAdditional(const uint8* data, uint64 length,
frame.set_track_number(track_number);
frame.set_timestamp(abs_timecode);
frame.set_is_key(is_key);
- return DoWriteFrame(&frame);
+ return QueueOrWriteFrame(&frame);
}
-bool Cluster::AddFrameWithDiscardPadding(const uint8* data, uint64 length,
- int64 discard_padding,
- uint64 track_number,
- uint64 abs_timecode, bool is_key) {
+bool Cluster::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
+ int64_t discard_padding,
+ uint64_t track_number,
+ uint64_t abs_timecode, bool is_key) {
Frame frame;
if (!frame.Init(data, length))
return false;
@@ -1814,11 +2181,12 @@ bool Cluster::AddFrameWithDiscardPadding(const uint8* data, uint64 length,
frame.set_track_number(track_number);
frame.set_timestamp(abs_timecode);
frame.set_is_key(is_key);
- return DoWriteFrame(&frame);
+ return QueueOrWriteFrame(&frame);
}
-bool Cluster::AddMetadata(const uint8* data, uint64 length, uint64 track_number,
- uint64 abs_timecode, uint64 duration_timecode) {
+bool Cluster::AddMetadata(const uint8_t* data, uint64_t length,
+ uint64_t track_number, uint64_t abs_timecode,
+ uint64_t duration_timecode) {
Frame frame;
if (!frame.Init(data, length))
return false;
@@ -1826,17 +2194,62 @@ bool Cluster::AddMetadata(const uint8* data, uint64 length, uint64 track_number,
frame.set_timestamp(abs_timecode);
frame.set_duration(duration_timecode);
frame.set_is_key(true); // All metadata blocks are keyframes.
- return DoWriteFrame(&frame);
+ return QueueOrWriteFrame(&frame);
}
-void Cluster::AddPayloadSize(uint64 size) { payload_size_ += size; }
+void Cluster::AddPayloadSize(uint64_t size) { payload_size_ += size; }
bool Cluster::Finalize() {
- if (!writer_ || finalized_ || size_position_ == -1)
+ return !write_last_frame_with_duration_ && Finalize(false, 0);
+}
+
+bool Cluster::Finalize(bool set_last_frame_duration, uint64_t duration) {
+ if (!writer_ || finalized_)
+ return false;
+
+ if (write_last_frame_with_duration_) {
+ // Write out held back Frames. This essentially performs a k-way merge
+ // across all tracks in the increasing order of timestamps.
+ while (!stored_frames_.empty()) {
+ Frame* frame = stored_frames_.begin()->second.front();
+
+ // Get the next frame to write (frame with least timestamp across all
+ // tracks).
+ for (FrameMapIterator frames_iterator = ++stored_frames_.begin();
+ frames_iterator != stored_frames_.end(); ++frames_iterator) {
+ if (frames_iterator->second.front()->timestamp() < frame->timestamp()) {
+ frame = frames_iterator->second.front();
+ }
+ }
+
+ // Set the duration if it's the last frame for the track.
+ if (set_last_frame_duration &&
+ stored_frames_[frame->track_number()].size() == 1 &&
+ !frame->duration_set()) {
+ frame->set_duration(duration - frame->timestamp());
+ if (!frame->is_key() && !frame->reference_block_timestamp_set()) {
+ frame->set_reference_block_timestamp(
+ last_block_timestamp_[frame->track_number()]);
+ }
+ }
+
+ // Write the frame and remove it from |stored_frames_|.
+ const bool wrote_frame = DoWriteFrame(frame);
+ stored_frames_[frame->track_number()].pop_front();
+ if (stored_frames_[frame->track_number()].empty()) {
+ stored_frames_.erase(frame->track_number());
+ }
+ delete frame;
+ if (!wrote_frame)
+ return false;
+ }
+ }
+
+ if (size_position_ == -1)
return false;
if (writer_->Seekable()) {
- const int64 pos = writer_->Position();
+ const int64_t pos = writer_->Position();
if (writer_->Position(size_position_))
return false;
@@ -1853,9 +2266,10 @@ bool Cluster::Finalize() {
return true;
}
-uint64 Cluster::Size() const {
- const uint64 element_size =
- EbmlMasterElementSize(kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + payload_size_;
+uint64_t Cluster::Size() const {
+ const uint64_t element_size =
+ EbmlMasterElementSize(libwebm::kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) +
+ payload_size_;
return element_size;
}
@@ -1871,15 +2285,15 @@ bool Cluster::PreWriteBlock() {
return true;
}
-void Cluster::PostWriteBlock(uint64 element_size) {
+void Cluster::PostWriteBlock(uint64_t element_size) {
AddPayloadSize(element_size);
++blocks_added_;
}
-int64 Cluster::GetRelativeTimecode(int64 abs_timecode) const {
- const int64 cluster_timecode = this->Cluster::timecode();
- const int64 rel_timecode =
- static_cast<int64>(abs_timecode) - cluster_timecode;
+int64_t Cluster::GetRelativeTimecode(int64_t abs_timecode) const {
+ const int64_t cluster_timecode = this->Cluster::timecode();
+ const int64_t rel_timecode =
+ static_cast<int64_t>(abs_timecode) - cluster_timecode;
if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode)
return -1;
@@ -1894,11 +2308,67 @@ bool Cluster::DoWriteFrame(const Frame* const frame) {
if (!PreWriteBlock())
return false;
- const uint64 element_size = WriteFrame(writer_, frame, this);
+ const uint64_t element_size = WriteFrame(writer_, frame, this);
if (element_size == 0)
return false;
PostWriteBlock(element_size);
+ last_block_timestamp_[frame->track_number()] = frame->timestamp();
+ return true;
+}
+
+bool Cluster::QueueOrWriteFrame(const Frame* const frame) {
+ if (!frame || !frame->IsValid())
+ return false;
+
+ // If |write_last_frame_with_duration_| is not set, then write the frame right
+ // away.
+ if (!write_last_frame_with_duration_) {
+ return DoWriteFrame(frame);
+ }
+
+ // Queue the current frame.
+ uint64_t track_number = frame->track_number();
+ Frame* const frame_to_store = new Frame();
+ frame_to_store->CopyFrom(*frame);
+ stored_frames_[track_number].push_back(frame_to_store);
+
+ // Iterate through all queued frames in the current track except the last one
+ // and write it if it is okay to do so (i.e.) no other track has an held back
+ // frame with timestamp <= the timestamp of the frame in question.
+ std::vector<std::list<Frame*>::iterator> frames_to_erase;
+ for (std::list<Frame *>::iterator
+ current_track_iterator = stored_frames_[track_number].begin(),
+ end = --stored_frames_[track_number].end();
+ current_track_iterator != end; ++current_track_iterator) {
+ const Frame* const frame_to_write = *current_track_iterator;
+ bool okay_to_write = true;
+ for (FrameMapIterator track_iterator = stored_frames_.begin();
+ track_iterator != stored_frames_.end(); ++track_iterator) {
+ if (track_iterator->first == track_number) {
+ continue;
+ }
+ if (track_iterator->second.front()->timestamp() <
+ frame_to_write->timestamp()) {
+ okay_to_write = false;
+ break;
+ }
+ }
+ if (okay_to_write) {
+ const bool wrote_frame = DoWriteFrame(frame_to_write);
+ delete frame_to_write;
+ if (!wrote_frame)
+ return false;
+ frames_to_erase.push_back(current_track_iterator);
+ } else {
+ break;
+ }
+ }
+ for (std::vector<std::list<Frame*>::iterator>::iterator iterator =
+ frames_to_erase.begin();
+ iterator != frames_to_erase.end(); ++iterator) {
+ stored_frames_[track_number].erase(*iterator);
+ }
return true;
}
@@ -1906,7 +2376,7 @@ bool Cluster::WriteClusterHeader() {
if (finalized_)
return false;
- if (WriteID(writer_, kMkvCluster))
+ if (WriteID(writer_, libwebm::kMkvCluster))
return false;
// Save for later.
@@ -1917,9 +2387,12 @@ bool Cluster::WriteClusterHeader() {
if (SerializeInt(writer_, kEbmlUnknownValue, 8))
return false;
- if (!WriteEbmlElement(writer_, kMkvTimecode, timecode()))
+ if (!WriteEbmlElement(writer_, libwebm::kMkvTimecode, timecode(),
+ fixed_size_timecode_ ? 8 : 0)) {
return false;
- AddPayloadSize(EbmlElementSize(kMkvTimecode, timecode()));
+ }
+ AddPayloadSize(EbmlElementSize(libwebm::kMkvTimecode, timecode(),
+ fixed_size_timecode_ ? 8 : 0));
header_written_ = true;
return true;
@@ -1930,7 +2403,7 @@ bool Cluster::WriteClusterHeader() {
// SeekHead Class
SeekHead::SeekHead() : start_pos_(0ULL) {
- for (int32 i = 0; i < kSeekEntryCount; ++i) {
+ for (int32_t i = 0; i < kSeekEntryCount; ++i) {
seek_entry_id_[i] = 0;
seek_entry_pos_[i] = 0;
}
@@ -1943,17 +2416,19 @@ bool SeekHead::Finalize(IMkvWriter* writer) const {
if (start_pos_ == -1)
return false;
- uint64 payload_size = 0;
- uint64 entry_size[kSeekEntryCount];
+ uint64_t payload_size = 0;
+ uint64_t entry_size[kSeekEntryCount];
- for (int32 i = 0; i < kSeekEntryCount; ++i) {
+ for (int32_t i = 0; i < kSeekEntryCount; ++i) {
if (seek_entry_id_[i] != 0) {
- entry_size[i] =
- EbmlElementSize(kMkvSeekID, static_cast<uint64>(seek_entry_id_[i]));
- entry_size[i] += EbmlElementSize(kMkvSeekPosition, seek_entry_pos_[i]);
+ entry_size[i] = EbmlElementSize(
+ libwebm::kMkvSeekID, static_cast<uint64_t>(seek_entry_id_[i]));
+ entry_size[i] +=
+ EbmlElementSize(libwebm::kMkvSeekPosition, seek_entry_pos_[i]);
payload_size +=
- EbmlMasterElementSize(kMkvSeek, entry_size[i]) + entry_size[i];
+ EbmlMasterElementSize(libwebm::kMkvSeek, entry_size[i]) +
+ entry_size[i];
}
}
@@ -1961,34 +2436,35 @@ bool SeekHead::Finalize(IMkvWriter* writer) const {
if (payload_size == 0)
return true;
- const int64 pos = writer->Position();
+ const int64_t pos = writer->Position();
if (writer->Position(start_pos_))
return false;
- if (!WriteEbmlMasterElement(writer, kMkvSeekHead, payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeekHead, payload_size))
return false;
- for (int32 i = 0; i < kSeekEntryCount; ++i) {
+ for (int32_t i = 0; i < kSeekEntryCount; ++i) {
if (seek_entry_id_[i] != 0) {
- if (!WriteEbmlMasterElement(writer, kMkvSeek, entry_size[i]))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeek, entry_size[i]))
return false;
- if (!WriteEbmlElement(writer, kMkvSeekID,
- static_cast<uint64>(seek_entry_id_[i])))
+ if (!WriteEbmlElement(writer, libwebm::kMkvSeekID,
+ static_cast<uint64_t>(seek_entry_id_[i])))
return false;
- if (!WriteEbmlElement(writer, kMkvSeekPosition, seek_entry_pos_[i]))
+ if (!WriteEbmlElement(writer, libwebm::kMkvSeekPosition,
+ seek_entry_pos_[i]))
return false;
}
}
- const uint64 total_entry_size = kSeekEntryCount * MaxEntrySize();
- const uint64 total_size =
- EbmlMasterElementSize(kMkvSeekHead, total_entry_size) +
+ const uint64_t total_entry_size = kSeekEntryCount * MaxEntrySize();
+ const uint64_t total_size =
+ EbmlMasterElementSize(libwebm::kMkvSeekHead, total_entry_size) +
total_entry_size;
- const int64 size_left = total_size - (writer->Position() - start_pos_);
+ const int64_t size_left = total_size - (writer->Position() - start_pos_);
- const uint64 bytes_written = WriteVoidElement(writer, size_left);
+ const uint64_t bytes_written = WriteVoidElement(writer, size_left);
if (!bytes_written)
return false;
@@ -2000,20 +2476,21 @@ bool SeekHead::Finalize(IMkvWriter* writer) const {
}
bool SeekHead::Write(IMkvWriter* writer) {
- const uint64 entry_size = kSeekEntryCount * MaxEntrySize();
- const uint64 size = EbmlMasterElementSize(kMkvSeekHead, entry_size);
+ const uint64_t entry_size = kSeekEntryCount * MaxEntrySize();
+ const uint64_t size =
+ EbmlMasterElementSize(libwebm::kMkvSeekHead, entry_size);
start_pos_ = writer->Position();
- const uint64 bytes_written = WriteVoidElement(writer, size + entry_size);
+ const uint64_t bytes_written = WriteVoidElement(writer, size + entry_size);
if (!bytes_written)
return false;
return true;
}
-bool SeekHead::AddSeekEntry(uint32 id, uint64 pos) {
- for (int32 i = 0; i < kSeekEntryCount; ++i) {
+bool SeekHead::AddSeekEntry(uint32_t id, uint64_t pos) {
+ for (int32_t i = 0; i < kSeekEntryCount; ++i) {
if (seek_entry_id_[i] == 0) {
seek_entry_id_[i] = id;
seek_entry_pos_[i] = pos;
@@ -2023,19 +2500,19 @@ bool SeekHead::AddSeekEntry(uint32 id, uint64 pos) {
return false;
}
-uint32 SeekHead::GetId(int index) const {
+uint32_t SeekHead::GetId(int index) const {
if (index < 0 || index >= kSeekEntryCount)
return UINT_MAX;
return seek_entry_id_[index];
}
-uint64 SeekHead::GetPosition(int index) const {
+uint64_t SeekHead::GetPosition(int index) const {
if (index < 0 || index >= kSeekEntryCount)
return ULLONG_MAX;
return seek_entry_pos_[index];
}
-bool SeekHead::SetSeekEntry(int index, uint32 id, uint64 position) {
+bool SeekHead::SetSeekEntry(int index, uint32_t id, uint64_t position) {
if (index < 0 || index >= kSeekEntryCount)
return false;
seek_entry_id_[index] = id;
@@ -2043,12 +2520,12 @@ bool SeekHead::SetSeekEntry(int index, uint32 id, uint64 position) {
return true;
}
-uint64 SeekHead::MaxEntrySize() const {
- const uint64 max_entry_payload_size =
- EbmlElementSize(kMkvSeekID, 0xffffffffULL) +
- EbmlElementSize(kMkvSeekPosition, 0xffffffffffffffffULL);
- const uint64 max_entry_size =
- EbmlMasterElementSize(kMkvSeek, max_entry_payload_size) +
+uint64_t SeekHead::MaxEntrySize() const {
+ const uint64_t max_entry_payload_size =
+ EbmlElementSize(libwebm::kMkvSeekID, UINT64_C(0xffffffff)) +
+ EbmlElementSize(libwebm::kMkvSeekPosition, UINT64_C(0xffffffffffffffff));
+ const uint64_t max_entry_size =
+ EbmlMasterElementSize(libwebm::kMkvSeek, max_entry_payload_size) +
max_entry_payload_size;
return max_entry_size;
@@ -2072,10 +2549,10 @@ SegmentInfo::~SegmentInfo() {
}
bool SegmentInfo::Init() {
- int32 major;
- int32 minor;
- int32 build;
- int32 revision;
+ int32_t major;
+ int32_t minor;
+ int32_t build;
+ int32_t revision;
GetVersion(&major, &minor, &build, &revision);
char temp[256];
#ifdef _MSC_VER
@@ -2115,12 +2592,12 @@ bool SegmentInfo::Finalize(IMkvWriter* writer) const {
if (duration_pos_ == -1)
return false;
- const int64 pos = writer->Position();
+ const int64_t pos = writer->Position();
if (writer->Position(duration_pos_))
return false;
- if (!WriteEbmlElement(writer, kMkvDuration,
+ if (!WriteEbmlElement(writer, libwebm::kMkvDuration,
static_cast<float>(duration_)))
return false;
@@ -2136,43 +2613,45 @@ bool SegmentInfo::Write(IMkvWriter* writer) {
if (!writer || !muxing_app_ || !writing_app_)
return false;
- uint64 size = EbmlElementSize(kMkvTimecodeScale, timecode_scale_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale, timecode_scale_);
if (duration_ > 0.0)
- size += EbmlElementSize(kMkvDuration, static_cast<float>(duration_));
+ size +=
+ EbmlElementSize(libwebm::kMkvDuration, static_cast<float>(duration_));
if (date_utc_ != LLONG_MIN)
- size += EbmlDateElementSize(kMkvDateUTC);
- size += EbmlElementSize(kMkvMuxingApp, muxing_app_);
- size += EbmlElementSize(kMkvWritingApp, writing_app_);
+ size += EbmlDateElementSize(libwebm::kMkvDateUTC);
+ size += EbmlElementSize(libwebm::kMkvMuxingApp, muxing_app_);
+ size += EbmlElementSize(libwebm::kMkvWritingApp, writing_app_);
- if (!WriteEbmlMasterElement(writer, kMkvInfo, size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvInfo, size))
return false;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, kMkvTimecodeScale, timecode_scale_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale, timecode_scale_))
return false;
if (duration_ > 0.0) {
// Save for later
duration_pos_ = writer->Position();
- if (!WriteEbmlElement(writer, kMkvDuration, static_cast<float>(duration_)))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDuration,
+ static_cast<float>(duration_)))
return false;
}
if (date_utc_ != LLONG_MIN)
- WriteEbmlDateElement(writer, kMkvDateUTC, date_utc_);
+ WriteEbmlDateElement(writer, libwebm::kMkvDateUTC, date_utc_);
- if (!WriteEbmlElement(writer, kMkvMuxingApp, muxing_app_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvMuxingApp, muxing_app_))
return false;
- if (!WriteEbmlElement(writer, kMkvWritingApp, writing_app_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvWritingApp, writing_app_))
return false;
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(size))
+ stop_position - payload_position != static_cast<int64_t>(size))
return false;
return true;
@@ -2244,6 +2723,8 @@ Segment::Segment()
mode_(kFile),
new_cuepoint_(false),
output_cues_(true),
+ accurate_cluster_duration_(false),
+ fixed_size_cluster_timecode_(false),
payload_pos_(0),
size_position_(0),
doc_type_version_(kDefaultDocTypeVersion),
@@ -2260,7 +2741,7 @@ Segment::Segment()
Segment::~Segment() {
if (cluster_list_) {
- for (int32 i = 0; i < cluster_list_size_; ++i) {
+ for (int32_t i = 0; i < cluster_list_size_; ++i) {
Cluster* const cluster = cluster_list_[i];
delete cluster;
}
@@ -2268,7 +2749,7 @@ Segment::~Segment() {
}
if (frames_) {
- for (int32 i = 0; i < frames_size_; ++i) {
+ for (int32_t i = 0; i < frames_size_; ++i) {
Frame* const frame = frames_[i];
delete frame;
}
@@ -2292,13 +2773,13 @@ Segment::~Segment() {
}
}
-void Segment::MoveCuesBeforeClustersHelper(uint64 diff, int32 index,
- uint64* cues_size) {
+void Segment::MoveCuesBeforeClustersHelper(uint64_t diff, int32_t index,
+ uint64_t* cues_size) {
CuePoint* const cue_point = cues_.GetCueByIndex(index);
if (cue_point == NULL)
return;
- const uint64 old_cue_point_size = cue_point->Size();
- const uint64 cluster_pos = cue_point->cluster_pos() + diff;
+ const uint64_t old_cue_point_size = cue_point->Size();
+ const uint64_t cluster_pos = cue_point->cluster_pos() + diff;
cue_point->set_cluster_pos(cluster_pos); // update the new cluster position
// New size of the cue is computed as follows
// Let a = current sum of size of all CuePoints
@@ -2308,40 +2789,40 @@ void Segment::MoveCuesBeforeClustersHelper(uint64 diff, int32 index,
// Let d = b + c. Now d is the |diff| passed to the next recursive call.
// Let e = a + b. Now e is the |cues_size| passed to the next recursive
// call.
- const uint64 cue_point_size_diff = cue_point->Size() - old_cue_point_size;
- const uint64 cue_size_diff =
+ const uint64_t cue_point_size_diff = cue_point->Size() - old_cue_point_size;
+ const uint64_t cue_size_diff =
GetCodedUIntSize(*cues_size + cue_point_size_diff) -
GetCodedUIntSize(*cues_size);
*cues_size += cue_point_size_diff;
diff = cue_size_diff + cue_point_size_diff;
if (diff > 0) {
- for (int32 i = 0; i < cues_.cue_entries_size(); ++i) {
+ for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) {
MoveCuesBeforeClustersHelper(diff, i, cues_size);
}
}
}
void Segment::MoveCuesBeforeClusters() {
- const uint64 current_cue_size = cues_.Size();
- uint64 cue_size = 0;
- for (int32 i = 0; i < cues_.cue_entries_size(); ++i)
+ const uint64_t current_cue_size = cues_.Size();
+ uint64_t cue_size = 0;
+ for (int32_t i = 0; i < cues_.cue_entries_size(); ++i)
cue_size += cues_.GetCueByIndex(i)->Size();
- for (int32 i = 0; i < cues_.cue_entries_size(); ++i)
+ for (int32_t i = 0; i < cues_.cue_entries_size(); ++i)
MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size);
// Adjust the Seek Entry to reflect the change in position
// of Cluster and Cues
- int32 cluster_index = 0;
- int32 cues_index = 0;
- for (int32 i = 0; i < SeekHead::kSeekEntryCount; ++i) {
- if (seek_head_.GetId(i) == kMkvCluster)
+ int32_t cluster_index = 0;
+ int32_t cues_index = 0;
+ for (int32_t i = 0; i < SeekHead::kSeekEntryCount; ++i) {
+ if (seek_head_.GetId(i) == libwebm::kMkvCluster)
cluster_index = i;
- if (seek_head_.GetId(i) == kMkvCues)
+ if (seek_head_.GetId(i) == libwebm::kMkvCues)
cues_index = i;
}
- seek_head_.SetSeekEntry(cues_index, kMkvCues,
+ seek_head_.SetSeekEntry(cues_index, libwebm::kMkvCues,
seek_head_.GetPosition(cluster_index));
- seek_head_.SetSeekEntry(cluster_index, kMkvCluster,
+ seek_head_.SetSeekEntry(cluster_index, libwebm::kMkvCluster,
cues_.Size() + seek_head_.GetPosition(cues_index));
}
@@ -2359,8 +2840,8 @@ bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader,
IMkvWriter* writer) {
if (!writer->Seekable() || chunking_)
return false;
- const int64 cluster_offset =
- cluster_list_[0]->size_position() - GetUIntSize(kMkvCluster);
+ const int64_t cluster_offset =
+ cluster_list_[0]->size_position() - GetUIntSize(libwebm::kMkvCluster);
// Copy the headers.
if (!ChunkedCopy(reader, writer, 0, cluster_offset))
@@ -2383,8 +2864,8 @@ bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader,
return false;
// Update the Segment size in case the Cues size has changed.
- const int64 pos = writer->Position();
- const int64 segment_size = writer->Position() - payload_pos_;
+ const int64_t pos = writer->Position();
+ const int64_t segment_size = writer->Position() - payload_pos_;
if (writer->Position(size_position_) ||
WriteUIntSize(writer, segment_size, 8) || writer->Position(pos))
return false;
@@ -2395,15 +2876,17 @@ bool Segment::Finalize() {
if (WriteFramesAll() < 0)
return false;
- if (mode_ == kFile) {
- if (cluster_list_size_ > 0) {
- // Update last cluster's size
- Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
+ if (cluster_list_size_ > 0) {
+ // Update last cluster's size
+ Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
- if (!old_cluster || !old_cluster->Finalize())
- return false;
- }
+ // For the last frame of the last Cluster, we don't write it as a BlockGroup
+ // with Duration unless the frame itself has duration set explicitly.
+ if (!old_cluster || !old_cluster->Finalize(false, 0))
+ return false;
+ }
+ if (mode_ == kFile) {
if (chunking_ && chunk_writer_cluster_) {
chunk_writer_cluster_->Close();
chunk_count_++;
@@ -2417,7 +2900,7 @@ bool Segment::Finalize() {
return false;
if (output_cues_)
- if (!seek_head_.AddSeekEntry(kMkvCues, MaxOffset()))
+ if (!seek_head_.AddSeekEntry(libwebm::kMkvCues, MaxOffset()))
return false;
if (chunking_) {
@@ -2448,11 +2931,11 @@ bool Segment::Finalize() {
if (size_position_ == -1)
return false;
- const int64 segment_size = MaxOffset();
+ const int64_t segment_size = MaxOffset();
if (segment_size < 1)
return false;
- const int64 pos = writer_header_->Position();
+ const int64_t pos = writer_header_->Position();
UpdateDocTypeVersion();
if (doc_type_version_ != doc_type_version_written_) {
if (writer_header_->Position(0))
@@ -2490,7 +2973,7 @@ bool Segment::Finalize() {
return true;
}
-Track* Segment::AddTrack(int32 number) {
+Track* Segment::AddTrack(int32_t number) {
Track* const track = new (std::nothrow) Track(&seed_); // NOLINT
if (!track)
@@ -2508,7 +2991,7 @@ Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); }
Tag* Segment::AddTag() { return tags_.AddTag(); }
-uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) {
+uint64_t Segment::AddVideoTrack(int32_t width, int32_t height, int32_t number) {
VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT
if (!track)
return 0;
@@ -2524,7 +3007,7 @@ uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) {
return track->number();
}
-bool Segment::AddCuePoint(uint64 timestamp, uint64 track) {
+bool Segment::AddCuePoint(uint64_t timestamp, uint64_t track) {
if (cluster_list_size_ < 1)
return false;
@@ -2547,7 +3030,8 @@ bool Segment::AddCuePoint(uint64 timestamp, uint64 track) {
return true;
}
-uint64 Segment::AddAudioTrack(int32 sample_rate, int32 channels, int32 number) {
+uint64_t Segment::AddAudioTrack(int32_t sample_rate, int32_t channels,
+ int32_t number) {
AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT
if (!track)
return 0;
@@ -2562,8 +3046,8 @@ uint64 Segment::AddAudioTrack(int32 sample_rate, int32 channels, int32 number) {
return track->number();
}
-bool Segment::AddFrame(const uint8* data, uint64 length, uint64 track_number,
- uint64 timestamp, bool is_key) {
+bool Segment::AddFrame(const uint8_t* data, uint64_t length,
+ uint64_t track_number, uint64_t timestamp, bool is_key) {
if (!data)
return false;
@@ -2576,11 +3060,11 @@ bool Segment::AddFrame(const uint8* data, uint64 length, uint64 track_number,
return AddGenericFrame(&frame);
}
-bool Segment::AddFrameWithAdditional(const uint8* data, uint64 length,
- const uint8* additional,
- uint64 additional_length, uint64 add_id,
- uint64 track_number, uint64 timestamp,
- bool is_key) {
+bool Segment::AddFrameWithAdditional(const uint8_t* data, uint64_t length,
+ const uint8_t* additional,
+ uint64_t additional_length,
+ uint64_t add_id, uint64_t track_number,
+ uint64_t timestamp, bool is_key) {
if (!data || !additional)
return false;
@@ -2595,10 +3079,10 @@ bool Segment::AddFrameWithAdditional(const uint8* data, uint64 length,
return AddGenericFrame(&frame);
}
-bool Segment::AddFrameWithDiscardPadding(const uint8* data, uint64 length,
- int64 discard_padding,
- uint64 track_number, uint64 timestamp,
- bool is_key) {
+bool Segment::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
+ int64_t discard_padding,
+ uint64_t track_number,
+ uint64_t timestamp, bool is_key) {
if (!data)
return false;
@@ -2612,8 +3096,9 @@ bool Segment::AddFrameWithDiscardPadding(const uint8* data, uint64 length,
return AddGenericFrame(&frame);
}
-bool Segment::AddMetadata(const uint8* data, uint64 length, uint64 track_number,
- uint64 timestamp_ns, uint64 duration_ns) {
+bool Segment::AddMetadata(const uint8_t* data, uint64_t length,
+ uint64_t track_number, uint64_t timestamp_ns,
+ uint64_t duration_ns) {
if (!data)
return false;
@@ -2702,6 +3187,14 @@ bool Segment::AddGenericFrame(const Frame* frame) {
void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; }
+void Segment::AccurateClusterDuration(bool accurate_cluster_duration) {
+ accurate_cluster_duration_ = accurate_cluster_duration;
+}
+
+void Segment::UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode) {
+ fixed_size_cluster_timecode_ = fixed_size_cluster_timecode;
+}
+
bool Segment::SetChunking(bool chunking, const char* filename) {
if (chunk_count_ > 0)
return false;
@@ -2781,7 +3274,7 @@ bool Segment::SetChunking(bool chunking, const char* filename) {
return true;
}
-bool Segment::CuesTrack(uint64 track_number) {
+bool Segment::CuesTrack(uint64_t track_number) {
const Track* const track = GetTrackByNumber(track_number);
if (!track)
return false;
@@ -2792,7 +3285,7 @@ bool Segment::CuesTrack(uint64 track_number) {
void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; }
-Track* Segment::GetTrackByNumber(uint64 track_number) const {
+Track* Segment::GetTrackByNumber(uint64_t track_number) const {
return tracks_.GetTrackByNumber(track_number);
}
@@ -2803,11 +3296,11 @@ bool Segment::WriteSegmentHeader() {
if (!WriteEbmlHeader(writer_header_, doc_type_version_))
return false;
doc_type_version_written_ = doc_type_version_;
- ebml_header_size_ = static_cast<int32>(writer_header_->Position());
+ ebml_header_size_ = static_cast<int32_t>(writer_header_->Position());
// Write "unknown" (-1) as segment size value. If mode is kFile, Segment
// will write over duration when the file is finalized.
- if (WriteID(writer_header_, kMkvSegment))
+ if (WriteID(writer_header_, libwebm::kMkvSegment))
return false;
// Save for later.
@@ -2831,25 +3324,25 @@ bool Segment::WriteSegmentHeader() {
return false;
}
- if (!seek_head_.AddSeekEntry(kMkvInfo, MaxOffset()))
+ if (!seek_head_.AddSeekEntry(libwebm::kMkvInfo, MaxOffset()))
return false;
if (!segment_info_.Write(writer_header_))
return false;
- if (!seek_head_.AddSeekEntry(kMkvTracks, MaxOffset()))
+ if (!seek_head_.AddSeekEntry(libwebm::kMkvTracks, MaxOffset()))
return false;
if (!tracks_.Write(writer_header_))
return false;
if (chapters_.Count() > 0) {
- if (!seek_head_.AddSeekEntry(kMkvChapters, MaxOffset()))
+ if (!seek_head_.AddSeekEntry(libwebm::kMkvChapters, MaxOffset()))
return false;
if (!chapters_.Write(writer_header_))
return false;
}
if (tags_.Count() > 0) {
- if (!seek_head_.AddSeekEntry(kMkvTags, MaxOffset()))
+ if (!seek_head_.AddSeekEntry(libwebm::kMkvTags, MaxOffset()))
return false;
if (!tags_.Write(writer_header_))
return false;
@@ -2870,7 +3363,7 @@ bool Segment::WriteSegmentHeader() {
// Here we are testing whether to create a new cluster, given a frame
// having time frame_timestamp_ns.
//
-int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
+int Segment::TestFrame(uint64_t track_number, uint64_t frame_timestamp_ns,
bool is_key) const {
if (force_new_cluster_)
return 1;
@@ -2888,11 +3381,11 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
// written to the existing cluster, or that a new cluster should be
// created.
- const uint64 timecode_scale = segment_info_.timecode_scale();
- const uint64 frame_timecode = frame_timestamp_ns / timecode_scale;
+ const uint64_t timecode_scale = segment_info_.timecode_scale();
+ const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale;
const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1];
- const uint64 last_cluster_timecode = last_cluster->timecode();
+ const uint64_t last_cluster_timecode = last_cluster->timecode();
// For completeness we test for the case when the frame's timecode
// is less than the cluster's timecode. Although in principle that
@@ -2907,7 +3400,7 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
// using a 16-bit signed integer), then we cannot write this frame
// to that cluster, and so we must create a new cluster.
- const int64 delta_timecode = frame_timecode - last_cluster_timecode;
+ const int64_t delta_timecode = frame_timecode - last_cluster_timecode;
if (delta_timecode > kMaxBlockTimecode)
return 2;
@@ -2923,7 +3416,7 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
// already, where "too many" is defined as "the total time of frames
// in the cluster exceeds a threshold".
- const uint64 delta_ns = delta_timecode * timecode_scale;
+ const uint64_t delta_ns = delta_timecode * timecode_scale;
if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_)
return 1;
@@ -2932,7 +3425,7 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
// cluster is created when the size of the current cluster exceeds a
// threshold.
- const uint64 cluster_size = last_cluster->payload_size();
+ const uint64_t cluster_size = last_cluster->payload_size();
if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_)
return 1;
@@ -2942,19 +3435,19 @@ int Segment::TestFrame(uint64 track_number, uint64 frame_timestamp_ns,
return 0;
}
-bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
- const int32 new_size = cluster_list_size_ + 1;
+bool Segment::MakeNewCluster(uint64_t frame_timestamp_ns) {
+ const int32_t new_size = cluster_list_size_ + 1;
if (new_size > cluster_list_capacity_) {
// Add more clusters.
- const int32 new_capacity =
+ const int32_t new_capacity =
(cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2;
Cluster** const clusters =
new (std::nothrow) Cluster*[new_capacity]; // NOLINT
if (!clusters)
return false;
- for (int32 i = 0; i < cluster_list_size_; ++i) {
+ for (int32_t i = 0; i < cluster_list_size_; ++i) {
clusters[i] = cluster_list_[i];
}
@@ -2967,19 +3460,17 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
if (!WriteFramesLessThan(frame_timestamp_ns))
return false;
- if (mode_ == kFile) {
- if (cluster_list_size_ > 0) {
- // Update old cluster's size
- Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
+ if (cluster_list_size_ > 0) {
+ // Update old cluster's size
+ Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
- if (!old_cluster || !old_cluster->Finalize())
- return false;
- }
-
- if (output_cues_)
- new_cuepoint_ = true;
+ if (!old_cluster || !old_cluster->Finalize(true, frame_timestamp_ns))
+ return false;
}
+ if (output_cues_)
+ new_cuepoint_ = true;
+
if (chunking_ && cluster_list_size_ > 0) {
chunk_writer_cluster_->Close();
chunk_count_++;
@@ -2990,24 +3481,25 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
return false;
}
- const uint64 timecode_scale = segment_info_.timecode_scale();
- const uint64 frame_timecode = frame_timestamp_ns / timecode_scale;
+ const uint64_t timecode_scale = segment_info_.timecode_scale();
+ const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale;
- uint64 cluster_timecode = frame_timecode;
+ uint64_t cluster_timecode = frame_timecode;
if (frames_size_ > 0) {
const Frame* const f = frames_[0]; // earliest queued frame
- const uint64 ns = f->timestamp();
- const uint64 tc = ns / timecode_scale;
+ const uint64_t ns = f->timestamp();
+ const uint64_t tc = ns / timecode_scale;
if (tc < cluster_timecode)
cluster_timecode = tc;
}
Cluster*& cluster = cluster_list_[cluster_list_size_];
- const int64 offset = MaxOffset();
- cluster = new (std::nothrow) Cluster(cluster_timecode, // NOLINT
- offset, segment_info_.timecode_scale());
+ const int64_t offset = MaxOffset();
+ cluster = new (std::nothrow)
+ Cluster(cluster_timecode, offset, segment_info_.timecode_scale(),
+ accurate_cluster_duration_, fixed_size_cluster_timecode_);
if (!cluster)
return false;
@@ -3018,8 +3510,8 @@ bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
return true;
}
-bool Segment::DoNewClusterProcessing(uint64 track_number,
- uint64 frame_timestamp_ns, bool is_key) {
+bool Segment::DoNewClusterProcessing(uint64_t track_number,
+ uint64_t frame_timestamp_ns, bool is_key) {
for (;;) {
// Based on the characteristics of the current frame and current
// cluster, decide whether to create a new cluster.
@@ -3055,12 +3547,12 @@ bool Segment::CheckHeaderInfo() {
if (!WriteSegmentHeader())
return false;
- if (!seek_head_.AddSeekEntry(kMkvCluster, MaxOffset()))
+ if (!seek_head_.AddSeekEntry(libwebm::kMkvCluster, MaxOffset()))
return false;
if (output_cues_ && cues_track_ == 0) {
// Check for a video track
- for (uint32 i = 0; i < tracks_.track_entries_size(); ++i) {
+ for (uint32_t i = 0; i < tracks_.track_entries_size(); ++i) {
const Track* const track = tracks_.GetTrackByIndex(i);
if (!track)
return false;
@@ -3085,7 +3577,7 @@ bool Segment::CheckHeaderInfo() {
}
void Segment::UpdateDocTypeVersion() {
- for (uint32 index = 0; index < tracks_.track_entries_size(); ++index) {
+ for (uint32_t index = 0; index < tracks_.track_entries_size(); ++index) {
const Track* track = tracks_.GetTrackByIndex(index);
if (track == NULL)
break;
@@ -3127,14 +3619,14 @@ bool Segment::UpdateChunkName(const char* ext, char** name) const {
return true;
}
-int64 Segment::MaxOffset() {
+int64_t Segment::MaxOffset() {
if (!writer_header_)
return -1;
- int64 offset = writer_header_->Position() - payload_pos_;
+ int64_t offset = writer_header_->Position() - payload_pos_;
if (chunking_) {
- for (int32 i = 0; i < cluster_list_size_; ++i) {
+ for (int32_t i = 0; i < cluster_list_size_; ++i) {
Cluster* const cluster = cluster_list_[i];
offset += cluster->Size();
}
@@ -3147,11 +3639,11 @@ int64 Segment::MaxOffset() {
}
bool Segment::QueueFrame(Frame* frame) {
- const int32 new_size = frames_size_ + 1;
+ const int32_t new_size = frames_size_ + 1;
if (new_size > frames_capacity_) {
// Add more frames.
- const int32 new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2;
+ const int32_t new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2;
if (new_capacity < 1)
return false;
@@ -3160,7 +3652,7 @@ bool Segment::QueueFrame(Frame* frame) {
if (!frames)
return false;
- for (int32 i = 0; i < frames_size_; ++i) {
+ for (int32_t i = 0; i < frames_size_; ++i) {
frames[i] = frames_[i];
}
@@ -3186,7 +3678,7 @@ int Segment::WriteFramesAll() {
if (!cluster)
return -1;
- for (int32 i = 0; i < frames_size_; ++i) {
+ for (int32_t i = 0; i < frames_size_; ++i) {
Frame*& frame = frames_[i];
// TODO(jzern/vigneshv): using Segment::AddGenericFrame here would limit the
// places where |doc_type_version_| needs to be updated.
@@ -3215,7 +3707,7 @@ int Segment::WriteFramesAll() {
return result;
}
-bool Segment::WriteFramesLessThan(uint64 timestamp) {
+bool Segment::WriteFramesLessThan(uint64_t timestamp) {
// Check |cluster_list_size_| to see if this is the first cluster. If it is
// the first cluster the audio frames that are less than the first video
// timesatmp will be written in a later step.
@@ -3227,11 +3719,11 @@ bool Segment::WriteFramesLessThan(uint64 timestamp) {
if (!cluster)
return false;
- int32 shift_left = 0;
+ int32_t shift_left = 0;
// TODO(fgalligan): Change this to use the durations of frames instead of
// the next frame's start time if the duration is accurate.
- for (int32 i = 1; i < frames_size_; ++i) {
+ for (int32_t i = 1; i < frames_size_; ++i) {
const Frame* const frame_curr = frames_[i];
if (frame_curr->timestamp() > timestamp)
@@ -3262,8 +3754,8 @@ bool Segment::WriteFramesLessThan(uint64 timestamp) {
if (shift_left >= frames_size_)
return false;
- const int32 new_frames_size = frames_size_ - shift_left;
- for (int32 i = 0; i < new_frames_size; ++i) {
+ const int32_t new_frames_size = frames_size_ - shift_left;
+ for (int32_t i = 0; i < new_frames_size; ++i) {
frames_[i] = frames_[i + shift_left];
}
diff --git a/libvpx/third_party/libwebm/mkvmuxer.hpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h
index 03a002c93..55ba07196 100644
--- a/libvpx/third_party/libwebm/mkvmuxer.hpp
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h
@@ -6,24 +6,31 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#ifndef MKVMUXER_HPP
-#define MKVMUXER_HPP
+#ifndef MKVMUXER_MKVMUXER_H_
+#define MKVMUXER_MKVMUXER_H_
-#include "mkvmuxertypes.hpp"
+#include <stdint.h>
+
+#include <cstddef>
+#include <list>
+#include <map>
+
+#include "common/webmids.h"
+#include "mkvmuxer/mkvmuxertypes.h"
// For a description of the WebM elements see
// http://www.webmproject.org/code/specs/container/.
namespace mkvparser {
class IMkvReader;
-} // end namespace
+} // namespace mkvparser
namespace mkvmuxer {
class MkvWriter;
class Segment;
-const uint64 kMaxTrackNumber = 126;
+const uint64_t kMaxTrackNumber = 126;
///////////////////////////////////////////////////////////////
// Interface used by the mkvmuxer to write out the Mkv data.
@@ -59,15 +66,15 @@ class IMkvWriter {
// Writes out the EBML header for a WebM file. This function must be called
// before any other libwebm writing functions are called.
-bool WriteEbmlHeader(IMkvWriter* writer, uint64 doc_type_version);
+bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version);
// Deprecated. Writes out EBML header with doc_type_version as
// kDefaultDocTypeVersion. Exists for backward compatibility.
bool WriteEbmlHeader(IMkvWriter* writer);
// Copies in Chunk from source to destination between the given byte positions
-bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64 start,
- int64 size);
+bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64_t start,
+ int64_t size);
///////////////////////////////////////////////////////////////
// Class to hold data the will be written to a block.
@@ -81,10 +88,11 @@ class Frame {
bool CopyFrom(const Frame& frame);
// Copies |frame| data into |frame_|. Returns true on success.
- bool Init(const uint8* frame, uint64 length);
+ bool Init(const uint8_t* frame, uint64_t length);
// Copies |additional| data into |additional_|. Returns true on success.
- bool AddAdditionalData(const uint8* additional, uint64 length, uint64 add_id);
+ bool AddAdditionalData(const uint8_t* additional, uint64_t length,
+ uint64_t add_id);
// Returns true if the frame has valid parameters.
bool IsValid() const;
@@ -93,62 +101,70 @@ class Frame {
// parameters.
bool CanBeSimpleBlock() const;
- uint64 add_id() const { return add_id_; }
- const uint8* additional() const { return additional_; }
- uint64 additional_length() const { return additional_length_; }
- void set_duration(uint64 duration) { duration_ = duration; }
- uint64 duration() const { return duration_; }
- const uint8* frame() const { return frame_; }
+ uint64_t add_id() const { return add_id_; }
+ const uint8_t* additional() const { return additional_; }
+ uint64_t additional_length() const { return additional_length_; }
+ void set_duration(uint64_t duration);
+ uint64_t duration() const { return duration_; }
+ bool duration_set() const { return duration_set_; }
+ const uint8_t* frame() const { return frame_; }
void set_is_key(bool key) { is_key_ = key; }
bool is_key() const { return is_key_; }
- uint64 length() const { return length_; }
- void set_track_number(uint64 track_number) { track_number_ = track_number; }
- uint64 track_number() const { return track_number_; }
- void set_timestamp(uint64 timestamp) { timestamp_ = timestamp; }
- uint64 timestamp() const { return timestamp_; }
- void set_discard_padding(int64 discard_padding) {
+ uint64_t length() const { return length_; }
+ void set_track_number(uint64_t track_number) { track_number_ = track_number; }
+ uint64_t track_number() const { return track_number_; }
+ void set_timestamp(uint64_t timestamp) { timestamp_ = timestamp; }
+ uint64_t timestamp() const { return timestamp_; }
+ void set_discard_padding(int64_t discard_padding) {
discard_padding_ = discard_padding;
}
- int64 discard_padding() const { return discard_padding_; }
- void set_reference_block_timestamp(int64 reference_block_timestamp);
- int64 reference_block_timestamp() const { return reference_block_timestamp_; }
+ int64_t discard_padding() const { return discard_padding_; }
+ void set_reference_block_timestamp(int64_t reference_block_timestamp);
+ int64_t reference_block_timestamp() const {
+ return reference_block_timestamp_;
+ }
bool reference_block_timestamp_set() const {
return reference_block_timestamp_set_;
}
private:
// Id of the Additional data.
- uint64 add_id_;
+ uint64_t add_id_;
// Pointer to additional data. Owned by this class.
- uint8* additional_;
+ uint8_t* additional_;
// Length of the additional data.
- uint64 additional_length_;
+ uint64_t additional_length_;
// Duration of the frame in nanoseconds.
- uint64 duration_;
+ uint64_t duration_;
+
+ // Flag indicating that |duration_| has been set. Setting duration causes the
+ // frame to be written out as a Block with BlockDuration instead of as a
+ // SimpleBlock.
+ bool duration_set_;
// Pointer to the data. Owned by this class.
- uint8* frame_;
+ uint8_t* frame_;
// Flag telling if the data should set the key flag of a block.
bool is_key_;
// Length of the data.
- uint64 length_;
+ uint64_t length_;
// Mkv track number the data is associated with.
- uint64 track_number_;
+ uint64_t track_number_;
// Timestamp of the data in nanoseconds.
- uint64 timestamp_;
+ uint64_t timestamp_;
// Discard padding for the frame.
- int64 discard_padding_;
+ int64_t discard_padding_;
// Reference block timestamp.
- int64 reference_block_timestamp_;
+ int64_t reference_block_timestamp_;
// Flag indicating if |reference_block_timestamp_| has been set.
bool reference_block_timestamp_set_;
@@ -164,19 +180,19 @@ class CuePoint {
~CuePoint();
// Returns the size in bytes for the entire CuePoint element.
- uint64 Size() const;
+ uint64_t Size() const;
// Output the CuePoint element to the writer. Returns true on success.
bool Write(IMkvWriter* writer) const;
- void set_time(uint64 time) { time_ = time; }
- uint64 time() const { return time_; }
- void set_track(uint64 track) { track_ = track; }
- uint64 track() const { return track_; }
- void set_cluster_pos(uint64 cluster_pos) { cluster_pos_ = cluster_pos; }
- uint64 cluster_pos() const { return cluster_pos_; }
- void set_block_number(uint64 block_number) { block_number_ = block_number; }
- uint64 block_number() const { return block_number_; }
+ void set_time(uint64_t time) { time_ = time; }
+ uint64_t time() const { return time_; }
+ void set_track(uint64_t track) { track_ = track; }
+ uint64_t track() const { return track_; }
+ void set_cluster_pos(uint64_t cluster_pos) { cluster_pos_ = cluster_pos; }
+ uint64_t cluster_pos() const { return cluster_pos_; }
+ void set_block_number(uint64_t block_number) { block_number_ = block_number; }
+ uint64_t block_number() const { return block_number_; }
void set_output_block_number(bool output_block_number) {
output_block_number_ = output_block_number;
}
@@ -184,19 +200,19 @@ class CuePoint {
private:
// Returns the size in bytes for the payload of the CuePoint element.
- uint64 PayloadSize() const;
+ uint64_t PayloadSize() const;
// Absolute timecode according to the segment time base.
- uint64 time_;
+ uint64_t time_;
// The Track element associated with the CuePoint.
- uint64 track_;
+ uint64_t track_;
// The position of the Cluster containing the Block.
- uint64 cluster_pos_;
+ uint64_t cluster_pos_;
// Number of the Block within the Cluster, starting from 1.
- uint64 block_number_;
+ uint64_t block_number_;
// If true the muxer will write out the block number for the cue if the
// block number is different than the default of 1. Default is set to true.
@@ -217,15 +233,15 @@ class Cues {
// Returns the cue point by index. Returns NULL if there is no cue point
// match.
- CuePoint* GetCueByIndex(int32 index) const;
+ CuePoint* GetCueByIndex(int32_t index) const;
// Returns the total size of the Cues element
- uint64 Size();
+ uint64_t Size();
// Output the Cues element to the writer. Returns true on success.
bool Write(IMkvWriter* writer) const;
- int32 cue_entries_size() const { return cue_entries_size_; }
+ int32_t cue_entries_size() const { return cue_entries_size_; }
void set_output_block_number(bool output_block_number) {
output_block_number_ = output_block_number;
}
@@ -233,10 +249,10 @@ class Cues {
private:
// Number of allocated elements in |cue_entries_|.
- int32 cue_entries_capacity_;
+ int32_t cue_entries_capacity_;
// Number of CuePoints in |cue_entries_|.
- int32 cue_entries_size_;
+ int32_t cue_entries_size_;
// CuePoint list.
CuePoint** cue_entries_;
@@ -258,21 +274,21 @@ class ContentEncAESSettings {
~ContentEncAESSettings() {}
// Returns the size in bytes for the ContentEncAESSettings element.
- uint64 Size() const;
+ uint64_t Size() const;
// Writes out the ContentEncAESSettings element to |writer|. Returns true on
// success.
bool Write(IMkvWriter* writer) const;
- uint64 cipher_mode() const { return cipher_mode_; }
+ uint64_t cipher_mode() const { return cipher_mode_; }
private:
// Returns the size in bytes for the payload of the ContentEncAESSettings
// element.
- uint64 PayloadSize() const;
+ uint64_t PayloadSize() const;
// Sub elements
- uint64 cipher_mode_;
+ uint64_t cipher_mode_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings);
};
@@ -291,45 +307,158 @@ class ContentEncoding {
// Sets the content encryption id. Copies |length| bytes from |id| to
// |enc_key_id_|. Returns true on success.
- bool SetEncryptionID(const uint8* id, uint64 length);
+ bool SetEncryptionID(const uint8_t* id, uint64_t length);
// Returns the size in bytes for the ContentEncoding element.
- uint64 Size() const;
+ uint64_t Size() const;
// Writes out the ContentEncoding element to |writer|. Returns true on
// success.
bool Write(IMkvWriter* writer) const;
- uint64 enc_algo() const { return enc_algo_; }
- uint64 encoding_order() const { return encoding_order_; }
- uint64 encoding_scope() const { return encoding_scope_; }
- uint64 encoding_type() const { return encoding_type_; }
+ uint64_t enc_algo() const { return enc_algo_; }
+ uint64_t encoding_order() const { return encoding_order_; }
+ uint64_t encoding_scope() const { return encoding_scope_; }
+ uint64_t encoding_type() const { return encoding_type_; }
ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; }
private:
// Returns the size in bytes for the encoding elements.
- uint64 EncodingSize(uint64 compresion_size, uint64 encryption_size) const;
+ uint64_t EncodingSize(uint64_t compresion_size,
+ uint64_t encryption_size) const;
// Returns the size in bytes for the encryption elements.
- uint64 EncryptionSize() const;
+ uint64_t EncryptionSize() const;
// Track element names
- uint64 enc_algo_;
- uint8* enc_key_id_;
- uint64 encoding_order_;
- uint64 encoding_scope_;
- uint64 encoding_type_;
+ uint64_t enc_algo_;
+ uint8_t* enc_key_id_;
+ uint64_t encoding_order_;
+ uint64_t encoding_scope_;
+ uint64_t encoding_type_;
// ContentEncAESSettings element.
ContentEncAESSettings enc_aes_settings_;
// Size of the ContentEncKeyID data in bytes.
- uint64 enc_key_id_length_;
+ uint64_t enc_key_id_length_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
};
///////////////////////////////////////////////////////////////
+// Colour element.
+struct PrimaryChromaticity {
+ PrimaryChromaticity(float x_val, float y_val) : x(x_val), y(y_val) {}
+ PrimaryChromaticity() : x(0), y(0) {}
+ ~PrimaryChromaticity() {}
+ uint64_t PrimaryChromaticityPayloadSize(libwebm::MkvId x_id,
+ libwebm::MkvId y_id) const;
+ bool Write(IMkvWriter* writer, libwebm::MkvId x_id,
+ libwebm::MkvId y_id) const;
+
+ float x;
+ float y;
+};
+
+class MasteringMetadata {
+ public:
+ static const float kValueNotPresent;
+
+ MasteringMetadata()
+ : luminance_max(kValueNotPresent),
+ luminance_min(kValueNotPresent),
+ r_(NULL),
+ g_(NULL),
+ b_(NULL),
+ white_point_(NULL) {}
+ ~MasteringMetadata() {
+ delete r_;
+ delete g_;
+ delete b_;
+ delete white_point_;
+ }
+
+ // Returns total size of the MasteringMetadata element.
+ uint64_t MasteringMetadataSize() const;
+ bool Write(IMkvWriter* writer) const;
+
+ // Copies non-null chromaticity.
+ bool SetChromaticity(const PrimaryChromaticity* r,
+ const PrimaryChromaticity* g,
+ const PrimaryChromaticity* b,
+ const PrimaryChromaticity* white_point);
+ const PrimaryChromaticity* r() const { return r_; }
+ const PrimaryChromaticity* g() const { return g_; }
+ const PrimaryChromaticity* b() const { return b_; }
+ const PrimaryChromaticity* white_point() const { return white_point_; }
+
+ float luminance_max;
+ float luminance_min;
+
+ private:
+ // Returns size of MasteringMetadata child elements.
+ uint64_t PayloadSize() const;
+
+ PrimaryChromaticity* r_;
+ PrimaryChromaticity* g_;
+ PrimaryChromaticity* b_;
+ PrimaryChromaticity* white_point_;
+};
+
+class Colour {
+ public:
+ static const uint64_t kValueNotPresent;
+ Colour()
+ : matrix_coefficients(kValueNotPresent),
+ bits_per_channel(kValueNotPresent),
+ chroma_subsampling_horz(kValueNotPresent),
+ chroma_subsampling_vert(kValueNotPresent),
+ cb_subsampling_horz(kValueNotPresent),
+ cb_subsampling_vert(kValueNotPresent),
+ chroma_siting_horz(kValueNotPresent),
+ chroma_siting_vert(kValueNotPresent),
+ range(kValueNotPresent),
+ transfer_characteristics(kValueNotPresent),
+ primaries(kValueNotPresent),
+ max_cll(kValueNotPresent),
+ max_fall(kValueNotPresent),
+ mastering_metadata_(NULL) {}
+ ~Colour() { delete mastering_metadata_; }
+
+ // Returns total size of the Colour element.
+ uint64_t ColourSize() const;
+ bool Write(IMkvWriter* writer) const;
+
+ // Deep copies |mastering_metadata|.
+ bool SetMasteringMetadata(const MasteringMetadata& mastering_metadata);
+
+ const MasteringMetadata* mastering_metadata() const {
+ return mastering_metadata_;
+ }
+
+ uint64_t matrix_coefficients;
+ uint64_t bits_per_channel;
+ uint64_t chroma_subsampling_horz;
+ uint64_t chroma_subsampling_vert;
+ uint64_t cb_subsampling_horz;
+ uint64_t cb_subsampling_vert;
+ uint64_t chroma_siting_horz;
+ uint64_t chroma_siting_vert;
+ uint64_t range;
+ uint64_t transfer_characteristics;
+ uint64_t primaries;
+ uint64_t max_cll;
+ uint64_t max_fall;
+
+ private:
+ // Returns size of Colour child elements.
+ uint64_t PayloadSize() const;
+
+ MasteringMetadata* mastering_metadata_;
+};
+
+///////////////////////////////////////////////////////////////
// Track element.
class Track {
public:
@@ -342,76 +471,76 @@ class Track {
// Returns the ContentEncoding by index. Returns NULL if there is no
// ContentEncoding match.
- ContentEncoding* GetContentEncodingByIndex(uint32 index) const;
+ ContentEncoding* GetContentEncodingByIndex(uint32_t index) const;
// Returns the size in bytes for the payload of the Track element.
- virtual uint64 PayloadSize() const;
+ virtual uint64_t PayloadSize() const;
// Returns the size in bytes of the Track element.
- virtual uint64 Size() const;
+ virtual uint64_t Size() const;
// Output the Track element to the writer. Returns true on success.
virtual bool Write(IMkvWriter* writer) const;
// Sets the CodecPrivate element of the Track element. Copies |length|
// bytes from |codec_private| to |codec_private_|. Returns true on success.
- bool SetCodecPrivate(const uint8* codec_private, uint64 length);
+ bool SetCodecPrivate(const uint8_t* codec_private, uint64_t length);
void set_codec_id(const char* codec_id);
const char* codec_id() const { return codec_id_; }
- const uint8* codec_private() const { return codec_private_; }
+ const uint8_t* codec_private() const { return codec_private_; }
void set_language(const char* language);
const char* language() const { return language_; }
- void set_max_block_additional_id(uint64 max_block_additional_id) {
+ void set_max_block_additional_id(uint64_t max_block_additional_id) {
max_block_additional_id_ = max_block_additional_id;
}
- uint64 max_block_additional_id() const { return max_block_additional_id_; }
+ uint64_t max_block_additional_id() const { return max_block_additional_id_; }
void set_name(const char* name);
const char* name() const { return name_; }
- void set_number(uint64 number) { number_ = number; }
- uint64 number() const { return number_; }
- void set_type(uint64 type) { type_ = type; }
- uint64 type() const { return type_; }
- void set_uid(uint64 uid) { uid_ = uid; }
- uint64 uid() const { return uid_; }
- void set_codec_delay(uint64 codec_delay) { codec_delay_ = codec_delay; }
- uint64 codec_delay() const { return codec_delay_; }
- void set_seek_pre_roll(uint64 seek_pre_roll) {
+ void set_number(uint64_t number) { number_ = number; }
+ uint64_t number() const { return number_; }
+ void set_type(uint64_t type) { type_ = type; }
+ uint64_t type() const { return type_; }
+ void set_uid(uint64_t uid) { uid_ = uid; }
+ uint64_t uid() const { return uid_; }
+ void set_codec_delay(uint64_t codec_delay) { codec_delay_ = codec_delay; }
+ uint64_t codec_delay() const { return codec_delay_; }
+ void set_seek_pre_roll(uint64_t seek_pre_roll) {
seek_pre_roll_ = seek_pre_roll;
}
- uint64 seek_pre_roll() const { return seek_pre_roll_; }
- void set_default_duration(uint64 default_duration) {
+ uint64_t seek_pre_roll() const { return seek_pre_roll_; }
+ void set_default_duration(uint64_t default_duration) {
default_duration_ = default_duration;
}
- uint64 default_duration() const { return default_duration_; }
+ uint64_t default_duration() const { return default_duration_; }
- uint64 codec_private_length() const { return codec_private_length_; }
- uint32 content_encoding_entries_size() const {
+ uint64_t codec_private_length() const { return codec_private_length_; }
+ uint32_t content_encoding_entries_size() const {
return content_encoding_entries_size_;
}
private:
// Track element names.
char* codec_id_;
- uint8* codec_private_;
+ uint8_t* codec_private_;
char* language_;
- uint64 max_block_additional_id_;
+ uint64_t max_block_additional_id_;
char* name_;
- uint64 number_;
- uint64 type_;
- uint64 uid_;
- uint64 codec_delay_;
- uint64 seek_pre_roll_;
- uint64 default_duration_;
+ uint64_t number_;
+ uint64_t type_;
+ uint64_t uid_;
+ uint64_t codec_delay_;
+ uint64_t seek_pre_roll_;
+ uint64_t default_duration_;
// Size of the CodecPrivate data in bytes.
- uint64 codec_private_length_;
+ uint64_t codec_private_length_;
// ContentEncoding element list.
ContentEncoding** content_encoding_entries_;
// Number of ContentEncoding elements added.
- uint32 content_encoding_entries_size_;
+ uint32_t content_encoding_entries_size_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track);
};
@@ -437,56 +566,63 @@ class VideoTrack : public Track {
// Returns the size in bytes for the payload of the Track element plus the
// video specific elements.
- virtual uint64 PayloadSize() const;
+ virtual uint64_t PayloadSize() const;
// Output the VideoTrack element to the writer. Returns true on success.
virtual bool Write(IMkvWriter* writer) const;
// Sets the video's stereo mode. Returns true on success.
- bool SetStereoMode(uint64 stereo_mode);
+ bool SetStereoMode(uint64_t stereo_mode);
// Sets the video's alpha mode. Returns true on success.
- bool SetAlphaMode(uint64 alpha_mode);
-
- void set_display_height(uint64 height) { display_height_ = height; }
- uint64 display_height() const { return display_height_; }
- void set_display_width(uint64 width) { display_width_ = width; }
- uint64 display_width() const { return display_width_; }
-
- void set_crop_left(uint64 crop_left) { crop_left_ = crop_left; }
- uint64 crop_left() const { return crop_left_; }
- void set_crop_right(uint64 crop_right) { crop_right_ = crop_right; }
- uint64 crop_right() const { return crop_right_; }
- void set_crop_top(uint64 crop_top) { crop_top_ = crop_top; }
- uint64 crop_top() const { return crop_top_; }
- void set_crop_bottom(uint64 crop_bottom) { crop_bottom_ = crop_bottom; }
- uint64 crop_bottom() const { return crop_bottom_; }
+ bool SetAlphaMode(uint64_t alpha_mode);
+
+ void set_display_height(uint64_t height) { display_height_ = height; }
+ uint64_t display_height() const { return display_height_; }
+ void set_display_width(uint64_t width) { display_width_ = width; }
+ uint64_t display_width() const { return display_width_; }
+
+ void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; }
+ uint64_t crop_left() const { return crop_left_; }
+ void set_crop_right(uint64_t crop_right) { crop_right_ = crop_right; }
+ uint64_t crop_right() const { return crop_right_; }
+ void set_crop_top(uint64_t crop_top) { crop_top_ = crop_top; }
+ uint64_t crop_top() const { return crop_top_; }
+ void set_crop_bottom(uint64_t crop_bottom) { crop_bottom_ = crop_bottom; }
+ uint64_t crop_bottom() const { return crop_bottom_; }
void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; }
double frame_rate() const { return frame_rate_; }
- void set_height(uint64 height) { height_ = height; }
- uint64 height() const { return height_; }
- uint64 stereo_mode() { return stereo_mode_; }
- uint64 alpha_mode() { return alpha_mode_; }
- void set_width(uint64 width) { width_ = width; }
- uint64 width() const { return width_; }
+ void set_height(uint64_t height) { height_ = height; }
+ uint64_t height() const { return height_; }
+ uint64_t stereo_mode() { return stereo_mode_; }
+ uint64_t alpha_mode() { return alpha_mode_; }
+ void set_width(uint64_t width) { width_ = width; }
+ uint64_t width() const { return width_; }
+
+ Colour* colour() { return colour_; }
+
+ // Deep copies |colour|.
+ bool SetColour(const Colour& colour);
private:
// Returns the size in bytes of the Video element.
- uint64 VideoPayloadSize() const;
+ uint64_t VideoPayloadSize() const;
// Video track element names.
- uint64 display_height_;
- uint64 display_width_;
- uint64 crop_left_;
- uint64 crop_right_;
- uint64 crop_top_;
- uint64 crop_bottom_;
+ uint64_t display_height_;
+ uint64_t display_width_;
+ uint64_t crop_left_;
+ uint64_t crop_right_;
+ uint64_t crop_top_;
+ uint64_t crop_bottom_;
double frame_rate_;
- uint64 height_;
- uint64 stereo_mode_;
- uint64 alpha_mode_;
- uint64 width_;
+ uint64_t height_;
+ uint64_t stereo_mode_;
+ uint64_t alpha_mode_;
+ uint64_t width_;
+
+ Colour* colour_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack);
};
@@ -501,22 +637,22 @@ class AudioTrack : public Track {
// Returns the size in bytes for the payload of the Track element plus the
// audio specific elements.
- virtual uint64 PayloadSize() const;
+ virtual uint64_t PayloadSize() const;
// Output the AudioTrack element to the writer. Returns true on success.
virtual bool Write(IMkvWriter* writer) const;
- void set_bit_depth(uint64 bit_depth) { bit_depth_ = bit_depth; }
- uint64 bit_depth() const { return bit_depth_; }
- void set_channels(uint64 channels) { channels_ = channels; }
- uint64 channels() const { return channels_; }
+ void set_bit_depth(uint64_t bit_depth) { bit_depth_ = bit_depth; }
+ uint64_t bit_depth() const { return bit_depth_; }
+ void set_channels(uint64_t channels) { channels_ = channels; }
+ uint64_t channels() const { return channels_; }
void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; }
double sample_rate() const { return sample_rate_; }
private:
// Audio track element names.
- uint64 bit_depth_;
- uint64 channels_;
+ uint64_t bit_depth_;
+ uint64_t channels_;
double sample_rate_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack);
@@ -542,32 +678,35 @@ class Tracks {
// deleted by the Tracks object. Returns true on success. |number| is the
// number to use for the track. |number| must be >= 0. If |number| == 0
// then the muxer will decide on the track number.
- bool AddTrack(Track* track, int32 number);
+ bool AddTrack(Track* track, int32_t number);
// Returns the track by index. Returns NULL if there is no track match.
- const Track* GetTrackByIndex(uint32 idx) const;
+ const Track* GetTrackByIndex(uint32_t idx) const;
// Search the Tracks and return the track that matches |tn|. Returns NULL
// if there is no track match.
- Track* GetTrackByNumber(uint64 track_number) const;
+ Track* GetTrackByNumber(uint64_t track_number) const;
// Returns true if the track number is an audio track.
- bool TrackIsAudio(uint64 track_number) const;
+ bool TrackIsAudio(uint64_t track_number) const;
// Returns true if the track number is a video track.
- bool TrackIsVideo(uint64 track_number) const;
+ bool TrackIsVideo(uint64_t track_number) const;
// Output the Tracks element to the writer. Returns true on success.
bool Write(IMkvWriter* writer) const;
- uint32 track_entries_size() const { return track_entries_size_; }
+ uint32_t track_entries_size() const { return track_entries_size_; }
private:
// Track element list.
Track** track_entries_;
// Number of Track elements added.
- uint32 track_entries_size_;
+ uint32_t track_entries_size_;
+
+ // Whether or not Tracks element has already been written via IMkvWriter.
+ mutable bool wrote_tracks_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks);
};
@@ -585,12 +724,12 @@ class Chapter {
// Converts the nanosecond start and stop times of this chapter to
// their corresponding timecode values, and stores them that way.
- void set_time(const Segment& segment, uint64 start_time_ns,
- uint64 end_time_ns);
+ void set_time(const Segment& segment, uint64_t start_time_ns,
+ uint64_t end_time_ns);
// Sets the uid for this chapter. Primarily used to enable
// deterministic output from the muxer.
- void set_uid(const uint64 uid) { uid_ = uid; }
+ void set_uid(const uint64_t uid) { uid_ = uid; }
// Add a title string to this chapter, per the semantics described
// here:
@@ -637,7 +776,7 @@ class Chapter {
// If |writer| is non-NULL, serialize the Display sub-element of
// the Atom into the stream. Returns the Display element size on
// success, 0 if error.
- uint64 WriteDisplay(IMkvWriter* writer) const;
+ uint64_t WriteDisplay(IMkvWriter* writer) const;
private:
char* title_;
@@ -670,20 +809,20 @@ class Chapter {
// If |writer| is non-NULL, serialize the Atom sub-element into the
// stream. Returns the total size of the element on success, 0 if
// error.
- uint64 WriteAtom(IMkvWriter* writer) const;
+ uint64_t WriteAtom(IMkvWriter* writer) const;
// The string identifier for this chapter (corresponds to WebVTT cue
// identifier).
char* id_;
// Start timecode of the chapter.
- uint64 start_timecode_;
+ uint64_t start_timecode_;
// Stop timecode of the chapter.
- uint64 end_timecode_;
+ uint64_t end_timecode_;
// The binary identifier for this chapter.
- uint64 uid_;
+ uint64_t uid_;
// The Atom element can contain multiple Display sub-elements, as
// the same logical title can be rendered in different languages.
@@ -723,7 +862,7 @@ class Chapters {
// If |writer| is non-NULL, serialize the Edition sub-element of the
// Chapters element into the stream. Returns the Edition element
// size on success, 0 if error.
- uint64 WriteEdition(IMkvWriter* writer) const;
+ uint64_t WriteEdition(IMkvWriter* writer) const;
// Total length of the chapters_ array.
int chapters_size_;
@@ -768,7 +907,7 @@ class Tag {
// If |writer| is non-NULL, serialize the SimpleTag sub-element of
// the Atom into the stream. Returns the SimpleTag element size on
// success, 0 if error.
- uint64 Write(IMkvWriter* writer) const;
+ uint64_t Write(IMkvWriter* writer) const;
private:
char* tag_name_;
@@ -795,7 +934,7 @@ class Tag {
// If |writer| is non-NULL, serialize the Tag sub-element into the
// stream. Returns the total size of the element on success, 0 if
// error.
- uint64 Write(IMkvWriter* writer) const;
+ uint64_t Write(IMkvWriter* writer) const;
// The Atom element can contain multiple SimpleTag sub-elements
SimpleTag* simple_tags_;
@@ -853,7 +992,9 @@ class Cluster {
// |timecode| is the absolute timecode of the cluster. |cues_pos| is the
// position for the cluster within the segment that should be written in
// the cues element. |timecode_scale| is the timecode scale of the segment.
- Cluster(uint64 timecode, int64 cues_pos, uint64 timecode_scale);
+ Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale,
+ bool write_last_frame_with_duration = false,
+ bool fixed_size_timecode = false);
~Cluster();
bool Init(IMkvWriter* ptr_writer);
@@ -872,8 +1013,8 @@ class Cluster {
// timecode: Absolute (not relative to cluster) timestamp of the
// frame, expressed in timecode units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrame(const uint8* data, uint64 length, uint64 track_number,
- uint64 timecode, // timecode units (absolute)
+ bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number,
+ uint64_t timecode, // timecode units (absolute)
bool is_key);
// Adds a frame to be output in the file. The frame is written out through
@@ -889,10 +1030,11 @@ class Cluster {
// abs_timecode: Absolute (not relative to cluster) timestamp of the
// frame, expressed in timecode units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithAdditional(const uint8* data, uint64 length,
- const uint8* additional, uint64 additional_length,
- uint64 add_id, uint64 track_number,
- uint64 abs_timecode, bool is_key);
+ bool AddFrameWithAdditional(const uint8_t* data, uint64_t length,
+ const uint8_t* additional,
+ uint64_t additional_length, uint64_t add_id,
+ uint64_t track_number, uint64_t abs_timecode,
+ bool is_key);
// Adds a frame to be output in the file. The frame is written out through
// |writer_| if successful. Returns true on success.
@@ -905,9 +1047,10 @@ class Cluster {
// abs_timecode: Absolute (not relative to cluster) timestamp of the
// frame, expressed in timecode units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithDiscardPadding(const uint8* data, uint64 length,
- int64 discard_padding, uint64 track_number,
- uint64 abs_timecode, bool is_key);
+ bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
+ int64_t discard_padding,
+ uint64_t track_number, uint64_t abs_timecode,
+ bool is_key);
// Writes a frame of metadata to the output medium; returns true on
// success.
@@ -923,31 +1066,53 @@ class Cluster {
// The metadata frame is written as a block group, with a duration
// sub-element but no reference time sub-elements (indicating that
// it is considered a keyframe, per Matroska semantics).
- bool AddMetadata(const uint8* data, uint64 length, uint64 track_number,
- uint64 timecode, uint64 duration);
+ bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number,
+ uint64_t timecode, uint64_t duration);
// Increments the size of the cluster's data in bytes.
- void AddPayloadSize(uint64 size);
+ void AddPayloadSize(uint64_t size);
// Closes the cluster so no more data can be written to it. Will update the
- // cluster's size if |writer_| is seekable. Returns true on success.
+ // cluster's size if |writer_| is seekable. Returns true on success. This
+ // variant of Finalize() fails when |write_last_frame_with_duration_| is set
+ // to true.
bool Finalize();
+ // Closes the cluster so no more data can be written to it. Will update the
+ // cluster's size if |writer_| is seekable. Returns true on success.
+ // Inputs:
+ // set_last_frame_duration: Boolean indicating whether or not the duration
+ // of the last frame should be set. If set to
+ // false, the |duration| value is ignored and
+ // |write_last_frame_with_duration_| will not be
+ // honored.
+ // duration: Duration of the Cluster in timecode scale.
+ bool Finalize(bool set_last_frame_duration, uint64_t duration);
+
// Returns the size in bytes for the entire Cluster element.
- uint64 Size() const;
+ uint64_t Size() const;
// Given |abs_timecode|, calculates timecode relative to most recent timecode.
// Returns -1 on failure, or a relative timecode.
- int64 GetRelativeTimecode(int64 abs_timecode) const;
-
- int64 size_position() const { return size_position_; }
- int32 blocks_added() const { return blocks_added_; }
- uint64 payload_size() const { return payload_size_; }
- int64 position_for_cues() const { return position_for_cues_; }
- uint64 timecode() const { return timecode_; }
- uint64 timecode_scale() const { return timecode_scale_; }
+ int64_t GetRelativeTimecode(int64_t abs_timecode) const;
+
+ int64_t size_position() const { return size_position_; }
+ int32_t blocks_added() const { return blocks_added_; }
+ uint64_t payload_size() const { return payload_size_; }
+ int64_t position_for_cues() const { return position_for_cues_; }
+ uint64_t timecode() const { return timecode_; }
+ uint64_t timecode_scale() const { return timecode_scale_; }
+ void set_write_last_frame_with_duration(bool write_last_frame_with_duration) {
+ write_last_frame_with_duration_ = write_last_frame_with_duration;
+ }
+ bool write_last_frame_with_duration() const {
+ return write_last_frame_with_duration_;
+ }
private:
+ // Iterator type for the |stored_frames_| map.
+ typedef std::map<uint64_t, std::list<Frame*> >::iterator FrameMapIterator;
+
// Utility method that confirms that blocks can still be added, and that the
// cluster header has been written. Used by |DoWriteFrame*|. Returns true
// when successful.
@@ -955,37 +1120,58 @@ class Cluster {
// Utility method used by the |DoWriteFrame*| methods that handles the book
// keeping required after each block is written.
- void PostWriteBlock(uint64 element_size);
+ void PostWriteBlock(uint64_t element_size);
// Does some verification and calls WriteFrame.
bool DoWriteFrame(const Frame* const frame);
+ // Either holds back the given frame, or writes it out depending on whether or
+ // not |write_last_frame_with_duration_| is set.
+ bool QueueOrWriteFrame(const Frame* const frame);
+
// Outputs the Cluster header to |writer_|. Returns true on success.
bool WriteClusterHeader();
// Number of blocks added to the cluster.
- int32 blocks_added_;
+ int32_t blocks_added_;
// Flag telling if the cluster has been closed.
bool finalized_;
+ // Flag indicating whether the cluster's timecode will always be written out
+ // using 8 bytes.
+ bool fixed_size_timecode_;
+
// Flag telling if the cluster's header has been written.
bool header_written_;
// The size of the cluster elements in bytes.
- uint64 payload_size_;
+ uint64_t payload_size_;
// The file position used for cue points.
- const int64 position_for_cues_;
+ const int64_t position_for_cues_;
// The file position of the cluster's size element.
- int64 size_position_;
+ int64_t size_position_;
// The absolute timecode of the cluster.
- const uint64 timecode_;
+ const uint64_t timecode_;
// The timecode scale of the Segment containing the cluster.
- const uint64 timecode_scale_;
+ const uint64_t timecode_scale_;
+
+ // Flag indicating whether the last frame of the cluster should be written as
+ // a Block with Duration. If set to true, then it will result in holding back
+ // of frames and the parameterized version of Finalize() must be called to
+ // finish writing the Cluster.
+ bool write_last_frame_with_duration_;
+
+ // Map used to hold back frames, if required. Track number is the key.
+ std::map<uint64_t, std::list<Frame*> > stored_frames_;
+
+ // Map from track number to the timestamp of the last block written for that
+ // track.
+ std::map<uint64_t, uint64_t> last_block_timestamp_;
// Pointer to the writer object. Not owned by this class.
IMkvWriter* writer_;
@@ -1006,42 +1192,42 @@ class SeekHead {
// Adds a seek entry to be written out when the element is finalized. |id|
// must be the coded mkv element id. |pos| is the file position of the
// element. Returns true on success.
- bool AddSeekEntry(uint32 id, uint64 pos);
+ bool AddSeekEntry(uint32_t id, uint64_t pos);
// Writes out SeekHead and SeekEntry elements. Returns true on success.
bool Finalize(IMkvWriter* writer) const;
// Returns the id of the Seek Entry at the given index. Returns -1 if index is
// out of range.
- uint32 GetId(int index) const;
+ uint32_t GetId(int index) const;
// Returns the position of the Seek Entry at the given index. Returns -1 if
// index is out of range.
- uint64 GetPosition(int index) const;
+ uint64_t GetPosition(int index) const;
// Sets the Seek Entry id and position at given index.
// Returns true on success.
- bool SetSeekEntry(int index, uint32 id, uint64 position);
+ bool SetSeekEntry(int index, uint32_t id, uint64_t position);
// Reserves space by writing out a Void element which will be updated with
// a SeekHead element later. Returns true on success.
bool Write(IMkvWriter* writer);
// We are going to put a cap on the number of Seek Entries.
- const static int32 kSeekEntryCount = 5;
+ const static int32_t kSeekEntryCount = 5;
private:
// Returns the maximum size in bytes of one seek entry.
- uint64 MaxEntrySize() const;
+ uint64_t MaxEntrySize() const;
// Seek entry id element list.
- uint32 seek_entry_id_[kSeekEntryCount];
+ uint32_t seek_entry_id_[kSeekEntryCount];
// Seek entry pos element list.
- uint64 seek_entry_pos_[kSeekEntryCount];
+ uint64_t seek_entry_pos_[kSeekEntryCount];
// The file position of SeekHead element.
- int64 start_pos_;
+ int64_t start_pos_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead);
};
@@ -1067,12 +1253,12 @@ class SegmentInfo {
double duration() const { return duration_; }
void set_muxing_app(const char* app);
const char* muxing_app() const { return muxing_app_; }
- void set_timecode_scale(uint64 scale) { timecode_scale_ = scale; }
- uint64 timecode_scale() const { return timecode_scale_; }
+ void set_timecode_scale(uint64_t scale) { timecode_scale_ = scale; }
+ uint64_t timecode_scale() const { return timecode_scale_; }
void set_writing_app(const char* app);
const char* writing_app() const { return writing_app_; }
- void set_date_utc(int64 date_utc) { date_utc_ = date_utc; }
- int64 date_utc() const { return date_utc_; }
+ void set_date_utc(int64_t date_utc) { date_utc_ = date_utc; }
+ int64_t date_utc() const { return date_utc_; }
private:
// Segment Information element names.
@@ -1081,14 +1267,14 @@ class SegmentInfo {
double duration_;
// Set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
char* muxing_app_;
- uint64 timecode_scale_;
+ uint64_t timecode_scale_;
// Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
char* writing_app_;
// LLONG_MIN when DateUTC is not set.
- int64 date_utc_;
+ int64_t date_utc_;
// The file position of the duration element.
- int64 duration_pos_;
+ int64_t duration_pos_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo);
};
@@ -1108,8 +1294,8 @@ class Segment {
kBeforeClusters = 0x1 // Position Cues before Clusters
};
- const static uint32 kDefaultDocTypeVersion = 2;
- const static uint64 kDefaultMaxClusterDuration = 30000000000ULL;
+ const static uint32_t kDefaultDocTypeVersion = 2;
+ const static uint64_t kDefaultMaxClusterDuration = 30000000000ULL;
Segment();
~Segment();
@@ -1123,13 +1309,13 @@ class Segment {
// error. |number| is the number to use for the track. |number|
// must be >= 0. If |number| == 0 then the muxer will decide on the
// track number.
- Track* AddTrack(int32 number);
+ Track* AddTrack(int32_t number);
// Adds a Vorbis audio track to the segment. Returns the number of the track
// on success, 0 on error. |number| is the number to use for the audio track.
// |number| must be >= 0. If |number| == 0 then the muxer will decide on
// the track number.
- uint64 AddAudioTrack(int32 sample_rate, int32 channels, int32 number);
+ uint64_t AddAudioTrack(int32_t sample_rate, int32_t channels, int32_t number);
// Adds an empty chapter to the chapters of this segment. Returns
// non-NULL on success. After adding the chapter, the caller should
@@ -1145,7 +1331,7 @@ class Segment {
// nanoseconds of the cue's time. |track| is the Track of the Cue. This
// function must be called after AddFrame to calculate the correct
// BlockNumber for the CuePoint. Returns true on success.
- bool AddCuePoint(uint64 timestamp, uint64 track);
+ bool AddCuePoint(uint64_t timestamp, uint64_t track);
// Adds a frame to be output in the file. Returns true on success.
// Inputs:
@@ -1155,8 +1341,8 @@ class Segment {
// functions.
// timestamp: Timestamp of the frame in nanoseconds from 0.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrame(const uint8* data, uint64 length, uint64 track_number,
- uint64 timestamp_ns, bool is_key);
+ bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number,
+ uint64_t timestamp_ns, bool is_key);
// Writes a frame of metadata to the output medium; returns true on
// success.
@@ -1172,8 +1358,8 @@ class Segment {
// The metadata frame is written as a block group, with a duration
// sub-element but no reference time sub-elements (indicating that
// it is considered a keyframe, per Matroska semantics).
- bool AddMetadata(const uint8* data, uint64 length, uint64 track_number,
- uint64 timestamp_ns, uint64 duration_ns);
+ bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number,
+ uint64_t timestamp_ns, uint64_t duration_ns);
// Writes a frame with additional data to the output medium; returns true on
// success.
@@ -1188,10 +1374,11 @@ class Segment {
// timestamp: Absolute timestamp of the frame, expressed in nanosecond
// units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithAdditional(const uint8* data, uint64 length,
- const uint8* additional, uint64 additional_length,
- uint64 add_id, uint64 track_number,
- uint64 timestamp, bool is_key);
+ bool AddFrameWithAdditional(const uint8_t* data, uint64_t length,
+ const uint8_t* additional,
+ uint64_t additional_length, uint64_t add_id,
+ uint64_t track_number, uint64_t timestamp,
+ bool is_key);
// Writes a frame with DiscardPadding to the output medium; returns true on
// success.
@@ -1204,9 +1391,10 @@ class Segment {
// timestamp: Absolute timestamp of the frame, expressed in nanosecond
// units.
// is_key: Flag telling whether or not this frame is a key frame.
- bool AddFrameWithDiscardPadding(const uint8* data, uint64 length,
- int64 discard_padding, uint64 track_number,
- uint64 timestamp, bool is_key);
+ bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length,
+ int64_t discard_padding,
+ uint64_t track_number, uint64_t timestamp,
+ bool is_key);
// Writes a Frame to the output medium. Chooses the correct way of writing
// the frame (Block vs SimpleBlock) based on the parameters passed.
@@ -1218,7 +1406,7 @@ class Segment {
// success, 0 on error. |number| is the number to use for the video track.
// |number| must be >= 0. If |number| == 0 then the muxer will decide on
// the track number.
- uint64 AddVideoTrack(int32 width, int32 height, int32 number);
+ uint64_t AddVideoTrack(int32_t width, int32_t height, int32_t number);
// This function must be called after Finalize() if you need a copy of the
// output with Cues written before the Clusters. It will return false if the
@@ -1237,7 +1425,7 @@ class Segment {
// Sets which track to use for the Cues element. Must have added the track
// before calling this function. Returns true on success. |track_number| is
// returned by the Add track functions.
- bool CuesTrack(uint64 track_number);
+ bool CuesTrack(uint64_t track_number);
// This will force the muxer to create a new Cluster when the next frame is
// added.
@@ -1257,11 +1445,17 @@ class Segment {
// Search the Tracks and return the track that matches |track_number|.
// Returns NULL if there is no track match.
- Track* GetTrackByNumber(uint64 track_number) const;
+ Track* GetTrackByNumber(uint64_t track_number) const;
// Toggles whether to output a cues element.
void OutputCues(bool output_cues);
+ // Toggles whether to write the last frame in each Cluster with Duration.
+ void AccurateClusterDuration(bool accurate_cluster_duration);
+
+ // Toggles whether to write the Cluster Timecode using exactly 8 bytes.
+ void UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode);
+
// Sets if the muxer will output files in chunks or not. |chunking| is a
// flag telling whether or not to turn on chunking. |filename| is the base
// filename for the chunk files. The header chunk file will be named
@@ -1274,15 +1468,15 @@ class Segment {
bool SetChunking(bool chunking, const char* filename);
bool chunking() const { return chunking_; }
- uint64 cues_track() const { return cues_track_; }
- void set_max_cluster_duration(uint64 max_cluster_duration) {
+ uint64_t cues_track() const { return cues_track_; }
+ void set_max_cluster_duration(uint64_t max_cluster_duration) {
max_cluster_duration_ = max_cluster_duration;
}
- uint64 max_cluster_duration() const { return max_cluster_duration_; }
- void set_max_cluster_size(uint64 max_cluster_size) {
+ uint64_t max_cluster_duration() const { return max_cluster_duration_; }
+ void set_max_cluster_size(uint64_t max_cluster_size) {
max_cluster_size_ = max_cluster_size;
}
- uint64 max_cluster_size() const { return max_cluster_size_; }
+ uint64_t max_cluster_size() const { return max_cluster_size_; }
void set_mode(Mode mode) { mode_ = mode; }
Mode mode() const { return mode_; }
CuesPosition cues_position() const { return cues_position_; }
@@ -1306,7 +1500,7 @@ class Segment {
// Returns the maximum offset within the segment's payload. When chunking
// this function is needed to determine offsets of elements within the
// chunked files. Returns -1 on error.
- int64 MaxOffset();
+ int64_t MaxOffset();
// Adds the frame to our frame array.
bool QueueFrame(Frame* frame);
@@ -1318,7 +1512,7 @@ class Segment {
// Output all frames that are queued that have an end time that is less
// then |timestamp|. Returns true on success and if there are no frames
// queued.
- bool WriteFramesLessThan(uint64 timestamp);
+ bool WriteFramesLessThan(uint64_t timestamp);
// Outputs the segment header, Segment Information element, SeekHead element,
// and Tracks element to |writer_|.
@@ -1332,16 +1526,17 @@ class Segment {
// 0 = do not create a new cluster, and write frame to the existing cluster
// 1 = create a new cluster, and write frame to that new cluster
// 2 = create a new cluster, and re-run test
- int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const;
+ int TestFrame(uint64_t track_num, uint64_t timestamp_ns, bool key) const;
// Create a new cluster, using the earlier of the first enqueued
// frame, or the indicated time. Returns true on success.
- bool MakeNewCluster(uint64 timestamp_ns);
+ bool MakeNewCluster(uint64_t timestamp_ns);
// Checks whether a new cluster needs to be created, and if so
// creates a new cluster. Returns false if creation of a new cluster
// was necessary but creation was not successful.
- bool DoNewClusterProcessing(uint64 track_num, uint64 timestamp_ns, bool key);
+ bool DoNewClusterProcessing(uint64_t track_num, uint64_t timestamp_ns,
+ bool key);
// Adjusts Cue Point values (to place Cues before Clusters) so that they
// reflect the correct offsets.
@@ -1355,7 +1550,8 @@ class Segment {
// accounted for.
// index - index in the list of Cues which is currently being adjusted.
// cue_size - sum of size of all the CuePoint elements.
- void MoveCuesBeforeClustersHelper(uint64 diff, int index, uint64* cue_size);
+ void MoveCuesBeforeClustersHelper(uint64_t diff, int index,
+ uint64_t* cue_size);
// Seeds the random number generator used to make UIDs.
unsigned int seed_;
@@ -1394,22 +1590,22 @@ class Segment {
char* chunking_base_name_;
// File position offset where the Clusters end.
- int64 cluster_end_offset_;
+ int64_t cluster_end_offset_;
// List of clusters.
Cluster** cluster_list_;
// Number of cluster pointers allocated in the cluster list.
- int32 cluster_list_capacity_;
+ int32_t cluster_list_capacity_;
// Number of clusters in the cluster list.
- int32 cluster_list_size_;
+ int32_t cluster_list_size_;
// Indicates whether Cues should be written before or after Clusters
CuesPosition cues_position_;
// Track number that is associated with the cues element for this segment.
- uint64 cues_track_;
+ uint64_t cues_track_;
// Tells the muxer to force a new cluster on the next Block.
bool force_new_cluster_;
@@ -1421,10 +1617,10 @@ class Segment {
Frame** frames_;
// Number of frame pointers allocated in the frame list.
- int32 frames_capacity_;
+ int32_t frames_capacity_;
// Number of frames in the frame list.
- int32 frames_size_;
+ int32_t frames_size_;
// Flag telling if a video track has been added to the segment.
bool has_video_;
@@ -1433,23 +1629,23 @@ class Segment {
bool header_written_;
// Duration of the last block in nanoseconds.
- uint64 last_block_duration_;
+ uint64_t last_block_duration_;
// Last timestamp in nanoseconds added to a cluster.
- uint64 last_timestamp_;
+ uint64_t last_timestamp_;
// Last timestamp in nanoseconds by track number added to a cluster.
- uint64 last_track_timestamp_[kMaxTrackNumber];
+ uint64_t last_track_timestamp_[kMaxTrackNumber];
// Maximum time in nanoseconds for a cluster duration. This variable is a
// guideline and some clusters may have a longer duration. Default is 30
// seconds.
- uint64 max_cluster_duration_;
+ uint64_t max_cluster_duration_;
// Maximum size in bytes for a cluster. This variable is a guideline and
// some clusters may have a larger size. Default is 0 which signifies that
// the muxer will decide the size.
- uint64 max_cluster_size_;
+ uint64_t max_cluster_size_;
// The mode that segment is in. If set to |kLive| the writer must not
// seek backwards.
@@ -1462,22 +1658,29 @@ class Segment {
// Flag whether or not the muxer should output a Cues element.
bool output_cues_;
+ // Flag whether or not the last frame in each Cluster will have a Duration
+ // element in it.
+ bool accurate_cluster_duration_;
+
+ // Flag whether or not to write the Cluster Timecode using exactly 8 bytes.
+ bool fixed_size_cluster_timecode_;
+
// The size of the EBML header, used to validate the header if
// WriteEbmlHeader() is called more than once.
- int32 ebml_header_size_;
+ int32_t ebml_header_size_;
// The file position of the segment's payload.
- int64 payload_pos_;
+ int64_t payload_pos_;
// The file position of the element's size.
- int64 size_position_;
+ int64_t size_position_;
// Current DocTypeVersion (|doc_type_version_|) and that written in
// WriteSegmentHeader().
// WriteEbmlHeader() will be called from Finalize() if |doc_type_version_|
// differs from |doc_type_version_written_|.
- uint32 doc_type_version_;
- uint32 doc_type_version_written_;
+ uint32_t doc_type_version_;
+ uint32_t doc_type_version_written_;
// Pointer to the writer objects. Not owned by this class.
IMkvWriter* writer_cluster_;
@@ -1487,6 +1690,6 @@ class Segment {
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment);
};
-} // end namespace mkvmuxer
+} // namespace mkvmuxer
-#endif // MKVMUXER_HPP
+#endif // MKVMUXER_MKVMUXER_H_
diff --git a/libvpx/third_party/libwebm/mkvmuxertypes.hpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h
index d0fc9fec8..e5db12160 100644
--- a/libvpx/third_party/libwebm/mkvmuxertypes.hpp
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxertypes.h
@@ -6,25 +6,23 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#ifndef MKVMUXERTYPES_HPP
-#define MKVMUXERTYPES_HPP
-
-// Copied from Chromium basictypes.h
-// A macro to disallow the copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&); \
- void operator=(const TypeName&)
+#ifndef MKVMUXER_MKVMUXERTYPES_H_
+#define MKVMUXER_MKVMUXERTYPES_H_
namespace mkvmuxer {
-
typedef unsigned char uint8;
typedef short int16;
typedef int int32;
typedef unsigned int uint32;
typedef long long int64;
typedef unsigned long long uint64;
+} // namespace mkvmuxer
-} // end namespace mkvmuxer
+// Copied from Chromium basictypes.h
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
-#endif // MKVMUXERTYPES_HPP
+#endif // MKVMUXER_MKVMUXERTYPES_HPP_
diff --git a/libvpx/third_party/libwebm/mkvmuxerutil.cpp b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
index 27ab15d51..3562b8ab8 100644
--- a/libvpx/third_party/libwebm/mkvmuxerutil.cpp
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
@@ -6,7 +6,7 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#include "mkvmuxerutil.hpp"
+#include "mkvmuxer/mkvmuxerutil.h"
#ifdef __ANDROID__
#include <fcntl.h>
@@ -20,13 +20,9 @@
#include <ctime>
#include <new>
-#include "mkvwriter.hpp"
-#include "webmids.hpp"
-
-#ifdef _MSC_VER
-// Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable : 4996)
-#endif
+#include "common/webmids.h"
+#include "mkvmuxer/mkvmuxer.h"
+#include "mkvmuxer/mkvwriter.h"
namespace mkvmuxer {
@@ -35,64 +31,68 @@ namespace {
// Date elements are always 8 octets in size.
const int kDateElementSize = 8;
-uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode,
- uint64 timecode_scale) {
- uint64 block_additional_elem_size = 0;
- uint64 block_addid_elem_size = 0;
- uint64 block_more_payload_size = 0;
- uint64 block_more_elem_size = 0;
- uint64 block_additions_payload_size = 0;
- uint64 block_additions_elem_size = 0;
+uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
+ int64_t timecode, uint64_t timecode_scale) {
+ uint64_t block_additional_elem_size = 0;
+ uint64_t block_addid_elem_size = 0;
+ uint64_t block_more_payload_size = 0;
+ uint64_t block_more_elem_size = 0;
+ uint64_t block_additions_payload_size = 0;
+ uint64_t block_additions_elem_size = 0;
if (frame->additional()) {
- block_additional_elem_size = EbmlElementSize(
- kMkvBlockAdditional, frame->additional(), frame->additional_length());
- block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, frame->add_id());
+ block_additional_elem_size =
+ EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(),
+ frame->additional_length());
+ block_addid_elem_size =
+ EbmlElementSize(libwebm::kMkvBlockAddID, frame->add_id());
block_more_payload_size =
block_addid_elem_size + block_additional_elem_size;
block_more_elem_size =
- EbmlMasterElementSize(kMkvBlockMore, block_more_payload_size) +
+ EbmlMasterElementSize(libwebm::kMkvBlockMore, block_more_payload_size) +
block_more_payload_size;
block_additions_payload_size = block_more_elem_size;
block_additions_elem_size =
- EbmlMasterElementSize(kMkvBlockAdditions,
+ EbmlMasterElementSize(libwebm::kMkvBlockAdditions,
block_additions_payload_size) +
block_additions_payload_size;
}
- uint64 discard_padding_elem_size = 0;
+ uint64_t discard_padding_elem_size = 0;
if (frame->discard_padding() != 0) {
discard_padding_elem_size =
- EbmlElementSize(kMkvDiscardPadding, frame->discard_padding());
+ EbmlElementSize(libwebm::kMkvDiscardPadding, frame->discard_padding());
}
- const uint64 reference_block_timestamp =
+ const uint64_t reference_block_timestamp =
frame->reference_block_timestamp() / timecode_scale;
- uint64 reference_block_elem_size = 0;
+ uint64_t reference_block_elem_size = 0;
if (!frame->is_key()) {
reference_block_elem_size =
- EbmlElementSize(kMkvReferenceBlock, reference_block_timestamp);
+ EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp);
}
- const uint64 duration = frame->duration() / timecode_scale;
- uint64 block_duration_elem_size = 0;
+ const uint64_t duration = frame->duration() / timecode_scale;
+ uint64_t block_duration_elem_size = 0;
if (duration > 0)
- block_duration_elem_size = EbmlElementSize(kMkvBlockDuration, duration);
+ block_duration_elem_size =
+ EbmlElementSize(libwebm::kMkvBlockDuration, duration);
- const uint64 block_payload_size = 4 + frame->length();
- const uint64 block_elem_size =
- EbmlMasterElementSize(kMkvBlock, block_payload_size) + block_payload_size;
+ const uint64_t block_payload_size = 4 + frame->length();
+ const uint64_t block_elem_size =
+ EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) +
+ block_payload_size;
- const uint64 block_group_payload_size =
+ const uint64_t block_group_payload_size =
block_elem_size + block_additions_elem_size + block_duration_elem_size +
discard_padding_elem_size + reference_block_elem_size;
- if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockGroup,
block_group_payload_size)) {
return 0;
}
- if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlock, block_payload_size))
return 0;
if (WriteUInt(writer, frame->track_number()))
@@ -105,77 +105,81 @@ uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode,
if (SerializeInt(writer, 0, 1))
return 0;
- if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
+ if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length())))
return 0;
if (frame->additional()) {
- if (!WriteEbmlMasterElement(writer, kMkvBlockAdditions,
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockAdditions,
block_additions_payload_size)) {
return 0;
}
- if (!WriteEbmlMasterElement(writer, kMkvBlockMore, block_more_payload_size))
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockMore,
+ block_more_payload_size))
return 0;
- if (!WriteEbmlElement(writer, kMkvBlockAddID, frame->add_id()))
+ if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID, frame->add_id()))
return 0;
- if (!WriteEbmlElement(writer, kMkvBlockAdditional, frame->additional(),
- frame->additional_length())) {
+ if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional,
+ frame->additional(), frame->additional_length())) {
return 0;
}
}
if (frame->discard_padding() != 0 &&
- !WriteEbmlElement(writer, kMkvDiscardPadding, frame->discard_padding())) {
+ !WriteEbmlElement(writer, libwebm::kMkvDiscardPadding,
+ frame->discard_padding())) {
return false;
}
if (!frame->is_key() &&
- !WriteEbmlElement(writer, kMkvReferenceBlock,
+ !WriteEbmlElement(writer, libwebm::kMkvReferenceBlock,
reference_block_timestamp)) {
return false;
}
- if (duration > 0 && !WriteEbmlElement(writer, kMkvBlockDuration, duration)) {
+ if (duration > 0 &&
+ !WriteEbmlElement(writer, libwebm::kMkvBlockDuration, duration)) {
return false;
}
- return EbmlMasterElementSize(kMkvBlockGroup, block_group_payload_size) +
+ return EbmlMasterElementSize(libwebm::kMkvBlockGroup,
+ block_group_payload_size) +
block_group_payload_size;
}
-uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
- int64 timecode) {
- if (WriteID(writer, kMkvSimpleBlock))
+uint64_t WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
+ int64_t timecode) {
+ if (WriteID(writer, libwebm::kMkvSimpleBlock))
return 0;
- const int32 size = static_cast<int32>(frame->length()) + 4;
+ const int32_t size = static_cast<int32_t>(frame->length()) + 4;
if (WriteUInt(writer, size))
return 0;
- if (WriteUInt(writer, static_cast<uint64>(frame->track_number())))
+ if (WriteUInt(writer, static_cast<uint64_t>(frame->track_number())))
return 0;
if (SerializeInt(writer, timecode, 2))
return 0;
- uint64 flags = 0;
+ uint64_t flags = 0;
if (frame->is_key())
flags |= 0x80;
if (SerializeInt(writer, flags, 1))
return 0;
- if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
+ if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length())))
return 0;
- return GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 +
- frame->length();
+ return static_cast<uint64_t>(GetUIntSize(libwebm::kMkvSimpleBlock) +
+ GetCodedUIntSize(size) + 4 + frame->length());
}
} // namespace
-int32 GetCodedUIntSize(uint64 value) {
+int32_t GetCodedUIntSize(uint64_t value) {
if (value < 0x000000000000007FULL)
return 1;
else if (value < 0x0000000000003FFFULL)
@@ -193,7 +197,7 @@ int32 GetCodedUIntSize(uint64 value) {
return 8;
}
-int32 GetUIntSize(uint64 value) {
+int32_t GetUIntSize(uint64_t value) {
if (value < 0x0000000000000100ULL)
return 1;
else if (value < 0x0000000000010000ULL)
@@ -211,26 +215,26 @@ int32 GetUIntSize(uint64 value) {
return 8;
}
-int32 GetIntSize(int64 value) {
+int32_t GetIntSize(int64_t value) {
// Doubling the requested value ensures positive values with their high bit
// set are written with 0-padding to avoid flipping the signedness.
- const uint64 v = (value < 0) ? value ^ -1LL : value;
+ const uint64_t v = (value < 0) ? value ^ -1LL : value;
return GetUIntSize(2 * v);
}
-uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
+uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value) {
// Size of EBML ID
- int32 ebml_size = GetUIntSize(type);
+ int32_t ebml_size = GetUIntSize(type);
// Datasize
ebml_size += GetCodedUIntSize(value);
- return ebml_size;
+ return static_cast<uint64_t>(ebml_size);
}
-uint64 EbmlElementSize(uint64 type, int64 value) {
+uint64_t EbmlElementSize(uint64_t type, int64_t value) {
// Size of EBML ID
- int32 ebml_size = GetUIntSize(type);
+ int32_t ebml_size = GetUIntSize(type);
// Datasize
ebml_size += GetIntSize(value);
@@ -238,15 +242,20 @@ uint64 EbmlElementSize(uint64 type, int64 value) {
// Size of Datasize
ebml_size++;
- return ebml_size;
+ return static_cast<uint64_t>(ebml_size);
+}
+
+uint64_t EbmlElementSize(uint64_t type, uint64_t value) {
+ return EbmlElementSize(type, value, 0);
}
-uint64 EbmlElementSize(uint64 type, uint64 value) {
+uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size) {
// Size of EBML ID
- int32 ebml_size = GetUIntSize(type);
+ uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
// Datasize
- ebml_size += GetUIntSize(value);
+ ebml_size +=
+ (fixed_size > 0) ? fixed_size : static_cast<uint64_t>(GetUIntSize(value));
// Size of Datasize
ebml_size++;
@@ -254,9 +263,9 @@ uint64 EbmlElementSize(uint64 type, uint64 value) {
return ebml_size;
}
-uint64 EbmlElementSize(uint64 type, float /* value */) {
+uint64_t EbmlElementSize(uint64_t type, float /* value */) {
// Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
+ uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
// Datasize
ebml_size += sizeof(float);
@@ -267,12 +276,12 @@ uint64 EbmlElementSize(uint64 type, float /* value */) {
return ebml_size;
}
-uint64 EbmlElementSize(uint64 type, const char* value) {
+uint64_t EbmlElementSize(uint64_t type, const char* value) {
if (!value)
return 0;
// Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
+ uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
// Datasize
ebml_size += strlen(value);
@@ -283,12 +292,12 @@ uint64 EbmlElementSize(uint64 type, const char* value) {
return ebml_size;
}
-uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
+uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size) {
if (!value)
return 0;
// Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
+ uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
// Datasize
ebml_size += size;
@@ -299,9 +308,9 @@ uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
return ebml_size;
}
-uint64 EbmlDateElementSize(uint64 type) {
+uint64_t EbmlDateElementSize(uint64_t type) {
// Size of EBML ID
- uint64 ebml_size = GetUIntSize(type);
+ uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
// Datasize
ebml_size += kDateElementSize;
@@ -312,18 +321,18 @@ uint64 EbmlDateElementSize(uint64 type) {
return ebml_size;
}
-int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
+int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size) {
if (!writer || size < 1 || size > 8)
return -1;
- for (int32 i = 1; i <= size; ++i) {
- const int32 byte_count = size - i;
- const int32 bit_count = byte_count * 8;
+ for (int32_t i = 1; i <= size; ++i) {
+ const int32_t byte_count = size - i;
+ const int32_t bit_count = byte_count * 8;
- const int64 bb = value >> bit_count;
- const uint8 b = static_cast<uint8>(bb);
+ const int64_t bb = value >> bit_count;
+ const uint8_t b = static_cast<uint8_t>(bb);
- const int32 status = writer->Write(&b, 1);
+ const int32_t status = writer->Write(&b, 1);
if (status < 0)
return status;
@@ -332,26 +341,26 @@ int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
return 0;
}
-int32 SerializeFloat(IMkvWriter* writer, float f) {
+int32_t SerializeFloat(IMkvWriter* writer, float f) {
if (!writer)
return -1;
- assert(sizeof(uint32) == sizeof(float));
+ assert(sizeof(uint32_t) == sizeof(float));
// This union is merely used to avoid a reinterpret_cast from float& to
// uint32& which will result in violation of strict aliasing.
union U32 {
- uint32 u32;
+ uint32_t u32;
float f;
} value;
value.f = f;
- for (int32 i = 1; i <= 4; ++i) {
- const int32 byte_count = 4 - i;
- const int32 bit_count = byte_count * 8;
+ for (int32_t i = 1; i <= 4; ++i) {
+ const int32_t byte_count = 4 - i;
+ const int32_t bit_count = byte_count * 8;
- const uint8 byte = static_cast<uint8>(value.u32 >> bit_count);
+ const uint8_t byte = static_cast<uint8_t>(value.u32 >> bit_count);
- const int32 status = writer->Write(&byte, 1);
+ const int32_t status = writer->Write(&byte, 1);
if (status < 0)
return status;
@@ -360,21 +369,21 @@ int32 SerializeFloat(IMkvWriter* writer, float f) {
return 0;
}
-int32 WriteUInt(IMkvWriter* writer, uint64 value) {
+int32_t WriteUInt(IMkvWriter* writer, uint64_t value) {
if (!writer)
return -1;
- int32 size = GetCodedUIntSize(value);
+ int32_t size = GetCodedUIntSize(value);
return WriteUIntSize(writer, value, size);
}
-int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
+int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
if (!writer || size < 0 || size > 8)
return -1;
if (size > 0) {
- const uint64 bit = 1LL << (size * 7);
+ const uint64_t bit = 1LL << (size * 7);
if (value > (bit - 2))
return -1;
@@ -382,11 +391,11 @@ int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
value |= bit;
} else {
size = 1;
- int64 bit;
+ int64_t bit;
for (;;) {
bit = 1LL << (size * 7);
- const uint64 max = bit - 2;
+ const uint64_t max = bit - 2;
if (value <= max)
break;
@@ -403,18 +412,18 @@ int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
return SerializeInt(writer, value, size);
}
-int32 WriteID(IMkvWriter* writer, uint64 type) {
+int32_t WriteID(IMkvWriter* writer, uint64_t type) {
if (!writer)
return -1;
writer->ElementStartNotify(type, writer->Position());
- const int32 size = GetUIntSize(type);
+ const int32_t size = GetUIntSize(type);
return SerializeInt(writer, type, size);
}
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) {
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t type, uint64_t size) {
if (!writer)
return false;
@@ -427,41 +436,51 @@ bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) {
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value) {
+ return WriteEbmlElement(writer, type, value, 0);
+}
+
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
+ uint64_t fixed_size) {
if (!writer)
return false;
if (WriteID(writer, type))
return false;
- const uint64 size = GetUIntSize(value);
+ uint64_t size = static_cast<uint64_t>(GetUIntSize(value));
+ if (fixed_size > 0) {
+ if (size > fixed_size)
+ return false;
+ size = fixed_size;
+ }
if (WriteUInt(writer, size))
return false;
- if (SerializeInt(writer, value, static_cast<int32>(size)))
+ if (SerializeInt(writer, value, static_cast<int32_t>(size)))
return false;
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value) {
if (!writer)
return false;
if (WriteID(writer, type))
return 0;
- const uint64 size = GetIntSize(value);
+ const uint64_t size = GetIntSize(value);
if (WriteUInt(writer, size))
return false;
- if (SerializeInt(writer, value, static_cast<int32>(size)))
+ if (SerializeInt(writer, value, static_cast<int32_t>(size)))
return false;
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value) {
if (!writer)
return false;
@@ -477,25 +496,25 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value) {
if (!writer || !value)
return false;
if (WriteID(writer, type))
return false;
- const uint64 length = strlen(value);
+ const uint64_t length = strlen(value);
if (WriteUInt(writer, length))
return false;
- if (writer->Write(value, static_cast<const uint32>(length)))
+ if (writer->Write(value, static_cast<const uint32_t>(length)))
return false;
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
- uint64 size) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
+ uint64_t size) {
if (!writer || !value || size < 1)
return false;
@@ -505,13 +524,13 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
if (WriteUInt(writer, size))
return false;
- if (writer->Write(value, static_cast<uint32>(size)))
+ if (writer->Write(value, static_cast<uint32_t>(size)))
return false;
return true;
}
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value) {
if (!writer)
return false;
@@ -527,8 +546,8 @@ bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
return true;
}
-uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
- Cluster* cluster) {
+uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
+ Cluster* cluster) {
if (!writer || !frame || !frame->IsValid() || !cluster ||
!cluster->timecode_scale())
return 0;
@@ -537,7 +556,7 @@ uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
// timecode for the cluster itself (remember that block timecode
// is a signed, 16-bit integer). However, as a simplification we
// only permit non-negative cluster-relative timecodes for blocks.
- const int64 relative_timecode = cluster->GetRelativeTimecode(
+ const int64_t relative_timecode = cluster->GetRelativeTimecode(
frame->timestamp() / cluster->timecode_scale());
if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode)
return 0;
@@ -548,53 +567,53 @@ uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
cluster->timecode_scale());
}
-uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) {
+uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size) {
if (!writer)
return false;
// Subtract one for the void ID and the coded size.
- uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
- uint64 void_size =
- EbmlMasterElementSize(kMkvVoid, void_entry_size) + void_entry_size;
+ uint64_t void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
+ uint64_t void_size =
+ EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) +
+ void_entry_size;
if (void_size != size)
return 0;
- const int64 payload_position = writer->Position();
+ const int64_t payload_position = writer->Position();
if (payload_position < 0)
return 0;
- if (WriteID(writer, kMkvVoid))
+ if (WriteID(writer, libwebm::kMkvVoid))
return 0;
if (WriteUInt(writer, void_entry_size))
return 0;
- const uint8 value = 0;
- for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) {
+ const uint8_t value = 0;
+ for (int32_t i = 0; i < static_cast<int32_t>(void_entry_size); ++i) {
if (writer->Write(&value, 1))
return 0;
}
- const int64 stop_position = writer->Position();
+ const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64>(void_size))
+ stop_position - payload_position != static_cast<int64_t>(void_size))
return 0;
return void_size;
}
-void GetVersion(int32* major, int32* minor, int32* build, int32* revision) {
+void GetVersion(int32_t* major, int32_t* minor, int32_t* build,
+ int32_t* revision) {
*major = 0;
*minor = 2;
*build = 1;
*revision = 0;
}
-} // namespace mkvmuxer
-
-mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) {
- uint64 uid = 0;
+uint64_t MakeUID(unsigned int* seed) {
+ uint64_t uid = 0;
#ifdef __MINGW32__
srand(*seed);
@@ -606,24 +625,26 @@ mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) {
// TODO(fgalligan): Move random number generation to platform specific code.
#ifdef _MSC_VER
(void)seed;
- const int32 nn = rand();
+ const int32_t nn = rand();
#elif __ANDROID__
- int32 temp_num = 1;
+ int32_t temp_num = 1;
int fd = open("/dev/urandom", O_RDONLY);
if (fd != -1) {
- read(fd, &temp_num, sizeof(int32));
+ read(fd, &temp_num, sizeof(temp_num));
close(fd);
}
- const int32 nn = temp_num;
+ const int32_t nn = temp_num;
#elif defined __MINGW32__
- const int32 nn = rand();
+ const int32_t nn = rand();
#else
- const int32 nn = rand_r(seed);
+ const int32_t nn = rand_r(seed);
#endif
- const int32 n = 0xFF & (nn >> 4); // throw away low-order bits
+ const int32_t n = 0xFF & (nn >> 4); // throw away low-order bits
uid |= n;
}
return uid;
}
+
+} // namespace mkvmuxer
diff --git a/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
new file mode 100644
index 000000000..0e21a2dcb
--- /dev/null
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#ifndef MKVMUXER_MKVMUXERUTIL_H_
+#define MKVMUXER_MKVMUXERUTIL_H_
+
+#include <stdint.h>
+
+namespace mkvmuxer {
+class Cluster;
+class Frame;
+class IMkvWriter;
+
+const uint64_t kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
+const int64_t kMaxBlockTimecode = 0x07FFFLL;
+
+// Writes out |value| in Big Endian order. Returns 0 on success.
+int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size);
+
+// Returns the size in bytes of the element.
+int32_t GetUIntSize(uint64_t value);
+int32_t GetIntSize(int64_t value);
+int32_t GetCodedUIntSize(uint64_t value);
+uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value);
+uint64_t EbmlElementSize(uint64_t type, int64_t value);
+uint64_t EbmlElementSize(uint64_t type, uint64_t value);
+uint64_t EbmlElementSize(uint64_t type, float value);
+uint64_t EbmlElementSize(uint64_t type, const char* value);
+uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size);
+uint64_t EbmlDateElementSize(uint64_t type);
+
+// Returns the size in bytes of the element assuming that the element was
+// written using |fixed_size| bytes. If |fixed_size| is set to zero, then it
+// computes the necessary number of bytes based on |value|.
+uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size);
+
+// Creates an EBML coded number from |value| and writes it out. The size of
+// the coded number is determined by the value of |value|. |value| must not
+// be in a coded form. Returns 0 on success.
+int32_t WriteUInt(IMkvWriter* writer, uint64_t value);
+
+// Creates an EBML coded number from |value| and writes it out. The size of
+// the coded number is determined by the value of |size|. |value| must not
+// be in a coded form. Returns 0 on success.
+int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size);
+
+// Output an Mkv master element. Returns true if the element was written.
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t value, uint64_t size);
+
+// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the
+// ID to |SerializeInt|. Returns 0 on success.
+int32_t WriteID(IMkvWriter* writer, uint64_t type);
+
+// Output an Mkv non-master element. Returns true if the element was written.
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
+ uint64_t size);
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value);
+
+// Output an Mkv non-master element using fixed size. The element will be
+// written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero
+// then it computes the necessary number of bytes based on |value|. Returns true
+// if the element was written.
+bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
+ uint64_t fixed_size);
+
+// Output a Mkv Frame. It decides the correct element to write (Block vs
+// SimpleBlock) based on the parameters of the Frame.
+uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
+ Cluster* cluster);
+
+// Output a void element. |size| must be the entire size in bytes that will be
+// void. The function will calculate the size of the void header and subtract
+// it from |size|.
+uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size);
+
+// Returns the version number of the muxer in |major|, |minor|, |build|,
+// and |revision|.
+void GetVersion(int32_t* major, int32_t* minor, int32_t* build,
+ int32_t* revision);
+
+// Returns a random number to be used for UID, using |seed| to seed
+// the random-number generator (see POSIX rand_r() for semantics).
+uint64_t MakeUID(unsigned int* seed);
+
+} // namespace mkvmuxer
+
+#endif // MKVMUXER_MKVMUXERUTIL_H_
diff --git a/libvpx/third_party/libwebm/mkvwriter.cpp b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc
index 75d4350c7..ca48e149c 100644
--- a/libvpx/third_party/libwebm/mkvwriter.cpp
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc
@@ -6,14 +6,12 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#include "mkvwriter.hpp"
+#include "mkvmuxer/mkvwriter.h"
#ifdef _MSC_VER
#include <share.h> // for _SH_DENYWR
#endif
-#include <new>
-
namespace mkvmuxer {
MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {}
diff --git a/libvpx/third_party/libwebm/mkvwriter.hpp b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h
index 684560c92..4227c6374 100644
--- a/libvpx/third_party/libwebm/mkvwriter.hpp
+++ b/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.h
@@ -6,13 +6,13 @@
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-#ifndef MKVWRITER_HPP
-#define MKVWRITER_HPP
+#ifndef MKVMUXER_MKVWRITER_H_
+#define MKVMUXER_MKVWRITER_H_
#include <stdio.h>
-#include "mkvmuxer.hpp"
-#include "mkvmuxertypes.hpp"
+#include "mkvmuxer/mkvmuxer.h"
+#include "mkvmuxer/mkvmuxertypes.h"
namespace mkvmuxer {
@@ -46,6 +46,6 @@ class MkvWriter : public IMkvWriter {
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter);
};
-} // end namespace mkvmuxer
+} // namespace mkvmuxer
-#endif // MKVWRITER_HPP
+#endif // MKVMUXER_MKVWRITER_H_
diff --git a/libvpx/third_party/libwebm/mkvmuxerutil.hpp b/libvpx/third_party/libwebm/mkvmuxerutil.hpp
deleted file mode 100644
index e31857694..000000000
--- a/libvpx/third_party/libwebm/mkvmuxerutil.hpp
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-//
-// Use of this source code is governed by a BSD-style license
-// that can be found in the LICENSE file in the root of the source
-// tree. An additional intellectual property rights grant can be found
-// in the file PATENTS. All contributing project authors may
-// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVMUXERUTIL_HPP
-#define MKVMUXERUTIL_HPP
-
-#include "mkvmuxer.hpp"
-#include "mkvmuxertypes.hpp"
-
-namespace mkvmuxer {
-
-class IMkvWriter;
-
-const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
-const int64 kMaxBlockTimecode = 0x07FFFLL;
-
-// Writes out |value| in Big Endian order. Returns 0 on success.
-int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size);
-
-// Returns the size in bytes of the element.
-int32 GetUIntSize(uint64 value);
-int32 GetIntSize(int64 value);
-int32 GetCodedUIntSize(uint64 value);
-uint64 EbmlMasterElementSize(uint64 type, uint64 value);
-uint64 EbmlElementSize(uint64 type, int64 value);
-uint64 EbmlElementSize(uint64 type, uint64 value);
-uint64 EbmlElementSize(uint64 type, float value);
-uint64 EbmlElementSize(uint64 type, const char* value);
-uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
-uint64 EbmlDateElementSize(uint64 type);
-
-// Creates an EBML coded number from |value| and writes it out. The size of
-// the coded number is determined by the value of |value|. |value| must not
-// be in a coded form. Returns 0 on success.
-int32 WriteUInt(IMkvWriter* writer, uint64 value);
-
-// Creates an EBML coded number from |value| and writes it out. The size of
-// the coded number is determined by the value of |size|. |value| must not
-// be in a coded form. Returns 0 on success.
-int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size);
-
-// Output an Mkv master element. Returns true if the element was written.
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size);
-
-// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the
-// ID to |SerializeInt|. Returns 0 on success.
-int32 WriteID(IMkvWriter* writer, uint64 type);
-
-// Output an Mkv non-master element. Returns true if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
- uint64 size);
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);
-
-// Output a Mkv Frame. It decides the correct element to write (Block vs
-// SimpleBlock) based on the parameters of the Frame.
-uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
- Cluster* cluster);
-
-// Output a void element. |size| must be the entire size in bytes that will be
-// void. The function will calculate the size of the void header and subtract
-// it from |size|.
-uint64 WriteVoidElement(IMkvWriter* writer, uint64 size);
-
-// Returns the version number of the muxer in |major|, |minor|, |build|,
-// and |revision|.
-void GetVersion(int32* major, int32* minor, int32* build, int32* revision);
-
-// Returns a random number to be used for UID, using |seed| to seed
-// the random-number generator (see POSIX rand_r() for semantics).
-uint64 MakeUID(unsigned int* seed);
-
-} // end namespace mkvmuxer
-
-#endif // MKVMUXERUTIL_HPP
diff --git a/libvpx/third_party/libwebm/mkvparser.cpp b/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
index f2855d506..21801154d 100644
--- a/libvpx/third_party/libwebm/mkvparser.cpp
+++ b/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
@@ -5,8 +5,7 @@
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-
-#include "mkvparser.hpp"
+#include "mkvparser/mkvparser.h"
#if defined(_MSC_VER) && _MSC_VER < 1800
#include <float.h> // _isnan() / _finite()
@@ -14,19 +13,18 @@
#endif
#include <cassert>
+#include <cfloat>
#include <climits>
#include <cmath>
#include <cstring>
+#include <memory>
#include <new>
-#include "webmids.hpp"
-
-#ifdef _MSC_VER
-// Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable : 4996)
-#endif
+#include "common/webmids.h"
namespace mkvparser {
+const float MasteringMetadata::kValueNotPresent = FLT_MAX;
+const long long Colour::kValueNotPresent = LLONG_MAX;
#ifdef MSC_COMPAT
inline bool isnan(double val) { return !!_isnan(val); }
@@ -38,8 +36,9 @@ inline bool isinf(double val) { return std::isinf(val); }
IMkvReader::~IMkvReader() {}
-template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
- unsigned long long element_size) {
+template <typename Type>
+Type* SafeArrayAlloc(unsigned long long num_elements,
+ unsigned long long element_size) {
if (num_elements == 0 || element_size == 0)
return NULL;
@@ -350,9 +349,8 @@ long UnserializeString(IMkvReader* pReader, long long pos, long long size,
return 0;
}
-long ParseElementHeader(IMkvReader* pReader, long long& pos,
- long long stop, long long& id,
- long long& size) {
+long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop,
+ long long& id, long long& size) {
if (stop >= 0 && pos >= stop)
return E_FILE_FORMAT_INVALID;
@@ -386,7 +384,7 @@ long ParseElementHeader(IMkvReader* pReader, long long& pos,
// pos now designates payload
- if (stop >= 0 && pos >= stop)
+ if (stop >= 0 && pos > stop)
return E_FILE_FORMAT_INVALID;
return 0; // success
@@ -520,7 +518,6 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
return status;
pos = 0;
- long long end = (available >= 1024) ? 1024 : available;
// Scan until we find what looks like the first byte of the EBML header.
const long long kMaxScanBytes = (available >= 1024) ? 1024 : available;
@@ -544,8 +541,10 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
long len = 0;
const long long ebml_id = ReadID(pReader, pos, len);
- // TODO(tomfinegan): Move Matroska ID constants into a common namespace.
- if (len != 4 || ebml_id != mkvmuxer::kMkvEBML)
+ if (ebml_id == E_BUFFER_NOT_FULL)
+ return E_BUFFER_NOT_FULL;
+
+ if (len != 4 || ebml_id != libwebm::kMkvEBML)
return E_FILE_FORMAT_INVALID;
// Move read pos forward to the EBML header size field.
@@ -584,7 +583,7 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
if ((available - pos) < result)
return pos + result;
- end = pos + result;
+ const long long end = pos + result;
Init();
@@ -599,27 +598,27 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
if (size == 0)
return E_FILE_FORMAT_INVALID;
- if (id == mkvmuxer::kMkvEBMLVersion) {
+ if (id == libwebm::kMkvEBMLVersion) {
m_version = UnserializeUInt(pReader, pos, size);
if (m_version <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvEBMLReadVersion) {
+ } else if (id == libwebm::kMkvEBMLReadVersion) {
m_readVersion = UnserializeUInt(pReader, pos, size);
if (m_readVersion <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvEBMLMaxIDLength) {
+ } else if (id == libwebm::kMkvEBMLMaxIDLength) {
m_maxIdLength = UnserializeUInt(pReader, pos, size);
if (m_maxIdLength <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvEBMLMaxSizeLength) {
+ } else if (id == libwebm::kMkvEBMLMaxSizeLength) {
m_maxSizeLength = UnserializeUInt(pReader, pos, size);
if (m_maxSizeLength <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvDocType) {
+ } else if (id == libwebm::kMkvDocType) {
if (m_docType)
return E_FILE_FORMAT_INVALID;
@@ -627,12 +626,12 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
if (status) // error
return status;
- } else if (id == mkvmuxer::kMkvDocTypeVersion) {
+ } else if (id == libwebm::kMkvDocTypeVersion) {
m_docTypeVersion = UnserializeUInt(pReader, pos, size);
if (m_docTypeVersion <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvDocTypeReadVersion) {
+ } else if (id == libwebm::kMkvDocTypeReadVersion) {
m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
if (m_docTypeReadVersion <= 0)
@@ -650,8 +649,8 @@ long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
return E_FILE_FORMAT_INVALID;
// Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid.
- if (m_maxIdLength <= 0 || m_maxIdLength > 4 ||
- m_maxSizeLength <= 0 || m_maxSizeLength > 8)
+ if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 ||
+ m_maxSizeLength > 8)
return E_FILE_FORMAT_INVALID;
return 0;
@@ -786,7 +785,7 @@ long long Segment::CreateInstance(IMkvReader* pReader, long long pos,
// Handle "unknown size" for live streaming of webm files.
const long long unknown_size = (1LL << (7 * len)) - 1;
- if (id == mkvmuxer::kMkvSegment) {
+ if (id == libwebm::kMkvSegment) {
if (size == unknown_size)
size = -1;
@@ -878,7 +877,7 @@ long long Segment::ParseHeaders() {
if (id < 0)
return E_FILE_FORMAT_INVALID;
- if (id == mkvmuxer::kMkvCluster)
+ if (id == libwebm::kMkvCluster)
break;
pos += len; // consume ID
@@ -930,7 +929,7 @@ long long Segment::ParseHeaders() {
if ((pos + size) > available)
return pos + size;
- if (id == mkvmuxer::kMkvInfo) {
+ if (id == libwebm::kMkvInfo) {
if (m_pInfo)
return E_FILE_FORMAT_INVALID;
@@ -944,7 +943,7 @@ long long Segment::ParseHeaders() {
if (status)
return status;
- } else if (id == mkvmuxer::kMkvTracks) {
+ } else if (id == libwebm::kMkvTracks) {
if (m_pTracks)
return E_FILE_FORMAT_INVALID;
@@ -958,7 +957,7 @@ long long Segment::ParseHeaders() {
if (status)
return status;
- } else if (id == mkvmuxer::kMkvCues) {
+ } else if (id == libwebm::kMkvCues) {
if (m_pCues == NULL) {
m_pCues = new (std::nothrow)
Cues(this, pos, size, element_start, element_size);
@@ -966,7 +965,7 @@ long long Segment::ParseHeaders() {
if (m_pCues == NULL)
return -1;
}
- } else if (id == mkvmuxer::kMkvSeekHead) {
+ } else if (id == libwebm::kMkvSeekHead) {
if (m_pSeekHead == NULL) {
m_pSeekHead = new (std::nothrow)
SeekHead(this, pos, size, element_start, element_size);
@@ -979,7 +978,7 @@ long long Segment::ParseHeaders() {
if (status)
return status;
}
- } else if (id == mkvmuxer::kMkvChapters) {
+ } else if (id == libwebm::kMkvChapters) {
if (m_pChapters == NULL) {
m_pChapters = new (std::nothrow)
Chapters(this, pos, size, element_start, element_size);
@@ -992,7 +991,7 @@ long long Segment::ParseHeaders() {
if (status)
return status;
}
- } else if (id == mkvmuxer::kMkvTags) {
+ } else if (id == libwebm::kMkvTags) {
if (m_pTags == NULL) {
m_pTags = new (std::nothrow)
Tags(this, pos, size, element_start, element_size);
@@ -1131,7 +1130,7 @@ long Segment::DoLoadCluster(long long& pos, long& len) {
return E_FILE_FORMAT_INVALID;
}
- if (id == mkvmuxer::kMkvCues) {
+ if (id == libwebm::kMkvCues) {
if (size == unknown_size) {
// Cues element of unknown size: Not supported.
return E_FILE_FORMAT_INVALID;
@@ -1149,7 +1148,7 @@ long Segment::DoLoadCluster(long long& pos, long& len) {
continue;
}
- if (id != mkvmuxer::kMkvCluster) {
+ if (id != libwebm::kMkvCluster) {
// Besides the Segment, Libwebm allows only cluster elements of unknown
// size. Fail the parse upon encountering a non-cluster element reporting
// unknown size.
@@ -1466,7 +1465,7 @@ long Segment::Load() {
return E_FILE_FORMAT_INVALID;
for (;;) {
- const int status = LoadCluster();
+ const long status = LoadCluster();
if (status < 0) // error
return status;
@@ -1512,9 +1511,9 @@ long SeekHead::Parse() {
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvSeek)
+ if (id == libwebm::kMkvSeek)
++entry_count;
- else if (id == mkvmuxer::kMkvVoid)
+ else if (id == libwebm::kMkvVoid)
++void_element_count;
pos += size; // consume payload
@@ -1553,14 +1552,14 @@ long SeekHead::Parse() {
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvSeek) {
+ if (id == libwebm::kMkvSeek) {
if (ParseEntry(pReader, pos, size, pEntry)) {
Entry& e = *pEntry++;
e.element_start = idpos;
e.element_size = (pos + size) - idpos;
}
- } else if (id == mkvmuxer::kMkvVoid) {
+ } else if (id == libwebm::kMkvVoid) {
VoidElement& e = *pVoidElement++;
e.element_start = idpos;
@@ -1664,7 +1663,7 @@ long Segment::ParseCues(long long off, long long& pos, long& len) {
const long long id = ReadID(m_pReader, idpos, len);
- if (id != mkvmuxer::kMkvCues)
+ if (id != libwebm::kMkvCues)
return E_FILE_FORMAT_INVALID;
pos += len; // consume ID
@@ -1746,7 +1745,7 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
if (seekIdId < 0)
return false;
- if (seekIdId != mkvmuxer::kMkvSeekID)
+ if (seekIdId != libwebm::kMkvSeekID)
return false;
if ((pos + len) > stop)
@@ -1790,7 +1789,7 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
const long long seekPosId = ReadID(pReader, pos, len);
- if (seekPosId != mkvmuxer::kMkvSeekPosition)
+ if (seekPosId != libwebm::kMkvSeekPosition)
return false;
if ((pos + len) > stop)
@@ -1900,7 +1899,7 @@ bool Cues::Init() const {
return false;
}
- if (id == mkvmuxer::kMkvCuePoint) {
+ if (id == libwebm::kMkvCuePoint) {
if (!PreloadCuePoint(cue_points_size, idpos))
return false;
}
@@ -1975,7 +1974,7 @@ bool Cues::LoadCuePoint() const {
if ((m_pos + size) > stop)
return false;
- if (id != mkvmuxer::kMkvCuePoint) {
+ if (id != libwebm::kMkvCuePoint) {
m_pos += size; // consume payload
if (m_pos > stop)
return false;
@@ -2105,8 +2104,8 @@ const CuePoint* Cues::GetLast() const {
}
const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
- if (pCurr == NULL || pCurr->GetTimeCode() < 0 ||
- m_cue_points == NULL || m_count < 1) {
+ if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL ||
+ m_count < 1) {
return NULL;
}
@@ -2286,7 +2285,7 @@ bool CuePoint::Load(IMkvReader* pReader) {
long len;
const long long id = ReadID(pReader, pos_, len);
- if (id != mkvmuxer::kMkvCuePoint)
+ if (id != libwebm::kMkvCuePoint)
return false;
pos_ += len; // consume ID
@@ -2326,10 +2325,10 @@ bool CuePoint::Load(IMkvReader* pReader) {
return false;
}
- if (id == mkvmuxer::kMkvCueTime)
+ if (id == libwebm::kMkvCueTime)
m_timecode = UnserializeUInt(pReader, pos, size);
- else if (id == mkvmuxer::kMkvCueTrackPositions)
+ else if (id == libwebm::kMkvCueTrackPositions)
++m_track_positions_count;
pos += size; // consume payload
@@ -2368,7 +2367,7 @@ bool CuePoint::Load(IMkvReader* pReader) {
pos += len; // consume Size field
assert((pos + size) <= stop);
- if (id == mkvmuxer::kMkvCueTrackPositions) {
+ if (id == libwebm::kMkvCueTrackPositions) {
TrackPosition& tp = *p++;
if (!tp.Parse(pReader, pos, size)) {
return false;
@@ -2417,11 +2416,11 @@ bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_,
return false;
}
- if (id == mkvmuxer::kMkvCueTrack)
+ if (id == libwebm::kMkvCueTrack)
m_track = UnserializeUInt(pReader, pos, size);
- else if (id == mkvmuxer::kMkvCueClusterPosition)
+ else if (id == libwebm::kMkvCueClusterPosition)
m_pos = UnserializeUInt(pReader, pos, size);
- else if (id == mkvmuxer::kMkvCueBlockNumber)
+ else if (id == libwebm::kMkvCueBlockNumber)
m_block = UnserializeUInt(pReader, pos, size);
pos += size; // consume payload
@@ -2555,7 +2554,7 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) {
return NULL;
const long long id = ReadID(m_pReader, pos, len);
- if (id != mkvmuxer::kMkvCluster)
+ if (id != libwebm::kMkvCluster)
return NULL;
pos += len; // consume ID
@@ -2612,7 +2611,7 @@ const Cluster* Segment::GetNext(const Cluster* pCurr) {
if (size == 0) // weird
continue;
- if (id == mkvmuxer::kMkvCluster) {
+ if (id == libwebm::kMkvCluster) {
const long long off_next_ = idpos - m_start;
long long pos_;
@@ -2762,7 +2761,7 @@ long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult,
const long long id = ReadUInt(m_pReader, pos, len);
- if (id != mkvmuxer::kMkvCluster)
+ if (id != libwebm::kMkvCluster)
return -1;
pos += len; // consume ID
@@ -2927,7 +2926,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
return E_FILE_FORMAT_INVALID;
}
- if (id == mkvmuxer::kMkvCues) {
+ if (id == libwebm::kMkvCues) {
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
@@ -2953,7 +2952,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
continue;
}
- if (id != mkvmuxer::kMkvCluster) { // not a Cluster ID
+ if (id != libwebm::kMkvCluster) { // not a Cluster ID
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
@@ -3091,7 +3090,7 @@ long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == mkvmuxer::kMkvCluster || id == mkvmuxer::kMkvCues)
+ if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues)
break;
pos += len; // consume ID (of sub-element)
@@ -3259,7 +3258,7 @@ long Chapters::Parse() {
if (size == 0) // weird
continue;
- if (id == mkvmuxer::kMkvEditionEntry) {
+ if (id == libwebm::kMkvEditionEntry) {
status = ParseEdition(pos, size);
if (status < 0) // error
@@ -3375,7 +3374,7 @@ long Chapters::Edition::Parse(IMkvReader* pReader, long long pos,
if (size == 0)
continue;
- if (id == mkvmuxer::kMkvChapterAtom) {
+ if (id == libwebm::kMkvChapterAtom) {
status = ParseAtom(pReader, pos, size);
if (status < 0) // error
@@ -3508,17 +3507,17 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) {
if (size == 0) // 0 length payload, skip.
continue;
- if (id == mkvmuxer::kMkvChapterDisplay) {
+ if (id == libwebm::kMkvChapterDisplay) {
status = ParseDisplay(pReader, pos, size);
if (status < 0) // error
return status;
- } else if (id == mkvmuxer::kMkvChapterStringUID) {
+ } else if (id == libwebm::kMkvChapterStringUID) {
status = UnserializeString(pReader, pos, size, m_string_uid);
if (status < 0) // error
return status;
- } else if (id == mkvmuxer::kMkvChapterUID) {
+ } else if (id == libwebm::kMkvChapterUID) {
long long val;
status = UnserializeInt(pReader, pos, size, val);
@@ -3526,14 +3525,14 @@ long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) {
return status;
m_uid = static_cast<unsigned long long>(val);
- } else if (id == mkvmuxer::kMkvChapterTimeStart) {
+ } else if (id == libwebm::kMkvChapterTimeStart) {
const long long val = UnserializeUInt(pReader, pos, size);
if (val < 0) // error
return static_cast<long>(val);
m_start_timecode = val;
- } else if (id == mkvmuxer::kMkvChapterTimeEnd) {
+ } else if (id == libwebm::kMkvChapterTimeEnd) {
const long long val = UnserializeUInt(pReader, pos, size);
if (val < 0) // error
@@ -3661,17 +3660,17 @@ long Chapters::Display::Parse(IMkvReader* pReader, long long pos,
if (size == 0) // No payload.
continue;
- if (id == mkvmuxer::kMkvChapString) {
+ if (id == libwebm::kMkvChapString) {
status = UnserializeString(pReader, pos, size, m_string);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvChapLanguage) {
+ } else if (id == libwebm::kMkvChapLanguage) {
status = UnserializeString(pReader, pos, size, m_language);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvChapCountry) {
+ } else if (id == libwebm::kMkvChapCountry) {
status = UnserializeString(pReader, pos, size, m_country);
if (status)
@@ -3724,7 +3723,7 @@ long Tags::Parse() {
if (size == 0) // 0 length tag, read another
continue;
- if (id == mkvmuxer::kMkvTag) {
+ if (id == libwebm::kMkvTag) {
status = ParseTag(pos, size);
if (status < 0)
@@ -3840,7 +3839,7 @@ long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) {
if (size == 0) // 0 length tag, read another
continue;
- if (id == mkvmuxer::kMkvSimpleTag) {
+ if (id == libwebm::kMkvSimpleTag) {
status = ParseSimpleTag(pReader, pos, size);
if (status < 0)
@@ -3931,12 +3930,12 @@ long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos,
if (size == 0) // weird
continue;
- if (id == mkvmuxer::kMkvTagName) {
+ if (id == libwebm::kMkvTagName) {
status = UnserializeString(pReader, pos, size, m_tag_name);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvTagString) {
+ } else if (id == libwebm::kMkvTagString) {
status = UnserializeString(pReader, pos, size, m_tag_string);
if (status)
@@ -3996,12 +3995,12 @@ long SegmentInfo::Parse() {
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvTimecodeScale) {
+ if (id == libwebm::kMkvTimecodeScale) {
m_timecodeScale = UnserializeUInt(pReader, pos, size);
if (m_timecodeScale <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvDuration) {
+ } else if (id == libwebm::kMkvDuration) {
const long status = UnserializeFloat(pReader, pos, size, m_duration);
if (status < 0)
@@ -4009,19 +4008,19 @@ long SegmentInfo::Parse() {
if (m_duration < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvMuxingApp) {
+ } else if (id == libwebm::kMkvMuxingApp) {
const long status =
UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvWritingApp) {
+ } else if (id == libwebm::kMkvWritingApp) {
const long status =
UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvTitle) {
+ } else if (id == libwebm::kMkvTitle) {
const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
if (status)
@@ -4176,7 +4175,7 @@ long ContentEncoding::ParseContentEncAESSettingsEntry(
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvAESSettingsCipherMode) {
+ if (id == libwebm::kMkvAESSettingsCipherMode) {
aes->cipher_mode = UnserializeUInt(pReader, pos, size);
if (aes->cipher_mode != 1)
return E_FILE_FORMAT_INVALID;
@@ -4207,10 +4206,10 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvContentCompression)
+ if (id == libwebm::kMkvContentCompression)
++compression_count;
- if (id == mkvmuxer::kMkvContentEncryption)
+ if (id == libwebm::kMkvContentEncryption)
++encryption_count;
pos += size; // consume payload
@@ -4246,15 +4245,15 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvContentEncodingOrder) {
+ if (id == libwebm::kMkvContentEncodingOrder) {
encoding_order_ = UnserializeUInt(pReader, pos, size);
- } else if (id == mkvmuxer::kMkvContentEncodingScope) {
+ } else if (id == libwebm::kMkvContentEncodingScope) {
encoding_scope_ = UnserializeUInt(pReader, pos, size);
if (encoding_scope_ < 1)
return -1;
- } else if (id == mkvmuxer::kMkvContentEncodingType) {
+ } else if (id == libwebm::kMkvContentEncodingType) {
encoding_type_ = UnserializeUInt(pReader, pos, size);
- } else if (id == mkvmuxer::kMkvContentCompression) {
+ } else if (id == libwebm::kMkvContentCompression) {
ContentCompression* const compression =
new (std::nothrow) ContentCompression();
if (!compression)
@@ -4266,7 +4265,7 @@ long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
return status;
}
*compression_entries_end_++ = compression;
- } else if (id == mkvmuxer::kMkvContentEncryption) {
+ } else if (id == libwebm::kMkvContentEncryption) {
ContentEncryption* const encryption =
new (std::nothrow) ContentEncryption();
if (!encryption)
@@ -4307,13 +4306,13 @@ long ContentEncoding::ParseCompressionEntry(long long start, long long size,
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvContentCompAlgo) {
+ if (id == libwebm::kMkvContentCompAlgo) {
long long algo = UnserializeUInt(pReader, pos, size);
if (algo < 0)
return E_FILE_FORMAT_INVALID;
compression->algo = algo;
valid = true;
- } else if (id == mkvmuxer::kMkvContentCompSettings) {
+ } else if (id == libwebm::kMkvContentCompSettings) {
if (size <= 0)
return E_FILE_FORMAT_INVALID;
@@ -4360,11 +4359,11 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvContentEncAlgo) {
+ if (id == libwebm::kMkvContentEncAlgo) {
encryption->algo = UnserializeUInt(pReader, pos, size);
if (encryption->algo != 5)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvContentEncKeyID) {
+ } else if (id == libwebm::kMkvContentEncKeyID) {
delete[] encryption->key_id;
encryption->key_id = NULL;
encryption->key_id_len = 0;
@@ -4386,7 +4385,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
encryption->key_id = buf;
encryption->key_id_len = buflen;
- } else if (id == mkvmuxer::kMkvContentSignature) {
+ } else if (id == libwebm::kMkvContentSignature) {
delete[] encryption->signature;
encryption->signature = NULL;
encryption->signature_len = 0;
@@ -4408,7 +4407,7 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
encryption->signature = buf;
encryption->signature_len = buflen;
- } else if (id == mkvmuxer::kMkvContentSigKeyID) {
+ } else if (id == libwebm::kMkvContentSigKeyID) {
delete[] encryption->sig_key_id;
encryption->sig_key_id = NULL;
encryption->sig_key_id_len = 0;
@@ -4430,11 +4429,11 @@ long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
encryption->sig_key_id = buf;
encryption->sig_key_id_len = buflen;
- } else if (id == mkvmuxer::kMkvContentSigAlgo) {
+ } else if (id == libwebm::kMkvContentSigAlgo) {
encryption->sig_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == mkvmuxer::kMkvContentSigHashAlgo) {
+ } else if (id == libwebm::kMkvContentSigHashAlgo) {
encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == mkvmuxer::kMkvContentEncAESSettings) {
+ } else if (id == libwebm::kMkvContentEncAESSettings) {
const long status = ParseContentEncAESSettingsEntry(
pos, size, pReader, &encryption->aes_settings);
if (status)
@@ -4921,7 +4920,7 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) {
return status;
// pos now designates start of element
- if (id == mkvmuxer::kMkvContentEncoding)
+ if (id == libwebm::kMkvContentEncoding)
++count;
pos += size; // consume payload
@@ -4946,7 +4945,7 @@ long Track::ParseContentEncodingsEntry(long long start, long long size) {
return status;
// pos now designates start of element
- if (id == mkvmuxer::kMkvContentEncoding) {
+ if (id == libwebm::kMkvContentEncoding) {
ContentEncoding* const content_encoding =
new (std::nothrow) ContentEncoding();
if (!content_encoding)
@@ -4978,9 +4977,222 @@ BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; }
const Block* Track::EOSBlock::GetBlock() const { return NULL; }
+bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos,
+ long long value_size, bool is_x,
+ PrimaryChromaticity** chromaticity) {
+ if (!reader)
+ return false;
+
+ std::auto_ptr<PrimaryChromaticity> chromaticity_ptr;
+
+ if (!*chromaticity) {
+ chromaticity_ptr.reset(new PrimaryChromaticity());
+ } else {
+ chromaticity_ptr.reset(*chromaticity);
+ }
+
+ if (!chromaticity_ptr.get())
+ return false;
+
+ float* value = is_x ? &chromaticity_ptr->x : &chromaticity_ptr->y;
+
+ double parser_value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, value_size, parser_value);
+
+ *value = static_cast<float>(parser_value);
+
+ if (value_parse_status < 0 || *value < 0.0 || *value > 1.0)
+ return false;
+
+ *chromaticity = chromaticity_ptr.release();
+ return true;
+}
+
+bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
+ long long mm_size, MasteringMetadata** mm) {
+ if (!reader || *mm)
+ return false;
+
+ std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ if (!mm_ptr.get())
+ return false;
+
+ const long long mm_end = mm_start + mm_size;
+ long long read_pos = mm_start;
+
+ while (read_pos < mm_end) {
+ long long child_id = 0;
+ long long child_size = 0;
+
+ const long long status =
+ ParseElementHeader(reader, read_pos, mm_end, child_id, child_size);
+ if (status < 0)
+ return false;
+
+ if (child_id == libwebm::kMkvLuminanceMax) {
+ double value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, child_size, value);
+ mm_ptr->luminance_max = static_cast<float>(value);
+ if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
+ mm_ptr->luminance_max > 9999.99) {
+ return false;
+ }
+ } else if (child_id == libwebm::kMkvLuminanceMin) {
+ double value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, child_size, value);
+ mm_ptr->luminance_min = static_cast<float>(value);
+ if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
+ mm_ptr->luminance_min > 999.9999) {
+ return false;
+ }
+ } else {
+ bool is_x = false;
+ PrimaryChromaticity** chromaticity;
+ switch (child_id) {
+ case libwebm::kMkvPrimaryRChromaticityX:
+ case libwebm::kMkvPrimaryRChromaticityY:
+ is_x = child_id == libwebm::kMkvPrimaryRChromaticityX;
+ chromaticity = &mm_ptr->r;
+ break;
+ case libwebm::kMkvPrimaryGChromaticityX:
+ case libwebm::kMkvPrimaryGChromaticityY:
+ is_x = child_id == libwebm::kMkvPrimaryGChromaticityX;
+ chromaticity = &mm_ptr->g;
+ break;
+ case libwebm::kMkvPrimaryBChromaticityX:
+ case libwebm::kMkvPrimaryBChromaticityY:
+ is_x = child_id == libwebm::kMkvPrimaryBChromaticityX;
+ chromaticity = &mm_ptr->b;
+ break;
+ case libwebm::kMkvWhitePointChromaticityX:
+ case libwebm::kMkvWhitePointChromaticityY:
+ is_x = child_id == libwebm::kMkvWhitePointChromaticityX;
+ chromaticity = &mm_ptr->white_point;
+ break;
+ default:
+ return false;
+ }
+ const bool value_parse_status = PrimaryChromaticity::Parse(
+ reader, read_pos, child_size, is_x, chromaticity);
+ if (!value_parse_status)
+ return false;
+ }
+
+ read_pos += child_size;
+ if (read_pos > mm_end)
+ return false;
+ }
+
+ *mm = mm_ptr.release();
+ return true;
+}
+
+bool Colour::Parse(IMkvReader* reader, long long colour_start,
+ long long colour_size, Colour** colour) {
+ if (!reader || *colour)
+ return false;
+
+ std::auto_ptr<Colour> colour_ptr(new Colour());
+ if (!colour_ptr.get())
+ return false;
+
+ const long long colour_end = colour_start + colour_size;
+ long long read_pos = colour_start;
+
+ while (read_pos < colour_end) {
+ long long child_id = 0;
+ long long child_size = 0;
+
+ const long status =
+ ParseElementHeader(reader, read_pos, colour_end, child_id, child_size);
+ if (status < 0)
+ return false;
+
+ if (child_id == libwebm::kMkvMatrixCoefficients) {
+ colour_ptr->matrix_coefficients =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->matrix_coefficients < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvBitsPerChannel) {
+ colour_ptr->bits_per_channel =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->bits_per_channel < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) {
+ colour_ptr->chroma_subsampling_horz =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_subsampling_horz < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSubsamplingVert) {
+ colour_ptr->chroma_subsampling_vert =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_subsampling_vert < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvCbSubsamplingHorz) {
+ colour_ptr->cb_subsampling_horz =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->cb_subsampling_horz < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvCbSubsamplingVert) {
+ colour_ptr->cb_subsampling_vert =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->cb_subsampling_vert < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSitingHorz) {
+ colour_ptr->chroma_siting_horz =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_siting_horz < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSitingVert) {
+ colour_ptr->chroma_siting_vert =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_siting_vert < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvRange) {
+ colour_ptr->range = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->range < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvTransferCharacteristics) {
+ colour_ptr->transfer_characteristics =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->transfer_characteristics < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvPrimaries) {
+ colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->primaries < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvMaxCLL) {
+ colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->max_cll < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvMaxFALL) {
+ colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->max_fall < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvMasteringMetadata) {
+ if (!MasteringMetadata::Parse(reader, read_pos, child_size,
+ &colour_ptr->mastering_metadata))
+ return false;
+ } else {
+ return false;
+ }
+
+ read_pos += child_size;
+ if (read_pos > colour_end)
+ return false;
+ }
+ *colour = colour_ptr.release();
+ return true;
+}
+
VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
long long element_size)
- : Track(pSegment, element_start, element_size) {}
+ : Track(pSegment, element_start, element_size), m_colour(NULL) {}
+
+VideoTrack::~VideoTrack() { delete m_colour; }
long VideoTrack::Parse(Segment* pSegment, const Info& info,
long long element_start, long long element_size,
@@ -5011,6 +5223,8 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
const long long stop = pos + s.size;
+ Colour* colour = NULL;
+
while (pos < stop) {
long long id, size;
@@ -5019,37 +5233,37 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvPixelWidth) {
+ if (id == libwebm::kMkvPixelWidth) {
width = UnserializeUInt(pReader, pos, size);
if (width <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvPixelHeight) {
+ } else if (id == libwebm::kMkvPixelHeight) {
height = UnserializeUInt(pReader, pos, size);
if (height <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvDisplayWidth) {
+ } else if (id == libwebm::kMkvDisplayWidth) {
display_width = UnserializeUInt(pReader, pos, size);
if (display_width <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvDisplayHeight) {
+ } else if (id == libwebm::kMkvDisplayHeight) {
display_height = UnserializeUInt(pReader, pos, size);
if (display_height <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvDisplayUnit) {
+ } else if (id == libwebm::kMkvDisplayUnit) {
display_unit = UnserializeUInt(pReader, pos, size);
if (display_unit < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvStereoMode) {
+ } else if (id == libwebm::kMkvStereoMode) {
stereo_mode = UnserializeUInt(pReader, pos, size);
if (stereo_mode < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvFrameRate) {
+ } else if (id == libwebm::kMkvFrameRate) {
const long status = UnserializeFloat(pReader, pos, size, rate);
if (status < 0)
@@ -5057,6 +5271,9 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
if (rate <= 0)
return E_FILE_FORMAT_INVALID;
+ } else if (id == libwebm::kMkvColour) {
+ if (!Colour::Parse(pReader, pos, size, &colour))
+ return E_FILE_FORMAT_INVALID;
}
pos += size; // consume payload
@@ -5087,6 +5304,7 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
pTrack->m_display_unit = display_unit;
pTrack->m_stereo_mode = stereo_mode;
pTrack->m_rate = rate;
+ pTrack->m_colour = colour;
pResult = pTrack;
return 0; // success
@@ -5185,6 +5403,8 @@ long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {
return 0;
}
+Colour* VideoTrack::GetColour() const { return m_colour; }
+
long long VideoTrack::GetWidth() const { return m_width; }
long long VideoTrack::GetHeight() const { return m_height; }
@@ -5239,7 +5459,7 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info,
if (status < 0) // error
return status;
- if (id == mkvmuxer::kMkvSamplingFrequency) {
+ if (id == libwebm::kMkvSamplingFrequency) {
status = UnserializeFloat(pReader, pos, size, rate);
if (status < 0)
@@ -5247,12 +5467,12 @@ long AudioTrack::Parse(Segment* pSegment, const Info& info,
if (rate <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvChannels) {
+ } else if (id == libwebm::kMkvChannels) {
channels = UnserializeUInt(pReader, pos, size);
if (channels <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvBitDepth) {
+ } else if (id == libwebm::kMkvBitDepth) {
bit_depth = UnserializeUInt(pReader, pos, size);
if (bit_depth <= 0)
@@ -5325,7 +5545,7 @@ long Tracks::Parse() {
if (size == 0) // weird
continue;
- if (id == mkvmuxer::kMkvTrackEntry)
+ if (id == libwebm::kMkvTrackEntry)
++count;
pos += size; // consume payload
@@ -5367,7 +5587,7 @@ long Tracks::Parse() {
const long long element_size = payload_stop - element_start;
- if (id == mkvmuxer::kMkvTrackEntry) {
+ if (id == libwebm::kMkvTrackEntry) {
Track*& pTrack = *m_trackEntriesEnd;
pTrack = NULL;
@@ -5443,16 +5663,16 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size,
const long long start = pos;
- if (id == mkvmuxer::kMkvVideo) {
+ if (id == libwebm::kMkvVideo) {
v.start = start;
v.size = size;
- } else if (id == mkvmuxer::kMkvAudio) {
+ } else if (id == libwebm::kMkvAudio) {
a.start = start;
a.size = size;
- } else if (id == mkvmuxer::kMkvContentEncodings) {
+ } else if (id == libwebm::kMkvContentEncodings) {
e.start = start;
e.size = size;
- } else if (id == mkvmuxer::kMkvTrackUID) {
+ } else if (id == libwebm::kMkvTrackUID) {
if (size > 8)
return E_FILE_FORMAT_INVALID;
@@ -5474,49 +5694,49 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size,
++pos_;
}
- } else if (id == mkvmuxer::kMkvTrackNumber) {
+ } else if (id == libwebm::kMkvTrackNumber) {
const long long num = UnserializeUInt(pReader, pos, size);
if ((num <= 0) || (num > 127))
return E_FILE_FORMAT_INVALID;
info.number = static_cast<long>(num);
- } else if (id == mkvmuxer::kMkvTrackType) {
+ } else if (id == libwebm::kMkvTrackType) {
const long long type = UnserializeUInt(pReader, pos, size);
if ((type <= 0) || (type > 254))
return E_FILE_FORMAT_INVALID;
info.type = static_cast<long>(type);
- } else if (id == mkvmuxer::kMkvName) {
+ } else if (id == libwebm::kMkvName) {
const long status =
UnserializeString(pReader, pos, size, info.nameAsUTF8);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvLanguage) {
+ } else if (id == libwebm::kMkvLanguage) {
const long status = UnserializeString(pReader, pos, size, info.language);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvDefaultDuration) {
+ } else if (id == libwebm::kMkvDefaultDuration) {
const long long duration = UnserializeUInt(pReader, pos, size);
if (duration < 0)
return E_FILE_FORMAT_INVALID;
info.defaultDuration = static_cast<unsigned long long>(duration);
- } else if (id == mkvmuxer::kMkvCodecID) {
+ } else if (id == libwebm::kMkvCodecID) {
const long status = UnserializeString(pReader, pos, size, info.codecId);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvFlagLacing) {
+ } else if (id == libwebm::kMkvFlagLacing) {
lacing = UnserializeUInt(pReader, pos, size);
if ((lacing < 0) || (lacing > 1))
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvCodecPrivate) {
+ } else if (id == libwebm::kMkvCodecPrivate) {
delete[] info.codecPrivate;
info.codecPrivate = NULL;
info.codecPrivateSize = 0;
@@ -5539,15 +5759,15 @@ long Tracks::ParseTrackEntry(long long track_start, long long track_size,
info.codecPrivate = buf;
info.codecPrivateSize = buflen;
}
- } else if (id == mkvmuxer::kMkvCodecName) {
+ } else if (id == libwebm::kMkvCodecName) {
const long status =
UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
if (status)
return status;
- } else if (id == mkvmuxer::kMkvCodecDelay) {
+ } else if (id == libwebm::kMkvCodecDelay) {
info.codecDelay = UnserializeUInt(pReader, pos, size);
- } else if (id == mkvmuxer::kMkvSeekPreRoll) {
+ } else if (id == libwebm::kMkvSeekPreRoll) {
info.seekPreRoll = UnserializeUInt(pReader, pos, size);
}
@@ -5730,7 +5950,7 @@ long Cluster::Load(long long& pos, long& len) const {
if (id_ < 0) // error
return static_cast<long>(id_);
- if (id_ != mkvmuxer::kMkvCluster)
+ if (id_ != libwebm::kMkvCluster)
return E_FILE_FORMAT_INVALID;
pos += len; // consume id
@@ -5812,10 +6032,10 @@ long Cluster::Load(long long& pos, long& len) const {
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == mkvmuxer::kMkvCluster)
+ if (id == libwebm::kMkvCluster)
break;
- if (id == mkvmuxer::kMkvCues)
+ if (id == libwebm::kMkvCues)
break;
pos += len; // consume ID field
@@ -5864,7 +6084,7 @@ long Cluster::Load(long long& pos, long& len) const {
if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
return E_FILE_FORMAT_INVALID;
- if (id == mkvmuxer::kMkvTimecode) {
+ if (id == libwebm::kMkvTimecode) {
len = static_cast<long>(size);
if ((pos + size) > avail)
@@ -5879,10 +6099,10 @@ long Cluster::Load(long long& pos, long& len) const {
if (bBlock)
break;
- } else if (id == mkvmuxer::kMkvBlockGroup) {
+ } else if (id == libwebm::kMkvBlockGroup) {
bBlock = true;
break;
- } else if (id == mkvmuxer::kMkvSimpleBlock) {
+ } else if (id == libwebm::kMkvSimpleBlock) {
bBlock = true;
break;
}
@@ -5980,7 +6200,7 @@ long Cluster::Parse(long long& pos, long& len) const {
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if ((id == mkvmuxer::kMkvCluster) || (id == mkvmuxer::kMkvCues)) {
+ if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) {
if (m_element_size < 0)
m_element_size = pos - m_element_start;
@@ -6035,8 +6255,7 @@ long Cluster::Parse(long long& pos, long& len) const {
if (cluster_stop >= 0) {
if (block_stop > cluster_stop) {
- if (id == mkvmuxer::kMkvBlockGroup ||
- id == mkvmuxer::kMkvSimpleBlock) {
+ if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) {
return E_FILE_FORMAT_INVALID;
}
@@ -6054,10 +6273,10 @@ long Cluster::Parse(long long& pos, long& len) const {
Cluster* const this_ = const_cast<Cluster*>(this);
- if (id == mkvmuxer::kMkvBlockGroup)
+ if (id == libwebm::kMkvBlockGroup)
return this_->ParseBlockGroup(size, pos, len);
- if (id == mkvmuxer::kMkvSimpleBlock)
+ if (id == libwebm::kMkvSimpleBlock)
return this_->ParseSimpleBlock(size, pos, len);
pos += size; // consume payload
@@ -6188,8 +6407,7 @@ long Cluster::ParseSimpleBlock(long long block_size, long long& pos,
return E_BUFFER_NOT_FULL;
}
- status = CreateBlock(mkvmuxer::kMkvSimpleBlock,
- block_start, block_size,
+ status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size,
0); // DiscardPadding
if (status != 0)
@@ -6299,14 +6517,14 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos,
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
- if (id == mkvmuxer::kMkvDiscardPadding) {
+ if (id == libwebm::kMkvDiscardPadding) {
status = UnserializeInt(pReader, pos, size, discard_padding);
if (status < 0) // error
return status;
}
- if (id != mkvmuxer::kMkvBlock) {
+ if (id != libwebm::kMkvBlock) {
pos += size; // consume sub-part of block group
if (pos > payload_stop)
@@ -6399,8 +6617,8 @@ long Cluster::ParseBlockGroup(long long payload_size, long long& pos,
if (pos != payload_stop)
return E_FILE_FORMAT_INVALID;
- status = CreateBlock(mkvmuxer::kMkvBlockGroup,
- payload_start, payload_size, discard_padding);
+ status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size,
+ discard_padding);
if (status != 0)
return status;
@@ -6565,7 +6783,7 @@ long Cluster::HasBlockEntries(
if (id < 0) // error
return static_cast<long>(id);
- if (id != mkvmuxer::kMkvCluster)
+ if (id != libwebm::kMkvCluster)
return E_PARSE_FAILED;
pos += len; // consume Cluster ID field
@@ -6653,10 +6871,10 @@ long Cluster::HasBlockEntries(
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == mkvmuxer::kMkvCluster)
+ if (id == libwebm::kMkvCluster)
return 0; // no entries found
- if (id == mkvmuxer::kMkvCues)
+ if (id == libwebm::kMkvCues)
return 0; // no entries found
pos += len; // consume id field
@@ -6708,10 +6926,10 @@ long Cluster::HasBlockEntries(
if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
return E_FILE_FORMAT_INVALID;
- if (id == mkvmuxer::kMkvBlockGroup)
+ if (id == libwebm::kMkvBlockGroup)
return 1; // have at least one entry
- if (id == mkvmuxer::kMkvSimpleBlock)
+ if (id == libwebm::kMkvSimpleBlock)
return 1; // have at least one entry
pos += size; // consume payload
@@ -6786,7 +7004,7 @@ long long Cluster::GetLastTime() const {
long Cluster::CreateBlock(long long id,
long long pos, // absolute pos of payload
long long size, long long discard_padding) {
- if (id != mkvmuxer::kMkvBlockGroup && id != mkvmuxer::kMkvSimpleBlock)
+ if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock)
return E_PARSE_FAILED;
if (m_entries_count < 0) { // haven't parsed anything yet
@@ -6826,7 +7044,7 @@ long Cluster::CreateBlock(long long id,
}
}
- if (id == mkvmuxer::kMkvBlockGroup)
+ if (id == libwebm::kMkvBlockGroup)
return CreateBlockGroup(pos, size, discard_padding);
else
return CreateSimpleBlock(pos, size);
@@ -6871,12 +7089,12 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size,
pos += len; // consume size
- if (id == mkvmuxer::kMkvBlock) {
+ if (id == libwebm::kMkvBlock) {
if (bpos < 0) { // Block ID
bpos = pos;
bsize = size;
}
- } else if (id == mkvmuxer::kMkvBlockDuration) {
+ } else if (id == libwebm::kMkvBlockDuration) {
if (size > 8)
return E_FILE_FORMAT_INVALID;
@@ -6884,7 +7102,7 @@ long Cluster::CreateBlockGroup(long long start_offset, long long size,
if (duration < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == mkvmuxer::kMkvReferenceBlock) {
+ } else if (id == libwebm::kMkvReferenceBlock) {
if (size > 8 || size <= 0)
return E_FILE_FORMAT_INVALID;
const long size_ = static_cast<long>(size);
@@ -7231,7 +7449,6 @@ const BlockEntry* Cluster::GetEntry(const CuePoint& cp,
BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {}
BlockEntry::~BlockEntry() {}
-bool BlockEntry::EOS() const { return (GetKind() == kBlockEOS); }
const Cluster* BlockEntry::GetCluster() const { return m_pCluster; }
long BlockEntry::GetIndex() const { return m_index; }
@@ -7555,7 +7772,6 @@ long Block::Parse(const Cluster* pCluster) {
if (pf >= pf_end)
return E_FILE_FORMAT_INVALID;
-
const Frame& prev = *pf++;
assert(prev.len == frame_size);
if (prev.len != frame_size)
@@ -7581,7 +7797,7 @@ long Block::Parse(const Cluster* pCluster) {
if (pos > stop)
return E_FILE_FORMAT_INVALID;
- const int exp = 7 * len - 1;
+ const long exp = 7 * len - 1;
const long long bias = (1LL << exp) - 1LL;
const long long delta_size = delta_size_ - bias;
@@ -7721,4 +7937,4 @@ long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const {
long long Block::GetDiscardPadding() const { return m_discard_padding; }
-} // end namespace mkvparser
+} // namespace mkvparser
diff --git a/libvpx/third_party/libwebm/mkvparser.hpp b/libvpx/third_party/libwebm/mkvparser/mkvparser.h
index 75ef69d76..42e6e88ab 100644
--- a/libvpx/third_party/libwebm/mkvparser.hpp
+++ b/libvpx/third_party/libwebm/mkvparser/mkvparser.h
@@ -5,13 +5,10 @@
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVPARSER_HPP
-#define MKVPARSER_HPP
+#ifndef MKVPARSER_MKVPARSER_H_
+#define MKVPARSER_MKVPARSER_H_
#include <cstddef>
-#include <cstdio>
-#include <cstdlib>
namespace mkvparser {
@@ -28,8 +25,9 @@ class IMkvReader {
virtual ~IMkvReader();
};
-template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
- unsigned long long element_size);
+template <typename Type>
+Type* SafeArrayAlloc(unsigned long long num_elements,
+ unsigned long long element_size);
long long GetUIntLength(IMkvReader*, long long, long&);
long long ReadUInt(IMkvReader*, long long, long&);
long long ReadID(IMkvReader* pReader, long long pos, long& len);
@@ -128,7 +126,7 @@ class BlockEntry {
public:
virtual ~BlockEntry();
- bool EOS() const;
+ bool EOS() const { return (GetKind() == kBlockEOS); }
const Cluster* GetCluster() const;
long GetIndex() const;
virtual const Block* GetBlock() const = 0;
@@ -391,6 +389,90 @@ class Track {
ContentEncoding** content_encoding_entries_end_;
};
+struct PrimaryChromaticity {
+ PrimaryChromaticity() : x(0), y(0) {}
+ ~PrimaryChromaticity() {}
+ static bool Parse(IMkvReader* reader, long long read_pos,
+ long long value_size, bool is_x,
+ PrimaryChromaticity** chromaticity);
+ float x;
+ float y;
+};
+
+struct MasteringMetadata {
+ static const float kValueNotPresent;
+
+ MasteringMetadata()
+ : r(NULL),
+ g(NULL),
+ b(NULL),
+ white_point(NULL),
+ luminance_max(kValueNotPresent),
+ luminance_min(kValueNotPresent) {}
+ ~MasteringMetadata() {
+ delete r;
+ delete g;
+ delete b;
+ delete white_point;
+ }
+
+ static bool Parse(IMkvReader* reader, long long element_start,
+ long long element_size,
+ MasteringMetadata** mastering_metadata);
+
+ PrimaryChromaticity* r;
+ PrimaryChromaticity* g;
+ PrimaryChromaticity* b;
+ PrimaryChromaticity* white_point;
+ float luminance_max;
+ float luminance_min;
+};
+
+struct Colour {
+ static const long long kValueNotPresent;
+
+ // Unless otherwise noted all values assigned upon construction are the
+ // equivalent of unspecified/default.
+ Colour()
+ : matrix_coefficients(kValueNotPresent),
+ bits_per_channel(kValueNotPresent),
+ chroma_subsampling_horz(kValueNotPresent),
+ chroma_subsampling_vert(kValueNotPresent),
+ cb_subsampling_horz(kValueNotPresent),
+ cb_subsampling_vert(kValueNotPresent),
+ chroma_siting_horz(kValueNotPresent),
+ chroma_siting_vert(kValueNotPresent),
+ range(kValueNotPresent),
+ transfer_characteristics(kValueNotPresent),
+ primaries(kValueNotPresent),
+ max_cll(kValueNotPresent),
+ max_fall(kValueNotPresent),
+ mastering_metadata(NULL) {}
+ ~Colour() {
+ delete mastering_metadata;
+ mastering_metadata = NULL;
+ }
+
+ static bool Parse(IMkvReader* reader, long long element_start,
+ long long element_size, Colour** colour);
+
+ long long matrix_coefficients;
+ long long bits_per_channel;
+ long long chroma_subsampling_horz;
+ long long chroma_subsampling_vert;
+ long long cb_subsampling_horz;
+ long long cb_subsampling_vert;
+ long long chroma_siting_horz;
+ long long chroma_siting_vert;
+ long long range;
+ long long transfer_characteristics;
+ long long primaries;
+ long long max_cll;
+ long long max_fall;
+
+ MasteringMetadata* mastering_metadata;
+};
+
class VideoTrack : public Track {
VideoTrack(const VideoTrack&);
VideoTrack& operator=(const VideoTrack&);
@@ -398,6 +480,7 @@ class VideoTrack : public Track {
VideoTrack(Segment*, long long element_start, long long element_size);
public:
+ virtual ~VideoTrack();
static long Parse(Segment*, const Info&, long long element_start,
long long element_size, VideoTrack*&);
@@ -412,6 +495,8 @@ class VideoTrack : public Track {
bool VetEntry(const BlockEntry*) const;
long Seek(long long time_ns, const BlockEntry*&) const;
+ Colour* GetColour() const;
+
private:
long long m_width;
long long m_height;
@@ -421,6 +506,8 @@ class VideoTrack : public Track {
long long m_stereo_mode;
double m_rate;
+
+ Colour* m_colour;
};
class AudioTrack : public Track {
@@ -1013,7 +1100,7 @@ class Segment {
const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&);
};
-} // end namespace mkvparser
+} // namespace mkvparser
inline long mkvparser::Segment::LoadCluster() {
long long pos;
@@ -1022,4 +1109,4 @@ inline long mkvparser::Segment::LoadCluster() {
return LoadCluster(pos, size);
}
-#endif // MKVPARSER_HPP
+#endif // MKVPARSER_MKVPARSER_H_
diff --git a/libvpx/third_party/libwebm/mkvreader.cpp b/libvpx/third_party/libwebm/mkvparser/mkvreader.cc
index eaf9e0a79..9f90d8c4f 100644
--- a/libvpx/third_party/libwebm/mkvreader.cpp
+++ b/libvpx/third_party/libwebm/mkvparser/mkvreader.cc
@@ -5,8 +5,7 @@
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-
-#include "mkvreader.hpp"
+#include "mkvparser/mkvreader.h"
#include <cassert>
@@ -129,4 +128,4 @@ int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
return 0; // success
}
-} // end namespace mkvparser
+} // namespace mkvparser \ No newline at end of file
diff --git a/libvpx/third_party/libwebm/mkvreader.hpp b/libvpx/third_party/libwebm/mkvparser/mkvreader.h
index 82ebad544..9831ecf64 100644
--- a/libvpx/third_party/libwebm/mkvreader.hpp
+++ b/libvpx/third_party/libwebm/mkvparser/mkvreader.h
@@ -5,13 +5,13 @@
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
+#ifndef MKVPARSER_MKVREADER_H_
+#define MKVPARSER_MKVREADER_H_
-#ifndef MKVREADER_HPP
-#define MKVREADER_HPP
-
-#include "mkvparser.hpp"
#include <cstdio>
+#include "mkvparser/mkvparser.h"
+
namespace mkvparser {
class MkvReader : public IMkvReader {
@@ -40,6 +40,6 @@ class MkvReader : public IMkvReader {
bool reader_owns_file_;
};
-} // end namespace mkvparser
+} // namespace mkvparser
-#endif // MKVREADER_HPP
+#endif // MKVPARSER_MKVREADER_H_
diff --git a/libvpx/third_party/x86inc/README.libvpx b/libvpx/third_party/x86inc/README.libvpx
index e91e305a2..8d3cd966d 100644
--- a/libvpx/third_party/x86inc/README.libvpx
+++ b/libvpx/third_party/x86inc/README.libvpx
@@ -1,5 +1,5 @@
-URL: http://git.videolan.org/?p=x264.git
-Version: a95584945dd9ce3acc66c6cd8f6796bc4404d40d
+URL: https://git.videolan.org/git/x264.git
+Version: d23d18655249944c1ca894b451e2c82c7a584c62
License: ISC
License File: LICENSE
@@ -13,12 +13,8 @@ Prefix functions with vpx by default.
Manage name mangling (prefixing with '_') manually because 'PREFIX' does not
exist in libvpx.
Expand PIC default to macho64 and respect CONFIG_PIC from libvpx
-Catch all elf formats for 'hidden' status and SECTION notes.
-Avoid 'amdnop' when building with nasm.
Set 'private_extern' visibility for macho targets.
Copy PIC 'GLOBAL' macros from x86_abi_support.asm
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
Use .text with no alignment for aout
Only use 'hidden' visibility with Chromium
-Move '%use smartalign' for nasm out of 'INIT_CPUFLAGS' and before
- 'ALIGNMODE'.
diff --git a/libvpx/third_party/x86inc/x86inc.asm b/libvpx/third_party/x86inc/x86inc.asm
index be59de311..b647dff2f 100644
--- a/libvpx/third_party/x86inc/x86inc.asm
+++ b/libvpx/third_party/x86inc/x86inc.asm
@@ -1,7 +1,7 @@
;*****************************************************************************
;* x86inc.asm: x264asm abstraction layer
;*****************************************************************************
-;* Copyright (C) 2005-2015 x264 project
+;* Copyright (C) 2005-2016 x264 project
;*
;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Anton Mitrofanov <BugMaster@narod.ru>
@@ -66,16 +66,35 @@
%endif
%endif
-%ifidn __OUTPUT_FORMAT__,elf32
- %define mangle(x) x
+%define FORMAT_ELF 0
+%ifidn __OUTPUT_FORMAT__,elf
+ %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+ %define FORMAT_ELF 1
%elifidn __OUTPUT_FORMAT__,elf64
- %define mangle(x) x
-%elifidn __OUTPUT_FORMAT__,x64
- %define mangle(x) x
-%elifidn __OUTPUT_FORMAT__,win64
- %define mangle(x) x
+ %define FORMAT_ELF 1
+%endif
+
+%define FORMAT_MACHO 0
+%ifidn __OUTPUT_FORMAT__,macho32
+ %define FORMAT_MACHO 1
+%elifidn __OUTPUT_FORMAT__,macho64
+ %define FORMAT_MACHO 1
+%endif
+
+; Set PREFIX for libvpx builds.
+%if FORMAT_ELF
+ %undef PREFIX
+%elif WIN64
+ %undef PREFIX
%else
+ %define PREFIX
+%endif
+
+%ifdef PREFIX
%define mangle(x) _ %+ x
+%else
+ %define mangle(x) x
%endif
; In some instances macho32 tables get misaligned when using .rodata.
@@ -94,14 +113,6 @@
%endif
%endmacro
-%macro SECTION_TEXT 0-1 16
- %ifidn __OUTPUT_FORMAT__,aout
- SECTION .text
- %else
- SECTION .text align=%1
- %endif
-%endmacro
-
; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
; from original code is added in for 64bit.
%ifidn __OUTPUT_FORMAT__,elf32
@@ -119,7 +130,7 @@
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
- %define GET_GOT_SAVE_ARG 1
+ %define GET_GOT_DEFINED 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
@@ -138,7 +149,7 @@
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
- %define GET_GOT_SAVE_ARG 1
+ %define GET_GOT_DEFINED 1
%macro GET_GOT 1
push %1
call %%get_got
@@ -149,6 +160,8 @@
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
+ %else
+ %define GET_GOT_DEFINED 0
%endif
%endif
@@ -186,8 +199,16 @@
%ifdef PIC
default rel
%endif
+
+%ifndef GET_GOT_DEFINED
+ %define GET_GOT_DEFINED 0
+%endif
; Done with PIC macros
+%ifdef __NASM_VER__
+ %use smartalign
+%endif
+
; Macros to eliminate most code duplication between x86_32 and x86_64:
; Currently this works only for leaf functions which load all their arguments
; into registers at the start, and make no other use of the stack. Luckily that
@@ -235,6 +256,7 @@
%define r%1w %2w
%define r%1b %2b
%define r%1h %2h
+ %define %2q %2
%if %0 == 2
%define r%1m %2d
%define r%1mp %2
@@ -259,9 +281,9 @@
%define e%1h %3
%define r%1b %2
%define e%1b %2
-%if ARCH_X86_64 == 0
- %define r%1 e%1
-%endif
+ %if ARCH_X86_64 == 0
+ %define r%1 e%1
+ %endif
%endmacro
DECLARE_REG_SIZE ax, al, ah
@@ -371,7 +393,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%macro ASSERT 1
%if (%1) == 0
- %error assert failed
+ %error assertion ``%1'' failed
%endif
%endmacro
@@ -462,8 +484,10 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
%if %1 > 0
%assign regs_used (regs_used + 1)
- %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
- %warning "Stack pointer will overwrite register argument"
+ %endif
+ %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+ ; Ensure that we don't clobber any registers containing arguments
+ %assign regs_used 5 + UNIX64 * 3
%endif
%endif
%endif
@@ -577,9 +601,9 @@ DECLARE_REG 14, R15, 120
%macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
- vzeroupper
-%endif
+ %if mmsize == 32
+ vzeroupper
+ %endif
AUTO_REP_RET
%endmacro
@@ -616,17 +640,17 @@ DECLARE_REG 14, R15, 72
%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0
%macro RET 0
-%if stack_size_padded > 0
-%if required_stack_alignment > STACK_ALIGNMENT
- mov rsp, rstkm
-%else
- add rsp, stack_size_padded
-%endif
-%endif
+ %if stack_size_padded > 0
+ %if required_stack_alignment > STACK_ALIGNMENT
+ mov rsp, rstkm
+ %else
+ add rsp, stack_size_padded
+ %endif
+ %endif
POP_IF_USED 14, 13, 12, 11, 10, 9
-%if mmsize == 32
- vzeroupper
-%endif
+ %if mmsize == 32
+ vzeroupper
+ %endif
AUTO_REP_RET
%endmacro
@@ -672,29 +696,29 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0
%macro RET 0
-%if stack_size_padded > 0
-%if required_stack_alignment > STACK_ALIGNMENT
- mov rsp, rstkm
-%else
- add rsp, stack_size_padded
-%endif
-%endif
+ %if stack_size_padded > 0
+ %if required_stack_alignment > STACK_ALIGNMENT
+ mov rsp, rstkm
+ %else
+ add rsp, stack_size_padded
+ %endif
+ %endif
POP_IF_USED 6, 5, 4, 3
-%if mmsize == 32
- vzeroupper
-%endif
+ %if mmsize == 32
+ vzeroupper
+ %endif
AUTO_REP_RET
%endmacro
%endif ;======================================================================
%if WIN64 == 0
-%macro WIN64_SPILL_XMM 1
-%endmacro
-%macro WIN64_RESTORE_XMM 1
-%endmacro
-%macro WIN64_PUSH_XMM 0
-%endmacro
+ %macro WIN64_SPILL_XMM 1
+ %endmacro
+ %macro WIN64_RESTORE_XMM 1
+ %endmacro
+ %macro WIN64_PUSH_XMM 0
+ %endmacro
%endif
; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
@@ -707,24 +731,26 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%else
rep ret
%endif
+ annotate_function_size
%endmacro
%define last_branch_adr $$
%macro AUTO_REP_RET 0
- %ifndef cpuflags
- times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ != last_branch_adr.
- %elif notcpuflag(ssse3)
- times ((last_branch_adr-$)>>31)+1 rep
+ %if notcpuflag(ssse3)
+ times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr.
%endif
ret
+ annotate_function_size
%endmacro
%macro BRANCH_INSTR 0-*
%rep %0
%macro %1 1-2 %1
%2 %1
- %%branch_instr:
- %xdefine last_branch_adr %%branch_instr
+ %if notcpuflag(ssse3)
+ %%branch_instr equ $
+ %xdefine last_branch_adr %%branch_instr
+ %endif
%endmacro
%rotate 1
%endrep
@@ -739,6 +765,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%elif %2
jmp %1
%endif
+ annotate_function_size
%endmacro
;=============================================================================
@@ -760,6 +787,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
cglobal_internal 0, %1 %+ SUFFIX, %2
%endmacro
%macro cglobal_internal 2-3+
+ annotate_function_size
%if %1
%xdefine %%FUNCTION_PREFIX private_prefix
; libvpx explicitly sets visibility in shared object builds. Avoid
@@ -780,17 +808,10 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
CAT_XDEFINE cglobaled_, %2, 1
%endif
%xdefine current_function %2
- %ifidn __OUTPUT_FORMAT__,elf32
- global %2:function %%VISIBILITY
- %elifidn __OUTPUT_FORMAT__,elf64
+ %xdefine current_function_section __SECT__
+ %if FORMAT_ELF
global %2:function %%VISIBILITY
- %elifidn __OUTPUT_FORMAT__,macho32
- %ifdef __NASM_VER__
- global %2
- %else
- global %2:private_extern
- %endif
- %elifidn __OUTPUT_FORMAT__,macho64
+ %elif FORMAT_MACHO
%ifdef __NASM_VER__
global %2
%else
@@ -820,16 +841,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
; like cextern, but without the prefix
%macro cextern_naked 1
- %xdefine %1 mangle(%1)
+ %ifdef PREFIX
+ %xdefine %1 mangle(%1)
+ %endif
CAT_XDEFINE cglobaled_, %1, 1
extern %1
%endmacro
%macro const 1-2+
%xdefine %1 mangle(private_prefix %+ _ %+ %1)
- %ifidn __OUTPUT_FORMAT__,elf32
- global %1:data hidden
- %elifidn __OUTPUT_FORMAT__,elf64
+ %if FORMAT_ELF
global %1:data hidden
%else
global %1
@@ -837,14 +858,29 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%1: %2
%endmacro
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf32
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
-%elifidn __OUTPUT_FORMAT__,elf64
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
+ [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
%endif
+; Tell debuggers how large the function was.
+; This may be invoked multiple times per function; we rely on later instances overriding earlier ones.
+; This is invoked by RET and similar macros, and also cglobal does it for the previous function,
+; but if the last function in a source file doesn't use any of the standard macros for its epilogue,
+; then its size might be unspecified.
+%macro annotate_function_size 0
+ %ifdef __YASM_VER__
+ %ifdef current_function
+ %if FORMAT_ELF
+ current_function_section
+ %%ecf equ $
+ size current_function %%ecf - current_function
+ __SECT__
+ %endif
+ %endif
+ %endif
+%endmacro
+
; cpuflags
%assign cpuflags_mmx (1<<0)
@@ -873,12 +909,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt
%assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1
-%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
-
-%ifdef __NASM_VER__
- %use smartalign
-%endif
+; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
+%define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
+%define notcpuflag(x) (cpuflag(x) ^ 1)
; Takes an arbitrary number of cpuflags from the above list.
; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
@@ -915,12 +948,18 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endif
%endif
- %ifdef __NASM_VER__
- ALIGNMODE k7
- %elif ARCH_X86_64 || cpuflag(sse2)
- CPU amdnop
+ %if ARCH_X86_64 || cpuflag(sse2)
+ %ifdef __NASM_VER__
+ ALIGNMODE k8
+ %else
+ CPU amdnop
+ %endif
%else
- CPU basicnop
+ %ifdef __NASM_VER__
+ ALIGNMODE nop
+ %else
+ CPU basicnop
+ %endif
%endif
%endmacro
@@ -949,14 +988,14 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define movnta movntq
%assign %%i 0
%rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nnmm, %%i, %%i
- %assign %%i %%i+1
+ CAT_XDEFINE m, %%i, mm %+ %%i
+ CAT_XDEFINE nnmm, %%i, %%i
+ %assign %%i %%i+1
%endrep
%rep 8
- CAT_UNDEF m, %%i
- CAT_UNDEF nnmm, %%i
- %assign %%i %%i+1
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nnmm, %%i
+ %assign %%i %%i+1
%endrep
INIT_CPUFLAGS %1
%endmacro
@@ -967,7 +1006,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define mmsize 16
%define num_mmregs 8
%if ARCH_X86_64
- %define num_mmregs 16
+ %define num_mmregs 16
%endif
%define mova movdqa
%define movu movdqu
@@ -975,9 +1014,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define movnta movntdq
%assign %%i 0
%rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nnxmm, %%i, %%i
- %assign %%i %%i+1
+ CAT_XDEFINE m, %%i, xmm %+ %%i
+ CAT_XDEFINE nnxmm, %%i, %%i
+ %assign %%i %%i+1
%endrep
INIT_CPUFLAGS %1
%endmacro
@@ -988,7 +1027,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define mmsize 32
%define num_mmregs 8
%if ARCH_X86_64
- %define num_mmregs 16
+ %define num_mmregs 16
%endif
%define mova movdqa
%define movu movdqu
@@ -996,9 +1035,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%define movnta movntdq
%assign %%i 0
%rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nnymm, %%i, %%i
- %assign %%i %%i+1
+ CAT_XDEFINE m, %%i, ymm %+ %%i
+ CAT_XDEFINE nnymm, %%i, %%i
+ %assign %%i %%i+1
%endrep
INIT_CPUFLAGS %1
%endmacro
@@ -1022,7 +1061,7 @@ INIT_XMM
%assign i 0
%rep 16
DECLARE_MMCAST i
-%assign i i+1
+ %assign i i+1
%endrep
; I often want to use macros that permute their arguments. e.g. there's no
@@ -1040,23 +1079,23 @@ INIT_XMM
; doesn't cost any cycles.
%macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
- %xdefine %%tmp%2 m%2
- %rotate 2
-%endrep
-%rep %0/2
- %xdefine m%1 %%tmp%2
- CAT_XDEFINE nn, m%1, %1
- %rotate 2
-%endrep
+ %rep %0/2
+ %xdefine %%tmp%2 m%2
+ %rotate 2
+ %endrep
+ %rep %0/2
+ %xdefine m%1 %%tmp%2
+ CAT_XDEFINE nn, m%1, %1
+ %rotate 2
+ %endrep
%endmacro
%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs)
-%ifnum %1 ; SWAP 0, 1, ...
- SWAP_INTERNAL_NUM %1, %2
-%else ; SWAP m0, m1, ...
- SWAP_INTERNAL_NAME %1, %2
-%endif
+ %ifnum %1 ; SWAP 0, 1, ...
+ SWAP_INTERNAL_NUM %1, %2
+ %else ; SWAP m0, m1, ...
+ SWAP_INTERNAL_NAME %1, %2
+ %endif
%endmacro
%macro SWAP_INTERNAL_NUM 2-*
@@ -1066,7 +1105,7 @@ INIT_XMM
%xdefine m%2 %%tmp
CAT_XDEFINE nn, m%1, %1
CAT_XDEFINE nn, m%2, %2
- %rotate 1
+ %rotate 1
%endrep
%endmacro
@@ -1074,7 +1113,7 @@ INIT_XMM
%xdefine %%args nn %+ %1
%rep %0-1
%xdefine %%args %%args, nn %+ %2
- %rotate 1
+ %rotate 1
%endrep
SWAP_INTERNAL_NUM %%args
%endmacro
@@ -1091,7 +1130,7 @@ INIT_XMM
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE %%f, %%i, m %+ %%i
- %assign %%i %%i+1
+ %assign %%i %%i+1
%endrep
%endmacro
@@ -1101,20 +1140,20 @@ INIT_XMM
%rep num_mmregs
CAT_XDEFINE m, %%i, %1_m %+ %%i
CAT_XDEFINE nn, m %+ %%i, %%i
- %assign %%i %%i+1
+ %assign %%i %%i+1
%endrep
%endif
%endmacro
; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
%macro call 1
- call_internal %1, %1 %+ SUFFIX
+ call_internal %1 %+ SUFFIX, %1
%endmacro
%macro call_internal 2
- %xdefine %%i %1
- %ifndef cglobaled_%1
- %ifdef cglobaled_%2
- %xdefine %%i %2
+ %xdefine %%i %2
+ %ifndef cglobaled_%2
+ %ifdef cglobaled_%1
+ %xdefine %%i %1
%endif
%endif
call %%i
@@ -1157,7 +1196,7 @@ INIT_XMM
%endif
CAT_XDEFINE sizeofxmm, i, 16
CAT_XDEFINE sizeofymm, i, 32
-%assign i i+1
+ %assign i i+1
%endrep
%undef i
@@ -1534,7 +1573,7 @@ AVX_INSTR pfmul, 3dnow, 1, 0, 1
%else
CAT_XDEFINE q, j, i
%endif
-%assign i i+1
+ %assign i i+1
%endrep
%undef i
%undef j
@@ -1557,55 +1596,54 @@ FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
FMA_INSTR pmadcswd, pmaddwd, paddd
-; convert FMA4 to FMA3 if possible
-%macro FMA4_INSTR 4
- %macro %1 4-8 %1, %2, %3, %4
- %if cpuflag(fma4)
- v%5 %1, %2, %3, %4
- %elifidn %1, %2
- v%6 %1, %4, %3 ; %1 = %1 * %3 + %4
- %elifidn %1, %3
- v%7 %1, %2, %4 ; %1 = %2 * %1 + %4
- %elifidn %1, %4
- v%8 %1, %2, %3 ; %1 = %2 * %3 + %1
- %else
- %error fma3 emulation of ``%5 %1, %2, %3, %4'' is not supported
- %endif
- %endmacro
-%endmacro
-
-FMA4_INSTR fmaddpd, fmadd132pd, fmadd213pd, fmadd231pd
-FMA4_INSTR fmaddps, fmadd132ps, fmadd213ps, fmadd231ps
-FMA4_INSTR fmaddsd, fmadd132sd, fmadd213sd, fmadd231sd
-FMA4_INSTR fmaddss, fmadd132ss, fmadd213ss, fmadd231ss
-
-FMA4_INSTR fmaddsubpd, fmaddsub132pd, fmaddsub213pd, fmaddsub231pd
-FMA4_INSTR fmaddsubps, fmaddsub132ps, fmaddsub213ps, fmaddsub231ps
-FMA4_INSTR fmsubaddpd, fmsubadd132pd, fmsubadd213pd, fmsubadd231pd
-FMA4_INSTR fmsubaddps, fmsubadd132ps, fmsubadd213ps, fmsubadd231ps
-
-FMA4_INSTR fmsubpd, fmsub132pd, fmsub213pd, fmsub231pd
-FMA4_INSTR fmsubps, fmsub132ps, fmsub213ps, fmsub231ps
-FMA4_INSTR fmsubsd, fmsub132sd, fmsub213sd, fmsub231sd
-FMA4_INSTR fmsubss, fmsub132ss, fmsub213ss, fmsub231ss
-
-FMA4_INSTR fnmaddpd, fnmadd132pd, fnmadd213pd, fnmadd231pd
-FMA4_INSTR fnmaddps, fnmadd132ps, fnmadd213ps, fnmadd231ps
-FMA4_INSTR fnmaddsd, fnmadd132sd, fnmadd213sd, fnmadd231sd
-FMA4_INSTR fnmaddss, fnmadd132ss, fnmadd213ss, fnmadd231ss
-
-FMA4_INSTR fnmsubpd, fnmsub132pd, fnmsub213pd, fnmsub231pd
-FMA4_INSTR fnmsubps, fnmsub132ps, fnmsub213ps, fnmsub231ps
-FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd
-FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss
-
-; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug
-%if ARCH_X86_64 == 0
-%macro vpbroadcastq 2
-%if sizeof%1 == 16
- movddup %1, %2
-%else
- vbroadcastsd %1, %2
-%endif
-%endmacro
+; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
+; FMA3 is only possible if dst is the same as one of the src registers.
+; Either src2 or src3 can be a memory operand.
+%macro FMA4_INSTR 2-*
+ %push fma4_instr
+ %xdefine %$prefix %1
+ %rep %0 - 1
+ %macro %$prefix%2 4-6 %$prefix, %2
+ %if notcpuflag(fma3) && notcpuflag(fma4)
+ %error use of ``%5%6'' fma instruction in cpuname function: current_function
+ %elif cpuflag(fma4)
+ v%5%6 %1, %2, %3, %4
+ %elifidn %1, %2
+ ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
+ %ifid %3
+ v%{5}213%6 %2, %3, %4
+ %else
+ v%{5}132%6 %2, %4, %3
+ %endif
+ %elifidn %1, %3
+ v%{5}213%6 %3, %2, %4
+ %elifidn %1, %4
+ v%{5}231%6 %4, %2, %3
+ %else
+ %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported
+ %endif
+ %endmacro
+ %rotate 1
+ %endrep
+ %pop
+%endmacro
+
+FMA4_INSTR fmadd, pd, ps, sd, ss
+FMA4_INSTR fmaddsub, pd, ps
+FMA4_INSTR fmsub, pd, ps, sd, ss
+FMA4_INSTR fmsubadd, pd, ps
+FMA4_INSTR fnmadd, pd, ps, sd, ss
+FMA4_INSTR fnmsub, pd, ps, sd, ss
+
+; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
+%ifdef __YASM_VER__
+ %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+ %macro vpbroadcastq 2
+ %if sizeof%1 == 16
+ movddup %1, %2
+ %else
+ vbroadcastsd %1, %2
+ %endif
+ %endmacro
+ %endif
%endif
diff --git a/libvpx/tools/gen_authors.sh b/libvpx/tools/gen_authors.sh
index e1246f08a..4cfd81ec3 100755
--- a/libvpx/tools/gen_authors.sh
+++ b/libvpx/tools/gen_authors.sh
@@ -6,7 +6,7 @@ cat <<EOF
# This file is automatically generated from the git commit history
# by tools/gen_authors.sh.
-$(git log --pretty=format:"%aN <%aE>" | sort | uniq)
+$(git log --pretty=format:"%aN <%aE>" | sort | uniq | grep -v corp.google)
Google Inc.
The Mozilla Foundation
The Xiph.Org Foundation
diff --git a/libvpx/tools_common.c b/libvpx/tools_common.c
index 20b259ca9..17c0d44f5 100644
--- a/libvpx/tools_common.c
+++ b/libvpx/tools_common.c
@@ -16,11 +16,11 @@
#include "./tools_common.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
#endif
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
@@ -133,10 +133,6 @@ int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) {
#if CONFIG_ENCODERS
static const VpxInterface vpx_encoders[] = {
-#if CONFIG_VP10_ENCODER
- {"vp10", VP10_FOURCC, &vpx_codec_vp10_cx},
-#endif
-
#if CONFIG_VP8_ENCODER
{"vp8", VP8_FOURCC, &vpx_codec_vp8_cx},
#endif
@@ -178,10 +174,6 @@ static const VpxInterface vpx_decoders[] = {
#if CONFIG_VP9_DECODER
{"vp9", VP9_FOURCC, &vpx_codec_vp9_dx},
#endif
-
-#if CONFIG_VP10_DECODER
- {"vp10", VP10_FOURCC, &vpx_codec_vp10_dx},
-#endif
};
int get_vpx_decoder_count(void) {
diff --git a/libvpx/tools_common.h b/libvpx/tools_common.h
index 98347b6f2..310b5695f 100644
--- a/libvpx/tools_common.h
+++ b/libvpx/tools_common.h
@@ -62,7 +62,6 @@
#define VP8_FOURCC 0x30385056
#define VP9_FOURCC 0x30395056
-#define VP10_FOURCC 0x303a5056
enum VideoFileType {
FILE_TYPE_RAW,
diff --git a/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c b/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
index 9824a3193..bb6ea76ba 100644
--- a/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
+++ b/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
@@ -21,114 +21,6 @@ static const uint8_t bifilter4_coeff[8][2] = {
{ 16, 112}
};
-void vp8_bilinear_predict4x4_neon(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
- uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
- uint8x16_t q1u8, q2u8;
- uint16x8_t q1u16, q2u16;
- uint16x8_t q7u16, q8u16, q9u16;
- uint64x2_t q4u64, q5u64;
- uint64x1_t d12u64;
- uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
-
- if (xoffset == 0) { // skip_1stpass_filter
- uint32x2_t d28u32 = vdup_n_u32(0);
- uint32x2_t d29u32 = vdup_n_u32(0);
- uint32x2_t d30u32 = vdup_n_u32(0);
-
- d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
- src_ptr += src_pixels_per_line;
- d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
- src_ptr += src_pixels_per_line;
- d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
- src_ptr += src_pixels_per_line;
- d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
- src_ptr += src_pixels_per_line;
- d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
- d28u8 = vreinterpret_u8_u32(d28u32);
- d29u8 = vreinterpret_u8_u32(d29u32);
- d30u8 = vreinterpret_u8_u32(d30u32);
- } else {
- d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
- d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
- d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
- d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
- d6u8 = vld1_u8(src_ptr);
-
- q1u8 = vcombine_u8(d2u8, d3u8);
- q2u8 = vcombine_u8(d4u8, d5u8);
-
- d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
- d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
-
- q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
- q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
- d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
- vreinterpret_u32_u8(vget_high_u8(q1u8)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
- vreinterpret_u32_u8(vget_high_u8(q2u8)));
- d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
- vreinterpret_u32_u64(vget_high_u64(q4u64)));
- d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
- vreinterpret_u32_u64(vget_high_u64(q5u64)));
-
- q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
- q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
- q9u16 = vmull_u8(d6u8, d0u8);
-
- q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
- q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
- q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
-
- d28u8 = vqrshrn_n_u16(q7u16, 7);
- d29u8 = vqrshrn_n_u16(q8u16, 7);
- d30u8 = vqrshrn_n_u16(q9u16, 7);
- }
-
- // secondpass_filter
- if (yoffset == 0) { // skip_2ndpass_filter
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
- } else {
- d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
- d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
-
- q1u16 = vmull_u8(d28u8, d0u8);
- q2u16 = vmull_u8(d29u8, d0u8);
-
- d26u8 = vext_u8(d28u8, d29u8, 4);
- d27u8 = vext_u8(d29u8, d30u8, 4);
-
- q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
- q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
-
- d2u8 = vqrshrn_n_u16(q1u16, 7);
- d3u8 = vqrshrn_n_u16(q2u16, 7);
-
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
- }
- return;
-}
-
void vp8_bilinear_predict8x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
diff --git a/libvpx/vp8/common/arm/neon/sixtappredict_neon.c b/libvpx/vp8/common/arm/neon/sixtappredict_neon.c
index 4c2efc92b..49d8d221f 100644
--- a/libvpx/vp8/common/arm/neon/sixtappredict_neon.c
+++ b/libvpx/vp8/common/arm/neon/sixtappredict_neon.c
@@ -22,383 +22,6 @@ static const int8_t vp8_sub_pel_filters[8][8] = {
{0, -1, 12, 123, -6, 0, 0, 0},
};
-void vp8_sixtap_predict4x4_neon(
- unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- unsigned char *dst_ptr,
- int dst_pitch) {
- unsigned char *src;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8;
- uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
- int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
- uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
- uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
- int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
- int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
- uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8;
- uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64;
- uint32x2x2_t d0u32x2, d1u32x2;
-
- if (xoffset == 0) { // secondpass_filter4x4_only
- uint32x2_t d27u32 = vdup_n_u32(0);
- uint32x2_t d28u32 = vdup_n_u32(0);
- uint32x2_t d29u32 = vdup_n_u32(0);
- uint32x2_t d30u32 = vdup_n_u32(0);
- uint32x2_t d31u32 = vdup_n_u32(0);
-
- // load second_pass filter
- dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
- d0s8 = vdup_lane_s8(dtmps8, 0);
- d1s8 = vdup_lane_s8(dtmps8, 1);
- d2s8 = vdup_lane_s8(dtmps8, 2);
- d3s8 = vdup_lane_s8(dtmps8, 3);
- d4s8 = vdup_lane_s8(dtmps8, 4);
- d5s8 = vdup_lane_s8(dtmps8, 5);
- d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
- d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
- d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
- d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
- d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
- d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
-
- // load src data
- src = src_ptr - src_pixels_per_line * 2;
- d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0);
- src += src_pixels_per_line;
- d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1);
- src += src_pixels_per_line;
- d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0);
- src += src_pixels_per_line;
- d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1);
- src += src_pixels_per_line;
- d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0);
- src += src_pixels_per_line;
- d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1);
- src += src_pixels_per_line;
- d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0);
- src += src_pixels_per_line;
- d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1);
- src += src_pixels_per_line;
- d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0);
-
- d27u8 = vreinterpret_u8_u32(d27u32);
- d28u8 = vreinterpret_u8_u32(d28u32);
- d29u8 = vreinterpret_u8_u32(d29u32);
- d30u8 = vreinterpret_u8_u32(d30u32);
- d31u8 = vreinterpret_u8_u32(d31u32);
-
- d23u8 = vext_u8(d27u8, d28u8, 4);
- d24u8 = vext_u8(d28u8, d29u8, 4);
- d25u8 = vext_u8(d29u8, d30u8, 4);
- d26u8 = vext_u8(d30u8, d31u8, 4);
-
- q3u16 = vmull_u8(d27u8, d0u8);
- q4u16 = vmull_u8(d28u8, d0u8);
- q5u16 = vmull_u8(d25u8, d5u8);
- q6u16 = vmull_u8(d26u8, d5u8);
-
- q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
- q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
- q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
- q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
-
- q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
- q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
- q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
- q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
-
- q3s16 = vreinterpretq_s16_u16(q3u16);
- q4s16 = vreinterpretq_s16_u16(q4u16);
- q5s16 = vreinterpretq_s16_u16(q5u16);
- q6s16 = vreinterpretq_s16_u16(q6u16);
-
- q5s16 = vqaddq_s16(q5s16, q3s16);
- q6s16 = vqaddq_s16(q6s16, q4s16);
-
- d3u8 = vqrshrun_n_s16(q5s16, 7);
- d4u8 = vqrshrun_n_s16(q6s16, 7);
-
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
- return;
- }
-
- // load first_pass filter
- dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
- d0s8 = vdup_lane_s8(dtmps8, 0);
- d1s8 = vdup_lane_s8(dtmps8, 1);
- d2s8 = vdup_lane_s8(dtmps8, 2);
- d3s8 = vdup_lane_s8(dtmps8, 3);
- d4s8 = vdup_lane_s8(dtmps8, 4);
- d5s8 = vdup_lane_s8(dtmps8, 5);
- d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
- d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
- d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
- d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
- d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
- d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
-
- // First pass: output_height lines x output_width columns (9x4)
-
- if (yoffset == 0) // firstpass_filter4x4_only
- src = src_ptr - 2;
- else
- src = src_ptr - 2 - (src_pixels_per_line * 2);
-
- q3u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q4u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q5u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q6u8 = vld1q_u8(src);
- src += src_pixels_per_line;
-
- d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
- d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
- d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
- d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
-
- // vswp here
- q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
- q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
- vreinterpret_u32_u8(d19u8));
- d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
- vreinterpret_u32_u8(d21u8));
- q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
- q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
-
- // keep original src data in q4 q6
- q4u64 = vreinterpretq_u64_u8(q3u8);
- q6u64 = vreinterpretq_u64_u8(q5u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
- vreinterpret_u32_u8(vget_high_u8(q3u8)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
- vreinterpret_u32_u8(vget_high_u8(q5u8)));
- q9u64 = vshrq_n_u64(q4u64, 8);
- q10u64 = vshrq_n_u64(q6u64, 8);
- q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
- q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
- vreinterpret_u32_u64(vget_high_u64(q9u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
- vreinterpret_u32_u64(vget_high_u64(q10u64)));
- q3u64 = vshrq_n_u64(q4u64, 32);
- q5u64 = vshrq_n_u64(q6u64, 32);
- q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
- q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
- vreinterpret_u32_u64(vget_high_u64(q3u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
- vreinterpret_u32_u64(vget_high_u64(q5u64)));
- q9u64 = vshrq_n_u64(q4u64, 16);
- q10u64 = vshrq_n_u64(q6u64, 16);
- q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
- q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
- vreinterpret_u32_u64(vget_high_u64(q9u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
- vreinterpret_u32_u64(vget_high_u64(q10u64)));
- q3u64 = vshrq_n_u64(q4u64, 24);
- q5u64 = vshrq_n_u64(q6u64, 24);
- q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
- q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
- vreinterpret_u32_u64(vget_high_u64(q3u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
- vreinterpret_u32_u64(vget_high_u64(q5u64)));
- q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
- q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
-
- q7s16 = vreinterpretq_s16_u16(q7u16);
- q8s16 = vreinterpretq_s16_u16(q8u16);
- q9s16 = vreinterpretq_s16_u16(q9u16);
- q10s16 = vreinterpretq_s16_u16(q10u16);
- q7s16 = vqaddq_s16(q7s16, q9s16);
- q8s16 = vqaddq_s16(q8s16, q10s16);
-
- d27u8 = vqrshrun_n_s16(q7s16, 7);
- d28u8 = vqrshrun_n_s16(q8s16, 7);
-
- if (yoffset == 0) { // firstpass_filter4x4_only
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
- return;
- }
-
- // First Pass on rest 5-line data
- q3u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q4u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q5u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q6u8 = vld1q_u8(src);
- src += src_pixels_per_line;
- q11u8 = vld1q_u8(src);
-
- d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
- d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
- d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
- d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
-
- // vswp here
- q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
- q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
- vreinterpret_u32_u8(d19u8));
- d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
- vreinterpret_u32_u8(d21u8));
- d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5);
- q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
- q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
- q12u16 = vmull_u8(d31u8, d5u8);
-
- q4u64 = vreinterpretq_u64_u8(q3u8);
- q6u64 = vreinterpretq_u64_u8(q5u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
- vreinterpret_u32_u8(vget_high_u8(q3u8)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
- vreinterpret_u32_u8(vget_high_u8(q5u8)));
- q9u64 = vshrq_n_u64(q4u64, 8);
- q10u64 = vshrq_n_u64(q6u64, 8);
- q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
- q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
- q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
- vreinterpret_u32_u64(vget_high_u64(q9u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
- vreinterpret_u32_u64(vget_high_u64(q10u64)));
- q3u64 = vshrq_n_u64(q4u64, 32);
- q5u64 = vshrq_n_u64(q6u64, 32);
- d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1);
- q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
- q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
- q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
- vreinterpret_u32_u64(vget_high_u64(q3u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
- vreinterpret_u32_u64(vget_high_u64(q5u64)));
- q9u64 = vshrq_n_u64(q4u64, 16);
- q10u64 = vshrq_n_u64(q6u64, 16);
- d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4);
- q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
- q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
- q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
- vreinterpret_u32_u64(vget_high_u64(q9u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
- vreinterpret_u32_u64(vget_high_u64(q10u64)));
- q3u64 = vshrq_n_u64(q4u64, 24);
- q5u64 = vshrq_n_u64(q6u64, 24);
- d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2);
- q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
- q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
- q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
-
- d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
- vreinterpret_u32_u64(vget_high_u64(q3u64)));
- d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
- vreinterpret_u32_u64(vget_high_u64(q5u64)));
- d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3);
- q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
- q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
- q11u16 = vmull_u8(d31u8, d3u8);
-
- q7s16 = vreinterpretq_s16_u16(q7u16);
- q8s16 = vreinterpretq_s16_u16(q8u16);
- q9s16 = vreinterpretq_s16_u16(q9u16);
- q10s16 = vreinterpretq_s16_u16(q10u16);
- q11s16 = vreinterpretq_s16_u16(q11u16);
- q12s16 = vreinterpretq_s16_u16(q12u16);
- q7s16 = vqaddq_s16(q7s16, q9s16);
- q8s16 = vqaddq_s16(q8s16, q10s16);
- q12s16 = vqaddq_s16(q12s16, q11s16);
-
- d29u8 = vqrshrun_n_s16(q7s16, 7);
- d30u8 = vqrshrun_n_s16(q8s16, 7);
- d31u8 = vqrshrun_n_s16(q12s16, 7);
-
- // Second pass: 4x4
- dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
- d0s8 = vdup_lane_s8(dtmps8, 0);
- d1s8 = vdup_lane_s8(dtmps8, 1);
- d2s8 = vdup_lane_s8(dtmps8, 2);
- d3s8 = vdup_lane_s8(dtmps8, 3);
- d4s8 = vdup_lane_s8(dtmps8, 4);
- d5s8 = vdup_lane_s8(dtmps8, 5);
- d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
- d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
- d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
- d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
- d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
- d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
-
- d23u8 = vext_u8(d27u8, d28u8, 4);
- d24u8 = vext_u8(d28u8, d29u8, 4);
- d25u8 = vext_u8(d29u8, d30u8, 4);
- d26u8 = vext_u8(d30u8, d31u8, 4);
-
- q3u16 = vmull_u8(d27u8, d0u8);
- q4u16 = vmull_u8(d28u8, d0u8);
- q5u16 = vmull_u8(d25u8, d5u8);
- q6u16 = vmull_u8(d26u8, d5u8);
-
- q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
- q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
- q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
- q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
-
- q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
- q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
- q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
- q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
-
- q3s16 = vreinterpretq_s16_u16(q3u16);
- q4s16 = vreinterpretq_s16_u16(q4u16);
- q5s16 = vreinterpretq_s16_u16(q5u16);
- q6s16 = vreinterpretq_s16_u16(q6u16);
-
- q5s16 = vqaddq_s16(q5s16, q3s16);
- q6s16 = vqaddq_s16(q6s16, q4s16);
-
- d3u8 = vqrshrun_n_s16(q5s16, 7);
- d4u8 = vqrshrun_n_s16(q6s16, 7);
-
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
- dst_ptr += dst_pitch;
- vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
- return;
-}
-
void vp8_sixtap_predict8x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
diff --git a/libvpx/vp8/common/findnearmv.h b/libvpx/vp8/common/findnearmv.h
index 155847ca2..472a7b5d8 100644
--- a/libvpx/vp8/common/findnearmv.h
+++ b/libvpx/vp8/common/findnearmv.h
@@ -104,7 +104,7 @@ vp8_prob *vp8_mv_ref_probs(
extern const unsigned char vp8_mbsplit_offset[4][16];
-static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
+static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
@@ -119,7 +119,8 @@ static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
return (cur_mb->bmi + b - 1)->mv.as_int;
}
-static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
+static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
+ int mi_stride)
{
if (!(b >> 2))
{
diff --git a/libvpx/vp8/common/generic/systemdependent.c b/libvpx/vp8/common/generic/systemdependent.c
index 28dc262ae..6d5f302d7 100644
--- a/libvpx/vp8/common/generic/systemdependent.c
+++ b/libvpx/vp8/common/generic/systemdependent.c
@@ -94,6 +94,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
{
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
+#else
+ (void)ctx;
#endif /* CONFIG_MULTITHREAD */
#if ARCH_ARM
diff --git a/libvpx/vp8/common/mips/msa/postproc_msa.c b/libvpx/vp8/common/mips/msa/postproc_msa.c
index c88f30238..23dcde2eb 100644
--- a/libvpx/vp8/common/mips/msa/postproc_msa.c
+++ b/libvpx/vp8/common/mips/msa/postproc_msa.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static const int16_t vp8_rv_msa[] =
@@ -798,54 +799,3 @@ void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
}
}
}
-
-void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
- char blackclamp[16], char whiteclamp[16],
- char bothclamp[16],
- uint32_t width, uint32_t height,
- int32_t pitch)
-{
- uint32_t i, j;
-
- for (i = 0; i < height / 2; ++i)
- {
- uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
- int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
- uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
- int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
- for (j = width / 16; j--;)
- {
- v16i8 temp00_s, temp01_s;
- v16u8 temp00, temp01, black_clamp, white_clamp;
- v16u8 pos0, ref0, pos1, ref1;
- v16i8 const127 = __msa_ldi_b(127);
-
- pos0 = LD_UB(pos0_ptr);
- ref0 = LD_UB(ref0_ptr);
- pos1 = LD_UB(pos1_ptr);
- ref1 = LD_UB(ref1_ptr);
- black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
- white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
- temp00 = (pos0 < black_clamp);
- pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
- temp01 = (pos1 < black_clamp);
- pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
- XORI_B2_128_UB(pos0, pos1);
- temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
- temp00 = (v16u8)(temp00_s < pos0);
- pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
- temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
- temp01 = (temp01_s < pos1);
- pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
- XORI_B2_128_UB(pos0, pos1);
- pos0 += ref0;
- ST_UB(pos0, pos0_ptr);
- pos1 += ref1;
- ST_UB(pos1, pos1_ptr);
- pos0_ptr += 16;
- pos1_ptr += 16;
- ref0_ptr += 16;
- ref1_ptr += 16;
- }
- }
-}
diff --git a/libvpx/vp8/common/postproc.c b/libvpx/vp8/common/postproc.c
index 322b61383..6baf00f1e 100644
--- a/libvpx/vp8/common/postproc.c
+++ b/libvpx/vp8/common/postproc.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
@@ -490,54 +491,6 @@ static void fillrd(struct postproc_state *state, int q, int a)
state->last_noise = a;
}
-/****************************************************************************
- *
- * ROUTINE : plane_add_noise_c
- *
- * INPUTS : unsigned char *Start starting address of buffer to add gaussian
- * noise to
- * unsigned int Width width of plane
- * unsigned int Height height of plane
- * int Pitch distance between subsequent lines of frame
- * int q quantizer used to determine amount of noise
- * to add
- *
- * OUTPUTS : None.
- *
- * RETURNS : void.
- *
- * FUNCTION : adds gaussian noise to a plane of pixels
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
-void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
- char blackclamp[16],
- char whiteclamp[16],
- char bothclamp[16],
- unsigned int Width, unsigned int Height, int Pitch)
-{
- unsigned int i, j;
- (void)bothclamp;
-
- for (i = 0; i < Height; i++)
- {
- unsigned char *Pos = Start + i * Pitch;
- char *Ref = (char *)(noise + (rand() & 0xff));
-
- for (j = 0; j < Width; j++)
- {
- if (Pos[j] < blackclamp[0])
- Pos[j] = blackclamp[0];
-
- if (Pos[j] > 255 + whiteclamp[0])
- Pos[j] = 255 + whiteclamp[0];
-
- Pos[j] += Ref[j];
- }
- }
-}
-
/* Blend the macro block with a solid colored square. Leave the
* edges unblended to give distinction to macro blocks in areas
* filled with the same color block.
@@ -828,7 +781,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
fillrd(&oci->postproc_state, 63 - q, noise_level);
}
- vp8_plane_add_noise
+ vpx_plane_add_noise
(oci->post_proc_buffer.y_buffer,
oci->postproc_state.noise,
oci->postproc_state.blackclamp,
diff --git a/libvpx/vp8/common/reconintra4x4.h b/libvpx/vp8/common/reconintra4x4.h
index 869841ee3..5dc5d13a5 100644
--- a/libvpx/vp8/common/reconintra4x4.h
+++ b/libvpx/vp8/common/reconintra4x4.h
@@ -17,8 +17,8 @@
extern "C" {
#endif
-static void intra_prediction_down_copy(MACROBLOCKD *xd,
- unsigned char *above_right_src)
+static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd,
+ unsigned char *above_right_src)
{
int dst_stride = xd->dst.y_stride;
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
diff --git a/libvpx/vp8/common/rtcd_defs.pl b/libvpx/vp8/common/rtcd_defs.pl
index 6799c2787..856ede189 100644
--- a/libvpx/vp8/common/rtcd_defs.pl
+++ b/libvpx/vp8/common/rtcd_defs.pl
@@ -167,10 +167,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
- add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
- specialize qw/vp8_plane_add_noise mmx sse2 msa/;
- $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
-
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
@@ -209,7 +205,6 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
-#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/;
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
@@ -227,7 +222,6 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/;
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
-#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
specialize qw/vp8_bilinear_predict4x4 mmx media msa/;
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
diff --git a/libvpx/vp8/common/threading.h b/libvpx/vp8/common/threading.h
index 01c82dbb8..183b49b8f 100644
--- a/libvpx/vp8/common/threading.h
+++ b/libvpx/vp8/common/threading.h
@@ -12,6 +12,8 @@
#ifndef VP8_COMMON_THREADING_H_
#define VP8_COMMON_THREADING_H_
+#include "./vpx_config.h"
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -19,17 +21,15 @@ extern "C" {
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
/* Thread management macros */
-#ifdef _WIN32
+#if defined(_WIN32) && !HAVE_PTHREAD_H
/* Win32 */
#include <process.h>
#include <windows.h>
-#define THREAD_FUNCTION DWORD WINAPI
+#define THREAD_FUNCTION unsigned int __stdcall
#define THREAD_FUNCTION_RETURN DWORD
#define THREAD_SPECIFIC_INDEX DWORD
#define pthread_t HANDLE
#define pthread_attr_t DWORD
-#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
-#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
#define thread_sleep(nms) Sleep(nms)
#define pthread_cancel(thread) terminate_thread(thread,0)
@@ -44,14 +44,11 @@ extern "C" {
#include <os2.h>
#include <stdlib.h>
-#define THREAD_FUNCTION void
-#define THREAD_FUNCTION_RETURN void
+#define THREAD_FUNCTION void *
+#define THREAD_FUNCTION_RETURN void *
#define THREAD_SPECIFIC_INDEX PULONG
#define pthread_t TID
#define pthread_attr_t ULONG
-#define pthread_create(thhandle,attr,thfunc,tharg) \
- ((int)((*(thhandle)=_beginthread(thfunc,NULL,1024*1024,tharg))==-1))
-#define pthread_join(thread, result) ((int)DosWaitThread(&(thread),0))
#define pthread_detach(thread) 0
#define thread_sleep(nms) DosSleep(nms)
#define pthread_cancel(thread) DosKillThread(thread)
@@ -81,8 +78,8 @@ extern "C" {
#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
#endif
-/* Syncrhronization macros: Win32 and Pthreads */
-#ifdef _WIN32
+/* Synchronization macros: Win32 and Pthreads */
+#if defined(_WIN32) && !HAVE_PTHREAD_H
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
@@ -185,6 +182,47 @@ static inline int sem_destroy(sem_t * sem)
#define x86_pause_hint()
#endif
+#include "vpx_util/vpx_thread.h"
+
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+ const int kMaxTryLocks = 4000;
+ int locked = 0;
+ int i;
+
+ for (i = 0; i < kMaxTryLocks; ++i) {
+ if (!pthread_mutex_trylock(mutex)) {
+ locked = 1;
+ break;
+ }
+ }
+
+ if (!locked)
+ pthread_mutex_lock(mutex);
+}
+
+static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {
+ int ret;
+ mutex_lock(mutex);
+ ret = *p;
+ pthread_mutex_unlock(mutex);
+ return ret;
+}
+
+static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,
+ const int *last_row_current_mb_col,
+ const int nsync) {
+ while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
+ x86_pause_hint();
+ thread_sleep(0);
+ }
+}
+
+static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {
+ mutex_lock(mutex);
+ *p = v;
+ pthread_mutex_unlock(mutex);
+}
+
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
#ifdef __cplusplus
diff --git a/libvpx/vp8/common/vp8_loopfilter.c b/libvpx/vp8/common/vp8_loopfilter.c
index 8b55dff92..756ad488f 100644
--- a/libvpx/vp8/common/vp8_loopfilter.c
+++ b/libvpx/vp8/common/vp8_loopfilter.c
@@ -141,8 +141,8 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm,
else /* Delta Value */
{
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
- lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
+ lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
if (!mbd->mode_ref_lf_delta_enabled)
diff --git a/libvpx/vp8/common/x86/postproc_mmx.asm b/libvpx/vp8/common/x86/postproc_mmx.asm
index a2b16327f..1a89e7ead 100644
--- a/libvpx/vp8/common/x86/postproc_mmx.asm
+++ b/libvpx/vp8/common/x86/postproc_mmx.asm
@@ -241,68 +241,6 @@ sym(vp8_mbpost_proc_down_mmx):
%undef flimit2
-;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int Width, unsigned int Height, int Pitch)
-global sym(vp8_plane_add_noise_mmx) PRIVATE
-sym(vp8_plane_add_noise_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movq mm1,[rsi+rax] ; get the source
-
- psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb mm1, [rdx+32] ;bothclamp
- psubusb mm1, [rdx+16] ;whiteclamp
-
- movq mm2,[rdi+rax] ; get the noise for this line
- paddb mm1,mm2 ; add it in
- movq [rsi+rax],mm1 ; store the result
-
- add rax,8 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
Blur:
diff --git a/libvpx/vp8/common/x86/postproc_sse2.asm b/libvpx/vp8/common/x86/postproc_sse2.asm
index fed4ee5cc..de17afa5c 100644
--- a/libvpx/vp8/common/x86/postproc_sse2.asm
+++ b/libvpx/vp8/common/x86/postproc_sse2.asm
@@ -655,68 +655,6 @@ sym(vp8_mbpost_proc_across_ip_xmm):
%undef flimit4
-;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int Width, unsigned int Height, int Pitch)
-global sym(vp8_plane_add_noise_wmt) PRIVATE
-sym(vp8_plane_add_noise_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movdqu xmm1,[rsi+rax] ; get the source
-
- psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb xmm1, [rdx+32] ;bothclamp
- psubusb xmm1, [rdx+16] ;whiteclamp
-
- movdqu xmm2,[rdi+rax] ; get the noise for this line
- paddb xmm1,xmm2 ; add it in
- movdqu [rsi+rax],xmm1 ; store the result
-
- add rax,16 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
four8s:
diff --git a/libvpx/vp8/decoder/dboolhuff.c b/libvpx/vp8/decoder/dboolhuff.c
index 8a7e33205..5cdd2a249 100644
--- a/libvpx/vp8/decoder/dboolhuff.c
+++ b/libvpx/vp8/decoder/dboolhuff.c
@@ -44,7 +44,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
size_t bytes_left = br->user_buffer_end - bufptr;
size_t bits_left = bytes_left * CHAR_BIT;
- int x = (int)(shift + CHAR_BIT - bits_left);
+ int x = shift + CHAR_BIT - (int)bits_left;
int loop_end = 0;
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
diff --git a/libvpx/vp8/decoder/dboolhuff.h b/libvpx/vp8/decoder/dboolhuff.h
index cc9eaaf43..1b1bbf868 100644
--- a/libvpx/vp8/decoder/dboolhuff.h
+++ b/libvpx/vp8/decoder/dboolhuff.h
@@ -83,7 +83,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}
{
- register unsigned int shift = vp8_norm[range];
+ register int shift = vp8_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
diff --git a/libvpx/vp8/decoder/decodeframe.c b/libvpx/vp8/decoder/decodeframe.c
index f0d760373..51acdbb9c 100644
--- a/libvpx/vp8/decoder/decodeframe.c
+++ b/libvpx/vp8/decoder/decodeframe.c
@@ -73,10 +73,9 @@ void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
/* Delta Value */
else
- {
QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
- }
+
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
}
else
QIndex = pc->base_qindex;
@@ -145,8 +144,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
*/
pbi->frame_corrupt_residual = 1;
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
- vp8_conceal_corrupt_mb(xd);
-
corruption_detected = 1;
@@ -626,8 +623,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)
*/
vp8_interpolate_motion(xd,
mb_row, mb_col,
- pc->mb_rows, pc->mb_cols,
- pc->mode_info_stride);
+ pc->mb_rows, pc->mb_cols);
}
}
#endif
@@ -987,7 +983,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
VP8_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
const unsigned char *data = pbi->fragments.ptrs[0];
- const unsigned char *data_end = data + pbi->fragments.sizes[0];
+ const unsigned int data_sz = pbi->fragments.sizes[0];
+ const unsigned char *data_end = data + data_sz;
ptrdiff_t first_partition_length_in_bytes;
int i, j, k, l;
@@ -1023,7 +1020,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
const unsigned char *clear = data;
if (pbi->decrypt_cb)
{
- int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data);
+ int n = (int)VPXMIN(sizeof(clear_buffer), data_sz);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
clear = clear_buffer;
}
diff --git a/libvpx/vp8/decoder/error_concealment.c b/libvpx/vp8/decoder/error_concealment.c
index 0b846a08b..a73813fc0 100644
--- a/libvpx/vp8/decoder/error_concealment.c
+++ b/libvpx/vp8/decoder/error_concealment.c
@@ -194,7 +194,7 @@ void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul,
return;
}
- if (new_row <= (-4 << 3) || new_col <= (-4 << 3))
+ if (new_row <= -32 || new_col <= -32)
{
/* outside the frame */
return;
@@ -558,8 +558,7 @@ static void interpolate_mvs(MACROBLOCKD *mb,
void vp8_interpolate_motion(MACROBLOCKD *mb,
int mb_row, int mb_col,
- int mb_rows, int mb_cols,
- int mi_stride)
+ int mb_rows, int mb_cols)
{
/* Find relevant neighboring blocks */
EC_BLOCK neighbors[NUM_NEIGHBORS];
@@ -585,13 +584,3 @@ void vp8_interpolate_motion(MACROBLOCKD *mb,
mb->mode_info_context->mbmi.partitioning = 3;
mb->mode_info_context->mbmi.segment_id = 0;
}
-
-void vp8_conceal_corrupt_mb(MACROBLOCKD *xd)
-{
- /* This macroblock has corrupt residual, use the motion compensated
- image (predictor) for concealment */
-
- /* The build predictor functions now output directly into the dst buffer,
- * so the copies are no longer necessary */
-
-}
diff --git a/libvpx/vp8/decoder/error_concealment.h b/libvpx/vp8/decoder/error_concealment.h
index 9a1e02486..b6b49725b 100644
--- a/libvpx/vp8/decoder/error_concealment.h
+++ b/libvpx/vp8/decoder/error_concealment.h
@@ -34,13 +34,7 @@ void vp8_estimate_missing_mvs(VP8D_COMP *pbi);
* (mb_row, mb_col). */
void vp8_interpolate_motion(MACROBLOCKD *mb,
int mb_row, int mb_col,
- int mb_rows, int mb_cols,
- int mi_stride);
-
-/* Conceal a macroblock with corrupt residual.
- * Copies the prediction signal to the reconstructed image.
- */
-void vp8_conceal_corrupt_mb(MACROBLOCKD *xd);
+ int mb_rows, int mb_cols);
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp8/decoder/onyxd_int.h b/libvpx/vp8/decoder/onyxd_int.h
index aa2cc57f7..313fe01c0 100644
--- a/libvpx/vp8/decoder/onyxd_int.h
+++ b/libvpx/vp8/decoder/onyxd_int.h
@@ -81,7 +81,7 @@ typedef struct VP8D_COMP
#if CONFIG_MULTITHREAD
/* variable for threading */
- volatile int b_multithreaded_rd;
+ int b_multithreaded_rd;
int max_threads;
int current_mb_col_main;
unsigned int decoding_thread_count;
@@ -90,6 +90,8 @@ typedef struct VP8D_COMP
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
int sync_range;
int *mt_current_mb_col; /* Each row remembers its already decoded column. */
+ pthread_mutex_t *pmutex;
+ pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */
unsigned char **mt_yabove_row; /* mb_rows x width */
unsigned char **mt_uabove_row;
diff --git a/libvpx/vp8/decoder/threading.c b/libvpx/vp8/decoder/threading.c
index 7c7184c78..3c1b8387e 100644
--- a/libvpx/vp8/decoder/threading.c
+++ b/libvpx/vp8/decoder/threading.c
@@ -52,9 +52,6 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
- mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
- mbd->mode_info_stride = pc->mode_info_stride;
-
mbd->frame_type = pc->frame_type;
mbd->pre = xd->pre;
mbd->dst = xd->dst;
@@ -139,8 +136,6 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
*/
pbi->frame_corrupt_residual = 1;
memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
- vp8_conceal_corrupt_mb(xd);
-
corruption_detected = 1;
@@ -298,8 +293,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
{
- volatile const int *last_row_current_mb_col;
- volatile int *current_mb_col;
+ const int *last_row_current_mb_col;
+ int *current_mb_col;
int mb_row;
VP8_COMMON *pc = &pbi->common;
const int nsync = pbi->sync_range;
@@ -337,6 +332,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
xd->up_available = (start_mb_row != 0);
+ xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row;
+ xd->mode_info_stride = pc->mode_info_stride;
+
for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
{
int recon_yoffset, recon_uvoffset;
@@ -405,17 +403,15 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
xd->dst.uv_stride);
}
- for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
- {
- *current_mb_col = mb_col - 1;
+ for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) {
+ if (((mb_col - 1) % nsync) == 0) {
+ pthread_mutex_t *mutex = &pbi->pmutex[mb_row];
+ protected_write(mutex, current_mb_col, mb_col - 1);
+ }
- if ((mb_col & (nsync - 1)) == 0)
- {
- while (mb_col > (*last_row_current_mb_col - nsync))
- {
- x86_pause_hint();
- thread_sleep(0);
- }
+ if (mb_row && !(mb_col & (nsync - 1))) {
+ pthread_mutex_t *mutex = &pbi->pmutex[mb_row-1];
+ sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
}
/* Distance of MB to the various image edges.
@@ -449,8 +445,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
*/
vp8_interpolate_motion(xd,
mb_row, mb_col,
- pc->mb_rows, pc->mb_cols,
- pc->mode_info_stride);
+ pc->mb_rows, pc->mb_cols);
}
}
#endif
@@ -604,7 +599,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
/* last MB of row is ready just after extension is done */
- *current_mb_col = mb_col + nsync;
+ protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);
++xd->mode_info_context; /* skip prediction column */
xd->up_available = 1;
@@ -629,12 +624,12 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data)
while (1)
{
- if (pbi->b_multithreaded_rd == 0)
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0)
break;
if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
{
- if (pbi->b_multithreaded_rd == 0)
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0)
break;
else
{
@@ -657,6 +652,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
pbi->b_multithreaded_rd = 0;
pbi->allocated_decoding_thread_count = 0;
+ pthread_mutex_init(&pbi->mt_mutex, NULL);
/* limit decoding threads to the max number of token partitions */
core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
@@ -699,8 +695,17 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
{
int i;
- if (pbi->b_multithreaded_rd)
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd))
{
+ /* De-allocate mutex */
+ if (pbi->pmutex != NULL) {
+ for (i = 0; i < mb_rows; i++) {
+ pthread_mutex_destroy(&pbi->pmutex[i]);
+ }
+ vpx_free(pbi->pmutex);
+ pbi->pmutex = NULL;
+ }
+
vpx_free(pbi->mt_current_mb_col);
pbi->mt_current_mb_col = NULL ;
@@ -781,7 +786,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
int i;
int uv_width;
- if (pbi->b_multithreaded_rd)
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd))
{
vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
@@ -796,6 +801,15 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
uv_width = width >>1;
+ /* Allocate mutex */
+ CHECK_MEM_ERROR(pbi->pmutex, vpx_malloc(sizeof(*pbi->pmutex) *
+ pc->mb_rows));
+ if (pbi->pmutex) {
+ for (i = 0; i < pc->mb_rows; i++) {
+ pthread_mutex_init(&pbi->pmutex[i], NULL);
+ }
+ }
+
/* Allocate an int for each mb row. */
CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
@@ -831,11 +845,11 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
void vp8_decoder_remove_threads(VP8D_COMP *pbi)
{
/* shutdown MB Decoding thread; */
- if (pbi->b_multithreaded_rd)
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd))
{
int i;
- pbi->b_multithreaded_rd = 0;
+ protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);
/* allow all threads to exit */
for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
@@ -863,6 +877,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
vpx_free(pbi->de_thread_data);
pbi->de_thread_data = NULL;
}
+ pthread_mutex_destroy(&pbi->mt_mutex);
}
void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
diff --git a/libvpx/vp8/encoder/bitstream.c b/libvpx/vp8/encoder/bitstream.c
index f3d91b552..3196422c2 100644
--- a/libvpx/vp8/encoder/bitstream.c
+++ b/libvpx/vp8/encoder/bitstream.c
@@ -163,7 +163,7 @@ void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *stop = p + xcount;
unsigned int split;
- unsigned int shift;
+ int shift;
int count = w->count;
unsigned int range = w->range;
unsigned int lowvalue = w->lowvalue;
diff --git a/libvpx/vp8/encoder/boolhuff.h b/libvpx/vp8/encoder/boolhuff.h
index 7c012a829..e66a2dbd8 100644
--- a/libvpx/vp8/encoder/boolhuff.h
+++ b/libvpx/vp8/encoder/boolhuff.h
@@ -65,7 +65,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability)
int count = br->count;
unsigned int range = br->range;
unsigned int lowvalue = br->lowvalue;
- register unsigned int shift;
+ register int shift;
#ifdef VP8_ENTROPY_STATS
#if defined(SECTIONBITS_OUTPUT)
diff --git a/libvpx/vp8/encoder/denoising.c b/libvpx/vp8/encoder/denoising.c
index d197f8f81..26ce120b4 100644
--- a/libvpx/vp8/encoder/denoising.c
+++ b/libvpx/vp8/encoder/denoising.c
@@ -23,7 +23,7 @@ static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
*/
static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
-static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 60;
+static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 80;
/*
* The filter function was modified to reduce the computational complexity.
@@ -440,6 +440,11 @@ int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
denoiser->yv12_last_source.frame_size);
denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
+ if (!denoiser->denoise_state)
+ {
+ vp8_denoiser_free(denoiser);
+ return 1;
+ }
memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
vp8_denoiser_set_parameters(denoiser, mode);
denoiser->nmse_source_diff = 0;
@@ -492,7 +497,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
loop_filter_info_n *lfi_n,
int mb_row,
int mb_col,
- int block_index)
+ int block_index,
+ int consec_zero_last)
{
int mv_row;
@@ -523,7 +529,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
// Bias on zero motion vector sse.
const int zero_bias = denoiser->denoise_pars.denoise_mv_bias;
zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
- sse_diff = zero_mv_sse - best_sse;
+ sse_diff = (int)zero_mv_sse - (int)best_sse;
saved_mbmi = *mbmi;
@@ -566,59 +572,69 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
best_sse = zero_mv_sse;
}
- saved_pre = filter_xd->pre;
- saved_dst = filter_xd->dst;
-
- /* Compensate the running average. */
- filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
- filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
- filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
- /* Write the compensated running average to the destination buffer. */
- filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
- filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
- filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
-
- if (!x->skip)
- {
- vp8_build_inter_predictors_mb(filter_xd);
- }
- else
- {
- vp8_build_inter16x16_predictors_mb(filter_xd,
- filter_xd->dst.y_buffer,
- filter_xd->dst.u_buffer,
- filter_xd->dst.v_buffer,
- filter_xd->dst.y_stride,
- filter_xd->dst.uv_stride);
+ mv_row = x->best_sse_mv.as_mv.row;
+ mv_col = x->best_sse_mv.as_mv.col;
+ motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
+ motion_threshold = denoiser->denoise_pars.scale_motion_thresh *
+ NOISE_MOTION_THRESHOLD;
+
+ if (motion_magnitude2 <
+ denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD)
+ x->increase_denoising = 1;
+
+ sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD;
+ if (x->increase_denoising)
+ sse_thresh =
+ denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD_HIGH;
+
+ if (best_sse > sse_thresh || motion_magnitude2 > motion_threshold)
+ decision = COPY_BLOCK;
+
+ // If block is considered skin, don't denoise if the block
+ // (1) is selected as non-zero motion for current frame, or
+ // (2) has not been selected as ZERO_LAST mode at least x past frames
+ // in a row.
+ // TODO(marpan): Parameter "x" should be varied with framerate.
+ // In particualar, should be reduced for layers (base layer/LAST).
+ if (x->is_skin && (consec_zero_last < 2 || motion_magnitude2 > 0))
+ decision = COPY_BLOCK;
+
+ if (decision == FILTER_BLOCK) {
+ saved_pre = filter_xd->pre;
+ saved_dst = filter_xd->dst;
+
+ /* Compensate the running average. */
+ filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
+ filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
+ filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
+ /* Write the compensated running average to the destination buffer. */
+ filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
+ filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
+ filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
+
+ if (!x->skip)
+ {
+ vp8_build_inter_predictors_mb(filter_xd);
+ }
+ else
+ {
+ vp8_build_inter16x16_predictors_mb(filter_xd,
+ filter_xd->dst.y_buffer,
+ filter_xd->dst.u_buffer,
+ filter_xd->dst.v_buffer,
+ filter_xd->dst.y_stride,
+ filter_xd->dst.uv_stride);
+ }
+ filter_xd->pre = saved_pre;
+ filter_xd->dst = saved_dst;
+ *mbmi = saved_mbmi;
}
- filter_xd->pre = saved_pre;
- filter_xd->dst = saved_dst;
- *mbmi = saved_mbmi;
-
- }
-
- mv_row = x->best_sse_mv.as_mv.row;
- mv_col = x->best_sse_mv.as_mv.col;
- motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
- motion_threshold = denoiser->denoise_pars.scale_motion_thresh *
- NOISE_MOTION_THRESHOLD;
-
- // If block is considered to be skin area, lower the motion threshold.
- // In current version set threshold = 1, so only denoise very low
- // (i.e., zero) mv on skin.
- if (x->is_skin)
- motion_threshold = 1;
-
- if (motion_magnitude2 <
- denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD)
- x->increase_denoising = 1;
-
- sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD;
- if (x->increase_denoising)
- sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD_HIGH;
-
- if (best_sse > sse_thresh || motion_magnitude2 > motion_threshold)
+ } else {
+ // zero_frame should always be 1 for real-time mode, as the
+ // ZEROMV mode is always checked, so we should never go into this branch.
+ // If case ZEROMV is not checked, then we will force no denoise (COPY).
decision = COPY_BLOCK;
+ }
if (decision == FILTER_BLOCK)
{
diff --git a/libvpx/vp8/encoder/denoising.h b/libvpx/vp8/encoder/denoising.h
index 9a379a6a1..8c126c1cb 100644
--- a/libvpx/vp8/encoder/denoising.h
+++ b/libvpx/vp8/encoder/denoising.h
@@ -18,8 +18,8 @@
extern "C" {
#endif
-#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
-#define SUM_DIFF_THRESHOLD_HIGH (600) // ~(16 * 16 * 1.5)
+#define SUM_DIFF_THRESHOLD 512
+#define SUM_DIFF_THRESHOLD_HIGH 600
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
#define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5)
@@ -108,7 +108,8 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
loop_filter_info_n *lfi_n,
int mb_row,
int mb_col,
- int block_index);
+ int block_index,
+ int consec_zero_last);
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp8/encoder/encodeframe.c b/libvpx/vp8/encoder/encodeframe.c
index b0aaa2f0b..9b05cd1fc 100644
--- a/libvpx/vp8/encoder/encodeframe.c
+++ b/libvpx/vp8/encoder/encodeframe.c
@@ -386,8 +386,8 @@ void encode_mb_row(VP8_COMP *cpi,
#if CONFIG_MULTITHREAD
const int nsync = cpi->mt_sync_range;
const int rightmost_col = cm->mb_cols + nsync;
- volatile const int *last_row_current_mb_col;
- volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+ const int *last_row_current_mb_col;
+ int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
@@ -461,17 +461,15 @@ void encode_mb_row(VP8_COMP *cpi,
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
#if CONFIG_MULTITHREAD
- if (cpi->b_multi_threaded != 0)
- {
- *current_mb_col = mb_col - 1; /* set previous MB done */
+ if (cpi->b_multi_threaded != 0) {
+ if (((mb_col - 1) % nsync) == 0) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
+ protected_write(mutex, current_mb_col, mb_col - 1);
+ }
- if ((mb_col & (nsync - 1)) == 0)
- {
- while (mb_col > (*last_row_current_mb_col - nsync))
- {
- x86_pause_hint();
- thread_sleep(0);
- }
+ if (mb_row && !(mb_col & (nsync - 1))) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row-1];
+ sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
}
}
#endif
@@ -616,7 +614,7 @@ void encode_mb_row(VP8_COMP *cpi,
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
- *current_mb_col = rightmost_col;
+ protected_write(&cpi->pmutex[mb_row], current_mb_col, rightmost_col);
#endif
/* this is to account for the border */
diff --git a/libvpx/vp8/encoder/ethreading.c b/libvpx/vp8/encoder/ethreading.c
index 4e234ccd5..2a0c2987b 100644
--- a/libvpx/vp8/encoder/ethreading.c
+++ b/libvpx/vp8/encoder/ethreading.c
@@ -26,12 +26,13 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data)
while (1)
{
- if (cpi->b_multi_threaded == 0)
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
if (sem_wait(&cpi->h_event_start_lpf) == 0)
{
- if (cpi->b_multi_threaded == 0) /* we're shutting down */
+ /* we're shutting down */
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
vp8_loopfilter_frame(cpi, cm);
@@ -53,7 +54,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
while (1)
{
- if (cpi->b_multi_threaded == 0)
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0)
@@ -72,9 +73,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int *segment_counts = mbri->segment_counts;
int *totalrate = &mbri->totalrate;
- if (cpi->b_multi_threaded == 0) /* we're shutting down */
+ /* we're shutting down */
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
+ xd->mode_info_context = cm->mi + cm->mode_info_stride *
+ (ithread + 1);
+ xd->mode_info_stride = cm->mode_info_stride;
+
for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{
@@ -85,8 +91,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cm->mb_cols);
- volatile const int *last_row_current_mb_col;
- volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+ const int *last_row_current_mb_col;
+ int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
@@ -113,15 +119,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
/* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- *current_mb_col = mb_col - 1;
+ if (((mb_col - 1) % nsync) == 0) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
+ protected_write(mutex, current_mb_col, mb_col - 1);
+ }
- if ((mb_col & (nsync - 1)) == 0)
- {
- while (mb_col > (*last_row_current_mb_col - nsync))
- {
- x86_pause_hint();
- thread_sleep(0);
- }
+ if (mb_row && !(mb_col & (nsync - 1))) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row-1];
+ sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
@@ -296,7 +301,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
- *current_mb_col = mb_col + nsync;
+ protected_write(&cpi->pmutex[mb_row], current_mb_col,
+ mb_col + nsync);
/* this is to account for the border */
xd->mode_info_context++;
@@ -473,9 +479,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
- mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1);
- mbd->mode_info_stride = cm->mode_info_stride;
-
mbd->frame_type = cm->frame_type;
mb->src = * cpi->Source;
@@ -515,7 +518,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
cpi->b_multi_threaded = 0;
cpi->encoding_thread_count = 0;
- cpi->b_lpf_running = 0;
+
+ pthread_mutex_init(&cpi->mt_mutex, NULL);
if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
{
@@ -580,7 +584,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
if(rc)
{
/* shutdown other threads */
- cpi->b_multi_threaded = 0;
+ protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
for(--ithread; ithread >= 0; ithread--)
{
pthread_join(cpi->h_encoding_thread[ithread], 0);
@@ -594,6 +598,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
+ pthread_mutex_destroy(&cpi->mt_mutex);
+
return -1;
}
@@ -611,7 +617,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
if(rc)
{
/* shutdown other threads */
- cpi->b_multi_threaded = 0;
+ protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
for(--ithread; ithread >= 0; ithread--)
{
sem_post(&cpi->h_event_start_encoding[ithread]);
@@ -628,6 +634,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
+ pthread_mutex_destroy(&cpi->mt_mutex);
+
return -2;
}
}
@@ -637,10 +645,10 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
{
- if (cpi->b_multi_threaded)
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded))
{
/* shutdown other threads */
- cpi->b_multi_threaded = 0;
+ protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
{
int i;
@@ -666,5 +674,6 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
}
+ pthread_mutex_destroy(&cpi->mt_mutex);
}
#endif
diff --git a/libvpx/vp8/encoder/firstpass.c b/libvpx/vp8/encoder/firstpass.c
index 4c2acc774..c526a3e89 100644
--- a/libvpx/vp8/encoder/firstpass.c
+++ b/libvpx/vp8/encoder/firstpass.c
@@ -18,6 +18,7 @@
#include "onyx_int.h"
#include "vpx_dsp/variance.h"
#include "encodeintra.h"
+#include "vp8/common/common.h"
#include "vp8/common/setupintrarecon.h"
#include "vp8/common/systemdependent.h"
#include "mcomp.h"
@@ -2417,7 +2418,7 @@ void vp8_second_pass(VP8_COMP *cpi)
int tmp_q;
int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
- FIRSTPASS_STATS this_frame = {0};
+ FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
double this_frame_intra_error;
@@ -2425,6 +2426,8 @@ void vp8_second_pass(VP8_COMP *cpi)
int overhead_bits;
+ vp8_zero(this_frame);
+
if (!cpi->twopass.stats_in)
{
return ;
@@ -2808,7 +2811,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
* static scene.
*/
if ( detect_transition_to_still( cpi, i,
- (cpi->key_frame_frequency-i),
+ ((int)(cpi->key_frame_frequency) -
+ (int)i),
loop_decay_rate,
decay_accumulator ) )
{
diff --git a/libvpx/vp8/encoder/lookahead.c b/libvpx/vp8/encoder/lookahead.c
index ce2ce08c1..662338574 100644
--- a/libvpx/vp8/encoder/lookahead.c
+++ b/libvpx/vp8/encoder/lookahead.c
@@ -181,6 +181,7 @@ vp8_lookahead_pop(struct lookahead_ctx *ctx,
{
struct lookahead_entry* buf = NULL;
+ assert(ctx != NULL);
if(ctx->sz && (drain || ctx->sz == ctx->max_sz - 1))
{
buf = pop(ctx, &ctx->read_idx);
diff --git a/libvpx/vp8/encoder/mcomp.c b/libvpx/vp8/encoder/mcomp.c
index 768c764ce..e20c1ea7b 100644
--- a/libvpx/vp8/encoder/mcomp.c
+++ b/libvpx/vp8/encoder/mcomp.c
@@ -1591,7 +1591,6 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance;
int col_max = ref_col + distance;
- // TODO(johannkoenig): check if this alignment is necessary.
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
unsigned int sad_array[3];
diff --git a/libvpx/vp8/encoder/onyx_if.c b/libvpx/vp8/encoder/onyx_if.c
index df5bcf688..d5a0fff35 100644
--- a/libvpx/vp8/encoder/onyx_if.c
+++ b/libvpx/vp8/encoder/onyx_if.c
@@ -477,6 +477,18 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
cpi->mb.pip = 0;
#if CONFIG_MULTITHREAD
+ /* De-allocate mutex */
+ if (cpi->pmutex != NULL) {
+ VP8_COMMON *const pc = &cpi->common;
+ int i;
+
+ for (i = 0; i < pc->mb_rows; i++) {
+ pthread_mutex_destroy(&cpi->pmutex[i]);
+ }
+ vpx_free(cpi->pmutex);
+ cpi->pmutex = NULL;
+ }
+
vpx_free(cpi->mt_current_mb_col);
cpi->mt_current_mb_col = NULL;
#endif
@@ -1180,6 +1192,9 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
int width = cm->Width;
int height = cm->Height;
+#if CONFIG_MULTITHREAD
+ int prev_mb_rows = cm->mb_rows;
+#endif
if (vp8_alloc_frame_buffers(cm, width, height))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
@@ -1271,6 +1286,25 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
if (cpi->oxcf.multi_threaded > 1)
{
+ int i;
+
+ /* De-allocate and re-allocate mutex */
+ if (cpi->pmutex != NULL) {
+ for (i = 0; i < prev_mb_rows; i++) {
+ pthread_mutex_destroy(&cpi->pmutex[i]);
+ }
+ vpx_free(cpi->pmutex);
+ cpi->pmutex = NULL;
+ }
+
+ CHECK_MEM_ERROR(cpi->pmutex, vpx_malloc(sizeof(*cpi->pmutex) *
+ cm->mb_rows));
+ if (cpi->pmutex) {
+ for (i = 0; i < cm->mb_rows; i++) {
+ pthread_mutex_init(&cpi->pmutex[i], NULL);
+ }
+ }
+
vpx_free(cpi->mt_current_mb_col);
CHECK_MEM_ERROR(cpi->mt_current_mb_col,
vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
@@ -1284,9 +1318,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
vp8_denoiser_free(&cpi->denoiser);
- vp8_denoiser_allocate(&cpi->denoiser, width, height,
- cm->mb_rows, cm->mb_cols,
- cpi->oxcf.noise_sensitivity);
+ if (vp8_denoiser_allocate(&cpi->denoiser, width, height,
+ cm->mb_rows, cm->mb_cols,
+ cpi->oxcf.noise_sensitivity))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate denoiser");
}
#endif
}
@@ -1487,7 +1523,8 @@ static void update_layer_contexts (VP8_COMP *cpi)
void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
VP8_COMMON *cm = &cpi->common;
- int last_w, last_h, prev_number_of_layers;
+ int last_w, last_h;
+ unsigned int prev_number_of_layers;
if (!cpi)
return;
@@ -1495,15 +1532,6 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (!oxcf)
return;
-#if CONFIG_MULTITHREAD
- /* wait for the last picture loopfilter thread done */
- if (cpi->b_lpf_running)
- {
- sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
- }
-#endif
-
if (cm->version != oxcf->Version)
{
cm->version = oxcf->Version;
@@ -1759,10 +1787,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height)
cpi->force_next_frame_intra = 1;
- if (((cm->Width + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_width ||
- ((cm->Height + 15) & 0xfffffff0) !=
- cm->yv12_fb[cm->lst_fb_idx].y_height ||
+ if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+ ((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
{
dealloc_raw_frame_buffers(cpi);
@@ -1798,9 +1824,11 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
int width = (cpi->oxcf.Width + 15) & ~15;
int height = (cpi->oxcf.Height + 15) & ~15;
- vp8_denoiser_allocate(&cpi->denoiser, width, height,
- cm->mb_rows, cm->mb_cols,
- cpi->oxcf.noise_sensitivity);
+ if (vp8_denoiser_allocate(&cpi->denoiser, width, height,
+ cm->mb_rows, cm->mb_cols,
+ cpi->oxcf.noise_sensitivity))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate denoiser");
}
}
#endif
@@ -2228,6 +2256,8 @@ void vp8_remove_compressor(VP8_COMP **ptr)
double total_encode_time = (cpi->time_receive_data +
cpi->time_compress_data) / 1000.000;
double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded;
+ const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr)
{
@@ -2273,12 +2303,14 @@ void vp8_remove_compressor(VP8_COMP **ptr)
cpi->summed_weights, 8.0);
fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
- "GLPsnrP\tVPXSSIM\t Time(us)\n");
+ "GLPsnrP\tVPXSSIM\t Time(us) Rc-Err "
+ "Abs Err\n");
fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%8.0f\n",
+ "%7.3f\t%8.0f %7.2f %7.2f\n",
dr, cpi->total / cpi->count, total_psnr,
cpi->totalp / cpi->count, total_psnr2,
- total_ssim, total_encode_time);
+ total_ssim, total_encode_time,
+ rate_err, fabs(rate_err));
}
}
@@ -3600,15 +3632,6 @@ static void encode_frame_to_data_rate
/* Clear down mmx registers to allow floating point in what follows */
vp8_clear_system_state();
-#if CONFIG_MULTITHREAD
- /* wait for the last picture loopfilter thread done */
- if (cpi->b_lpf_running)
- {
- sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
- }
-#endif
-
if(cpi->force_next_frame_intra)
{
cm->frame_type = KEY_FRAME; /* delayed intra frame */
@@ -4337,8 +4360,6 @@ static void encode_frame_to_data_rate
vp8_setup_key_frame(cpi);
}
-
-
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
if(cpi->oxcf.error_resilient_mode)
@@ -4804,7 +4825,6 @@ static void encode_frame_to_data_rate
{
/* start loopfilter in separate thread */
sem_post(&cpi->h_event_start_lpf);
- cpi->b_lpf_running = 1;
}
else
#endif
@@ -4836,11 +4856,10 @@ static void encode_frame_to_data_rate
vp8_pack_bitstream(cpi, dest, dest_end, size);
#if CONFIG_MULTITHREAD
- /* if PSNR packets are generated we have to wait for the lpf */
- if (cpi->b_lpf_running && cpi->b_calculate_psnr)
+ /* wait for the lpf thread done */
+ if (cpi->b_multi_threaded)
{
sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
}
#endif
@@ -5201,7 +5220,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
vp8_second_pass(cpi);
encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags);
- cpi->twopass.bits_left -= 8 * *size;
+ cpi->twopass.bits_left -= 8 * (int)(*size);
if (!cpi->common.refresh_alt_ref_frame)
{
@@ -5800,14 +5819,6 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
{
int ret;
-#if CONFIG_MULTITHREAD
- if(cpi->b_lpf_running)
- {
- sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
- }
-#endif
-
#if CONFIG_POSTPROC
cpi->common.show_frame_mi = cpi->common.mi;
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
@@ -5845,7 +5856,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
return -1;
// Check number of rows and columns match
- if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
+ if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols)
return -1;
// Range check the delta Q values and convert the external Q range values
@@ -5901,7 +5912,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols)
{
- if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols)
+ if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols)
{
if (map)
{
diff --git a/libvpx/vp8/encoder/onyx_int.h b/libvpx/vp8/encoder/onyx_int.h
index 317e4b9e4..44fbbd456 100644
--- a/libvpx/vp8/encoder/onyx_int.h
+++ b/libvpx/vp8/encoder/onyx_int.h
@@ -371,7 +371,7 @@ typedef struct VP8_COMP
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
- unsigned int frames_since_golden;
+ int frames_since_golden;
/* Count down till next GF */
int frames_till_gf_update_due;
@@ -530,11 +530,12 @@ typedef struct VP8_COMP
#if CONFIG_MULTITHREAD
/* multithread data */
+ pthread_mutex_t *pmutex;
+ pthread_mutex_t mt_mutex; /* mutex for b_multi_threaded */
int * mt_current_mb_col;
int mt_sync_range;
int b_multi_threaded;
int encoding_thread_count;
- int b_lpf_running;
pthread_t *h_encoding_thread;
pthread_t h_filter_thread;
diff --git a/libvpx/vp8/encoder/pickinter.c b/libvpx/vp8/encoder/pickinter.c
index d0fff3f04..24b332dcd 100644
--- a/libvpx/vp8/encoder/pickinter.c
+++ b/libvpx/vp8/encoder/pickinter.c
@@ -36,6 +36,8 @@
extern unsigned int cnt_pm;
#endif
+#define MODEL_MODE 1
+
extern const int vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
@@ -45,18 +47,22 @@ extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
// skin color classifier is defined.
// Fixed-point skin color model parameters.
-static const int skin_mean[2] = {7463, 9614}; // q6
+static const int skin_mean[5][2] =
+ {{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}};
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
-static const int skin_threshold = 1570636; // q18
+static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000,
+ 800000}; // q18
// Evaluates the Mahalanobis distance measure for the input CbCr values.
-static int evaluate_skin_color_difference(int cb, int cr)
-{
+static int evaluate_skin_color_difference(int cb, int cr, int idx) {
const int cb_q6 = cb << 6;
const int cr_q6 = cr << 6;
- const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]);
- const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]);
- const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]);
+ const int cb_diff_q12 =
+ (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]);
+ const int cbcr_diff_q12 =
+ (cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]);
+ const int cr_diff_q12 =
+ (cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]);
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
@@ -67,6 +73,52 @@ static int evaluate_skin_color_difference(int cb, int cr)
return skin_diff;
}
+// Checks if the input yCbCr values corresponds to skin color.
+static int is_skin_color(int y, int cb, int cr, int consec_zeromv)
+{
+ if (y < 40 || y > 220)
+ {
+ return 0;
+ }
+ else
+ {
+ if (MODEL_MODE == 0)
+ {
+ return (evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]);
+ }
+ else
+ {
+ int i = 0;
+ // No skin if block has been zero motion for long consecutive time.
+ if (consec_zeromv > 60)
+ return 0;
+ // Exit on grey.
+ if (cb == 128 && cr == 128)
+ return 0;
+ // Exit on very strong cb.
+ if (cb > 150 && cr < 110)
+ return 0;
+ for (; i < 5; i++) {
+ int skin_color_diff = evaluate_skin_color_difference(cb, cr, i);
+ if (skin_color_diff < skin_threshold[i + 1]) {
+ if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
+ return 0;
+ else if (consec_zeromv > 25 &&
+ skin_color_diff > (skin_threshold[i + 1] >> 1))
+ return 0;
+ else
+ return 1;
+ }
+ // Exit if difference is much large than the threshold.
+ if (skin_color_diff > (skin_threshold[i + 1] << 3)) {
+ return 0;
+ }
+ }
+ return 0;
+ }
+ }
+}
+
static int macroblock_corner_grad(unsigned char* signal, int stride,
int offsetx, int offsety, int sgnx, int sgny)
{
@@ -157,16 +209,6 @@ static int check_dot_artifact_candidate(VP8_COMP *cpi,
return 0;
}
-// Checks if the input yCbCr values corresponds to skin color.
-static int is_skin_color(int y, int cb, int cr)
-{
- if (y < 40 || y > 220)
- {
- return 0;
- }
- return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
-}
-
int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
@@ -828,8 +870,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->src.v_buffer[4 * x->src.uv_stride + 3] +
x->src.v_buffer[4 * x->src.uv_stride + 4]) >> 2;
x->is_skin = 0;
- if (!cpi->oxcf.screen_content_mode)
- x->is_skin = is_skin_color(y, cb, cr);
+ if (!cpi->oxcf.screen_content_mode) {
+ int block_index = mb_row * cpi->common.mb_cols + mb_col;
+ x->is_skin = is_skin_color(y, cb, cr, cpi->consec_zero_last[block_index]);
+ }
}
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity) {
@@ -1433,7 +1477,8 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
recon_yoffset, recon_uvoffset,
&cpi->common.lf_info, mb_row, mb_col,
- block_index);
+ block_index,
+ cpi->consec_zero_last_mvbias[block_index]);
// Reevaluate ZEROMV after denoising: for large noise content
// (i.e., cpi->mse_source_denoised is above threshold), do this for all
diff --git a/libvpx/vp8/encoder/rdopt.c b/libvpx/vp8/encoder/rdopt.c
index ab0ad1599..6507ae9f1 100644
--- a/libvpx/vp8/encoder/rdopt.c
+++ b/libvpx/vp8/encoder/rdopt.c
@@ -1899,7 +1899,8 @@ static int calculate_final_rd_costs(int this_rd,
int prob_skip_cost;
prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
- prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
+ prob_skip_cost -=
+ (int)vp8_cost_bit(cpi->prob_skip_false, 0);
rd->rate2 += prob_skip_cost;
*other_cost += prob_skip_cost;
}
@@ -2530,7 +2531,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
recon_yoffset, recon_uvoffset,
&cpi->common.lf_info, mb_row, mb_col,
- block_index);
+ block_index, 0);
/* Reevaluate ZEROMV after denoising. */
if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
diff --git a/libvpx/vp8/encoder/vp8_quantize.c b/libvpx/vp8/encoder/vp8_quantize.c
index ee922c9d6..0d101ba5a 100644
--- a/libvpx/vp8/encoder/vp8_quantize.c
+++ b/libvpx/vp8/encoder/vp8_quantize.c
@@ -227,12 +227,12 @@ static void invert_quant(int improved_quant, short *quant,
if(improved_quant)
{
unsigned t;
- int l;
+ int l, m;
t = d;
for(l = 0; t > 1; l++)
t>>=1;
- t = 1 + (1<<(16+l))/d;
- *quant = (short)(t - (1<<16));
+ m = 1 + (1<<(16+l))/d;
+ *quant = (short)(m - (1<<16));
*shift = l;
/* use multiplication and constant shift by 16 */
*shift = 1 << (16 - *shift);
diff --git a/libvpx/vp8/vp8_cx_iface.c b/libvpx/vp8/vp8_cx_iface.c
index c125ae84d..22a82b734 100644
--- a/libvpx/vp8/vp8_cx_iface.c
+++ b/libvpx/vp8/vp8_cx_iface.c
@@ -22,6 +22,7 @@
#include "vpx/vp8cx.h"
#include "vp8/encoder/firstpass.h"
#include "vp8/common/onyx.h"
+#include "vp8/common/common.h"
#include <stdlib.h>
#include <string.h>
@@ -760,7 +761,7 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
unsigned long duration,
unsigned long deadline)
{
- unsigned int new_qc;
+ int new_qc;
#if !(CONFIG_REALTIME_ONLY)
/* Use best quality mode if no deadline is given. */
@@ -782,10 +783,13 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
}
#else
+ (void)duration;
new_qc = MODE_REALTIME;
#endif
- if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
+ if (deadline == VPX_DL_REALTIME)
+ new_qc = MODE_REALTIME;
+ else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
new_qc = MODE_FIRSTPASS;
else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
new_qc = (new_qc == MODE_BESTQUALITY)
@@ -1116,7 +1120,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
{
YV12_BUFFER_CONFIG sd;
- vp8_ppflags_t flags = {0};
+ vp8_ppflags_t flags;
+ vp8_zero(flags);
if (ctx->preview_ppcfg.post_proc_flag)
{
@@ -1162,31 +1167,6 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
return NULL;
}
-static vpx_codec_err_t vp8e_update_entropy(vpx_codec_alg_priv_t *ctx,
- va_list args)
-{
- int update = va_arg(args, int);
- vp8_update_entropy(ctx->cpi, update);
- return VPX_CODEC_OK;
-
-}
-
-static vpx_codec_err_t vp8e_update_reference(vpx_codec_alg_priv_t *ctx,
- va_list args)
-{
- int update = va_arg(args, int);
- vp8_update_reference(ctx->cpi, update);
- return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t vp8e_use_reference(vpx_codec_alg_priv_t *ctx,
- va_list args)
-{
- int reference_flag = va_arg(args, int);
- vp8_use_as_reference(ctx->cpi, reference_flag);
- return VPX_CODEC_OK;
-}
-
static vpx_codec_err_t vp8e_set_frame_flags(vpx_codec_alg_priv_t *ctx,
va_list args)
{
@@ -1330,8 +1310,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
30, /* rc_resize_up_thresold */
VPX_VBR, /* rc_end_usage */
- {0}, /* rc_twopass_stats_in */
- {0}, /* rc_firstpass_mb_stats_in */
+ {NULL, 0}, /* rc_twopass_stats_in */
+ {NULL, 0}, /* rc_firstpass_mb_stats_in */
256, /* rc_target_bandwidth */
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
@@ -1359,6 +1339,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
{0}, /* ts_rate_decimator */
0, /* ts_periodicity */
{0}, /* ts_layer_id */
+ {0}, /* layer_target_bitrate */
+ 0 /* temporal_layering_mode */
}},
};
diff --git a/libvpx/vp8/vp8_dx_iface.c b/libvpx/vp8/vp8_dx_iface.c
index a12a2ad0e..fc9288d62 100644
--- a/libvpx/vp8/vp8_dx_iface.c
+++ b/libvpx/vp8/vp8_dx_iface.c
@@ -9,6 +9,7 @@
*/
+#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "./vp8_rtcd.h"
@@ -67,10 +68,11 @@ struct vpx_codec_alg_priv
FRAGMENT_DATA fragments;
};
-static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
+static int vp8_init_ctx(vpx_codec_ctx_t *ctx)
{
vpx_codec_alg_priv_t *priv =
(vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
+ if (!priv) return 1;
ctx->priv = (vpx_codec_priv_t *)priv;
ctx->priv->init_flags = ctx->init_flags;
@@ -85,6 +87,8 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
priv->cfg = *ctx->config.dec;
ctx->config.dec = &priv->cfg;
}
+
+ return 0;
}
static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
@@ -103,7 +107,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
* information becomes known.
*/
if (!ctx->priv) {
- vp8_init_ctx(ctx);
+ if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR;
priv = (vpx_codec_alg_priv_t *)ctx->priv;
/* initialize number of fragments to zero */
@@ -151,6 +155,8 @@ static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data,
{
vpx_codec_err_t res = VPX_CODEC_OK;
+ assert(data != NULL);
+
if(data + data_sz <= data)
{
res = VPX_CODEC_INVALID_PARAM;
@@ -516,7 +522,8 @@ static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx,
{
YV12_BUFFER_CONFIG sd;
int64_t time_stamp = 0, time_end_stamp = 0;
- vp8_ppflags_t flags = {0};
+ vp8_ppflags_t flags;
+ vp8_zero(flags);
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
{
@@ -810,11 +817,12 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
},
{ /* encoder functions */
0,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL
+ NULL, /* vpx_codec_enc_cfg_map_t */
+ NULL, /* vpx_codec_encode_fn_t */
+ NULL, /* vpx_codec_get_cx_data_fn_t */
+ NULL, /* vpx_codec_enc_config_set_fn_t */
+ NULL, /* vpx_codec_get_global_headers_fn_t */
+ NULL, /* vpx_codec_get_preview_frame_fn_t */
+ NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */
}
};
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c
index 24c6c54ed..7dd1005d3 100644
--- a/libvpx/vp9/common/vp9_alloccommon.c
+++ b/libvpx/vp9/common/vp9_alloccommon.c
@@ -119,6 +119,20 @@ void vp9_free_context_buffers(VP9_COMMON *cm) {
cm->lf.lfm = NULL;
}
+
+int vp9_alloc_loop_filter(VP9_COMMON *cm) {
+ vpx_free(cm->lf.lfm);
+ // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
+ // stride and rows are rounded up / truncated to a multiple of 8.
+ cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
+ cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
+ ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
+ sizeof(*cm->lf.lfm));
+ if (!cm->lf.lfm)
+ return 1;
+ return 0;
+}
+
int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
int new_mi_size;
@@ -151,15 +165,8 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
cm->above_context_alloc_cols = cm->mi_cols;
}
- vpx_free(cm->lf.lfm);
-
- // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
- // stride and rows are rounded up / truncated to a multiple of 8.
- cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
- cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
- ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
- sizeof(*cm->lf.lfm));
- if (!cm->lf.lfm) goto fail;
+ if (vp9_alloc_loop_filter(cm))
+ goto fail;
return 0;
diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h
index c0e51a6ce..e53955b99 100644
--- a/libvpx/vp9/common/vp9_alloccommon.h
+++ b/libvpx/vp9/common/vp9_alloccommon.h
@@ -23,6 +23,7 @@ struct BufferPool;
void vp9_remove_common(struct VP9Common *cm);
+int vp9_alloc_loop_filter(struct VP9Common *cm);
int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
void vp9_init_context_buffers(struct VP9Common *cm);
void vp9_free_context_buffers(struct VP9Common *cm);
diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c
index 0e104ee59..7bab27d4f 100644
--- a/libvpx/vp9/common/vp9_blockd.c
+++ b/libvpx/vp9/common/vp9_blockd.c
@@ -13,7 +13,7 @@
PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
const MODE_INFO *left_mi, int b) {
if (b == 0 || b == 2) {
- if (!left_mi || is_inter_block(&left_mi->mbmi))
+ if (!left_mi || is_inter_block(left_mi))
return DC_PRED;
return get_y_mode(left_mi, b + 1);
@@ -26,7 +26,7 @@ PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi,
const MODE_INFO *above_mi, int b) {
if (b == 0 || b == 1) {
- if (!above_mi || is_inter_block(&above_mi->mbmi))
+ if (!above_mi || is_inter_block(above_mi))
return DC_PRED;
return get_y_mode(above_mi, b + 2);
@@ -40,12 +40,12 @@ void vp9_foreach_transformed_block_in_plane(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
foreach_transformed_block_visitor visit, void *arg) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
- const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
+ const MODE_INFO* mi = xd->mi[0];
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
// transform size varies per plane, look it up in a common way.
- const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
- : mbmi->tx_size;
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd)
+ : mi->tx_size;
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index 61eb59162..3d26fb2b5 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -64,7 +64,7 @@ typedef struct {
typedef int8_t MV_REFERENCE_FRAME;
// This structure now relates to 8x8 block regions.
-typedef struct {
+typedef struct MODE_INFO {
// Common for both INTER and INTRA blocks
BLOCK_SIZE sb_type;
PREDICTION_MODE mode;
@@ -82,24 +82,21 @@ typedef struct {
// TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
-} MB_MODE_INFO;
-typedef struct MODE_INFO {
- MB_MODE_INFO mbmi;
b_mode_info bmi[4];
} MODE_INFO;
static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
- return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode
- : mi->mbmi.mode;
+ return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode
+ : mi->mode;
}
-static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
- return mbmi->ref_frame[0] > INTRA_FRAME;
+static INLINE int is_inter_block(const MODE_INFO *mi) {
+ return mi->ref_frame[0] > INTRA_FRAME;
}
-static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
- return mbmi->ref_frame[1] > INTRA_FRAME;
+static INLINE int has_second_ref(const MODE_INFO *mi) {
+ return mi->ref_frame[1] > INTRA_FRAME;
}
PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi,
@@ -160,11 +157,9 @@ typedef struct macroblockd {
MODE_INFO **mi;
MODE_INFO *left_mi;
MODE_INFO *above_mi;
- MB_MODE_INFO *left_mbmi;
- MB_MODE_INFO *above_mbmi;
- int up_available;
- int left_available;
+ unsigned int max_blocks_wide;
+ unsigned int max_blocks_high;
const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
@@ -212,19 +207,19 @@ extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES];
static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const MODE_INFO *const mi = xd->mi[0];
- if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi))
+ if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi))
return DCT_DCT;
- return intra_mode_to_tx_type_lookup[mbmi->mode];
+ return intra_mode_to_tx_type_lookup[mi->mode];
}
static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
const MACROBLOCKD *xd, int ib) {
const MODE_INFO *const mi = xd->mi[0];
- if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi))
+ if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi))
return DCT_DCT;
return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)];
@@ -242,9 +237,9 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
}
}
-static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
+static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi,
const struct macroblockd_plane *pd) {
- return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
+ return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x,
pd->subsampling_y);
}
diff --git a/libvpx/vp9/common/vp9_common.h b/libvpx/vp9/common/vp9_common.h
index 76e7cd440..908fa80a3 100644
--- a/libvpx/vp9/common/vp9_common.h
+++ b/libvpx/vp9/common/vp9_common.h
@@ -67,7 +67,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
#define VP9_FRAME_MARKER 0x2
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c
index a6dae6a1c..3409d0484 100644
--- a/libvpx/vp9/common/vp9_common_data.c
+++ b/libvpx/vp9/common/vp9_common_data.c
@@ -159,3 +159,18 @@ const struct {
{0, 8 }, // 64X32 - {0b0000, 0b1000}
{0, 0 }, // 64X64 - {0b0000, 0b0000}
};
+
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+const uint8_t need_top_left[INTRA_MODES] = {
+ 0, // DC_PRED
+ 0, // V_PRED
+ 0, // H_PRED
+ 0, // D45_PRED
+ 1, // D135_PRED
+ 1, // D117_PRED
+ 1, // D153_PRED
+ 0, // D207_PRED
+ 0, // D63_PRED
+ 1, // TM_PRED
+};
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h
index 95a117961..0ae24dad5 100644
--- a/libvpx/vp9/common/vp9_common_data.h
+++ b/libvpx/vp9/common/vp9_common_data.h
@@ -33,6 +33,9 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES];
extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+extern const uint8_t need_top_left[INTRA_MODES];
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp9/common/vp9_debugmodes.c b/libvpx/vp9/common/vp9_debugmodes.c
index 3d80103d2..d9c1fd968 100644
--- a/libvpx/vp9/common/vp9_debugmodes.c
+++ b/libvpx/vp9/common/vp9_debugmodes.c
@@ -35,7 +35,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
fprintf(file, "%c ", prefix);
for (mi_col = 0; mi_col < cols; mi_col++) {
fprintf(file, "%2d ",
- *((int*) ((char *) (&mi[0]->mbmi) +
+ *((int*) ((char *) (mi[0]) +
member_offset)));
mi++;
}
@@ -53,18 +53,18 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
int rows = cm->mi_rows;
int cols = cm->mi_cols;
- print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
- print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
- print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
- print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
- print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
+ print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type));
+ print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode));
+ print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0]));
+ print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size));
+ print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode));
// output skip infomation.
log_frame_info(cm, "Skips:", mvs);
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(mvs, "S ");
for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%2d ", mi[0]->mbmi.skip);
+ fprintf(mvs, "%2d ", mi[0]->skip);
mi++;
}
fprintf(mvs, "\n");
@@ -78,8 +78,8 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(mvs, "V ");
for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row,
- mi[0]->mbmi.mv[0].as_mv.col);
+ fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row,
+ mi[0]->mv[0].as_mv.col);
mi++;
}
fprintf(mvs, "\n");
diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c
index 579857bc9..7b490af34 100644
--- a/libvpx/vp9/common/vp9_entropy.c
+++ b/libvpx/vp9/common/vp9_entropy.c
@@ -36,20 +36,6 @@ const vpx_prob vp9_cat6_prob[] = {
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
};
#if CONFIG_VP9_HIGHBITDEPTH
-const vpx_prob vp9_cat1_prob_high10[] = { 159 };
-const vpx_prob vp9_cat2_prob_high10[] = { 165, 145 };
-const vpx_prob vp9_cat3_prob_high10[] = { 173, 148, 140 };
-const vpx_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 };
-const vpx_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
-const vpx_prob vp9_cat6_prob_high10[] = {
- 255, 255, 254, 254, 254, 252, 249, 243,
- 230, 196, 177, 153, 140, 133, 130, 129
-};
-const vpx_prob vp9_cat1_prob_high12[] = { 159 };
-const vpx_prob vp9_cat2_prob_high12[] = { 165, 145 };
-const vpx_prob vp9_cat3_prob_high12[] = { 173, 148, 140 };
-const vpx_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 };
-const vpx_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
const vpx_prob vp9_cat6_prob_high12[] = {
255, 255, 255, 255, 254, 254, 254, 252, 249,
243, 230, 196, 177, 153, 140, 133, 130, 129
@@ -403,7 +389,6 @@ const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
{255, 241, 243, 255, 236, 255, 252, 254},
{255, 243, 245, 255, 237, 255, 252, 254},
{255, 246, 247, 255, 239, 255, 253, 255},
- {255, 246, 247, 255, 239, 255, 253, 255},
};
static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
@@ -743,8 +728,8 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
};
static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
- memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1],
- MODEL_NODES * sizeof(vpx_prob));
+ assert(p != 0);
+ memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
}
void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h
index 21611ed6d..63b3bff5d 100644
--- a/libvpx/vp9/common/vp9_entropy.h
+++ b/libvpx/vp9/common/vp9_entropy.h
@@ -138,7 +138,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
-#define COEFF_PROB_MODELS 256
+#define COEFF_PROB_MODELS 255
#define UNCONSTRAINED_NODES 3
diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c
index 3acfe1448..566ae91cf 100644
--- a/libvpx/vp9/common/vp9_entropymv.c
+++ b/libvpx/vp9/common/vp9_entropymv.c
@@ -11,9 +11,6 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_entropymv.h"
-// Integer pel reference mv threshold for use of high-precision 1/8 mv
-#define COMPANDED_MVREF_THRESH 8
-
const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
-MV_JOINT_ZERO, 2,
-MV_JOINT_HNZVZ, 4,
@@ -127,11 +124,6 @@ MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
return c;
}
-int vp9_use_mv_hp(const MV *ref) {
- return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
- (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
-}
-
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
int incr, int usehp) {
int s, z, c, o, d, e, f;
diff --git a/libvpx/vp9/common/vp9_entropymv.h b/libvpx/vp9/common/vp9_entropymv.h
index 8c817bf7b..2f05ad44b 100644
--- a/libvpx/vp9/common/vp9_entropymv.h
+++ b/libvpx/vp9/common/vp9_entropymv.h
@@ -27,7 +27,14 @@ struct VP9Common;
void vp9_init_mv_probs(struct VP9Common *cm);
void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
-int vp9_use_mv_hp(const MV *ref);
+
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
+#define COMPANDED_MVREF_THRESH 8
+
+static INLINE int use_mv_hp(const MV *ref) {
+ return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH &&
+ (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH;
+}
#define MV_UPDATE_PROB 252
diff --git a/libvpx/vp9/common/vp9_idct.c b/libvpx/vp9/common/vp9_idct.c
index d12cd76db..1b420143b 100644
--- a/libvpx/vp9/common/vp9_idct.c
+++ b/libvpx/vp9/common/vp9_idct.c
@@ -174,6 +174,9 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
else if (eob <= 34)
// non-zero coeff only in upper-left 8x8
vpx_idct32x32_34_add(input, dest, stride);
+ else if (eob <= 135)
+ // non-zero coeff only in upper-left 16x16
+ vpx_idct32x32_135_add(input, dest, stride);
else
vpx_idct32x32_1024_add(input, dest, stride);
}
diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c
index b8a113223..183dec4e7 100644
--- a/libvpx/vp9/common/vp9_loopfilter.c
+++ b/libvpx/vp9/common/vp9_loopfilter.c
@@ -232,9 +232,9 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
}
static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
- const MB_MODE_INFO *mbmi) {
- return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
- [mode_lf_lut[mbmi->mode]];
+ const MODE_INFO *mi) {
+ return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]]
+ [mode_lf_lut[mi->mode]];
}
void vp9_loop_filter_init(VP9_COMMON *cm) {
@@ -298,200 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
static void filter_selectively_vert_row2(int subsampling_factor,
uint8_t *s, int pitch,
- unsigned int mask_16x16_l,
- unsigned int mask_8x8_l,
- unsigned int mask_4x4_l,
- unsigned int mask_4x4_int_l,
- const loop_filter_info_n *lfi_n,
+ unsigned int mask_16x16,
+ unsigned int mask_8x8,
+ unsigned int mask_4x4,
+ unsigned int mask_4x4_int,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
- const int mask_shift = subsampling_factor ? 4 : 8;
- const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+ const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
-
- unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
- unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
- unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
- unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
- unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
- unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+ const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
-
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
- mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
- mask; mask >>= 1) {
- const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
-
- // TODO(yunqingwang): count in loopfilter functions should be removed.
- if (mask & 1) {
- if ((mask_16x16_0 | mask_16x16_1) & 1) {
- if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
- } else if (mask_16x16_0 & 1) {
- vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ uint8_t *ss[2];
+ ss[0] = s;
+
+ for (mask =
+ (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+ mask; mask = (mask & ~dual_one) >> 1) {
+ if (mask & dual_one) {
+ const loop_filter_thresh *lfis[2];
+ lfis[0] = lfthr + *lfl;
+ lfis[1] = lfthr + *(lfl + lfl_forward);
+ ss[1] = ss[0] + 8 * pitch;
+
+ if (mask_16x16 & dual_one) {
+ if ((mask_16x16 & dual_one) == dual_one) {
+ vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+ lfis[0]->hev_thr);
} else {
- vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+ vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr);
}
}
- if ((mask_8x8_0 | mask_8x8_1) & 1) {
- if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_8x8_0 & 1) {
- vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- 1);
+ if (mask_8x8 & dual_one) {
+ if ((mask_8x8 & dual_one) == dual_one) {
+ vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+ lfis[0]->hev_thr, lfis[1]->mblim,
+ lfis[1]->lim, lfis[1]->hev_thr);
} else {
- vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+ vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
}
- if ((mask_4x4_0 | mask_4x4_1) & 1) {
- if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_4x4_0 & 1) {
- vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- 1);
+ if (mask_4x4 & dual_one) {
+ if ((mask_4x4 & dual_one) == dual_one) {
+ vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
+ lfis[0]->hev_thr, lfis[1]->mblim,
+ lfis[1]->lim, lfis[1]->hev_thr);
} else {
- vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+ vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
}
- if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
- if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr);
- } else if (mask_4x4_int_0 & 1) {
- vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ if (mask_4x4_int & dual_one) {
+ if ((mask_4x4_int & dual_one) == dual_one) {
+ vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr);
} else {
- vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+ vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr);
}
}
}
- s += 8;
+ ss[0] += 8;
lfl += 1;
- mask_16x16_0 >>= 1;
- mask_8x8_0 >>= 1;
- mask_4x4_0 >>= 1;
- mask_4x4_int_0 >>= 1;
- mask_16x16_1 >>= 1;
- mask_8x8_1 >>= 1;
- mask_4x4_1 >>= 1;
- mask_4x4_int_1 >>= 1;
+ mask_16x16 >>= 1;
+ mask_8x8 >>= 1;
+ mask_4x4 >>= 1;
+ mask_4x4_int >>= 1;
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_filter_selectively_vert_row2(int subsampling_factor,
uint16_t *s, int pitch,
- unsigned int mask_16x16_l,
- unsigned int mask_8x8_l,
- unsigned int mask_4x4_l,
- unsigned int mask_4x4_int_l,
- const loop_filter_info_n *lfi_n,
+ unsigned int mask_16x16,
+ unsigned int mask_8x8,
+ unsigned int mask_4x4,
+ unsigned int mask_4x4_int,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
- const int mask_shift = subsampling_factor ? 4 : 8;
- const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+ const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
-
- unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
- unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
- unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
- unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
- unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
- unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
- unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+ const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
-
- for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
- mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
- mask; mask >>= 1) {
- const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
- const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
-
- // TODO(yunqingwang): count in loopfilter functions should be removed.
- if (mask & 1) {
- if ((mask_16x16_0 | mask_16x16_1) & 1) {
- if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
- } else if (mask_16x16_0 & 1) {
- vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, bd);
+ uint16_t *ss[2];
+ ss[0] = s;
+
+ for (mask =
+ (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
+ mask; mask = (mask & ~dual_one) >> 1) {
+ if (mask & dual_one) {
+ const loop_filter_thresh *lfis[2];
+ lfis[0] = lfthr + *lfl;
+ lfis[1] = lfthr + *(lfl + lfl_forward);
+ ss[1] = ss[0] + 8 * pitch;
+
+ if (mask_16x16 & dual_one) {
+ if ((mask_16x16 & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
+ vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
}
}
- if ((mask_8x8_0 | mask_8x8_1) & 1) {
- if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_8x8_0 & 1) {
- vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ if (mask_8x8 & dual_one) {
+ if ((mask_8x8 & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
+ vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
}
}
- if ((mask_4x4_0 | mask_4x4_1) & 1) {
- if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_4x4_0 & 1) {
- vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ if (mask_4x4 & dual_one) {
+ if ((mask_4x4 & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
+ vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
}
}
- if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
- if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, bd);
- } else if (mask_4x4_int_0 & 1) {
- vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ if (mask_4x4_int & dual_one) {
+ if ((mask_4x4_int & dual_one) == dual_one) {
+ vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
+ lfis[0]->lim, lfis[0]->hev_thr,
+ lfis[1]->mblim, lfis[1]->lim,
+ lfis[1]->hev_thr, bd);
} else {
- vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
+ vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
+ lfi->mblim, lfi->lim, lfi->hev_thr, bd);
}
}
}
- s += 8;
+ ss[0] += 8;
lfl += 1;
- mask_16x16_0 >>= 1;
- mask_8x8_0 >>= 1;
- mask_4x4_0 >>= 1;
- mask_4x4_int_0 >>= 1;
- mask_16x16_1 >>= 1;
- mask_8x8_1 >>= 1;
- mask_4x4_1 >>= 1;
- mask_4x4_int_1 >>= 1;
+ mask_16x16 >>= 1;
+ mask_8x8 >>= 1;
+ mask_4x4 >>= 1;
+ mask_4x4_int >>= 1;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -501,30 +469,30 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
count = 1;
if (mask & 1) {
+ const loop_filter_thresh *lfi = lfthr + *lfl;
+
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2);
+ vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
count = 2;
} else {
- vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -537,23 +505,23 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
} else {
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
else if (mask_4x4_int & 2)
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ lfin->lim, lfin->hev_thr);
}
count = 2;
} else {
- vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -565,22 +533,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
} else {
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
else if (mask_4x4_int & 2)
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ lfin->lim, lfin->hev_thr);
}
count = 2;
} else {
- vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
- } else if (mask_4x4_int & 1) {
+ } else {
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
}
s += 8 * count;
@@ -598,30 +566,30 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
-
count = 1;
if (mask & 1) {
+ const loop_filter_thresh *lfi = lfthr + *lfl;
+
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2, bd);
+ vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
count = 2;
} else {
- vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
}
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -635,26 +603,26 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
} else {
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
} else if (mask_4x4_int & 2) {
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
} else {
vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
- const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+ const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@@ -667,25 +635,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
} else {
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
} else if (mask_4x4_int & 2) {
vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
} else {
vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
if (mask_4x4_int & 1) {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
- } else if (mask_4x4_int & 1) {
+ } else {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
s += 8 * count;
@@ -704,16 +672,14 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
// whether there were any coefficients encoded, and the loop filter strength
// block we are currently looking at. Shift is used to position the
// 1's we produce.
-// TODO(JBB) Need another function for different resolution color..
static void build_masks(const loop_filter_info_n *const lfi_n,
const MODE_INFO *mi, const int shift_y,
const int shift_uv,
LOOP_FILTER_MASK *lfm) {
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- const BLOCK_SIZE block_size = mbmi->sb_type;
- const TX_SIZE tx_size_y = mbmi->tx_size;
+ const BLOCK_SIZE block_size = mi->sb_type;
+ const TX_SIZE tx_size_y = mi->tx_size;
const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
- const int filter_level = get_filter_level(lfi_n, mbmi);
+ const int filter_level = get_filter_level(lfi_n, mi);
uint64_t *const left_y = &lfm->left_y[tx_size_y];
uint64_t *const above_y = &lfm->above_y[tx_size_y];
uint64_t *const int_4x4_y = &lfm->int_4x4_y;
@@ -754,7 +720,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
- if (mbmi->skip && is_inter_block(mbmi))
+ if (mi->skip && is_inter_block(mi))
return;
// Here we are adding a mask for the transform size. The transform
@@ -788,10 +754,9 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
static void build_y_mask(const loop_filter_info_n *const lfi_n,
const MODE_INFO *mi, const int shift_y,
LOOP_FILTER_MASK *lfm) {
- const MB_MODE_INFO *mbmi = &mi->mbmi;
- const BLOCK_SIZE block_size = mbmi->sb_type;
- const TX_SIZE tx_size_y = mbmi->tx_size;
- const int filter_level = get_filter_level(lfi_n, mbmi);
+ const BLOCK_SIZE block_size = mi->sb_type;
+ const TX_SIZE tx_size_y = mi->tx_size;
+ const int filter_level = get_filter_level(lfi_n, mi);
uint64_t *const left_y = &lfm->left_y[tx_size_y];
uint64_t *const above_y = &lfm->above_y[tx_size_y];
uint64_t *const int_4x4_y = &lfm->int_4x4_y;
@@ -812,7 +777,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
*above_y |= above_prediction_mask[block_size] << shift_y;
*left_y |= left_prediction_mask[block_size] << shift_y;
- if (mbmi->skip && is_inter_block(mbmi))
+ if (mi->skip && is_inter_block(mi))
return;
*above_y |= (size_mask[block_size] &
@@ -941,7 +906,6 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row,
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
-// TODO(JBB): This function only works for yv12.
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
MODE_INFO **mi, const int mode_info_stride,
LOOP_FILTER_MASK *lfm) {
@@ -977,10 +941,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
vp9_zero(*lfm);
assert(mip[0] != NULL);
- // TODO(jimbankoski): Try moving most of the following code into decode
- // loop and storing lfm in the mbmi structure so that we don't have to go
- // through the recursive loop structure multiple times.
- switch (mip[0]->mbmi.sb_type) {
+ switch (mip[0]->sb_type) {
case BLOCK_64X64:
build_masks(lfi_n, mip[0] , 0, 0, lfm);
break;
@@ -1006,7 +967,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
const int mi_32_row_offset = ((idx_32 >> 1) << 2);
if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
continue;
- switch (mip[0]->mbmi.sb_type) {
+ switch (mip[0]->sb_type) {
case BLOCK_32X32:
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
break;
@@ -1036,7 +997,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
continue;
- switch (mip[0]->mbmi.sb_type) {
+ switch (mip[0]->sb_type) {
case BLOCK_16X16:
build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
break;
@@ -1083,8 +1044,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
}
break;
}
-
- vp9_adjust_mask(cm, mi_row, mi_col, lfm);
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -1092,25 +1051,25 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+ const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_8x8 & 1) {
- vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_4x4 & 1) {
- vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
}
}
if (mask_4x4_int & 1)
- vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@@ -1126,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
- const loop_filter_info_n *lfi_n,
+ const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
- const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+ const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
@@ -1140,15 +1099,15 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
lfi->hev_thr, bd);
} else if (mask_8x8 & 1) {
vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
} else if (mask_4x4 & 1) {
vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
if (mask_4x4_int & 1)
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@@ -1186,8 +1145,8 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
// Determine the vertical edges that need filtering
for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
- const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
- const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
+ const BLOCK_SIZE sb_type = mi[0].sb_type;
+ const int skip_this = mi[0].skip && is_inter_block(mi);
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
!(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
@@ -1196,13 +1155,13 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
!(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
const int skip_this_r = skip_this && !block_edge_above;
- const TX_SIZE tx_size = get_uv_tx_size(&mi[0].mbmi, plane);
+ const TX_SIZE tx_size = get_uv_tx_size(mi, plane);
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
// Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] =
- get_filter_level(&cm->lf_info, &mi[0].mbmi)))
+ get_filter_level(&cm->lf_info, mi)))
continue;
// Build masks based on the transform size of each block
@@ -1263,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
- &cm->lf_info, &lfl[r << 3],
+ cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert(dst->buf, dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
- &cm->lf_info, &lfl[r << 3]);
+ cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_vert(dst->buf, dst->stride,
- mask_16x16_c & border_mask,
- mask_8x8_c & border_mask,
- mask_4x4_c & border_mask,
- mask_4x4_int[r],
- &cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
@@ -1312,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
- &cm->lf_info, &lfl[r << 3],
+ cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
- &cm->lf_info, &lfl[r << 3]);
+ cm->lf_info.lfthr, &lfl[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_horiz(dst->buf, dst->stride,
- mask_16x16_r,
- mask_8x8_r,
- mask_4x4_r,
- mask_4x4_int_r,
- &cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
}
@@ -1350,27 +1299,29 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
// Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
- unsigned int mask_16x16_l = mask_16x16 & 0xffff;
- unsigned int mask_8x8_l = mask_8x8 & 0xffff;
- unsigned int mask_4x4_l = mask_4x4 & 0xffff;
- unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
-
-// Disable filtering on the leftmost column.
+ // Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- highbd_filter_selectively_vert_row2(
- plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_y[r << 3], (int)cm->bit_depth);
+ highbd_filter_selectively_vert_row2(plane->subsampling_x,
+ CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride,
+ (unsigned int)mask_16x16,
+ (unsigned int)mask_8x8,
+ (unsigned int)mask_4x4,
+ (unsigned int)mask_4x4_int,
+ cm->lf_info.lfthr,
+ &lfm->lfl_y[r << 3],
+ (int)cm->bit_depth);
} else {
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
- mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
+ (unsigned int)mask_16x16,
+ (unsigned int)mask_8x8,
+ (unsigned int)mask_4x4,
+ (unsigned int)mask_4x4_int,
+ cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
- mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 16;
@@ -1403,19 +1354,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
- (int)cm->bit_depth);
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int & 0xff,
+ cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
+ (int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
- &lfm->lfl_y[r << 3]);
+ mask_4x4_r, mask_4x4_int & 0xff,
+ cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
- &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@@ -1449,38 +1399,35 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
}
- {
- unsigned int mask_16x16_l = mask_16x16 & 0xff;
- unsigned int mask_8x8_l = mask_8x8 & 0xff;
- unsigned int mask_4x4_l = mask_4x4 & 0xff;
- unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
-
-// Disable filtering on the leftmost column.
+ // Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- highbd_filter_selectively_vert_row2(
- plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfl_uv[r << 1], (int)cm->bit_depth);
- } else {
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfl_uv[r << 1]);
- }
-#else
- filter_selectively_vert_row2(
- plane->subsampling_x, dst->buf, dst->stride,
- mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfl_uv[r << 1]);
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_vert_row2(plane->subsampling_x,
+ CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride,
+ (unsigned int)mask_16x16,
+ (unsigned int)mask_8x8,
+ (unsigned int)mask_4x4,
+ (unsigned int)mask_4x4_int,
+ cm->lf_info.lfthr, &lfl_uv[r << 1],
+ (int)cm->bit_depth);
+ } else {
#endif // CONFIG_VP9_HIGHBITDEPTH
-
- dst->buf += 16 * dst->stride;
- mask_16x16 >>= 8;
- mask_8x8 >>= 8;
- mask_4x4 >>= 8;
- mask_4x4_int >>= 8;
+ filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
+ (unsigned int)mask_16x16,
+ (unsigned int)mask_8x8,
+ (unsigned int)mask_4x4,
+ (unsigned int)mask_4x4_int,
+ cm->lf_info.lfthr, &lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ dst->buf += 16 * dst->stride;
+ mask_16x16 >>= 8;
+ mask_8x8 >>= 8;
+ mask_4x4 >>= 8;
+ mask_4x4_int >>= 8;
}
// Horizontal pass
@@ -1512,17 +1459,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfl_uv[r << 1], (int)cm->bit_depth);
+ mask_4x4_r, mask_4x4_int_r,
+ cm->lf_info.lfthr, &lfl_uv[r << 1],
+ (int)cm->bit_depth);
} else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
&lfl_uv[r << 1]);
+#if CONFIG_VP9_HIGHBITDEPTH
}
-#else
- filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
- mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@@ -1558,7 +1504,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
- // TODO(JBB): Make setup_mask work for non 420.
+ // TODO(jimbankoski): For 444 only need to do y mask.
vp9_adjust_mask(cm, mi_row, mi_col, lfm);
vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
@@ -1598,6 +1544,8 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
}
// Used by the encoder to build the loopfilter masks.
+// TODO(slavarnway): Do the encoder the same way the decoder does it and
+// build the masks in line as part of the encode process.
void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
int partial_frame) {
int start_mi_row, end_mi_row, mi_rows_to_filter;
@@ -1640,12 +1588,12 @@ static const uint8_t first_block_in_16x16[8][8] = {
// This function sets up the bit masks for a block represented
// by mi_row, mi_col in a 64x64 region.
// TODO(SJL): This function only works for yv12.
-void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row,
+void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row,
int mi_col, int bw, int bh) {
- const BLOCK_SIZE block_size = mbmi->sb_type;
- const TX_SIZE tx_size_y = mbmi->tx_size;
+ const BLOCK_SIZE block_size = mi->sb_type;
+ const TX_SIZE tx_size_y = mi->tx_size;
const loop_filter_info_n *const lfi_n = &cm->lf_info;
- const int filter_level = get_filter_level(lfi_n, mbmi);
+ const int filter_level = get_filter_level(lfi_n, mi);
const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col);
uint64_t *const left_y = &lfm->left_y[tx_size_y];
@@ -1693,7 +1641,7 @@ void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row,
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
- if (mbmi->skip && is_inter_block(mbmi))
+ if (mi->skip && is_inter_block(mi))
return;
// Add a mask for the transform size. The transform size mask is set to
diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h
index 7f943ea09..fca8830fa 100644
--- a/libvpx/vp9/common/vp9_loopfilter.h
+++ b/libvpx/vp9/common/vp9_loopfilter.h
@@ -69,6 +69,7 @@ typedef struct {
struct loopfilter {
int filter_level;
+ int last_filt_level;
int sharpness_level;
int last_sharpness_level;
@@ -134,7 +135,7 @@ static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf,
return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)];
}
-void vp9_build_mask(struct VP9Common *cm, const MB_MODE_INFO *mbmi, int mi_row,
+void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row,
int mi_col, int bw, int bh);
void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row,
const int mi_col, LOOP_FILTER_MASK *lfm);
diff --git a/libvpx/vp9/common/vp9_mfqe.c b/libvpx/vp9/common/vp9_mfqe.c
index 6d560f438..f5264665b 100644
--- a/libvpx/vp9/common/vp9_mfqe.c
+++ b/libvpx/vp9/common/vp9_mfqe.c
@@ -203,12 +203,12 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
// Check the motion in current block(for inter frame),
// or check the motion in the correlated block in last frame (for keyframe).
- const int mv_len_square = mi->mbmi.mv[0].as_mv.row *
- mi->mbmi.mv[0].as_mv.row +
- mi->mbmi.mv[0].as_mv.col *
- mi->mbmi.mv[0].as_mv.col;
+ const int mv_len_square = mi->mv[0].as_mv.row *
+ mi->mv[0].as_mv.row +
+ mi->mv[0].as_mv.col *
+ mi->mv[0].as_mv.col;
const int mv_threshold = 100;
- return mi->mbmi.mode >= NEARESTMV && // Not an intra block
+ return mi->mode >= NEARESTMV && // Not an intra block
cur_bs >= BLOCK_16X16 &&
mv_len_square <= mv_threshold;
}
@@ -220,7 +220,7 @@ static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
uint8_t *yd, uint8_t *ud, uint8_t *vd,
int yd_stride, int uvd_stride) {
int mi_offset, y_offset, uv_offset;
- const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;
+ const BLOCK_SIZE cur_bs = mi->sb_type;
const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
const int bsl = b_width_log2_lookup[bs];
PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c
index 77d1ff459..0eb01a51b 100644
--- a/libvpx/vp9/common/vp9_mvref_common.c
+++ b/libvpx/vp9/common/vp9_mvref_common.c
@@ -11,20 +11,19 @@
#include "vp9/common/vp9_mvref_common.h"
-// This function searches the neighbourhood of a given MB/SB
+// This function searches the neighborhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int block, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data,
uint8_t *mode_context) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
- const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+ const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
int different_ref_found = 0;
int context_counter = 0;
- const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
const TileInfo *const tile = &xd->tile;
@@ -39,15 +38,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
xd->mi_stride];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
// Keep counts for entropy encoding.
- context_counter += mode_2_counter[candidate->mode];
+ context_counter += mode_2_counter[candidate_mi->mode];
different_ref_found = 1;
- if (candidate->ref_frame[0] == ref_frame)
+ if (candidate_mi->ref_frame[0] == ref_frame)
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
refmv_count, mv_ref_list, Done);
- else if (candidate->ref_frame[1] == ref_frame)
+ else if (candidate_mi->ref_frame[1] == ref_frame)
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
refmv_count, mv_ref_list, Done);
}
@@ -59,34 +57,19 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
for (; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
- xd->mi_stride]->mbmi;
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
different_ref_found = 1;
- if (candidate->ref_frame[0] == ref_frame)
- ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
- else if (candidate->ref_frame[1] == ref_frame)
- ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, Done);
+ if (candidate_mi->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
+ else if (candidate_mi->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done);
}
}
- // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
- // on windows platform. The sync here is unncessary if use_perv_frame_mvs
- // is 0. But after removing it, there will be hang in the unit test on windows
- // due to several threads waiting for a thread's signal.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
- if (cm->frame_parallel_decode && sync != NULL) {
- sync(data, mi_row);
- }
-#endif
-
// Check the last frame's mode and mv info.
if (cm->use_prev_frame_mvs) {
- // Synchronize here for frame parallel decode if sync function is provided.
- if (cm->frame_parallel_decode && sync != NULL) {
- sync(data, mi_row);
- }
-
if (prev_frame_mvs->ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
} else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
@@ -101,11 +84,11 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
- * xd->mi_stride]->mbmi;
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
// If the candidate is INTRA we don't want to consider its mv.
- IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
+ IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
refmv_count, mv_ref_list, Done);
}
}
@@ -150,20 +133,9 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data,
uint8_t *mode_context) {
find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1,
- mi_row, mi_col, sync, data, mode_context);
-}
-
-static void lower_mv_precision(MV *mv, int allow_hp) {
- const int use_hp = allow_hp && vp9_use_mv_hp(mv);
- if (!use_hp) {
- if (mv->row & 1)
- mv->row += (mv->row > 0 ? -1 : 1);
- if (mv->col & 1)
- mv->col += (mv->col > 0 ? -1 : 1);
- }
+ mi_row, mi_col, mode_context);
}
void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
@@ -190,8 +162,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
assert(MAX_MV_REF_CANDIDATES == 2);
- find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block,
- mi_row, mi_col, NULL, NULL, mode_context);
+ find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block,
+ mi_row, mi_col, mode_context);
near_mv->as_int = 0;
switch (block) {
diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h
index bd216d433..4380843e2 100644
--- a/libvpx/vp9/common/vp9_mvref_common.h
+++ b/libvpx/vp9/common/vp9_mvref_common.h
@@ -136,19 +136,19 @@ static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
// on whether the block_size < 8x8 and we have check_sub_blocks set.
static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
int search_col, int block_idx) {
- return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+ return block_idx >= 0 && candidate->sb_type < BLOCK_8X8
? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
.as_mv[which_mv]
- : candidate->mbmi.mv[which_mv];
+ : candidate->mv[which_mv];
}
// Performs mv sign inversion if indicated by the reference frame combination.
-static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref,
const MV_REFERENCE_FRAME this_ref_frame,
const int *ref_sign_bias) {
- int_mv mv = mbmi->mv[ref];
- if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+ int_mv mv = mi->mv[ref];
+ if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
mv.as_mv.row *= -1;
mv.as_mv.col *= -1;
}
@@ -157,7 +157,7 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
// This macro is used to add a motion vector mv_ref list if it isn't
// already in the list. If it's the second motion vector it will also
-// skip all additional processing and jump to done!
+// skip all additional processing and jump to Done!
#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \
do { \
if (refmv_count) { \
@@ -207,11 +207,20 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+ const int use_hp = allow_hp && use_mv_hp(mv);
+ if (!use_hp) {
+ if (mv->row & 1)
+ mv->row += (mv->row > 0 ? -1 : 1);
+ if (mv->col & 1)
+ mv->col += (mv->col > 0 ? -1 : 1);
+ }
+}
+
typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data,
uint8_t *mode_context);
// check a list of motion vectors by sad score using a number rows of pixels
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index ceffdedf9..3fd935e62 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -404,25 +404,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
// Are edges available for intra prediction?
- xd->up_available = (mi_row != 0);
- xd->left_available = (mi_col > tile->mi_col_start);
- if (xd->up_available) {
- xd->above_mi = xd->mi[-xd->mi_stride];
- // above_mi may be NULL in VP9 encoder's first pass.
- xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
- } else {
- xd->above_mi = NULL;
- xd->above_mbmi = NULL;
- }
-
- if (xd->left_available) {
- xd->left_mi = xd->mi[-1];
- // left_mi may be NULL in VP9 encoder's first pass.
- xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
- } else {
- xd->left_mi = NULL;
- xd->left_mbmi = NULL;
- }
+ xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL;
+ xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL;
}
static INLINE void update_partition_context(MACROBLOCKD *xd,
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c
index b685d813b..c04cc8f05 100644
--- a/libvpx/vp9/common/vp9_postproc.c
+++ b/libvpx/vp9/common/vp9_postproc.c
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include <stdio.h>
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
@@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
state->last_noise = a;
}
-void vp9_plane_add_noise_c(uint8_t *start, char *noise,
- char blackclamp[16],
- char whiteclamp[16],
- char bothclamp[16],
- unsigned int width, unsigned int height, int pitch) {
- unsigned int i, j;
-
- // TODO(jbb): why does simd code use both but c doesn't, normalize and
- // fix..
- (void) bothclamp;
- for (i = 0; i < height; i++) {
- uint8_t *pos = start + i * pitch;
- char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
-
- for (j = 0; j < width; j++) {
- if (pos[j] < blackclamp[0])
- pos[j] = blackclamp[0];
-
- if (pos[j] > 255 + whiteclamp[0])
- pos[j] = 255 + whiteclamp[0];
-
- pos[j] += ref[j];
- }
- }
-}
-
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
@@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
ppstate->last_noise != noise_level) {
fillrd(ppstate, 63 - q, noise_level);
}
-
- vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
+ vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}
diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c
index 1f1632573..8f90e70e7 100644
--- a/libvpx/vp9/common/vp9_pred_common.c
+++ b/libvpx/vp9/common/vp9_pred_common.c
@@ -17,82 +17,57 @@
int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
// Note:
// The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int left_type = xd->left_available && is_inter_block(left_mbmi) ?
- left_mbmi->interp_filter : SWITCHABLE_FILTERS;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const int above_type = xd->up_available && is_inter_block(above_mbmi) ?
- above_mbmi->interp_filter : SWITCHABLE_FILTERS;
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int left_type = left_mi && is_inter_block(left_mi) ?
+ left_mi->interp_filter : SWITCHABLE_FILTERS;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const int above_type = above_mi && is_inter_block(above_mi) ?
+ above_mi->interp_filter : SWITCHABLE_FILTERS;
if (left_type == above_type)
return left_type;
- else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+ else if (left_type == SWITCHABLE_FILTERS)
return above_type;
- else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+ else if (above_type == SWITCHABLE_FILTERS)
return left_type;
else
return SWITCHABLE_FILTERS;
}
-// The mode info data structure has a one element border above and to the
-// left of the entries corresponding to real macroblocks.
-// The prediction flags in these dummy entries are initialized to 0.
-// 0 - inter/inter, inter/--, --/inter, --/--
-// 1 - intra/inter, inter/intra
-// 2 - intra/--, --/intra
-// 3 - intra/intra
-int vp9_get_intra_inter_context(const MACROBLOCKD *xd) {
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
-
- if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
- return left_intra && above_intra ? 3
- : left_intra || above_intra;
- } else if (has_above || has_left) { // one edge available
- return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
- } else {
- return 0;
- }
-}
-
int vp9_get_reference_mode_context(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
int ctx;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int has_above = !!above_mi;
+ const int has_left = !!left_mi;
// Note:
// The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
if (has_above && has_left) { // both edges available
- if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+ if (!has_second_ref(above_mi) && !has_second_ref(left_mi))
// neither edge uses comp pred (0/1)
- ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
- (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
- else if (!has_second_ref(above_mbmi))
+ ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^
+ (left_mi->ref_frame[0] == cm->comp_fixed_ref);
+ else if (!has_second_ref(above_mi))
// one of two edges uses comp pred (2/3)
- ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
- !is_inter_block(above_mbmi));
- else if (!has_second_ref(left_mbmi))
+ ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref ||
+ !is_inter_block(above_mi));
+ else if (!has_second_ref(left_mi))
// one of two edges uses comp pred (2/3)
- ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
- !is_inter_block(left_mbmi));
+ ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref ||
+ !is_inter_block(left_mi));
else // both edges use comp pred (4)
ctx = 4;
} else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+ const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
- if (!has_second_ref(edge_mbmi))
+ if (!has_second_ref(edge_mi))
// edge does not use comp pred (0/1)
- ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
+ ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref;
else
// edge uses comp pred (3)
ctx = 3;
@@ -107,39 +82,39 @@ int vp9_get_reference_mode_context(const VP9_COMMON *cm,
int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int above_in_image = xd->up_available;
- const int left_in_image = xd->left_available;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int above_in_image = !!above_mi;
+ const int left_in_image = !!left_mi;
// Note:
// The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
const int var_ref_idx = !fix_ref_idx;
if (above_in_image && left_in_image) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
+ const int above_intra = !is_inter_block(above_mi);
+ const int left_intra = !is_inter_block(left_mi);
if (above_intra && left_intra) { // intra/intra (2)
pred_context = 2;
} else if (above_intra || left_intra) { // intra/inter
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
- if (!has_second_ref(edge_mbmi)) // single pred (1/3)
- pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+ if (!has_second_ref(edge_mi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]);
else // comp pred (1/3)
- pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx]
+ pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx]
!= cm->comp_var_ref[1]);
} else { // inter/inter
- const int l_sg = !has_second_ref(left_mbmi);
- const int a_sg = !has_second_ref(above_mbmi);
- const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0]
- : above_mbmi->ref_frame[var_ref_idx];
- const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0]
- : left_mbmi->ref_frame[var_ref_idx];
+ const int l_sg = !has_second_ref(left_mi);
+ const int a_sg = !has_second_ref(above_mi);
+ const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0]
+ : above_mi->ref_frame[var_ref_idx];
+ const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0]
+ : left_mi->ref_frame[var_ref_idx];
if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) {
pred_context = 0;
@@ -167,16 +142,16 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
}
}
} else if (above_in_image || left_in_image) { // one edge available
- const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+ const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi;
- if (!is_inter_block(edge_mbmi)) {
+ if (!is_inter_block(edge_mi)) {
pred_context = 2;
} else {
- if (has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx]
+ if (has_second_ref(edge_mi))
+ pred_context = 4 * (edge_mi->ref_frame[var_ref_idx]
!= cm->comp_var_ref[1]);
else
- pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+ pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]);
}
} else { // no edges available (2)
pred_context = 2;
@@ -188,34 +163,34 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int has_above = !!above_mi;
+ const int has_left = !!left_mi;
// Note:
// The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
+ const int above_intra = !is_inter_block(above_mi);
+ const int left_intra = !is_inter_block(left_mi);
if (above_intra && left_intra) { // intra/intra
pred_context = 2;
} else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+ if (!has_second_ref(edge_mi))
+ pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME);
else
- pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
- edge_mbmi->ref_frame[1] == LAST_FRAME);
+ pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME ||
+ edge_mi->ref_frame[1] == LAST_FRAME);
} else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+ const int above_has_second = has_second_ref(above_mi);
+ const int left_has_second = has_second_ref(left_mi);
+ const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1];
if (above_has_second && left_has_second) {
pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
@@ -234,15 +209,15 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
}
}
} else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
- if (!is_inter_block(edge_mbmi)) { // intra
+ const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
+ if (!is_inter_block(edge_mi)) { // intra
pred_context = 2;
} else { // inter
- if (!has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ if (!has_second_ref(edge_mi))
+ pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME);
else
- pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
- edge_mbmi->ref_frame[1] == LAST_FRAME);
+ pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME ||
+ edge_mi->ref_frame[1] == LAST_FRAME);
}
} else { // no edges available
pred_context = 2;
@@ -254,39 +229,39 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
int pred_context;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int has_above = !!above_mi;
+ const int has_left = !!left_mi;
// Note:
// The mode info data structure has a one element border above and to the
- // left of the entries correpsonding to real macroblocks.
- // The prediction flags in these dummy entries are initialised to 0.
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
if (has_above && has_left) { // both edges available
- const int above_intra = !is_inter_block(above_mbmi);
- const int left_intra = !is_inter_block(left_mbmi);
+ const int above_intra = !is_inter_block(above_mi);
+ const int left_intra = !is_inter_block(left_mi);
if (above_intra && left_intra) { // intra/intra
pred_context = 2;
} else if (above_intra || left_intra) { // intra/inter or inter/intra
- const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
- if (!has_second_ref(edge_mbmi)) {
- if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+ const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi;
+ if (!has_second_ref(edge_mi)) {
+ if (edge_mi->ref_frame[0] == LAST_FRAME)
pred_context = 3;
else
- pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
} else {
- pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mi->ref_frame[1] == GOLDEN_FRAME);
}
} else { // inter/inter
- const int above_has_second = has_second_ref(above_mbmi);
- const int left_has_second = has_second_ref(left_mbmi);
- const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
- const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+ const int above_has_second = has_second_ref(above_mi);
+ const int left_has_second = has_second_ref(left_mi);
+ const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1];
if (above_has_second && left_has_second) {
if (above0 == left0 && above1 == left1)
@@ -321,16 +296,16 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
}
}
} else if (has_above || has_left) { // one edge available
- const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+ const MODE_INFO *edge_mi = has_above ? above_mi : left_mi;
- if (!is_inter_block(edge_mbmi) ||
- (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi)))
+ if (!is_inter_block(edge_mi) ||
+ (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi)))
pred_context = 2;
- else if (!has_second_ref(edge_mbmi))
- pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ else if (!has_second_ref(edge_mi))
+ pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
else
- pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
- edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mi->ref_frame[1] == GOLDEN_FRAME);
} else { // no edges available (2)
pred_context = 2;
}
diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h
index 6f7af4a50..f3c676e95 100644
--- a/libvpx/vp9/common/vp9_pred_common.h
+++ b/libvpx/vp9/common/vp9_pred_common.h
@@ -42,8 +42,8 @@ static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) {
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
const int above_sip = (above_mi != NULL) ?
- above_mi->mbmi.seg_id_predicted : 0;
- const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0;
+ above_mi->seg_id_predicted : 0;
+ const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0;
return above_sip + left_sip;
}
@@ -56,8 +56,8 @@ static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg,
static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
- const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
- const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
+ const int above_skip = (above_mi != NULL) ? above_mi->skip : 0;
+ const int left_skip = (left_mi != NULL) ? left_mi->skip : 0;
return above_skip + left_skip;
}
@@ -68,11 +68,32 @@ static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm,
int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
-int vp9_get_intra_inter_context(const MACROBLOCKD *xd);
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
+static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) {
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int has_above = !!above_mi;
+ const int has_left = !!left_mi;
+
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mi);
+ const int left_intra = !is_inter_block(left_mi);
+ return left_intra && above_intra ? 3 : left_intra || above_intra;
+ } else if (has_above || has_left) { // one edge available
+ return 2 * !is_inter_block(has_above ? above_mi : left_mi);
+ }
+ return 0;
+}
static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc->intra_inter_prob[vp9_get_intra_inter_context(xd)];
+ return cm->fc->intra_inter_prob[get_intra_inter_context(xd)];
}
int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
@@ -110,15 +131,15 @@ static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
// left of the entries corresponding to real blocks.
// The prediction flags in these dummy entries are initialized to 0.
static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
- const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const int has_above = xd->up_available;
- const int has_left = xd->left_available;
- int above_ctx = (has_above && !above_mbmi->skip) ? (int)above_mbmi->tx_size
- : max_tx_size;
- int left_ctx = (has_left && !left_mbmi->skip) ? (int)left_mbmi->tx_size
- : max_tx_size;
+ const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type];
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int has_above = !!above_mi;
+ const int has_left = !!left_mi;
+ int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size
+ : max_tx_size;
+ int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size
+ : max_tx_size;
if (!has_left)
left_ctx = above_ctx;
diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c
index d8c14ecc8..84718e970 100644
--- a/libvpx/vp9/common/vp9_reconinter.c
+++ b/libvpx/vp9/common/vp9_reconinter.c
@@ -20,19 +20,6 @@
#include "vp9/common/vp9_reconintra.h"
#if CONFIG_VP9_HIGHBITDEPTH
-void high_inter_predictor(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- const int subpel_x,
- const int subpel_y,
- const struct scale_factors *sf,
- int w, int h, int ref,
- const InterpKernel *kernel,
- int xs, int ys, int bd) {
- sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
-}
-
void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *src_mv,
@@ -50,8 +37,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
- high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
- sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
+ highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+ sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4,
+ bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -159,8 +147,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const MODE_INFO *mi = xd->mi[0];
- const int is_compound = has_second_ref(&mi->mbmi);
- const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
+ const int is_compound = has_second_ref(mi);
+ const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
int ref;
for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -168,9 +156,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
struct buf_2d *const pre_buf = &pd->pre[ref];
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+ const MV mv = mi->sb_type < BLOCK_8X8
? average_split_mvs(pd, mi, ref, block)
- : mi->mbmi.mv[ref].as_mv;
+ : mi->mv[ref].as_mv;
// TODO(jkoleszar): This clamping is done in the incorrect place for the
// scaling case. It needs to be done on the scaled MV, not the pre-scaling
@@ -190,6 +178,12 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
// Co-ordinate of containing block to pixel precision.
const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ assert(xd->mi[0]->sb_type != BLOCK_4X8 &&
+ xd->mi[0]->sb_type != BLOCK_8X4);
+ assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) &&
+ mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x)));
+#endif
if (plane == 0)
pre_buf->buf = xd->block_refs[ref]->buf->y_buffer;
else if (plane == 1)
@@ -216,9 +210,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
- xd->bd);
+ highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+ subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
+ xd->bd);
} else {
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
@@ -244,7 +238,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
const int bw = 4 * num_4x4_w;
const int bh = 4 * num_4x4_h;
- if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+ if (xd->mi[0]->sb_type < BLOCK_8X8) {
int i = 0, x, y;
assert(bsize == BLOCK_8X8);
for (y = 0; y < num_4x4_h; ++y)
diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h
index 7d907748e..07745e3aa 100644
--- a/libvpx/vp9/common/vp9_reconinter.h
+++ b/libvpx/vp9/common/vp9_reconinter.h
@@ -34,14 +34,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
}
#if CONFIG_VP9_HIGHBITDEPTH
-void high_inter_predictor(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- const int subpel_x,
- const int subpel_y,
- const struct scale_factors *sf,
- int w, int h, int ref,
- const InterpKernel *kernel,
- int xs, int ys, int bd);
+static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int subpel_x,
+ const int subpel_y,
+ const struct scale_factors *sf,
+ int w, int h, int ref,
+ const InterpKernel *kernel,
+ int xs, int ys, int bd) {
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+}
#endif // CONFIG_VP9_HIGHBITDEPTH
MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi,
diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c
index 3d84a2883..445785835 100644
--- a/libvpx/vp9/common/vp9_reconintra.c
+++ b/libvpx/vp9/common/vp9_reconintra.c
@@ -133,12 +133,16 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
int frame_width, frame_height;
int x0, y0;
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int need_left = extend_modes[mode] & NEED_LEFT;
+ const int need_above = extend_modes[mode] & NEED_ABOVE;
+ const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT;
int base = 128 << (bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
+ // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
// Get current frame pointer, width and height.
if (plane == 0) {
@@ -153,79 +157,106 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
- // left
- if (left_available) {
- if (xd->mb_to_bottom_edge < 0) {
- /* slower path if the block needs border extension */
- if (y0 + bs <= frame_height) {
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
+ // NEED_LEFT
+ if (need_left) {
+ if (left_available) {
+ if (xd->mb_to_bottom_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (y0 + bs <= frame_height) {
+ for (i = 0; i < bs; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ } else {
+ const int extend_bottom = frame_height - y0;
+ for (i = 0; i < extend_bottom; ++i)
+ left_col[i] = ref[i * ref_stride - 1];
+ for (; i < bs; ++i)
+ left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
+ }
} else {
- const int extend_bottom = frame_height - y0;
- for (i = 0; i < extend_bottom; ++i)
+ /* faster path if the block does not need extension */
+ for (i = 0; i < bs; ++i)
left_col[i] = ref[i * ref_stride - 1];
- for (; i < bs; ++i)
- left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1];
}
} else {
- /* faster path if the block does not need extension */
- for (i = 0; i < bs; ++i)
- left_col[i] = ref[i * ref_stride - 1];
+ vpx_memset16(left_col, base + 1, bs);
}
- } else {
- // TODO(Peter): this value should probably change for high bitdepth
- vpx_memset16(left_col, base + 1, bs);
}
- // TODO(hkuang) do not extend 2*bs pixels for all modes.
- // above
- if (up_available) {
- const uint16_t *above_ref = ref - ref_stride;
- if (xd->mb_to_right_edge < 0) {
- /* slower path if the block needs border extension */
- if (x0 + 2 * bs <= frame_width) {
- if (right_available && bs == 4) {
- memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+ // NEED_ABOVE
+ if (need_above) {
+ if (up_available) {
+ const uint16_t *above_ref = ref - ref_stride;
+ if (xd->mb_to_right_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (x0 + bs <= frame_width) {
+ memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+ } else if (x0 <= frame_width) {
+ const int r = frame_width - x0;
+ memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+ vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width);
+ }
+ } else {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && right_available && left_available) {
+ const_above_row = above_ref;
} else {
memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
}
- } else if (x0 + bs <= frame_width) {
- const int r = frame_width - x0;
- if (right_available && bs == 4) {
+ }
+ above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+ } else {
+ vpx_memset16(above_row, base - 1, bs);
+ above_row[-1] = base - 1;
+ }
+ }
+
+ // NEED_ABOVERIGHT
+ if (need_aboveright) {
+ if (up_available) {
+ const uint16_t *above_ref = ref - ref_stride;
+ if (xd->mb_to_right_edge < 0) {
+ /* slower path if the block needs border extension */
+ if (x0 + 2 * bs <= frame_width) {
+ if (right_available && bs == 4) {
+ memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0]));
+ } else {
+ memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+ vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ }
+ } else if (x0 + bs <= frame_width) {
+ const int r = frame_width - x0;
+ if (right_available && bs == 4) {
+ memcpy(above_row, above_ref, r * sizeof(above_row[0]));
+ vpx_memset16(above_row + r, above_row[r - 1],
+ x0 + 2 * bs - frame_width);
+ } else {
+ memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
+ vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ }
+ } else if (x0 <= frame_width) {
+ const int r = frame_width - x0;
memcpy(above_row, above_ref, r * sizeof(above_row[0]));
vpx_memset16(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
+ }
+ above_row[-1] = left_available ? above_ref[-1] : (base + 1);
+ } else {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && right_available && left_available) {
+ const_above_row = above_ref;
} else {
memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ if (bs == 4 && right_available)
+ memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
+ else
+ vpx_memset16(above_row + bs, above_row[bs - 1], bs);
+ above_row[-1] = left_available ? above_ref[-1] : (base + 1);
}
- } else if (x0 <= frame_width) {
- const int r = frame_width - x0;
- memcpy(above_row, above_ref, r * sizeof(above_row[0]));
- vpx_memset16(above_row + r, above_row[r - 1],
- x0 + 2 * bs - frame_width);
}
- // TODO(Peter) this value should probably change for high bitdepth
- above_row[-1] = left_available ? above_ref[-1] : (base+1);
} else {
- /* faster path if the block does not need extension */
- if (bs == 4 && right_available && left_available) {
- const_above_row = above_ref;
- } else {
- memcpy(above_row, above_ref, bs * sizeof(above_row[0]));
- if (bs == 4 && right_available)
- memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
- else
- vpx_memset16(above_row + bs, above_row[bs - 1], bs);
- // TODO(Peter): this value should probably change for high bitdepth
- above_row[-1] = left_available ? above_ref[-1] : (base+1);
- }
+ vpx_memset16(above_row, base - 1, bs * 2);
+ above_row[-1] = base - 1;
}
- } else {
- vpx_memset16(above_row, base - 1, bs * 2);
- // TODO(Peter): this value should probably change for high bitdepth
- above_row[-1] = base - 1;
}
// predict
@@ -391,8 +422,8 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
int aoff, int loff, int plane) {
const int bw = (1 << bwl_in);
const int txw = (1 << tx_size);
- const int have_top = loff || xd->up_available;
- const int have_left = aoff || xd->left_available;
+ const int have_top = loff || (xd->above_mi != NULL);
+ const int have_left = aoff || (xd->left_mi != NULL);
const int have_right = (aoff + txw) < bw;
const int x = aoff * 4;
const int y = loff * 4;
diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl
index 5bf71ef9f..846133674 100644
--- a/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8
specialize qw/vp9_post_proc_down_and_across sse2/;
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
-add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-specialize qw/vp9_plane_add_noise sse2/;
-$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
-
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
@@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp9_highbd_post_proc_down_and_across/;
-
- add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
- specialize qw/vp9_highbd_plane_add_noise/;
}
#
@@ -194,42 +187,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
-add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2 neon msa/;
-
-add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
-specialize qw/vp9_avg_4x4 sse2 msa/;
-
-add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
-specialize qw/vp9_minmax_8x8 sse2/;
-
-add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
-
-add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
-specialize qw/vp9_hadamard_16x16 sse2/;
-
-add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
-specialize qw/vp9_satd sse2/;
-
-add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
-specialize qw/vp9_int_pro_row sse2 neon/;
-
-add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width";
-specialize qw/vp9_int_pro_col sse2 neon/;
-
-add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
-specialize qw/vp9_vector_var neon sse2/;
-
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
- specialize qw/vp9_highbd_avg_8x8/;
- add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
- specialize qw/vp9_highbd_avg_4x4/;
- add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
- specialize qw/vp9_highbd_minmax_8x8/;
-}
-
# ENCODEMB INVOKE
#
@@ -288,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
+ specialize qw/vp9_fwht4x4/, "$sse2_x86inc";
} else {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4 sse2 msa/;
@@ -300,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fht16x16 sse2 msa/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
+ specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc";
}
#
@@ -312,10 +269,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
-specialize qw/vp9_diamond_search_sad/;
-
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
-specialize qw/vp9_full_range_search/;
+specialize qw/vp9_diamond_search_sad avx/;
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
specialize qw/vp9_temporal_filter_apply sse2 msa/;
@@ -349,6 +303,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
}
# End vp9_high encoder functions
+#
+# frame based scale
+#
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+} else {
+ add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst";
+ specialize qw/vp9_scale_and_extend_frame ssse3/;
+}
+
}
# end encoder functions
1;
diff --git a/libvpx/vp9/common/vp9_scan.c b/libvpx/vp9/common/vp9_scan.c
index d6fb8b2d7..8b8b09f4a 100644
--- a/libvpx/vp9/common/vp9_scan.c
+++ b/libvpx/vp9/common/vp9_scan.c
@@ -229,10 +229,8 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
990, 959, 1022, 991, 1023,
};
-// Neighborhood 5-tuples for various scans and blocksizes,
-// in {top, left, topleft, topright, bottomleft} order
-// for each position in raster scan order.
-// -1 indicates the neighbor does not exist.
+// Neighborhood 2-tuples for various scans and blocksizes,
+// in {top, left} order for each position in corresponding scan order.
DECLARE_ALIGNED(16, static const int16_t,
default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9,
diff --git a/libvpx/vp9/common/vp9_scan.h b/libvpx/vp9/common/vp9_scan.h
index 1d86b5cfe..4c1ee8107 100644
--- a/libvpx/vp9/common/vp9_scan.h
+++ b/libvpx/vp9/common/vp9_scan.h
@@ -42,7 +42,7 @@ static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
PLANE_TYPE type, int block_idx) {
const MODE_INFO *const mi = xd->mi[0];
- if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
+ if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) {
return &vp9_default_scan_orders[tx_size];
} else {
const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
diff --git a/libvpx/vp9/common/vp9_seg_common.c b/libvpx/vp9/common/vp9_seg_common.c
index c8ef618b7..7af61629a 100644
--- a/libvpx/vp9/common/vp9_seg_common.c
+++ b/libvpx/vp9/common/vp9_seg_common.c
@@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
void vp9_clearall_segfeatures(struct segmentation *seg) {
vp9_zero(seg->feature_data);
vp9_zero(seg->feature_mask);
+ seg->aq_av_offset = 0;
}
void vp9_enable_segfeature(struct segmentation *seg, int segment_id,
diff --git a/libvpx/vp9/common/vp9_seg_common.h b/libvpx/vp9/common/vp9_seg_common.h
index 5b75d8d4e..99a9440c1 100644
--- a/libvpx/vp9/common/vp9_seg_common.h
+++ b/libvpx/vp9/common/vp9_seg_common.h
@@ -46,7 +46,8 @@ struct segmentation {
vpx_prob pred_probs[PREDICTION_PROBS];
int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
- unsigned int feature_mask[MAX_SEGMENTS];
+ uint32_t feature_mask[MAX_SEGMENTS];
+ int aq_av_offset;
};
static INLINE int segfeature_active(const struct segmentation *seg,
diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 8d312d03f..1c77b57ff 100644
--- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp9_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
diff --git a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
index ec8bfdb18..430762815 100644
--- a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
+++ b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm
@@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm):
%undef flimit4
-;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int width, unsigned int height, int pitch)
-global sym(vp9_plane_add_noise_wmt) PRIVATE
-sym(vp9_plane_add_noise_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movdqu xmm1,[rsi+rax] ; get the source
-
- psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb xmm1, [rdx+32] ;bothclamp
- psubusb xmm1, [rdx+16] ;whiteclamp
-
- movdqu xmm2,[rdi+rax] ; get the noise for this line
- paddb xmm1,xmm2 ; add it in
- movdqu [rsi+rax],xmm1 ; store the result
-
- add rax,16 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
SECTION_RODATA
align 16
rd42:
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c
index f1916639b..d63912932 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -189,54 +189,31 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
uint8_t *dst, int stride,
int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- if (eob > 0) {
- tran_low_t *const dqcoeff = pd->dqcoeff;
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ assert(eob > 0);
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_8X8:
- vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_16X16:
- vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- default:
- assert(0 && "Invalid transform size");
- }
- }
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless) {
+ vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
} else {
- if (xd->lossless) {
- vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_idct4x4_add(dqcoeff, dst, stride, eob);
- break;
- case TX_8X8:
- vp9_idct8x8_add(dqcoeff, dst, stride, eob);
- break;
- case TX_16X16:
- vp9_idct16x16_add(dqcoeff, dst, stride, eob);
- break;
- case TX_32X32:
- vp9_idct32x32_add(dqcoeff, dst, stride, eob);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_8X8:
+ vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_16X16:
+ vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_32X32:
+ vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
}
}
-#else
+ } else {
if (xd->lossless) {
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
@@ -258,18 +235,40 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
return;
}
}
+ }
+#else
+ if (xd->lossless) {
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_idct4x4_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ vp9_idct8x8_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ vp9_idct16x16_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ return;
+ }
+ }
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (eob == 1) {
- dqcoeff[0] = 0;
- } else {
- if (tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
- else if (tx_size == TX_32X32 && eob <= 34)
- memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
- }
+ if (eob == 1) {
+ dqcoeff[0] = 0;
+ } else {
+ if (tx_size <= TX_16X16 && eob <= 10)
+ memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
+ else if (tx_size == TX_32X32 && eob <= 34)
+ memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
+ else
+ memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
}
}
@@ -279,54 +278,31 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
uint8_t *dst, int stride,
int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- if (eob > 0) {
- tran_low_t *const dqcoeff = pd->dqcoeff;
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ assert(eob > 0);
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_8X8:
- vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_16X16:
- vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- default:
- assert(0 && "Invalid transform size");
- }
- }
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless) {
+ vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
} else {
- if (xd->lossless) {
- vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_8X8:
- vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_16X16:
- vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_32X32:
- vp9_idct32x32_add(dqcoeff, dst, stride, eob);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_8X8:
+ vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_16X16:
+ vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_32X32:
+ vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
}
}
-#else
+ } else {
if (xd->lossless) {
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
@@ -348,33 +324,55 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
return;
}
}
+ }
+#else
+ if (xd->lossless) {
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ return;
+ }
+ }
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (eob == 1) {
- dqcoeff[0] = 0;
- } else {
- if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
- else if (tx_size == TX_32X32 && eob <= 34)
- memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
- }
+ if (eob == 1) {
+ dqcoeff[0] = 0;
+ } else {
+ if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+ memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
+ else if (tx_size == TX_32X32 && eob <= 34)
+ memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
+ else
+ memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
}
}
static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
vpx_reader *r,
- MB_MODE_INFO *const mbmi,
+ MODE_INFO *const mi,
int plane,
int row, int col,
TX_SIZE tx_size) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- PREDICTION_MODE mode = (plane == 0) ? mbmi->mode : mbmi->uv_mode;
+ PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode;
uint8_t *dst;
dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
- if (mbmi->sb_type < BLOCK_8X8)
+ if (mi->sb_type < BLOCK_8X8)
if (plane == 0)
mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
@@ -382,29 +380,33 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
dst, pd->dst.stride, dst, pd->dst.stride,
col, row, plane);
- if (!mbmi->skip) {
+ if (!mi->skip) {
const TX_TYPE tx_type = (plane || xd->lossless) ?
DCT_DCT : intra_mode_to_tx_type_lookup[mode];
const scan_order *sc = (plane || xd->lossless) ?
&vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type];
const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size,
- r, mbmi->segment_id);
- inverse_transform_block_intra(xd, plane, tx_type, tx_size,
- dst, pd->dst.stride, eob);
+ r, mi->segment_id);
+ if (eob > 0) {
+ inverse_transform_block_intra(xd, plane, tx_type, tx_size,
+ dst, pd->dst.stride, eob);
+ }
}
}
static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r,
- MB_MODE_INFO *const mbmi, int plane,
+ MODE_INFO *const mi, int plane,
int row, int col, TX_SIZE tx_size) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const scan_order *sc = &vp9_default_scan_orders[tx_size];
const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r,
- mbmi->segment_id);
+ mi->segment_id);
- inverse_transform_block_inter(xd, plane, tx_size,
- &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
- pd->dst.stride, eob);
+ if (eob > 0) {
+ inverse_transform_block_inter(
+ xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+ pd->dst.stride, eob);
+ }
return eob;
}
@@ -523,8 +525,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+ highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys);
@@ -552,7 +554,7 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
+static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd,
int plane, int bw, int bh, int x,
int y, int w, int h, int mi_x, int mi_y,
const InterpKernel *kernel,
@@ -587,7 +589,12 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
// Co-ordinate of containing block to pixel precision.
int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ assert(xd->mi[0]->sb_type != BLOCK_4X8 &&
+ xd->mi[0]->sb_type != BLOCK_8X4);
+ assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) &&
+ mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x)));
+#endif
// Co-ordinate of the block to 1/16th pixel precision.
x0_16 = (x_start + x) << SUBPEL_BITS;
y0_16 = (y_start + y) << SUBPEL_BITS;
@@ -657,8 +664,8 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
- if (pbi->frame_parallel_decode)
- vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
+ if (worker != NULL)
+ vp9_frameworker_wait(worker, ref_frame_buf,
VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
// Skip border extension if block is inside the frame.
@@ -684,16 +691,16 @@ static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
} else {
// Wait until reference block is ready. Pad 7 more pixels as last 7
// pixels of each superblock row can be changed by next superblock row.
- if (pbi->frame_parallel_decode) {
- const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
- vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
- }
+ if (worker != NULL) {
+ const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
+ vp9_frameworker_wait(worker, ref_frame_buf,
+ VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+ }
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+ highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys);
@@ -711,55 +718,75 @@ static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
const int mi_x = mi_col * MI_SIZE;
const int mi_y = mi_row * MI_SIZE;
const MODE_INFO *mi = xd->mi[0];
- const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
- const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
- const int is_compound = has_second_ref(&mi->mbmi);
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- struct buf_2d *const dst_buf = &pd->dst;
- const int num_4x4_w = pd->n4_w;
- const int num_4x4_h = pd->n4_h;
-
- const int n4w_x4 = 4 * num_4x4_w;
- const int n4h_x4 = 4 * num_4x4_h;
- int ref;
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- const int idx = xd->block_refs[ref]->idx;
- BufferPool *const pool = pbi->common.buffer_pool;
- RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
- const int is_scaled = vp9_is_scaled(sf);
-
- if (sb_type < BLOCK_8X8) {
+ const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
+ const BLOCK_SIZE sb_type = mi->sb_type;
+ const int is_compound = has_second_ref(mi);
+ int ref;
+ int is_scaled;
+ VPxWorker *const fwo = pbi->frame_parallel_decode ?
+ pbi->frame_worker_owner : NULL;
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
+ RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME];
+ const struct scale_factors *const sf = &ref_buf->sf;
+ const int idx = ref_buf->idx;
+ BufferPool *const pool = pbi->common.buffer_pool;
+ RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
+
+ if (!vp9_is_valid_scale(sf))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+
+ is_scaled = vp9_is_scaled(sf);
+ vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
+ is_scaled ? sf : NULL);
+ xd->block_refs[ref] = ref_buf;
+
+ if (sb_type < BLOCK_8X8) {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ struct buf_2d *const dst_buf = &pd->dst;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ const int n4w_x4 = 4 * num_4x4_w;
+ const int n4h_x4 = 4 * num_4x4_h;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
int i = 0, x, y;
for (y = 0; y < num_4x4_h; ++y) {
for (x = 0; x < num_4x4_w; ++x) {
const MV mv = average_split_mvs(pd, mi, ref, i++);
- dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
+ dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4,
4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
sf, pre_buf, dst_buf, &mv,
ref_frame_buf, is_scaled, ref);
}
}
- } else {
- const MV mv = mi->mbmi.mv[ref].as_mv;
- dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4,
+ }
+ } else {
+ const MV mv = mi->mv[ref].as_mv;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ struct buf_2d *const dst_buf = &pd->dst;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ const int n4w_x4 = 4 * num_4x4_w;
+ const int n4h_x4 = 4 * num_4x4_h;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4,
0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel,
- sf, pre_buf, dst_buf, &mv, ref_frame_buf,
- is_scaled, ref);
+ sf, pre_buf, dst_buf, &mv,
+ ref_frame_buf, is_scaled, ref);
}
}
}
}
-static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
+static INLINE TX_SIZE dec_get_uv_tx_size(const MODE_INFO *mi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
const int x = VPXMIN(n4_wl, n4_hl);
- return VPXMIN(mbmi->tx_size, x);
+ return VPXMIN(mi->tx_size, x);
}
static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
@@ -782,10 +809,10 @@ static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl,
}
}
-static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- BLOCK_SIZE bsize, int mi_row, int mi_col,
- int bw, int bh, int x_mis, int y_mis,
- int bwl, int bhl) {
+static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int bw, int bh, int x_mis, int y_mis,
+ int bwl, int bhl) {
const int offset = mi_row * cm->mi_stride + mi_col;
int x, y;
const TileInfo *const tile = &xd->tile;
@@ -794,7 +821,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
xd->mi[0] = &cm->mi[offset];
// TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of
// passing bsize from decode_partition().
- xd->mi[0]->mbmi.sb_type = bsize;
+ xd->mi[0]->sb_type = bsize;
for (y = 0; y < y_mis; ++y)
for (x = !y; x < x_mis; ++x) {
xd->mi[y * cm->mi_stride + x] = xd->mi[0];
@@ -809,7 +836,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
- return &xd->mi[0]->mbmi;
+ return xd->mi[0];
}
static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
@@ -823,8 +850,8 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
- MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
- bw, bh, x_mis, y_mis, bwl, bhl);
+ MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col,
+ bw, bh, x_mis, y_mis, bwl, bhl);
if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
const BLOCK_SIZE uv_subsize =
@@ -834,19 +861,19 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
}
- vpx_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
+ vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
- if (mbmi->skip) {
+ if (mi->skip) {
dec_reset_skip_context(xd);
}
- if (!is_inter_block(mbmi)) {
+ if (!is_inter_block(mi)) {
int plane;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size =
- plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
- : mbmi->tx_size;
+ plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl)
+ : mi->tx_size;
const int num_4x4_w = pd->n4_w;
const int num_4x4_h = pd->n4_h;
const int step = (1 << tx_size);
@@ -856,9 +883,12 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide;
+ xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high;
+
for (row = 0; row < max_blocks_high; row += step)
for (col = 0; col < max_blocks_wide; col += step)
- predict_and_reconstruct_intra_block(xd, r, mbmi, plane,
+ predict_and_reconstruct_intra_block(xd, r, mi, plane,
row, col, tx_size);
}
} else {
@@ -866,15 +896,15 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
// Reconstruction
- if (!mbmi->skip) {
+ if (!mi->skip) {
int eobtotal = 0;
int plane;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size =
- plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
- : mbmi->tx_size;
+ plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl)
+ : mi->tx_size;
const int num_4x4_w = pd->n4_w;
const int num_4x4_h = pd->n4_h;
const int step = (1 << tx_size);
@@ -884,21 +914,24 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide;
+ xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high;
+
for (row = 0; row < max_blocks_high; row += step)
for (col = 0; col < max_blocks_wide; col += step)
- eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col,
+ eobtotal += reconstruct_inter_block(xd, r, mi, plane, row, col,
tx_size);
}
if (!less8x8 && eobtotal == 0)
- mbmi->skip = 1; // skip loopfilter
+ mi->skip = 1; // skip loopfilter
}
}
xd->corrupted |= vpx_reader_has_error(r);
if (cm->lf.filter_level) {
- vp9_build_mask(cm, mbmi, mi_row, mi_col, bw, bh);
+ vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh);
}
}
@@ -1196,8 +1229,9 @@ static void resize_mv_buffer(VP9_COMMON *cm) {
vpx_free(cm->cur_frame->mvs);
cm->cur_frame->mi_rows = cm->mi_rows;
cm->cur_frame->mi_cols = cm->mi_cols;
- cm->cur_frame->mvs = (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(*cm->cur_frame->mvs));
+ CHECK_MEM_ERROR(cm, cm->cur_frame->mvs,
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*cm->cur_frame->mvs)));
}
static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
@@ -1281,11 +1315,16 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
BufferPool *const pool = cm->buffer_pool;
for (i = 0; i < REFS_PER_FRAME; ++i) {
if (vpx_rb_read_bit(rb)) {
- YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
- width = buf->y_crop_width;
- height = buf->y_crop_height;
- found = 1;
- break;
+ if (cm->frame_refs[i].idx != INVALID_IDX) {
+ YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
+ width = buf->y_crop_width;
+ height = buf->y_crop_height;
+ found = 1;
+ break;
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Failed to decode frame size");
+ }
}
}
@@ -1300,22 +1339,23 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
// has valid dimensions.
for (i = 0; i < REFS_PER_FRAME; ++i) {
RefBuffer *const ref_frame = &cm->frame_refs[i];
- has_valid_ref_frame |= valid_ref_frame_size(ref_frame->buf->y_crop_width,
- ref_frame->buf->y_crop_height,
- width, height);
+ has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX &&
+ valid_ref_frame_size(ref_frame->buf->y_crop_width,
+ ref_frame->buf->y_crop_height,
+ width, height));
}
if (!has_valid_ref_frame)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame has invalid size");
for (i = 0; i < REFS_PER_FRAME; ++i) {
RefBuffer *const ref_frame = &cm->frame_refs[i];
- if (!valid_ref_frame_img_fmt(
- ref_frame->buf->bit_depth,
- ref_frame->buf->subsampling_x,
- ref_frame->buf->subsampling_y,
- cm->bit_depth,
- cm->subsampling_x,
- cm->subsampling_y))
+ if (ref_frame->idx == INVALID_IDX ||
+ !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth,
+ ref_frame->buf->subsampling_x,
+ ref_frame->buf->subsampling_y,
+ cm->bit_depth,
+ cm->subsampling_x,
+ cm->subsampling_y))
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame has incompatible color format");
}
@@ -1434,7 +1474,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
TileBuffer tile_buffers[4][1 << 6];
int tile_row, tile_col;
int mi_row, mi_col;
- TileData *tile_data = NULL;
+ TileWorkerData *tile_data = NULL;
if (cm->lf.filter_level && !cm->skip_loop_filter &&
pbi->lf_worker.data1 == NULL) {
@@ -1470,28 +1510,17 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
- if (pbi->tile_data == NULL ||
- (tile_cols * tile_rows) != pbi->total_tiles) {
- vpx_free(pbi->tile_data);
- CHECK_MEM_ERROR(
- cm,
- pbi->tile_data,
- vpx_memalign(32, tile_cols * tile_rows * (sizeof(*pbi->tile_data))));
- pbi->total_tiles = tile_rows * tile_cols;
- }
-
// Load all tile information into tile_data.
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
const TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
- tile_data = pbi->tile_data + tile_cols * tile_row + tile_col;
- tile_data->cm = cm;
+ tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
- tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
- NULL : &cm->counts;
+ tile_data->xd.counts =
+ cm->frame_parallel_decoding_mode ? NULL : &cm->counts;
vp9_zero(tile_data->dqcoeff);
- vp9_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col);
+ vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
pbi->decrypt_state);
@@ -1507,8 +1536,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
const int col = pbi->inv_tile_order ?
tile_cols - tile_col - 1 : tile_col;
- tile_data = pbi->tile_data + tile_cols * tile_row + col;
- vp9_tile_set_col(&tile, tile_data->cm, col);
+ tile_data = pbi->tile_worker_data + tile_cols * tile_row + col;
+ vp9_tile_set_col(&tile, cm, col);
vp9_zero(tile_data->xd.left_context);
vp9_zero(tile_data->xd.left_seg_context);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
@@ -1560,7 +1589,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
}
// Get last tile data.
- tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
+ tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1;
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
@@ -1645,12 +1674,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
const int num_threads = pbi->max_threads;
CHECK_MEM_ERROR(cm, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
- // Ensure tile data offsets will be properly aligned. This may fail on
- // platforms without DECLARE_ALIGNED().
- assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
- CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
- vpx_memalign(32, num_threads *
- sizeof(*pbi->tile_worker_data)));
for (n = 0; n < num_threads; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
++pbi->num_tile_workers;
@@ -1666,7 +1689,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
- TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
+ TileWorkerData *const tile_data =
+ &pbi->tile_worker_data[n + pbi->total_tiles];
winterface->sync(worker);
tile_data->xd = pbi->mb;
tile_data->xd.counts =
@@ -1979,6 +2003,8 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
if (!cm->error_resilient_mode) {
cm->refresh_frame_context = vpx_rb_read_bit(rb);
cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb);
+ if (!cm->frame_parallel_decoding_mode)
+ vp9_zero(cm->counts);
} else {
cm->refresh_frame_context = 0;
cm->frame_parallel_decoding_mode = 1;
@@ -2082,43 +2108,6 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
return vpx_reader_has_error(&r);
}
-#ifdef NDEBUG
-#define debug_check_frame_counts(cm) (void)0
-#else // !NDEBUG
-// Counts should only be incremented when frame_parallel_decoding_mode and
-// error_resilient_mode are disabled.
-static void debug_check_frame_counts(const VP9_COMMON *const cm) {
- FRAME_COUNTS zero_counts;
- vp9_zero(zero_counts);
- assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode);
- assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
- sizeof(cm->counts.y_mode)));
- assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
- sizeof(cm->counts.uv_mode)));
- assert(!memcmp(cm->counts.partition, zero_counts.partition,
- sizeof(cm->counts.partition)));
- assert(!memcmp(cm->counts.coef, zero_counts.coef,
- sizeof(cm->counts.coef)));
- assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch,
- sizeof(cm->counts.eob_branch)));
- assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp,
- sizeof(cm->counts.switchable_interp)));
- assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode,
- sizeof(cm->counts.inter_mode)));
- assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
- sizeof(cm->counts.intra_inter)));
- assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
- sizeof(cm->counts.comp_inter)));
- assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref,
- sizeof(cm->counts.single_ref)));
- assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
- sizeof(cm->counts.comp_ref)));
- assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
- assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
- assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
-}
-#endif // NDEBUG
-
static struct vpx_read_bit_buffer *init_read_bit_buffer(
VP9Decoder *pbi,
struct vpx_read_bit_buffer *rb,
@@ -2202,8 +2191,6 @@ void vp9_decode_frame(VP9Decoder *pbi,
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Uninitialized entropy context.");
- vp9_zero(cm->counts);
-
xd->corrupted = 0;
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
if (new_fb->corrupted)
@@ -2232,6 +2219,19 @@ void vp9_decode_frame(VP9Decoder *pbi,
vp9_frameworker_unlock_stats(worker);
}
+ if (pbi->tile_worker_data == NULL ||
+ (tile_cols * tile_rows) != pbi->total_tiles) {
+ const int num_tile_workers = tile_cols * tile_rows +
+ ((pbi->max_threads > 1) ? pbi->max_threads : 0);
+ const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data);
+ // Ensure tile data offsets will be properly aligned. This may fail on
+ // platforms without DECLARE_ALIGNED().
+ assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
+ vpx_free(pbi->tile_worker_data);
+ CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size));
+ pbi->total_tiles = tile_rows * tile_cols;
+ }
+
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
@@ -2259,8 +2259,6 @@ void vp9_decode_frame(VP9Decoder *pbi,
vp9_adapt_mode_probs(cm);
vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
- } else {
- debug_check_frame_counts(cm);
}
} else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c
index d3ca7b3fe..ffc6839ad 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/libvpx/vp9/decoder/vp9_decodemv.c
@@ -81,10 +81,10 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
return (TX_SIZE)tx_size;
}
-static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
- int allow_select, vpx_reader *r) {
+static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
+ int allow_select, vpx_reader *r) {
TX_MODE tx_mode = cm->tx_mode;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
return read_selected_tx_size(cm, xd, max_tx_size, r);
@@ -149,17 +149,12 @@ static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset,
}
static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- int mi_row, int mi_col, vpx_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r,
+ int x_mis, int y_mis) {
struct segmentation *const seg = &cm->seg;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
int predicted_segment_id, segment_id;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = xd->plane[0].n4_w >> 1;
- const int bh = xd->plane[0].n4_h >> 1;
-
- // TODO(slavarnway): move x_mis, y_mis into xd ?????
- const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
- const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
if (!seg->enabled)
return 0; // Default for disabled segmentation
@@ -176,9 +171,9 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
if (seg->temporal_update) {
const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
- mbmi->seg_id_predicted = vpx_read(r, pred_prob);
- segment_id = mbmi->seg_id_predicted ? predicted_segment_id
- : read_segment_id(r, seg);
+ mi->seg_id_predicted = vpx_read(r, pred_prob);
+ segment_id = mi->seg_id_predicted ? predicted_segment_id
+ : read_segment_id(r, seg);
} else {
segment_id = read_segment_id(r, seg);
}
@@ -202,52 +197,46 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
static void read_intra_frame_mode_info(VP9_COMMON *const cm,
MACROBLOCKD *const xd,
- int mi_row, int mi_col, vpx_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r,
+ int x_mis, int y_mis) {
MODE_INFO *const mi = xd->mi[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
const MODE_INFO *above_mi = xd->above_mi;
const MODE_INFO *left_mi = xd->left_mi;
- const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
int i;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = xd->plane[0].n4_w >> 1;
- const int bh = xd->plane[0].n4_h >> 1;
- // TODO(slavarnway): move x_mis, y_mis into xd ?????
- const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
- const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
-
- mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r);
- mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
- mbmi->tx_size = read_tx_size(cm, xd, 1, r);
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE;
+ mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r);
+ mi->skip = read_skip(cm, xd, mi->segment_id, r);
+ mi->tx_size = read_tx_size(cm, xd, 1, r);
+ mi->ref_frame[0] = INTRA_FRAME;
+ mi->ref_frame[1] = NONE;
switch (bsize) {
case BLOCK_4X4:
for (i = 0; i < 4; ++i)
mi->bmi[i].as_mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i));
- mbmi->mode = mi->bmi[3].as_mode;
+ mi->mode = mi->bmi[3].as_mode;
break;
case BLOCK_4X8:
mi->bmi[0].as_mode = mi->bmi[2].as_mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
- mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1));
break;
case BLOCK_8X4:
mi->bmi[0].as_mode = mi->bmi[1].as_mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
- mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode =
read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2));
break;
default:
- mbmi->mode = read_intra_mode(r,
- get_y_mode_probs(mi, above_mi, left_mi, 0));
+ mi->mode = read_intra_mode(r,
+ get_y_mode_probs(mi, above_mi, left_mi, 0));
}
- mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]);
+ mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]);
}
static int read_mv_component(vpx_reader *r,
@@ -289,7 +278,7 @@ static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref,
nmv_context_counts *counts, int allow_hp) {
const MV_JOINT_TYPE joint_type =
(MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints);
- const int use_hp = allow_hp && vp9_use_mv_hp(ref);
+ const int use_hp = allow_hp && use_mv_hp(ref);
MV diff = {0, 0};
if (mv_joint_vertical(joint_type))
@@ -364,11 +353,36 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
}
+// TODO(slavarnway): Move this decoder version of
+// vp9_get_pred_context_switchable_interp() to vp9_pred_common.h and update the
+// encoder.
+//
+// Returns a context number for the given MB prediction signal
+static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const int above_type = above_mi ? above_mi->interp_filter
+ : SWITCHABLE_FILTERS;
+
+ if (left_type == above_type)
+ return left_type;
+ else if (left_type == SWITCHABLE_FILTERS)
+ return above_type;
+ else if (above_type == SWITCHABLE_FILTERS)
+ return left_type;
+ else
+ return SWITCHABLE_FILTERS;
+}
static INLINE INTERP_FILTER read_switchable_interp_filter(
VP9_COMMON *const cm, MACROBLOCKD *const xd,
vpx_reader *r) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ const int ctx = dec_get_pred_context_switchable_interp(xd);
const INTERP_FILTER type =
(INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree,
cm->fc->switchable_interp_prob[ctx]);
@@ -381,36 +395,39 @@ static INLINE INTERP_FILTER read_switchable_interp_filter(
static void read_intra_block_mode_info(VP9_COMMON *const cm,
MACROBLOCKD *const xd, MODE_INFO *mi,
vpx_reader *r) {
- MB_MODE_INFO *const mbmi = &mi->mbmi;
- const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
int i;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE;
-
switch (bsize) {
case BLOCK_4X4:
for (i = 0; i < 4; ++i)
mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0);
- mbmi->mode = mi->bmi[3].as_mode;
+ mi->mode = mi->bmi[3].as_mode;
break;
case BLOCK_4X8:
mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd,
r, 0);
- mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode =
read_intra_mode_y(cm, xd, r, 0);
break;
case BLOCK_8X4:
mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd,
r, 0);
- mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode =
read_intra_mode_y(cm, xd, r, 0);
break;
default:
- mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
+ mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
}
- mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+ mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode);
+
+ // Initialize interp_filter here so we do not have to check for inter block
+ // modes in dec_get_pred_context_switchable_interp()
+ mi->interp_filter = SWITCHABLE_FILTERS;
+
+ mi->ref_frame[0] = INTRA_FRAME;
+ mi->ref_frame[1] = NONE;
}
static INLINE int is_mv_valid(const MV *mv) {
@@ -418,10 +435,18 @@ static INLINE int is_mv_valid(const MV *mv) {
mv->col > MV_LOW && mv->col < MV_UPP;
}
+static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) {
+ memcpy(dst, src, sizeof(*dst) * 2);
+}
+
+static INLINE void zero_mv_pair(int_mv *dst) {
+ memset(dst, 0, sizeof(*dst) * 2);
+}
+
static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd,
PREDICTION_MODE mode,
int_mv mv[2], int_mv ref_mv[2],
- int_mv nearest_mv[2], int_mv near_mv[2],
+ int_mv near_nearest_mv[2],
int is_compound, int allow_hp, vpx_reader *r) {
int i;
int ret = 1;
@@ -437,22 +462,13 @@ static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd,
}
break;
}
+ case NEARMV:
case NEARESTMV: {
- mv[0].as_int = nearest_mv[0].as_int;
- if (is_compound)
- mv[1].as_int = nearest_mv[1].as_int;
- break;
- }
- case NEARMV: {
- mv[0].as_int = near_mv[0].as_int;
- if (is_compound)
- mv[1].as_int = near_mv[1].as_int;
+ copy_mv_pair(mv, near_nearest_mv);
break;
}
case ZEROMV: {
- mv[0].as_int = 0;
- if (is_compound)
- mv[1].as_int = 0;
+ zero_mv_pair(mv);
break;
}
default: {
@@ -467,7 +483,7 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
} else {
- const int ctx = vp9_get_intra_inter_context(xd);
+ const int ctx = get_intra_inter_context(xd);
const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]);
FRAME_COUNTS *counts = xd->counts;
if (counts)
@@ -476,44 +492,295 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
}
+static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
+ int refmv_count) {
+ int i;
+
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < refmv_count; ++i) {
+ lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+ *best_mv = mvlist[i];
+ }
+}
+
static void fpm_sync(void *const data, int mi_row) {
VP9Decoder *const pbi = (VP9Decoder *)data;
vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
mi_row << MI_BLOCK_SIZE_LOG2);
}
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list. If it's the second motion vector or early_break
+// it will also skip all additional processing and jump to Done!
+#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \
+ do { \
+ if (refmv_count) { \
+ if ((mv).as_int != (mv_ref_list)[0].as_int) { \
+ (mv_ref_list)[(refmv_count)] = (mv); \
+ refmv_count++; \
+ goto Done; \
+ } \
+ } else { \
+ (mv_ref_list)[(refmv_count)++] = (mv); \
+ if (early_break) \
+ goto Done; \
+ } \
+ } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \
+ refmv_count, mv_ref_list, Done) \
+ do { \
+ if (is_inter_block(mbmi)) { \
+ if ((mbmi)->ref_frame[0] != ref_frame) \
+ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, Done); \
+ if (has_second_ref(mbmi) && \
+ (mbmi)->ref_frame[1] != ref_frame && \
+ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, Done); \
+ } \
+ } while (0)
+
+// This function searches the neighborhood of a given MB/SB
+// to try and find candidate reference vectors.
+static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
+ const POSITION *const mv_ref_search,
+ int_mv *mv_ref_list,
+ int mi_row, int mi_col, int block, int is_sub8x8,
+ find_mv_refs_sync sync, void *const data) {
+ const int *ref_sign_bias = cm->ref_frame_sign_bias;
+ int i, refmv_count = 0;
+ int different_ref_found = 0;
+ const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ?
+ cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL;
+ const TileInfo *const tile = &xd->tile;
+ // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop
+ // searching after the first mv is found.
+ const int early_break = (mode != NEARMV);
+
+ // Blank the reference vector list
+ memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ i = 0;
+ if (is_sub8x8) {
+ // If the size < 8x8 we get the mv from the bmi substructure for the
+ // nearest two blocks.
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ different_ref_found = 1;
+
+ if (candidate_mi->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST_EB(
+ get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+ refmv_count, mv_ref_list, Done);
+ else if (candidate_mi->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST_EB(
+ get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+ refmv_count, mv_ref_list, Done);
+ }
+ }
+ }
+
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done);
+ else if (candidate->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
+ // on windows platform. The sync here is unnecessary if use_prev_frame_mvs
+ // is 0. But after removing it, there will be hang in the unit test on windows
+ // due to several threads waiting for a thread's signal.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+#endif
+
+ // Check the last frame's mode and mv info.
+ if (prev_frame_mvs) {
+ // Synchronize here for frame parallel decode if sync function is provided.
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+
+ if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
+ } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (different_ref_found) {
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+
+ // If the candidate is INTRA we don't want to consider its mv.
+ IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias,
+ refmv_count, mv_ref_list, Done);
+ }
+ }
+ }
+
+ // Since we still don't have a candidate we'll try the last frame.
+ if (prev_frame_mvs) {
+ if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+ prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+ int_mv mv = prev_frame_mvs->mv[0];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done);
+ }
+
+ if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+ prev_frame_mvs->ref_frame[1] != ref_frame &&
+ prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) {
+ int_mv mv = prev_frame_mvs->mv[1];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done);
+ }
+ }
+
+ if (mode == NEARMV)
+ refmv_count = MAX_MV_REF_CANDIDATES;
+ else
+ // we only care about the nearestmv for the remaining modes
+ refmv_count = 1;
+
+ Done:
+ // Clamp vectors
+ for (i = 0; i < refmv_count; ++i)
+ clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
+
+ return refmv_count;
+}
+
+static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
+ const POSITION *const mv_ref_search,
+ PREDICTION_MODE b_mode, int block,
+ int ref, int mi_row, int mi_col,
+ int_mv *best_sub8x8) {
+ int_mv mv_list[MAX_MV_REF_CANDIDATES];
+ MODE_INFO *const mi = xd->mi[0];
+ b_mode_info *bmi = mi->bmi;
+ int n;
+ int refmv_count;
+
+ assert(MAX_MV_REF_CANDIDATES == 2);
+
+ refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref],
+ mv_ref_search, mv_list, mi_row, mi_col, block,
+ 1, NULL, NULL);
+
+ switch (block) {
+ case 0:
+ best_sub8x8->as_int = mv_list[refmv_count - 1].as_int;
+ break;
+ case 1:
+ case 2:
+ if (b_mode == NEARESTMV) {
+ best_sub8x8->as_int = bmi[0].as_mv[ref].as_int;
+ } else {
+ best_sub8x8->as_int = 0;
+ for (n = 0; n < refmv_count; ++n)
+ if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) {
+ best_sub8x8->as_int = mv_list[n].as_int;
+ break;
+ }
+ }
+ break;
+ case 3:
+ if (b_mode == NEARESTMV) {
+ best_sub8x8->as_int = bmi[2].as_mv[ref].as_int;
+ } else {
+ int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
+ candidates[0] = bmi[1].as_mv[ref];
+ candidates[1] = bmi[0].as_mv[ref];
+ candidates[2] = mv_list[0];
+ candidates[3] = mv_list[1];
+ best_sub8x8->as_int = 0;
+ for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
+ if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) {
+ best_sub8x8->as_int = candidates[n].as_int;
+ break;
+ }
+ }
+ break;
+ default:
+ assert(0 && "Invalid block index.");
+ }
+}
+
+static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const POSITION *const mv_ref_search,
+ int mi_row, int mi_col) {
+ int i;
+ int context_counter = 0;
+ const TileInfo *const tile = &xd->tile;
+
+ // Get mode count from nearest 2 blocks
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+ }
+ }
+
+ return counter_to_context[context_counter];
+}
+
static void read_inter_block_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
MODE_INFO *const mi,
int mi_row, int mi_col, vpx_reader *r) {
VP9_COMMON *const cm = &pbi->common;
- MB_MODE_INFO *const mbmi = &mi->mbmi;
- const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
- int_mv nearestmv[2], nearmv[2];
- int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int_mv best_ref_mvs[2];
int ref, is_compound;
- uint8_t inter_mode_ctx[MAX_REF_FRAMES];
-
- read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
- is_compound = has_second_ref(mbmi);
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
- RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
-
- xd->block_refs[ref] = ref_buf;
- if ((!vp9_is_valid_scale(&ref_buf->sf)))
- vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
- "Reference frame has invalid dimensions");
- vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
- &ref_buf->sf);
- vp9_find_mv_refs(cm, xd, mi, frame, ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
- }
+ uint8_t inter_mode_ctx;
+ const POSITION *const mv_ref_search = mv_ref_blocks[bsize];
+
+ read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame);
+ is_compound = has_second_ref(mi);
+ inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col);
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- mbmi->mode = ZEROMV;
+ if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
+ mi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid usage of segement feature on small blocks");
@@ -521,18 +788,31 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
}
} else {
if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, xd, r,
- inter_mode_ctx[mbmi->ref_frame[0]]);
- }
-
- if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
- &nearestmv[ref], &nearmv[ref]);
+ mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ else
+ // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV.
+ // Setting mode to NEARESTMV forces the search to stop after the nearestmv
+ // has been found. After b_modes have been read, mode will be overwritten
+ // by the last b_mode.
+ mi->mode = NEARESTMV;
+
+ if (mi->mode != ZEROMV) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int_mv tmp_mvs[MAX_MV_REF_CANDIDATES];
+ const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
+ int refmv_count;
+
+ refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search,
+ tmp_mvs, mi_row, mi_col, -1, 0,
+ fpm_sync, (void *)pbi);
+
+ dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
+ refmv_count);
+ }
}
}
- mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
+ mi->interp_filter = (cm->interp_filter == SWITCHABLE)
? read_switchable_interp_filter(cm, xd, r)
: cm->interp_filter;
@@ -541,33 +821,24 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
const int num_4x4_h = 1 << xd->bmode_blocks_hl;
int idx, idy;
PREDICTION_MODE b_mode;
- int_mv nearest_sub8x8[2], near_sub8x8[2];
+ int_mv best_sub8x8[2];
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
- int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
+ b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
if (b_mode == NEARESTMV || b_mode == NEARMV) {
- uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
for (ref = 0; ref < 1 + is_compound; ++ref)
- vp9_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col,
- &nearest_sub8x8[ref],
- &near_sub8x8[ref],
- dummy_mode_ctx);
+ append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref,
+ mi_row, mi_col, &best_sub8x8[ref]);
}
- if (!assign_mv(cm, xd, b_mode, block, nearestmv,
- nearest_sub8x8, near_sub8x8,
- is_compound, allow_hp, r)) {
+ if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs,
+ best_sub8x8, is_compound, allow_hp, r)) {
xd->corrupted |= 1;
break;
}
- mi->bmi[j].as_mv[0].as_int = block[0].as_int;
- if (is_compound)
- mi->bmi[j].as_mv[1].as_int = block[1].as_int;
-
if (num_4x4_h == 2)
mi->bmi[j + 2] = mi->bmi[j];
if (num_4x4_w == 2)
@@ -575,30 +846,28 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
}
}
- mi->mbmi.mode = b_mode;
+ mi->mode = b_mode;
- mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
- mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ copy_mv_pair(mi->mv, mi->bmi[3].as_mv);
} else {
- xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv,
- nearestmv, nearmv, is_compound, allow_hp, r);
+ xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs,
+ best_ref_mvs, is_compound, allow_hp, r);
}
}
static void read_inter_frame_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
- int mi_row, int mi_col, vpx_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r,
+ int x_mis, int y_mis) {
VP9_COMMON *const cm = &pbi->common;
MODE_INFO *const mi = xd->mi[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
int inter_block;
- mbmi->mv[0].as_int = 0;
- mbmi->mv[1].as_int = 0;
- mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
- mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
- inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
- mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+ mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis,
+ y_mis);
+ mi->skip = read_skip(cm, xd, mi->segment_id, r);
+ inter_block = read_is_inter_block(cm, xd, mi->segment_id, r);
+ mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r);
if (inter_block)
read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r);
@@ -606,7 +875,12 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
read_intra_block_mode_info(cm, xd, mi, r);
}
-void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
+static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst,
+ const MV_REFERENCE_FRAME *src) {
+ memcpy(dst, src, sizeof(*dst) * 2);
+}
+
+void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
int mi_row, int mi_col, vpx_reader *r,
int x_mis, int y_mis) {
VP9_COMMON *const cm = &pbi->common;
@@ -615,19 +889,23 @@ void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
int w, h;
if (frame_is_intra_only(cm)) {
- read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
+ read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis);
} else {
- read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r);
+ read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
for (h = 0; h < y_mis; ++h) {
- MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
for (w = 0; w < x_mis; ++w) {
- MV_REF *const mv = frame_mv + w;
- mv->ref_frame[0] = mi->mbmi.ref_frame[0];
- mv->ref_frame[1] = mi->mbmi.ref_frame[1];
- mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
- mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ MV_REF *const mv = frame_mvs + w;
+ copy_ref_frame_pair(mv->ref_frame, mi->ref_frame);
+ copy_mv_pair(mv->mv, mi->mv);
}
+ frame_mvs += cm->mi_cols;
}
}
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) &&
+ !is_inter_block(mi) && need_top_left[mi->uv_mode])
+ assert(0);
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
}
diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h
index 75f568cf1..45569ec81 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/libvpx/vp9/decoder/vp9_decodemv.h
@@ -19,7 +19,7 @@
extern "C" {
#endif
-void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
+void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
int mi_row, int mi_col, vpx_reader *r,
int x_mis, int y_mis);
diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c
index 4e88819b1..935c04f3a 100644
--- a/libvpx/vp9/decoder/vp9_decoder.c
+++ b/libvpx/vp9/decoder/vp9_decoder.c
@@ -131,11 +131,12 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
vpx_get_worker_interface()->end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
- vpx_free(pbi->tile_data);
+
for (i = 0; i < pbi->num_tile_workers; ++i) {
VPxWorker *const worker = &pbi->tile_workers[i];
vpx_get_worker_interface()->end(worker);
}
+
vpx_free(pbi->tile_worker_data);
vpx_free(pbi->tile_workers);
@@ -213,8 +214,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
// Find an empty frame buffer.
const int free_fb = get_free_fb(cm);
- if (cm->new_fb_idx == INVALID_IDX)
- return VPX_CODEC_MEM_ERROR;
+ if (cm->new_fb_idx == INVALID_IDX) {
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Unable to find free frame buffer");
+ return cm->error.error_code;
+ }
// Decrease ref_count since it will be increased again in
// ref_cnt_fb() below.
@@ -243,7 +247,7 @@ static void swap_frame_buffers(VP9Decoder *pbi) {
decrease_ref_count(old_idx, frame_bufs, pool);
// Release the reference frame in reference map.
- if ((mask & 1) && old_idx >= 0) {
+ if (mask & 1) {
decrease_ref_count(old_idx, frame_bufs, pool);
}
cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
@@ -305,8 +309,11 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
&frame_bufs[cm->new_fb_idx].raw_frame_buffer);
// Find a free frame buffer. Return error if can not find any.
cm->new_fb_idx = get_free_fb(cm);
- if (cm->new_fb_idx == INVALID_IDX)
- return VPX_CODEC_MEM_ERROR;
+ if (cm->new_fb_idx == INVALID_IDX) {
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Unable to find free frame buffer");
+ return cm->error.error_code;
+ }
// Assign a MV array to the frame buffer.
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
@@ -350,7 +357,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
decrease_ref_count(old_idx, frame_bufs, pool);
// Release the reference frame in reference map.
- if ((mask & 1) && old_idx >= 0) {
+ if (mask & 1) {
decrease_ref_count(old_idx, frame_bufs, pool);
}
++ref_index;
@@ -501,7 +508,7 @@ vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j)
- this_sz |= (*x++) << (j * 8);
+ this_sz |= ((uint32_t)(*x++)) << (j * 8);
sizes[i] = this_sz;
}
*count = frames;
diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h
index 4a5188f8f..7111a36d3 100644
--- a/libvpx/vp9/decoder/vp9_decoder.h
+++ b/libvpx/vp9/decoder/vp9_decoder.h
@@ -27,15 +27,6 @@
extern "C" {
#endif
-// TODO(hkuang): combine this with TileWorkerData.
-typedef struct TileData {
- VP9_COMMON *cm;
- vpx_reader bit_reader;
- DECLARE_ALIGNED(16, MACROBLOCKD, xd);
- /* dqcoeff are shared by all the planes. So planes must be decoded serially */
- DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
-} TileData;
-
typedef struct TileBuffer {
const uint8_t *data;
size_t size;
@@ -74,8 +65,6 @@ typedef struct VP9Decoder {
TileWorkerData *tile_worker_data;
TileBuffer tile_buffers[64];
int num_tile_workers;
-
- TileData *tile_data;
int total_tiles;
VP9LfSync lf_row_sync;
@@ -128,7 +117,7 @@ void vp9_decoder_remove(struct VP9Decoder *pbi);
static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
BufferPool *const pool) {
- if (idx >= 0) {
+ if (idx >= 0 && frame_bufs[idx].ref_count > 0) {
--frame_bufs[idx].ref_count;
// A worker may only get a free framebuffer index when calling get_free_fb.
// But the private buffer is not set up until finish decoding header.
diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c
index 591236530..47dc107fe 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/libvpx/vp9/decoder/vp9_detokenize.c
@@ -23,14 +23,6 @@
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
-#define LOW_VAL_CONTEXT_NODE 0
-#define TWO_CONTEXT_NODE 1
-#define THREE_CONTEXT_NODE 2
-#define HIGH_LOW_CONTEXT_NODE 3
-#define CAT_ONE_CONTEXT_NODE 4
-#define CAT_THREEFOUR_CONTEXT_NODE 5
-#define CAT_THREE_CONTEXT_NODE 6
-#define CAT_FIVE_CONTEXT_NODE 7
#define INCREMENT_COUNT(token) \
do { \
@@ -53,7 +45,7 @@ static int decode_coefs(const MACROBLOCKD *xd,
FRAME_COUNTS *counts = xd->counts;
const int max_eob = 16 << (tx_size << 1);
const FRAME_CONTEXT *const fc = xd->fc;
- const int ref = is_inter_block(&xd->mi[0]->mbmi);
+ const int ref = is_inter_block(xd->mi[0]);
int band, c = 0;
const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
@@ -65,52 +57,24 @@ static int decode_coefs(const MACROBLOCKD *xd,
const int dq_shift = (tx_size == TX_32X32);
int v, token;
int16_t dqv = dq[0];
- const uint8_t *cat1_prob;
- const uint8_t *cat2_prob;
- const uint8_t *cat3_prob;
- const uint8_t *cat4_prob;
- const uint8_t *cat5_prob;
- const uint8_t *cat6_prob;
+ const uint8_t *const cat6_prob =
+#if CONFIG_VP9_HIGHBITDEPTH
+ (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 :
+ (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 :
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vp9_cat6_prob;
+ const int cat6_bits =
+#if CONFIG_VP9_HIGHBITDEPTH
+ (xd->bd == VPX_BITS_12) ? 18 :
+ (xd->bd == VPX_BITS_10) ? 16 :
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ 14;
if (counts) {
coef_counts = counts->coef[tx_size][type][ref];
eob_branch_count = counts->eob_branch[tx_size][type][ref];
}
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->bd > VPX_BITS_8) {
- if (xd->bd == VPX_BITS_10) {
- cat1_prob = vp9_cat1_prob_high10;
- cat2_prob = vp9_cat2_prob_high10;
- cat3_prob = vp9_cat3_prob_high10;
- cat4_prob = vp9_cat4_prob_high10;
- cat5_prob = vp9_cat5_prob_high10;
- cat6_prob = vp9_cat6_prob_high10;
- } else {
- cat1_prob = vp9_cat1_prob_high12;
- cat2_prob = vp9_cat2_prob_high12;
- cat3_prob = vp9_cat3_prob_high12;
- cat4_prob = vp9_cat4_prob_high12;
- cat5_prob = vp9_cat5_prob_high12;
- cat6_prob = vp9_cat6_prob_high12;
- }
- } else {
- cat1_prob = vp9_cat1_prob;
- cat2_prob = vp9_cat2_prob;
- cat3_prob = vp9_cat3_prob;
- cat4_prob = vp9_cat4_prob;
- cat5_prob = vp9_cat5_prob;
- cat6_prob = vp9_cat6_prob;
- }
-#else
- cat1_prob = vp9_cat1_prob;
- cat2_prob = vp9_cat2_prob;
- cat3_prob = vp9_cat3_prob;
- cat4_prob = vp9_cat4_prob;
- cat5_prob = vp9_cat5_prob;
- cat6_prob = vp9_cat6_prob;
-#endif
-
while (c < max_eob) {
int val = -1;
band = *band_translate++;
@@ -149,39 +113,22 @@ static int decode_coefs(const MACROBLOCKD *xd,
val = token;
break;
case CATEGORY1_TOKEN:
- val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, r);
+ val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r);
break;
case CATEGORY2_TOKEN:
- val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, r);
+ val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r);
break;
case CATEGORY3_TOKEN:
- val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, r);
+ val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r);
break;
case CATEGORY4_TOKEN:
- val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, r);
+ val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r);
break;
case CATEGORY5_TOKEN:
- val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
+ val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r);
break;
case CATEGORY6_TOKEN:
-#if CONFIG_VP9_HIGHBITDEPTH
- switch (xd->bd) {
- case VPX_BITS_8:
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
- break;
- case VPX_BITS_10:
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 16, r);
- break;
- case VPX_BITS_12:
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 18, r);
- break;
- default:
- assert(0);
- return -1;
- }
-#else
- val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
-#endif
+ val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r);
break;
}
}
@@ -205,65 +152,73 @@ static int decode_coefs(const MACROBLOCKD *xd,
return c;
}
-// TODO(slavarnway): Decode version of vp9_set_context. Modify vp9_set_context
-// after testing is complete, then delete this version.
-static
-void dec_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
- TX_SIZE tx_size, int has_eob,
- int aoff, int loff) {
- ENTROPY_CONTEXT *const a = pd->above_context + aoff;
- ENTROPY_CONTEXT *const l = pd->left_context + loff;
- const int tx_size_in_blocks = 1 << tx_size;
-
- // above
- if (has_eob && xd->mb_to_right_edge < 0) {
- int i;
- const int blocks_wide = pd->n4_w +
- (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- int above_contexts = tx_size_in_blocks;
- if (above_contexts + aoff > blocks_wide)
- above_contexts = blocks_wide - aoff;
-
- for (i = 0; i < above_contexts; ++i)
- a[i] = has_eob;
- for (i = above_contexts; i < tx_size_in_blocks; ++i)
- a[i] = 0;
- } else {
- memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l,
+ int x, int y, unsigned int tx_size_in_blocks) {
+ if (xd->max_blocks_wide) {
+ if (tx_size_in_blocks + x > xd->max_blocks_wide)
+ *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8;
}
-
- // left
- if (has_eob && xd->mb_to_bottom_edge < 0) {
- int i;
- const int blocks_high = pd->n4_h +
- (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
- int left_contexts = tx_size_in_blocks;
- if (left_contexts + loff > blocks_high)
- left_contexts = blocks_high - loff;
-
- for (i = 0; i < left_contexts; ++i)
- l[i] = has_eob;
- for (i = left_contexts; i < tx_size_in_blocks; ++i)
- l[i] = 0;
- } else {
- memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks);
+ if (xd->max_blocks_high) {
+ if (tx_size_in_blocks + y > xd->max_blocks_high)
+ *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8;
}
}
-int vp9_decode_block_tokens(MACROBLOCKD *xd,
- int plane, const scan_order *sc,
- int x, int y,
- TX_SIZE tx_size, vpx_reader *r,
+int vp9_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc,
+ int x, int y, TX_SIZE tx_size, vpx_reader *r,
int seg_id) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int16_t *const dequant = pd->seg_dequant[seg_id];
- const int ctx = get_entropy_context(tx_size, pd->above_context + x,
- pd->left_context + y);
- const int eob = decode_coefs(xd, get_plane_type(plane),
- pd->dqcoeff, tx_size,
- dequant, ctx, sc->scan, sc->neighbors, r);
- dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
+ int eob;
+ ENTROPY_CONTEXT *a = pd->above_context + x;
+ ENTROPY_CONTEXT *l = pd->left_context + y;
+ int ctx;
+ int ctx_shift_a = 0;
+ int ctx_shift_l = 0;
+
+ switch (tx_size) {
+ case TX_4X4:
+ ctx = a[0] != 0;
+ ctx += l[0] != 0;
+ eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
+ dequant, ctx, sc->scan, sc->neighbors, r);
+ a[0] = l[0] = (eob > 0);
+ break;
+ case TX_8X8:
+ get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8);
+ ctx = !!*(const uint16_t *)a;
+ ctx += !!*(const uint16_t *)l;
+ eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
+ dequant, ctx, sc->scan, sc->neighbors, r);
+ *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a;
+ *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l;
+ break;
+ case TX_16X16:
+ get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16);
+ ctx = !!*(const uint32_t *)a;
+ ctx += !!*(const uint32_t *)l;
+ eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
+ dequant, ctx, sc->scan, sc->neighbors, r);
+ *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a;
+ *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l;
+ break;
+ case TX_32X32:
+ get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32);
+ // NOTE: casting to uint64_t here is safe because the default memory
+ // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte
+ // boundaries.
+ ctx = !!*(const uint64_t *)a;
+ ctx += !!*(const uint64_t *)l;
+ eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
+ dequant, ctx, sc->scan, sc->neighbors, r);
+ *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a;
+ *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l;
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ eob = 0;
+ break;
+ }
+
return eob;
}
-
-
diff --git a/libvpx/vp9/decoder/vp9_dsubexp.c b/libvpx/vp9/decoder/vp9_dsubexp.c
index 4fbc6db47..05b38538a 100644
--- a/libvpx/vp9/decoder/vp9_dsubexp.c
+++ b/libvpx/vp9/decoder/vp9_dsubexp.c
@@ -29,7 +29,7 @@ static int decode_uniform(vpx_reader *r) {
}
static int inv_remap_prob(int v, int m) {
- static int inv_map_table[MAX_PROB] = {
+ static uint8_t inv_map_table[MAX_PROB] = {
7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189,
202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27,
diff --git a/libvpx/vp9/encoder/vp9_aq_360.c b/libvpx/vp9/encoder/vp9_aq_360.c
new file mode 100644
index 000000000..7d411f65d
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_aq_360.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "vpx_ports/mem.h"
+#include "vpx_ports/system_state.h"
+
+#include "vp9/encoder/vp9_aq_360.h"
+#include "vp9/encoder/vp9_aq_variance.h"
+
+#include "vp9/common/vp9_seg_common.h"
+
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_rd.h"
+#include "vp9/encoder/vp9_segmentation.h"
+
+static const double rate_ratio[MAX_SEGMENTS] =
+ {1.0, 0.75, 0.6, 0.5, 0.4, 0.3, 0.25};
+
+// Sets segment id 0 for the equatorial region, 1 for temperate region
+// and 2 for the polar regions
+unsigned int vp9_360aq_segment_id(int mi_row, int mi_rows) {
+ if (mi_row < mi_rows / 8 || mi_row > mi_rows - mi_rows / 8)
+ return 2;
+ else if (mi_row < mi_rows / 4 || mi_row > mi_rows - mi_rows / 4)
+ return 1;
+ else
+ return 0;
+}
+
+void vp9_360aq_frame_setup(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ struct segmentation *seg = &cm->seg;
+ int i;
+
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+ vp9_enable_segmentation(seg);
+ vp9_clearall_segfeatures(seg);
+
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ vpx_clear_system_state();
+
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ int qindex_delta =
+ vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
+ rate_ratio[i], cm->bit_depth);
+
+ // We don't allow qindex 0 in a segment if the base value is not 0.
+ // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
+ // Q delta is sometimes applied without going back around the rd loop.
+ // This could lead to an illegal combination of partition size and q.
+ if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
+ qindex_delta = -cm->base_qindex + 1;
+ }
+
+ // No need to enable SEG_LVL_ALT_Q for this segment.
+ if (rate_ratio[i] == 1.0) {
+ continue;
+ }
+
+ vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta);
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ }
+ }
+}
diff --git a/libvpx/vp9/encoder/vp9_aq_360.h b/libvpx/vp9/encoder/vp9_aq_360.h
new file mode 100644
index 000000000..fb861cb05
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_aq_360.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_AQ_360_H_
+#define VP9_ENCODER_VP9_AQ_360_H_
+
+#include "vp9/encoder/vp9_encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int vp9_360aq_segment_id(int mi_row, int mi_rows);
+void vp9_360aq_frame_setup(VP9_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_
diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.c b/libvpx/vp9/encoder/vp9_aq_complexity.c
index 30ec19112..2d979ec70 100644
--- a/libvpx/vp9/encoder/vp9_aq_complexity.c
+++ b/libvpx/vp9/encoder/vp9_aq_complexity.c
@@ -35,9 +35,6 @@ static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
{-3.5, -2.5, -1.5, 100.00, 100.0},
{-3.0, -2.0, -1.0, 100.00, 100.0} };
-#define DEFAULT_COMPLEXITY 64
-
-
static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) {
// Approximate base quatizer (truncated to int)
const int base_quant = vp9_ac_quant(q_index, 0, bit_depth) / 4;
@@ -51,7 +48,7 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
// Make SURE use of floating point in this function is safe.
vpx_clear_system_state();
- if (cm->frame_type == KEY_FRAME ||
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
int segment;
@@ -107,7 +104,6 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
#define DEFAULT_LV_THRESH 10.0
#define MIN_DEFAULT_LV_THRESH 8.0
-#define VAR_STRENGTH_STEP 0.25
// Select a segment for the current block.
// The choice of segment for a block depends on the ratio of the projected
// bits for the block vs a target average and its spatial complexity.
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index 2cd89c0d4..3e1a0a522 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -23,73 +23,42 @@
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
- size_t consec_zero_mv_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
if (cr->map == NULL) {
- vpx_free(cr);
+ vp9_cyclic_refresh_free(cr);
return NULL;
}
last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
if (cr->last_coded_q_map == NULL) {
- vpx_free(cr);
+ vp9_cyclic_refresh_free(cr);
return NULL;
}
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
-
- consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
- cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
- if (cr->consec_zero_mv == NULL) {
- vpx_free(cr);
- return NULL;
- }
- memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
return cr;
}
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
vpx_free(cr->map);
vpx_free(cr->last_coded_q_map);
- vpx_free(cr->consec_zero_mv);
vpx_free(cr);
}
-// Check if we should turn off cyclic refresh based on bitrate condition.
-static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm,
- const RATE_CONTROL *rc) {
- // Turn off cyclic refresh if bits available per frame is not sufficiently
- // larger than bit cost of segmentation. Segment map bit cost should scale
- // with number of seg blocks, so compare available bits to number of blocks.
- // Average bits available per frame = avg_frame_bandwidth
- // Number of (8x8) blocks in frame = mi_rows * mi_cols;
- const float factor = 0.25;
- const int number_blocks = cm->mi_rows * cm->mi_cols;
- // The condition below corresponds to turning off at target bitrates:
- // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p.
- // Also turn off at very small frame sizes, to avoid too large fraction of
- // superblocks to be refreshed per frame. Threshold below is less than QCIF.
- if (rc->avg_frame_bandwidth < factor * number_blocks ||
- number_blocks / 64 < 5)
- return 0;
- else
- return 1;
-}
-
// Check if this coding block, of size bsize, should be considered for refresh
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
// mode, and rate/distortion.
static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
- const MB_MODE_INFO *mbmi,
+ const MODE_INFO *mi,
int64_t rate,
int64_t dist,
int bsize) {
- MV mv = mbmi->mv[0].as_mv;
+ MV mv = mi->mv[0].as_mv;
// Reject the block for lower-qp coding if projected distortion
// is above the threshold, and any of the following is true:
// 1) mode uses large mv
@@ -98,12 +67,12 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
if (dist > cr->thresh_dist_sb &&
(mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh ||
mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh ||
- !is_inter_block(mbmi)))
+ !is_inter_block(mi)))
return CR_SEGMENT_ID_BASE;
else if (bsize >= BLOCK_16X16 &&
rate < cr->thresh_rate_sb &&
- is_inter_block(mbmi) &&
- mbmi->mv[0].as_int == 0 &&
+ is_inter_block(mi) &&
+ mi->mv[0].as_int == 0 &&
cr->rate_boost_fac > 10)
// More aggressive delta-q for bigger blocks with zero motion.
return CR_SEGMENT_ID_BOOST2;
@@ -186,12 +155,13 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const VP9_COMP *cpi, int i,
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
- MB_MODE_INFO *const mbmi,
+ MODE_INFO *const mi,
int mi_row, int mi_col,
BLOCK_SIZE bsize,
int64_t rate,
int64_t dist,
- int skip) {
+ int skip,
+ struct macroblock_plane *const p) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
@@ -199,26 +169,44 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
- const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
- bsize);
+ int refresh_this_block = candidate_refresh_aq(cr, mi, rate, dist, bsize);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
+ int is_skin = 0;
+ if (refresh_this_block == 0 &&
+ bsize <= BLOCK_16X16 &&
+ cpi->use_skin_detection) {
+ is_skin = vp9_compute_skin_block(p[0].src.buf,
+ p[1].src.buf,
+ p[2].src.buf,
+ p[0].src.stride,
+ p[1].src.stride,
+ bsize,
+ 0,
+ 0);
+ if (is_skin)
+ refresh_this_block = 1;
+ }
+
+ if (cpi->oxcf.rc_mode == VPX_VBR && mi->ref_frame[0] == GOLDEN_FRAME)
+ refresh_this_block = 0;
+
// If this block is labeled for refresh, check if we should reset the
// segment_id.
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
- mbmi->segment_id = refresh_this_block;
+ if (cyclic_refresh_segment_id_boosted(mi->segment_id)) {
+ mi->segment_id = refresh_this_block;
// Reset segment_id if it will be skipped.
if (skip)
- mbmi->segment_id = CR_SEGMENT_ID_BASE;
+ mi->segment_id = CR_SEGMENT_ID_BASE;
}
// Update the cyclic refresh map, to be used for setting segmentation map
// for the next frame. If the block will be refreshed this frame, mark it
// as clean. The magnitude of the -ve influences how long before we consider
// it for refresh again.
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
+ if (cyclic_refresh_segment_id_boosted(mi->segment_id)) {
new_map_value = -cr->time_for_refresh;
} else if (refresh_this_block) {
// Else if it is accepted as candidate for refresh, and has not already
@@ -237,17 +225,16 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
for (x = 0; x < xmis; x++) {
int map_offset = block_index + y * cm->mi_cols + x;
cr->map[map_offset] = new_map_value;
- cpi->segmentation_map[map_offset] = mbmi->segment_id;
+ cpi->segmentation_map[map_offset] = mi->segment_id;
}
}
void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
- const MB_MODE_INFO *const mbmi,
+ const MODE_INFO *const mi,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- MV mv = mbmi->mv[0].as_mv;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
@@ -261,25 +248,18 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
// don't update the map for them. For cases where motion is non-zero or
// the reference frame isn't the previous frame, the previous value in
// the map for this spatial location is not entirely correct.
- if ((!is_inter_block(mbmi) || !mbmi->skip) &&
- mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ if ((!is_inter_block(mi) || !mi->skip) &&
+ mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
cr->last_coded_q_map[map_offset] = clamp(
- cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
- } else if (is_inter_block(mbmi) && mbmi->skip &&
- mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ);
+ } else if (is_inter_block(mi) && mi->skip &&
+ mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
cr->last_coded_q_map[map_offset] = VPXMIN(
- clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id],
+ clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id],
0, MAXQ),
cr->last_coded_q_map[map_offset]);
- // Update the consecutive zero/low_mv count.
- if (is_inter_block(mbmi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) {
- if (cr->consec_zero_mv[map_offset] < 255)
- cr->consec_zero_mv[map_offset]++;
- } else {
- cr->consec_zero_mv[map_offset] = 0;
}
}
- }
}
// Update the actual number of blocks that were applied the segment delta q.
@@ -305,13 +285,15 @@ void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) {
void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) {
RATE_CONTROL *const rc = &cpi->rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- // Set minimum gf_interval for GF update to a multiple (== 2) of refresh
- // period. Depending on past encoding stats, GF flag may be reset and update
- // may not occur until next baseline_gf_interval.
+ // Set minimum gf_interval for GF update to a multiple of the refresh period,
+ // with some max limit. Depending on past encoding stats, GF flag may be
+ // reset and update may not occur until next baseline_gf_interval.
if (cr->percent_refresh > 0)
- rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh);
+ rc->baseline_gf_interval = VPXMIN(4 * (100 / cr->percent_refresh), 40);
else
rc->baseline_gf_interval = 40;
+ if (cpi->oxcf.rc_mode == VPX_VBR)
+ rc->baseline_gf_interval = 20;
}
// Update some encoding stats (from the just encoded frame). If this frame's
@@ -324,42 +306,40 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
int mi_row, mi_col;
double fraction_low = 0.0;
int low_content_frame = 0;
-
MODE_INFO **mi = cm->mi_grid_visible;
RATE_CONTROL *const rc = &cpi->rc;
const int rows = cm->mi_rows, cols = cm->mi_cols;
int cnt1 = 0, cnt2 = 0;
int force_gf_refresh = 0;
-
+ int flag_force_gf_high_motion = 0;
for (mi_row = 0; mi_row < rows; mi_row++) {
for (mi_col = 0; mi_col < cols; mi_col++) {
- int16_t abs_mvr = mi[0]->mbmi.mv[0].as_mv.row >= 0 ?
- mi[0]->mbmi.mv[0].as_mv.row : -1 * mi[0]->mbmi.mv[0].as_mv.row;
- int16_t abs_mvc = mi[0]->mbmi.mv[0].as_mv.col >= 0 ?
- mi[0]->mbmi.mv[0].as_mv.col : -1 * mi[0]->mbmi.mv[0].as_mv.col;
-
- // Calculate the motion of the background.
- if (abs_mvr <= 16 && abs_mvc <= 16) {
- cnt1++;
- if (abs_mvr == 0 && abs_mvc == 0)
- cnt2++;
+ if (flag_force_gf_high_motion == 1) {
+ int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0 ?
+ mi[0]->mv[0].as_mv.row : -1 * mi[0]->mv[0].as_mv.row;
+ int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0 ?
+ mi[0]->mv[0].as_mv.col : -1 * mi[0]->mv[0].as_mv.col;
+ // Calculate the motion of the background.
+ if (abs_mvr <= 16 && abs_mvc <= 16) {
+ cnt1++;
+ if (abs_mvr == 0 && abs_mvc == 0)
+ cnt2++;
+ }
}
mi++;
-
// Accumulate low_content_frame.
if (cr->map[mi_row * cols + mi_col] < 1)
low_content_frame++;
}
mi += 8;
}
-
// For video conference clips, if the background has high motion in current
// frame because of the camera movement, set this frame as the golden frame.
// Use 70% and 5% as the thresholds for golden frame refreshing.
// Also, force this frame as a golden update frame if this frame will change
// the resolution (resize_pending != 0).
if (cpi->resize_pending != 0 ||
- (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
+ (cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
vp9_cyclic_refresh_set_golden_update(cpi);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -368,7 +348,6 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
cpi->refresh_golden_frame = 1;
force_gf_refresh = 1;
}
-
fraction_low =
(double)low_content_frame / (rows * cols);
// Update average.
@@ -412,12 +391,20 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
assert(cr->sb_index < sbs_in_frame);
i = cr->sb_index;
cr->target_num_seg_blocks = 0;
- if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
consec_zero_mv_thresh = 100;
+ }
qindex_thresh =
cpi->oxcf.content == VP9E_CONTENT_SCREEN
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
: vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
+ // More aggressive settings for noisy content.
+ if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
+ consec_zero_mv_thresh = 80;
+ qindex_thresh =
+ VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex),
+ 7 * cm->base_qindex >> 3);
+ }
do {
int sum_map = 0;
// Get the mi_row/mi_col corresponding to superblock index i.
@@ -442,7 +429,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
if (cr->map[bl_index2] == 0) {
count_tot++;
if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
- cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
+ cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
sum_map++;
count_sel++;
}
@@ -481,29 +468,46 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->percent_refresh = 5;
cr->max_qdelta_perc = 50;
cr->time_for_refresh = 0;
+ cr->motion_thresh = 32;
+ cr->rate_boost_fac = 15;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
// periods of the refresh cycle, after a key frame.
// Account for larger interval on base layer for temporal layers.
if (cr->percent_refresh > 0 &&
rc->frames_since_key < (4 * cpi->svc.number_temporal_layers) *
- (100 / cr->percent_refresh))
+ (100 / cr->percent_refresh)) {
cr->rate_ratio_qdelta = 3.0;
- else
+ } else {
cr->rate_ratio_qdelta = 2.0;
+ if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
+ // Reduce the delta-qp if the estimated source noise is above threshold.
+ cr->rate_ratio_qdelta = 1.7;
+ cr->rate_boost_fac = 13;
+ }
+ }
// Adjust some parameters for low resolutions at low bitrates.
if (cm->width <= 352 &&
cm->height <= 288 &&
rc->avg_frame_bandwidth < 3400) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 10;
- } else {
- cr->motion_thresh = 32;
- cr->rate_boost_fac = 15;
}
if (cpi->svc.spatial_layer_id > 0) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 12;
}
+ if (cpi->oxcf.rc_mode == VPX_VBR) {
+ // To be adjusted for VBR mode, e.g., based on gf period and boost.
+ // For now use smaller qp-delta (than CBR), no second boosted seg, and
+ // turn-off (no refresh) on golden refresh (since it's already boosted).
+ cr->percent_refresh = 10;
+ cr->rate_ratio_qdelta = 1.5;
+ cr->rate_boost_fac = 10;
+ if (cpi->refresh_golden_frame == 1) {
+ cr->percent_refresh = 0;
+ cr->rate_ratio_qdelta = 1.0;
+ }
+ }
}
// Setup cyclic background refresh: set delta q and segmentation map.
@@ -512,7 +516,10 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
struct segmentation *const seg = &cm->seg;
- const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
+ // TODO(marpan): Look into whether we should reduce the amount/delta-qp
+ // instead of completely shutting off at low bitrates. For now keep it on.
+ // const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
+ const int apply_cyclic_refresh = 1;
if (cm->current_video_frame == 0)
cr->low_content_avg = 0.0;
// Don't apply refresh on key frame or temporal enhancement layer frames.
@@ -526,8 +533,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
if (cm->frame_type == KEY_FRAME) {
memset(cr->last_coded_q_map, MAXQ,
cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
- memset(cr->consec_zero_mv, 0,
- cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv));
cr->sb_index = 0;
}
return;
@@ -602,7 +607,7 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
- memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
}
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
index a5b38138b..35eea182f 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -14,6 +14,8 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
#ifdef __cplusplus
extern "C" {
@@ -51,8 +53,6 @@ struct CYCLIC_REFRESH {
signed char *map;
// Map of the last q a block was coded at.
uint8_t *last_coded_q_map;
- // Count on how many consecutive times a block uses ZER0MV for encoding.
- uint8_t *consec_zero_mv;
// Thresholds applied to the projected rate/distortion of the coding block,
// when deciding whether block should be refreshed.
int64_t thresh_rate_sb;
@@ -91,12 +91,13 @@ int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i,
// check if we should reset the segment_id, and update the cyclic_refresh map
// and segmentation map.
void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
- MB_MODE_INFO *const mbmi,
+ MODE_INFO *const mi,
int mi_row, int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip);
+ int64_t rate, int64_t dist, int skip,
+ struct macroblock_plane *const p);
void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
- const MB_MODE_INFO *const mbmi,
+ const MODE_INFO *const mi,
int mi_row, int mi_col,
BLOCK_SIZE bsize);
diff --git a/libvpx/vp9/encoder/vp9_aq_variance.c b/libvpx/vp9/encoder/vp9_aq_variance.c
index 1c99105d1..59ef5faa6 100644
--- a/libvpx/vp9/encoder/vp9_aq_variance.c
+++ b/libvpx/vp9/encoder/vp9_aq_variance.c
@@ -48,7 +48,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
struct segmentation *seg = &cm->seg;
int i;
- if (cm->frame_type == KEY_FRAME ||
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
vp9_enable_segmentation(seg);
@@ -167,7 +167,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
vp9_64_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
- return (256 * var) / (bw * bh);
+ return (unsigned int)(((uint64_t)256 * var) / (bw * bh));
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -185,7 +185,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH
- return (256 * var) >> num_pels_log2_lookup[bs];
+ return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]);
}
}
diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c
index 461555438..73a2db09a 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/libvpx/vp9/encoder/vp9_bitstream.c
@@ -79,8 +79,8 @@ static void prob_diff_update(const vpx_tree_index *tree,
static void write_selected_tx_size(const VP9_COMMON *cm,
const MACROBLOCKD *xd, vpx_writer *w) {
- TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
- BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ TX_SIZE tx_size = xd->mi[0]->tx_size;
+ BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const vpx_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
&cm->fc->tx_probs);
@@ -97,7 +97,7 @@ static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
- const int skip = mi->mbmi.skip;
+ const int skip = mi->skip;
vpx_write(w, skip, vp9_get_skip_prob(cm, xd));
return skip;
}
@@ -123,72 +123,66 @@ static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w,
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
- TOKENEXTRA *p = *tp;
-
- while (p < stop && p->token != EOSB_TOKEN) {
- const int t = p->token;
- const struct vp9_token *const a = &vp9_coef_encodings[t];
- int i = 0;
- int v = a->value;
- int n = a->len;
+ const TOKENEXTRA *p;
+ const vp9_extra_bit *const extra_bits =
#if CONFIG_VP9_HIGHBITDEPTH
- const vp9_extra_bit *b;
- if (bit_depth == VPX_BITS_12)
- b = &vp9_extra_bits_high12[t];
- else if (bit_depth == VPX_BITS_10)
- b = &vp9_extra_bits_high10[t];
- else
- b = &vp9_extra_bits[t];
+ (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 :
+ (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 :
+ vp9_extra_bits;
#else
- const vp9_extra_bit *const b = &vp9_extra_bits[t];
+ vp9_extra_bits;
(void) bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- /* skip one or two nodes */
- if (p->skip_eob_node) {
- n -= p->skip_eob_node;
- i = 2 * p->skip_eob_node;
+ for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
+ if (p->token == EOB_TOKEN) {
+ vpx_write(w, 0, p->context_tree[0]);
+ continue;
}
-
- // TODO(jbb): expanding this can lead to big gains. It allows
- // much better branch prediction and would enable us to avoid numerous
- // lookups and compares.
-
- // If we have a token that's in the constrained set, the coefficient tree
- // is split into two treed writes. The first treed write takes care of the
- // unconstrained nodes. The second treed write takes care of the
- // constrained nodes.
- if (t >= TWO_TOKEN && t < EOB_TOKEN) {
- int len = UNCONSTRAINED_NODES - p->skip_eob_node;
- int bits = v >> (n - len);
- vp9_write_tree(w, vp9_coef_tree, p->context_tree, bits, len, i);
- vp9_write_tree(w, vp9_coef_con_tree,
- vp9_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
- v, n - len, 0);
- } else {
- vp9_write_tree(w, vp9_coef_tree, p->context_tree, v, n, i);
+ vpx_write(w, 1, p->context_tree[0]);
+ while (p->token == ZERO_TOKEN) {
+ vpx_write(w, 0, p->context_tree[1]);
+ ++p;
+ if (p == stop || p->token == EOSB_TOKEN) {
+ *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
+ return;
+ }
}
- if (b->base_val) {
- const int e = p->extra, l = b->len;
-
- if (l) {
- const unsigned char *pb = b->prob;
- int v = e >> 1;
- int n = l; /* number of bits in v, assumed nonzero */
-
- do {
- const int bb = (v >> --n) & 1;
- vpx_write(w, bb, *pb++);
- } while (n);
+ {
+ const int t = p->token;
+ const vpx_prob *const context_tree = p->context_tree;
+ assert(t != ZERO_TOKEN);
+ assert(t != EOB_TOKEN);
+ assert(t != EOSB_TOKEN);
+ vpx_write(w, 1, context_tree[1]);
+ if (t == ONE_TOKEN) {
+ vpx_write(w, 0, context_tree[2]);
+ vpx_write_bit(w, p->extra & 1);
+ } else { // t >= TWO_TOKEN && t < EOB_TOKEN
+ const struct vp9_token *const a = &vp9_coef_encodings[t];
+ const int v = a->value;
+ const int n = a->len;
+ const int e = p->extra;
+ vpx_write(w, 1, context_tree[2]);
+ vp9_write_tree(w, vp9_coef_con_tree,
+ vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
+ n - UNCONSTRAINED_NODES, 0);
+ if (t >= CATEGORY1_TOKEN) {
+ const vp9_extra_bit *const b = &extra_bits[t];
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = b->len; // number of bits in v, assumed nonzero
+ do {
+ const int bb = (v >> --n) & 1;
+ vpx_write(w, bb, *pb++);
+ } while (n);
+ }
+ vpx_write_bit(w, e & 1);
}
-
- vpx_write_bit(w, e & 1);
}
- ++p;
}
-
- *tp = p + (p->token == EOSB_TOKEN);
+ *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
}
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
@@ -200,15 +194,15 @@ static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
// This function encodes the reference frame
static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
vpx_writer *w) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int is_compound = has_second_ref(mbmi);
- const int segment_id = mbmi->segment_id;
+ const MODE_INFO *const mi = xd->mi[0];
+ const int is_compound = has_second_ref(mi);
+ const int segment_id = mi->segment_id;
// If segment level coding of this signal is disabled...
// or the segment allows multiple reference frame options
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
assert(!is_compound);
- assert(mbmi->ref_frame[0] ==
+ assert(mi->ref_frame[0] ==
get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
} else {
// does the feature use compound prediction or not
@@ -220,13 +214,13 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
if (is_compound) {
- vpx_write(w, mbmi->ref_frame[0] == GOLDEN_FRAME,
+ vpx_write(w, mi->ref_frame[0] == GOLDEN_FRAME,
vp9_get_pred_prob_comp_ref_p(cm, xd));
} else {
- const int bit0 = mbmi->ref_frame[0] != LAST_FRAME;
+ const int bit0 = mi->ref_frame[0] != LAST_FRAME;
vpx_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd));
if (bit0) {
- const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
+ const int bit1 = mi->ref_frame[0] != GOLDEN_FRAME;
vpx_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd));
}
}
@@ -240,19 +234,18 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
const MACROBLOCK *const x = &cpi->td.mb;
const MACROBLOCKD *const xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg;
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const PREDICTION_MODE mode = mbmi->mode;
- const int segment_id = mbmi->segment_id;
- const BLOCK_SIZE bsize = mbmi->sb_type;
+ const PREDICTION_MODE mode = mi->mode;
+ const int segment_id = mi->segment_id;
+ const BLOCK_SIZE bsize = mi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
- const int is_inter = is_inter_block(mbmi);
- const int is_compound = has_second_ref(mbmi);
+ const int is_inter = is_inter_block(mi);
+ const int is_compound = has_second_ref(mi);
int skip, ref;
if (seg->update_map) {
if (seg->temporal_update) {
- const int pred_flag = mbmi->seg_id_predicted;
+ const int pred_flag = mi->seg_id_predicted;
vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
vpx_write(w, pred_flag, pred_prob);
if (!pred_flag)
@@ -286,9 +279,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
}
}
}
- write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
+ write_intra_mode(w, mi->uv_mode, cm->fc->uv_mode_prob[mode]);
} else {
- const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
+ const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
write_ref_frames(cm, xd, w);
@@ -303,10 +296,10 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
const int ctx = vp9_get_pred_context_switchable_interp(xd);
vp9_write_token(w, vp9_switchable_interp_tree,
cm->fc->switchable_interp_prob[ctx],
- &switchable_interp_encodings[mbmi->interp_filter]);
- ++cpi->interp_filter_selected[0][mbmi->interp_filter];
+ &switchable_interp_encodings[mi->interp_filter]);
+ ++cpi->interp_filter_selected[0][mi->interp_filter];
} else {
- assert(mbmi->interp_filter == cm->interp_filter);
+ assert(mi->interp_filter == cm->interp_filter);
}
if (bsize < BLOCK_8X8) {
@@ -321,7 +314,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
if (b_mode == NEWMV) {
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
- &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+ &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv,
nmvc, allow_hp);
}
}
@@ -329,8 +322,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
} else {
if (mode == NEWMV) {
for (ref = 0; ref < 1 + is_compound; ++ref)
- vp9_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
- &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
+ vp9_encode_mv(cpi, w, &mi->mv[ref].as_mv,
+ &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc,
allow_hp);
}
}
@@ -343,19 +336,18 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const MODE_INFO *const mi = mi_8x8[0];
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
- const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
if (seg->update_map)
- write_segment_id(w, seg, mbmi->segment_id);
+ write_segment_id(w, seg, mi->segment_id);
- write_skip(cm, xd, mbmi->segment_id, mi, w);
+ write_skip(cm, xd, mi->segment_id, mi, w);
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
write_selected_tx_size(cm, xd, w);
if (bsize >= BLOCK_8X8) {
- write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
+ write_intra_mode(w, mi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
} else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -370,7 +362,7 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
}
- write_intra_mode(w, mbmi->uv_mode, vp9_kf_uv_mode_prob[mbmi->mode]);
+ write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]);
}
static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
@@ -388,8 +380,8 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
(mi_row * cm->mi_cols + mi_col);
set_mi_row_col(xd, tile,
- mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
- mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
+ mi_row, num_8x8_blocks_high_lookup[m->sb_type],
+ mi_col, num_8x8_blocks_wide_lookup[m->sb_type],
cm->mi_rows, cm->mi_cols);
if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w);
@@ -441,7 +433,7 @@ static void write_modes_sb(VP9_COMP *cpi,
m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
- partition = partition_lookup[bsl][m->mbmi.sb_type];
+ partition = partition_lookup[bsl][m->sb_type];
write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
subsize = get_subsize(bsize, partition);
if (subsize < BLOCK_8X8) {
@@ -553,8 +545,8 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
int u = 0;
if (t == PIVOT_NODE)
s = vp9_prob_diff_update_savings_search_model(
- frame_branch_ct[i][j][k][l][0],
- old_coef_probs[i][j][k][l], &newp, upd, stepsize);
+ frame_branch_ct[i][j][k][l][0], oldp, &newp, upd,
+ stepsize);
else
s = vp9_prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
@@ -592,7 +584,7 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
if (t == PIVOT_NODE)
s = vp9_prob_diff_update_savings_search_model(
frame_branch_ct[i][j][k][l][0],
- old_coef_probs[i][j][k][l], &newp, upd, stepsize);
+ *oldp, &newp, upd, stepsize);
else
s = vp9_prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t],
@@ -630,7 +622,7 @@ static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
if (t == PIVOT_NODE) {
s = vp9_prob_diff_update_savings_search_model(
frame_branch_ct[i][j][k][l][0],
- old_coef_probs[i][j][k][l], &newp, upd, stepsize);
+ *oldp, &newp, upd, stepsize);
} else {
s = vp9_prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t],
@@ -899,7 +891,7 @@ static void write_tile_info(const VP9_COMMON *const cm,
vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
}
-static int get_refresh_mask(VP9_COMP *cpi) {
+int vp9_get_refresh_mask(VP9_COMP *cpi) {
if (vp9_preserve_existing_gf(cpi)) {
// We have decided to preserve the previously existing golden frame as our
// new ARF frame. However, in the short term we leave it in the GF slot and,
@@ -1115,11 +1107,11 @@ static void write_uncompressed_header(VP9_COMP *cpi,
write_bitdepth_colorspace_sampling(cm, wb);
}
- vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+ vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
write_frame_size(cm, wb);
} else {
MV_REFERENCE_FRAME ref_frame;
- vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+ vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
diff --git a/libvpx/vp9/encoder/vp9_bitstream.h b/libvpx/vp9/encoder/vp9_bitstream.h
index da6b41464..f24d20f31 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.h
+++ b/libvpx/vp9/encoder/vp9_bitstream.h
@@ -18,6 +18,8 @@ extern "C" {
#include "vp9/encoder/vp9_encoder.h"
+int vp9_get_refresh_mask(VP9_COMP *cpi);
+
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h
index fc3478626..069c33564 100644
--- a/libvpx/vp9/encoder/vp9_block.h
+++ b/libvpx/vp9/encoder/vp9_block.h
@@ -65,12 +65,20 @@ struct macroblock {
int skip_optimize;
int q_index;
+ // The equivalent error at the current rdmult of one whole bit (not one
+ // bitcost unit).
int errorperbit;
+ // The equivalend SAD error of one (whole) bit at the current quantizer
+ // for large blocks.
int sadperbit16;
+ // The equivalend SAD error of one (whole) bit at the current quantizer
+ // for sub-8x8 blocks.
int sadperbit4;
int rddiv;
int rdmult;
int mb_energy;
+ int * m_search_count_ptr;
+ int * ex_search_count_ptr;
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
@@ -135,6 +143,13 @@ struct macroblock {
// the visual quality at the boundary of moving color objects.
uint8_t color_sensitivity[2];
+ uint8_t sb_is_skin;
+
+ // Used to save the status of whether a block has a low variance in
+ // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
+ // 32x32, 9~24 for 16x16.
+ uint8_t variance_low[25];
+
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vp9/encoder/vp9_context_tree.h b/libvpx/vp9/encoder/vp9_context_tree.h
index 8e365ce33..86ba03d69 100644
--- a/libvpx/vp9/encoder/vp9_context_tree.h
+++ b/libvpx/vp9/encoder/vp9_context_tree.h
@@ -60,6 +60,7 @@ typedef struct {
#if CONFIG_VP9_TEMPORAL_DENOISING
unsigned int newmv_sse;
unsigned int zeromv_sse;
+ unsigned int zeromv_lastref_sse;
PREDICTION_MODE best_sse_inter_mode;
int_mv best_sse_mv;
MV_REFERENCE_FRAME best_reference_frame;
diff --git a/libvpx/vp9/encoder/vp9_cost.c b/libvpx/vp9/encoder/vp9_cost.c
index e2fbb34aa..5d14742bc 100644
--- a/libvpx/vp9/encoder/vp9_cost.c
+++ b/libvpx/vp9/encoder/vp9_cost.c
@@ -11,35 +11,38 @@
#include "vp9/encoder/vp9_cost.h"
-const unsigned int vp9_prob_cost[256] = {
- 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
- 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889,
- 873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733,
- 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
- 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541,
- 534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472,
- 467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415,
- 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
- 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321,
- 317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281,
- 278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246,
- 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
- 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184,
- 181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156,
- 154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131,
- 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
- 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84,
- 82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63,
- 61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43,
- 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
- 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6,
- 4, 3, 1, 1};
+/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
+ Begins with a bogus entry for simpler addressing. */
+const uint16_t vp9_prob_cost[256] = {
+ 4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
+ 2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
+ 1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
+ 1449, 1429, 1409, 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252,
+ 1236, 1221, 1206, 1192, 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084,
+ 1072, 1059, 1047, 1036, 1024, 1013, 1001, 990, 979, 968, 958, 947,
+ 937, 927, 917, 907, 897, 887, 878, 868, 859, 850, 841, 832,
+ 823, 814, 806, 797, 789, 780, 772, 764, 756, 748, 740, 732,
+ 724, 717, 709, 702, 694, 687, 680, 673, 665, 658, 651, 644,
+ 637, 631, 624, 617, 611, 604, 598, 591, 585, 578, 572, 566,
+ 560, 554, 547, 541, 535, 530, 524, 518, 512, 506, 501, 495,
+ 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, 430,
+ 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
+ 366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316,
+ 311, 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264,
+ 260, 256, 252, 248, 244, 240, 236, 232, 228, 224, 220, 216,
+ 212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, 171,
+ 168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129,
+ 125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89,
+ 86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51,
+ 48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
+ 12, 9, 6, 3};
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
int i, int c) {
const vpx_prob prob = probs[i / 2];
int b;
+ assert(prob != 0);
for (b = 0; b <= 1; ++b) {
const int cc = c + vp9_cost_bit(prob, b);
const vpx_tree_index ii = tree[i + b];
diff --git a/libvpx/vp9/encoder/vp9_cost.h b/libvpx/vp9/encoder/vp9_cost.h
index eac74c40b..0c70b7826 100644
--- a/libvpx/vp9/encoder/vp9_cost.h
+++ b/libvpx/vp9/encoder/vp9_cost.h
@@ -12,18 +12,22 @@
#define VP9_ENCODER_VP9_COST_H_
#include "vpx_dsp/prob.h"
+#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
-extern const unsigned int vp9_prob_cost[256];
+extern const uint16_t vp9_prob_cost[256];
+
+// The factor to scale from cost in bits to cost in vp9_prob_cost units.
+#define VP9_PROB_COST_SHIFT 9
#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
-#define vp9_cost_one(prob) vp9_cost_zero(vpx_complement(prob))
+#define vp9_cost_one(prob) vp9_cost_zero(256 - (prob))
-#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vpx_complement(prob) \
+#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? 256 - (prob) \
: (prob))
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
diff --git a/libvpx/vp9/encoder/vp9_denoiser.c b/libvpx/vp9/encoder/vp9_denoiser.c
index 8623b4225..42d456e89 100644
--- a/libvpx/vp9/encoder/vp9_denoiser.c
+++ b/libvpx/vp9/encoder/vp9_denoiser.c
@@ -21,12 +21,6 @@
#include "vp9/encoder/vp9_denoiser.h"
#include "vp9/encoder/vp9_encoder.h"
-/* The VP9 denoiser is similar to that of the VP8 denoiser. While
- * choosing the motion vectors / reference frames, the denoiser is run, and if
- * it did not modify the signal to much, the denoised block is copied to the
- * signal.
- */
-
#ifdef OUTPUT_YUV_DENOISED
static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
#endif
@@ -49,16 +43,19 @@ static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
}
static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
- return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40);
+ return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40);
}
static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
int motion_magnitude) {
if (motion_magnitude >
noise_motion_thresh(bs, increase_denoising)) {
- return 0;
+ if (increase_denoising)
+ return (1 << num_pels_log2_lookup[bs]) << 2;
+ else
+ return 0;
} else {
- return (1 << num_pels_log2_lookup[bs]) * 20;
+ return (1 << num_pels_log2_lookup[bs]) << 4;
}
}
@@ -183,7 +180,7 @@ int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
static uint8_t *block_start(uint8_t *framebuf, int stride,
int mi_row, int mi_col) {
- return framebuf + (stride * mi_row * 8) + (mi_col * 8);
+ return framebuf + (stride * mi_row << 3) + (mi_col << 3);
}
static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
@@ -193,90 +190,101 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
int mi_row,
int mi_col,
PICK_MODE_CONTEXT *ctx,
- int *motion_magnitude,
- int is_skin) {
- int mv_col, mv_row;
+ int motion_magnitude,
+ int is_skin,
+ int *zeromv_filter,
+ int consec_zeromv) {
int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
MV_REFERENCE_FRAME frame;
MACROBLOCKD *filter_mbd = &mb->e_mbd;
- MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
- MB_MODE_INFO saved_mbmi;
- int i, j;
+ MODE_INFO *mi = filter_mbd->mi[0];
+ MODE_INFO saved_mi;
+ int i;
struct buf_2d saved_dst[MAX_MB_PLANE];
- struct buf_2d saved_pre[MAX_MB_PLANE][2]; // 2 pre buffers
+ struct buf_2d saved_pre[MAX_MB_PLANE];
- mv_col = ctx->best_sse_mv.as_mv.col;
- mv_row = ctx->best_sse_mv.as_mv.row;
- *motion_magnitude = mv_row * mv_row + mv_col * mv_col;
frame = ctx->best_reference_frame;
+ saved_mi = *mi;
- saved_mbmi = *mbmi;
+ if (is_skin && (motion_magnitude > 0 || consec_zeromv < 4))
+ return COPY_BLOCK;
- if (is_skin && *motion_magnitude > 16)
+ // Avoid denoising for small block (unless motion is small).
+ // Small blocks are selected in variance partition (before encoding) and
+ // will typically lie on moving areas.
+ if (denoiser->denoising_level < kDenHigh &&
+ motion_magnitude > 16 && bs <= BLOCK_8X8)
return COPY_BLOCK;
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME &&
- sse_diff > sse_diff_thresh(bs, increase_denoising, *motion_magnitude)) {
- mbmi->ref_frame[0] = ctx->best_reference_frame;
- mbmi->mode = ctx->best_sse_inter_mode;
- mbmi->mv[0] = ctx->best_sse_mv;
+ ctx->newmv_sse != UINT_MAX &&
+ sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
+ mi->ref_frame[0] = ctx->best_reference_frame;
+ mi->mode = ctx->best_sse_inter_mode;
+ mi->mv[0] = ctx->best_sse_mv;
} else {
// Otherwise, use the zero reference frame.
frame = ctx->best_zeromv_reference_frame;
-
- mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame;
- mbmi->mode = ZEROMV;
- mbmi->mv[0].as_int = 0;
-
+ ctx->newmv_sse = ctx->zeromv_sse;
+ // Bias to last reference.
+ if (frame != LAST_FRAME &&
+ ((ctx->zeromv_lastref_sse < (5 * ctx->zeromv_sse) >> 2) ||
+ denoiser->denoising_level >= kDenHigh)) {
+ frame = LAST_FRAME;
+ ctx->newmv_sse = ctx->zeromv_lastref_sse;
+ }
+ mi->ref_frame[0] = frame;
+ mi->mode = ZEROMV;
+ mi->mv[0].as_int = 0;
ctx->best_sse_inter_mode = ZEROMV;
ctx->best_sse_mv.as_int = 0;
- ctx->newmv_sse = ctx->zeromv_sse;
+ *zeromv_filter = 1;
+ if (denoiser->denoising_level > kDenMedium) {
+ motion_magnitude = 0;
+ }
}
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
// Restore everything to its original state
- *mbmi = saved_mbmi;
+ *mi = saved_mi;
return COPY_BLOCK;
}
- if (*motion_magnitude >
+ if (motion_magnitude >
(noise_motion_thresh(bs, increase_denoising) << 3)) {
// Restore everything to its original state
- *mbmi = saved_mbmi;
+ *mi = saved_mi;
return COPY_BLOCK;
}
// We will restore these after motion compensation.
for (i = 0; i < MAX_MB_PLANE; ++i) {
- for (j = 0; j < 2; ++j) {
- saved_pre[i][j] = filter_mbd->plane[i].pre[j];
- }
+ saved_pre[i] = filter_mbd->plane[i].pre[0];
saved_dst[i] = filter_mbd->plane[i].dst;
}
// Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
// struct.
- for (j = 0; j < 2; ++j) {
- filter_mbd->plane[0].pre[j].buf =
- block_start(denoiser->running_avg_y[frame].y_buffer,
- denoiser->running_avg_y[frame].y_stride,
- mi_row, mi_col);
- filter_mbd->plane[0].pre[j].stride =
- denoiser->running_avg_y[frame].y_stride;
- filter_mbd->plane[1].pre[j].buf =
- block_start(denoiser->running_avg_y[frame].u_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[1].pre[j].stride =
- denoiser->running_avg_y[frame].uv_stride;
- filter_mbd->plane[2].pre[j].buf =
- block_start(denoiser->running_avg_y[frame].v_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[2].pre[j].stride =
- denoiser->running_avg_y[frame].uv_stride;
- }
+ filter_mbd->plane[0].pre[0].buf =
+ block_start(denoiser->running_avg_y[frame].y_buffer,
+ denoiser->running_avg_y[frame].y_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[0].pre[0].stride =
+ denoiser->running_avg_y[frame].y_stride;
+ filter_mbd->plane[1].pre[0].buf =
+ block_start(denoiser->running_avg_y[frame].u_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[1].pre[0].stride =
+ denoiser->running_avg_y[frame].uv_stride;
+ filter_mbd->plane[2].pre[0].buf =
+ block_start(denoiser->running_avg_y[frame].v_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[2].pre[0].stride =
+ denoiser->running_avg_y[frame].uv_stride;
+
filter_mbd->plane[0].dst.buf =
block_start(denoiser->mc_running_avg_y.y_buffer,
denoiser->mc_running_avg_y.y_stride,
@@ -293,27 +301,26 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
mi_row, mi_col);
filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
- vp9_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs);
+ vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
// Restore everything to its original state
- *mbmi = saved_mbmi;
+ *mi = saved_mi;
for (i = 0; i < MAX_MB_PLANE; ++i) {
- for (j = 0; j < 2; ++j) {
- filter_mbd->plane[i].pre[j] = saved_pre[i][j];
- }
+ filter_mbd->plane[i].pre[0] = saved_pre[i];
filter_mbd->plane[i].dst = saved_dst[i];
}
- mv_row = ctx->best_sse_mv.as_mv.row;
- mv_col = ctx->best_sse_mv.as_mv.col;
-
return FILTER_BLOCK;
}
-void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
+void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
- PICK_MODE_CONTEXT *ctx) {
+ PICK_MODE_CONTEXT *ctx,
+ VP9_DENOISER_DECISION *denoiser_decision) {
+ int mv_col, mv_row;
int motion_magnitude = 0;
+ int zeromv_filter = 0;
+ VP9_DENOISER *denoiser = &cpi->denoiser;
VP9_DENOISER_DECISION decision = COPY_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
@@ -322,36 +329,75 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
int is_skin = 0;
-
- if (bs <= BLOCK_16X16 && denoiser->denoising_on) {
- // Take center pixel in block to determine is_skin.
- const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
- const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
- const int uv_width_shift = y_width_shift >> 1;
- const int uv_height_shift = y_height_shift >> 1;
- const int stride = mb->plane[0].src.stride;
- const int strideuv = mb->plane[1].src.stride;
- const uint8_t ysource =
- mb->plane[0].src.buf[y_height_shift * stride + y_width_shift];
- const uint8_t usource =
- mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
- const uint8_t vsource =
- mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
- is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ int consec_zeromv = 0;
+ mv_col = ctx->best_sse_mv.as_mv.col;
+ mv_row = ctx->best_sse_mv.as_mv.row;
+ motion_magnitude = mv_row * mv_row + mv_col * mv_col;
+
+ if (cpi->use_skin_detection &&
+ bs <= BLOCK_32X32 &&
+ denoiser->denoising_level < kDenHigh) {
+ int motion_level = (motion_magnitude < 16) ? 0 : 1;
+ // If motion for current block is small/zero, compute consec_zeromv for
+ // skin detection (early exit in skin detection is done for large
+ // consec_zeromv when current block has small/zero motion).
+ consec_zeromv = 0;
+ if (motion_level == 0) {
+ VP9_COMMON * const cm = &cpi->common;
+ int j, i;
+ // Loop through the 8x8 sub-blocks.
+ const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+ const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ consec_zeromv = 100;
+ for (i = 0; i < ymis; i++) {
+ for (j = 0; j < xmis; j++) {
+ int bl_index = block_index + i * cm->mi_cols + j;
+ consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], consec_zeromv);
+ // No need to keep checking 8x8 blocks if any of the sub-blocks
+ // has small consec_zeromv (since threshold for no_skin based on
+ // zero/small motion in skin detection is high, i.e, > 4).
+ if (consec_zeromv < 4) {
+ i = ymis;
+ j = xmis;
+ }
+ }
+ }
+ }
+ // TODO(marpan): Compute skin detection over sub-blocks.
+ is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
+ mb->plane[1].src.buf,
+ mb->plane[2].src.buf,
+ mb->plane[0].src.stride,
+ mb->plane[1].src.stride,
+ bs,
+ consec_zeromv,
+ motion_level);
+ }
+ if (!is_skin &&
+ denoiser->denoising_level == kDenHigh) {
+ denoiser->increase_denoising = 1;
+ } else {
+ denoiser->increase_denoising = 0;
}
- if (denoiser->denoising_on)
+ if (denoiser->denoising_level >= kDenLow)
decision = perform_motion_compensation(denoiser, mb, bs,
denoiser->increase_denoising,
mi_row, mi_col, ctx,
- &motion_magnitude,
- is_skin);
+ motion_magnitude,
+ is_skin,
+ &zeromv_filter,
+ consec_zeromv);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride,
mc_avg_start, mc_avg.y_stride,
avg_start, avg.y_stride,
- 0, bs, motion_magnitude);
+ denoiser->increase_denoising,
+ bs, motion_magnitude);
}
if (decision == FILTER_BLOCK) {
@@ -365,6 +411,9 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
num_4x4_blocks_wide_lookup[bs] << 2,
num_4x4_blocks_high_lookup[bs] << 2);
}
+ *denoiser_decision = decision;
+ if (decision == FILTER_BLOCK && zeromv_filter == 1)
+ *denoiser_decision = FILTER_ZEROMV_BLOCK;
}
static void copy_frame(YV12_BUFFER_CONFIG * const dest,
@@ -401,11 +450,12 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
int resized) {
// Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized.
- if (frame_type == KEY_FRAME || resized != 0) {
+ if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset) {
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < MAX_REF_FRAMES; ++i)
copy_frame(&denoiser->running_avg_y[i], &src);
+ denoiser->reset = 0;
return;
}
@@ -443,22 +493,25 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
ctx->zeromv_sse = UINT_MAX;
ctx->newmv_sse = UINT_MAX;
+ ctx->zeromv_lastref_sse = UINT_MAX;
+ ctx->best_sse_mv.as_int = 0;
}
-void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
+void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PREDICTION_MODE mode,
PICK_MODE_CONTEXT *ctx) {
- // TODO(tkopp): Use both MVs if possible
- if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
+ if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
ctx->zeromv_sse = sse;
- ctx->best_zeromv_reference_frame = mbmi->ref_frame[0];
+ ctx->best_zeromv_reference_frame = mi->ref_frame[0];
+ if (mi->ref_frame[0] == LAST_FRAME)
+ ctx->zeromv_lastref_sse = sse;
}
- if (mbmi->mv[0].as_int != 0 && sse < ctx->newmv_sse) {
+ if (mi->mv[0].as_int != 0 && sse < ctx->newmv_sse) {
ctx->newmv_sse = sse;
ctx->best_sse_inter_mode = mode;
- ctx->best_sse_mv = mbmi->mv[0];
- ctx->best_reference_frame = mbmi->ref_frame[0];
+ ctx->best_sse_mv = mi->mv[0];
+ ctx->best_reference_frame = mi->ref_frame[0];
}
}
@@ -514,27 +567,12 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
#endif
denoiser->increase_denoising = 0;
denoiser->frame_buffer_initialized = 1;
- vp9_denoiser_init_noise_estimate(denoiser, width, height);
+ denoiser->denoising_level = kDenLow;
+ denoiser->prev_denoising_level = kDenLow;
+ denoiser->reset = 0;
return 0;
}
-void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser,
- int width,
- int height) {
- // Denoiser is off by default, i.e., no denoising is performed.
- // Noise level is measured periodically, and if observed to be above
- // thresh_noise_estimate, then denoising is performed, i.e., denoising_on = 1.
- denoiser->denoising_on = 0;
- denoiser->noise_estimate = 0;
- denoiser->noise_estimate_count = 0;
- denoiser->thresh_noise_estimate = 20;
- if (width * height >= 1920 * 1080) {
- denoiser->thresh_noise_estimate = 70;
- } else if (width * height >= 1280 * 720) {
- denoiser->thresh_noise_estimate = 40;
- }
-}
-
void vp9_denoiser_free(VP9_DENOISER *denoiser) {
int i;
denoiser->frame_buffer_initialized = 0;
@@ -548,117 +586,15 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
vpx_free_frame_buffer(&denoiser->last_source);
}
-void vp9_denoiser_update_noise_estimate(VP9_COMP *const cpi) {
- const VP9_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- int frame_period = 10;
- int thresh_consec_zeromv = 8;
- unsigned int thresh_sum_diff = 128;
- int num_frames_estimate = 20;
- int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7;
- // Estimate of noise level every frame_period frames.
- // Estimate is between current source and last source.
- if (cm->current_video_frame % frame_period != 0 ||
- cpi->denoiser.last_source.y_buffer == NULL) {
- copy_frame(&cpi->denoiser.last_source, cpi->Source);
- return;
- } else {
- int num_samples = 0;
- uint64_t avg_est = 0;
- int bsize = BLOCK_16X16;
- static const unsigned char const_source[16] = {
- 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128};
- // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have
- // been encoded as zero/small mv at least x consecutive frames, compute
- // the variance to update estimate of noise in the source.
- const uint8_t *src_y = cpi->Source->y_buffer;
- const int src_ystride = cpi->Source->y_stride;
- const uint8_t *last_src_y = cpi->denoiser.last_source.y_buffer;
- const int last_src_ystride = cpi->denoiser.last_source.y_stride;
- const uint8_t *src_u = cpi->Source->u_buffer;
- const uint8_t *src_v = cpi->Source->v_buffer;
- const int src_uvstride = cpi->Source->uv_stride;
- const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
- const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
- const int uv_width_shift = y_width_shift >> 1;
- const int uv_height_shift = y_height_shift >> 1;
- int mi_row, mi_col;
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row ++) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col ++) {
- // 16x16 blocks, 1/4 sample of frame.
- if (mi_row % 4 == 0 && mi_col % 4 == 0) {
- int bl_index = mi_row * cm->mi_cols + mi_col;
- int bl_index1 = bl_index + 1;
- int bl_index2 = bl_index + cm->mi_cols;
- int bl_index3 = bl_index2 + 1;
- // Only consider blocks that are likely steady background. i.e, have
- // been encoded as zero/low motion x (= thresh_consec_zeromv) frames
- // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
- // 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
- const uint8_t ysource =
- src_y[y_height_shift * src_ystride + y_width_shift];
- const uint8_t usource =
- src_u[uv_height_shift * src_uvstride + uv_width_shift];
- const uint8_t vsource =
- src_v[uv_height_shift * src_uvstride + uv_width_shift];
- int is_skin = vp9_skin_pixel(ysource, usource, vsource);
- if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
- cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
- cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
- cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
- !is_skin) {
- // Compute variance.
- unsigned int sse;
- unsigned int variance = cpi->fn_ptr[bsize].vf(src_y,
- src_ystride,
- last_src_y,
- last_src_ystride,
- &sse);
- // Only consider this block as valid for noise measurement if the
- // average term (sse - variance = N * avg^{2}, N = 16X16) of the
- // temporal residual is small (avoid effects from lighting change).
- if ((sse - variance) < thresh_sum_diff) {
- unsigned int sse2;
- const unsigned int spatial_variance =
- cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source,
- 0, &sse2);
- avg_est += variance / (10 + spatial_variance);
- num_samples++;
- }
- }
- }
- src_y += 8;
- last_src_y += 8;
- src_u += 4;
- src_v += 4;
- }
- src_y += (src_ystride << 3) - (cm->mi_cols << 3);
- last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3);
- src_u += (src_uvstride << 2) - (cm->mi_cols << 2);
- src_v += (src_uvstride << 2) - (cm->mi_cols << 2);
- }
- // Update noise estimate if we have at a minimum number of block samples,
- // and avg_est > 0 (avg_est == 0 can happen if the application inputs
- // duplicate frames).
- if (num_samples > min_blocks_estimate && avg_est > 0) {
- // Normalize.
- avg_est = (avg_est << 8) / num_samples;
- // Update noise estimate.
- cpi->denoiser.noise_estimate = (3 * cpi->denoiser.noise_estimate +
- avg_est) >> 2;
- cpi->denoiser.noise_estimate_count++;
- if (cpi->denoiser.noise_estimate_count == num_frames_estimate) {
- // Reset counter and check noise level condition.
- cpi->denoiser.noise_estimate_count = 0;
- if (cpi->denoiser.noise_estimate > cpi->denoiser.thresh_noise_estimate)
- cpi->denoiser.denoising_on = 1;
- else
- cpi->denoiser.denoising_on = 0;
- }
- }
- }
- copy_frame(&cpi->denoiser.last_source, cpi->Source);
+void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser,
+ int noise_level) {
+ denoiser->denoising_level = noise_level;
+ if (denoiser->denoising_level > kDenLowLow &&
+ denoiser->prev_denoising_level == kDenLowLow)
+ denoiser->reset = 1;
+ else
+ denoiser->reset = 0;
+ denoiser->prev_denoising_level = denoiser->denoising_level;
}
#ifdef OUTPUT_YUV_DENOISED
diff --git a/libvpx/vp9/encoder/vp9_denoiser.h b/libvpx/vp9/encoder/vp9_denoiser.h
index f8ad4acd6..9c86e5a93 100644
--- a/libvpx/vp9/encoder/vp9_denoiser.h
+++ b/libvpx/vp9/encoder/vp9_denoiser.h
@@ -23,21 +23,40 @@ extern "C" {
typedef enum vp9_denoiser_decision {
COPY_BLOCK,
- FILTER_BLOCK
+ FILTER_BLOCK,
+ FILTER_ZEROMV_BLOCK
} VP9_DENOISER_DECISION;
+typedef enum vp9_denoiser_level {
+ kDenLowLow,
+ kDenLow,
+ kDenMedium,
+ kDenHigh
+} VP9_DENOISER_LEVEL;
+
typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
YV12_BUFFER_CONFIG mc_running_avg_y;
YV12_BUFFER_CONFIG last_source;
int increase_denoising;
int frame_buffer_initialized;
- int denoising_on;
- int noise_estimate;
- int thresh_noise_estimate;
- int noise_estimate_count;
+ int reset;
+ VP9_DENOISER_LEVEL denoising_level;
+ VP9_DENOISER_LEVEL prev_denoising_level;
} VP9_DENOISER;
+typedef struct {
+ int64_t zero_last_cost_orig;
+ int *ref_frame_cost;
+ int_mv (*frame_mv)[MAX_REF_FRAMES];
+ int reuse_inter_pred;
+ TX_SIZE best_tx_size;
+ PREDICTION_MODE best_mode;
+ MV_REFERENCE_FRAME best_ref_frame;
+ INTERP_FILTER best_pred_filter;
+ uint8_t best_mode_skip_txfm;
+} VP9_PICKMODE_CTX_DEN;
+
struct VP9_COMP;
void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
@@ -48,13 +67,14 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
int refresh_last_frame,
int resized);
-void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
+void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
- PICK_MODE_CONTEXT *ctx);
+ PICK_MODE_CONTEXT *ctx ,
+ VP9_DENOISER_DECISION *denoiser_decision);
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
-void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi,
+void vp9_denoiser_update_frame_stats(MODE_INFO *mi,
unsigned int sse, PREDICTION_MODE mode,
PICK_MODE_CONTEXT *ctx);
@@ -69,18 +89,16 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
// This function is used by both c and sse2 denoiser implementations.
// Define it as a static function within the scope where vp9_denoiser.h
// is referenced.
-static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
+static INLINE int total_adj_strong_thresh(BLOCK_SIZE bs,
+ int increase_denoising) {
return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
}
#endif
void vp9_denoiser_free(VP9_DENOISER *denoiser);
-void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser,
- int width,
- int height);
-
-void vp9_denoiser_update_noise_estimate(struct VP9_COMP *const cpi);
+void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser,
+ int noise_level);
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index 2333a1391..f66ed9ed3 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -33,6 +33,7 @@
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_tile_common.h"
+#include "vp9/encoder/vp9_aq_360.h"
#include "vp9/encoder/vp9_aq_complexity.h"
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
@@ -133,7 +134,7 @@ unsigned int vp9_high_get_sby_perpixel_variance(
0, &sse);
break;
}
- return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+ return ROUND_POWER_OF_TWO((int64_t)var, num_pels_log2_lookup[bs]);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -186,7 +187,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi;
+ MODE_INFO *mi;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
const struct segmentation *const seg = &cm->seg;
@@ -195,7 +196,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
- mbmi = &xd->mi[0]->mbmi;
+ mi = xd->mi[0];
// Set up destination pointers.
vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
@@ -221,16 +222,17 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
// Setup segment ID.
if (seg->enabled) {
- if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
+ if (cpi->oxcf.aq_mode != VARIANCE_AQ &&
+ cpi->oxcf.aq_mode != EQUATOR360_AQ) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
- mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
}
vp9_init_plane_quantizers(cpi, x);
- x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
+ x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
} else {
- mbmi->segment_id = 0;
+ mi->segment_id = 0;
x->encode_breakout = cpi->encode_breakout;
}
@@ -241,14 +243,16 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
- const int block_width = num_8x8_blocks_wide_lookup[bsize];
- const int block_height = num_8x8_blocks_high_lookup[bsize];
+ const int block_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize],
+ cm->mi_cols - mi_col);
+ const int block_height = VPXMIN(num_8x8_blocks_high_lookup[bsize],
+ cm->mi_rows - mi_row);
+ const int mi_stride = xd->mi_stride;
+ MODE_INFO *const src_mi = xd->mi[0];
int i, j;
for (j = 0; j < block_height; ++j)
- for (i = 0; i < block_width; ++i) {
- if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols)
- xd->mi[j * xd->mi_stride + i] = xd->mi[0];
- }
+ for (i = 0; i < block_width; ++i)
+ xd->mi[j * mi_stride + i] = src_mi;
}
static void set_block_size(VP9_COMP * const cpi,
@@ -258,7 +262,7 @@ static void set_block_size(VP9_COMP * const cpi,
BLOCK_SIZE bsize) {
if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
- xd->mi[0]->mbmi.sb_type = bsize;
+ xd->mi[0]->sb_type = bsize;
}
}
@@ -401,7 +405,6 @@ static int set_vt_partitioning(VP9_COMP *cpi,
variance_node vt;
const int block_width = num_8x8_blocks_wide_lookup[bsize];
const int block_height = num_8x8_blocks_high_lookup[bsize];
- const int low_res = (cm->width <= 352 && cm->height <= 288);
assert(block_height == block_width);
tree_to_node(data, bsize, &vt);
@@ -414,7 +417,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
// No check for vert/horiz split as too few samples for variance.
if (bsize == bsize_min) {
// Variance already computed to set the force_split.
- if (low_res || cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME)
get_variance(&vt.part_variances->none);
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
@@ -425,7 +428,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
return 0;
} else if (bsize > bsize_min) {
// Variance already computed to set the force_split.
- if (low_res || cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME)
get_variance(&vt.part_variances->none);
// For key frame: take split for bsize above 32X32 or very high variance.
if (cm->frame_type == KEY_FRAME &&
@@ -481,7 +484,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
VP9_COMMON *const cm = &cpi->common;
const int is_key_frame = (cm->frame_type == KEY_FRAME);
const int threshold_multiplier = is_key_frame ? 20 : 1;
- const int64_t threshold_base = (int64_t)(threshold_multiplier *
+ int64_t threshold_base = (int64_t)(threshold_multiplier *
cpi->y_dequant[q][1]);
if (is_key_frame) {
thresholds[0] = threshold_base;
@@ -489,9 +492,20 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
thresholds[2] = threshold_base >> 2;
thresholds[3] = threshold_base << 2;
} else {
- thresholds[1] = threshold_base;
+ // Increase base variance threshold based on estimated noise level.
+ if (cpi->noise_estimate.enabled) {
+ NOISE_LEVEL noise_level = vp9_noise_estimate_extract_level(
+ &cpi->noise_estimate);
+ if (noise_level == kHigh)
+ threshold_base = 3 * threshold_base;
+ else if (noise_level == kMedium)
+ threshold_base = threshold_base << 1;
+ else if (noise_level < kLow)
+ threshold_base = (7 * threshold_base) >> 3;
+ }
if (cm->width <= 352 && cm->height <= 288) {
- thresholds[0] = threshold_base >> 2;
+ thresholds[0] = threshold_base >> 3;
+ thresholds[1] = threshold_base >> 1;
thresholds[2] = threshold_base << 3;
} else {
thresholds[0] = threshold_base;
@@ -518,7 +532,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
cpi->vbp_bsize_min = BLOCK_8X8;
} else {
if (cm->width <= 352 && cm->height <= 288)
- cpi->vbp_threshold_sad = 100;
+ cpi->vbp_threshold_sad = 10;
else
cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ?
(cpi->y_dequant[q][1] << 1) : 1000;
@@ -548,16 +562,16 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
if (x8_idx < pixels_wide && y8_idx < pixels_high) {
#if CONFIG_VP9_HIGHBITDEPTH
if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
- vp9_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
d + y8_idx * dp + x8_idx, dp,
&min, &max);
} else {
- vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
d + y8_idx * dp + x8_idx, dp,
&min, &max);
}
#else
- vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
+ vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
d + y8_idx * dp + x8_idx, dp,
&min, &max);
#endif
@@ -589,18 +603,18 @@ static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
- s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame)
- d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
} else {
- s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
}
#else
- s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
+ d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
#endif
sum = s_avg - d_avg;
sse = sum * sum;
@@ -628,18 +642,18 @@ static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
- s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame)
- d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
} else {
- s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
}
#else
- s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
if (!is_key_frame)
- d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
#endif
sum = s_avg - d_avg;
sse = sum * sum;
@@ -648,45 +662,177 @@ static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
}
}
+#if !CONFIG_VP9_HIGHBITDEPTH
+// Check if most of the superblock is skin content, and if so, force split to
+// 32x32, and set x->sb_is_skin for use in mode selection.
+static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
+ int mi_row, int mi_col, int *force_split) {
+ VP9_COMMON * const cm = &cpi->common;
+ // Avoid checking superblocks on/near boundary and avoid low resolutions.
+ // Note superblock may still pick 64X64 if y_sad is very small
+ // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
+ if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
+ mi_row + 8 < cm->mi_rows)) {
+ int num_16x16_skin = 0;
+ int num_16x16_nonskin = 0;
+ uint8_t *ysignal = x->plane[0].src.buf;
+ uint8_t *usignal = x->plane[1].src.buf;
+ uint8_t *vsignal = x->plane[2].src.buf;
+ int sp = x->plane[0].src.stride;
+ int spuv = x->plane[1].src.stride;
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+ const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ // Loop through the 16x16 sub-blocks.
+ int i, j;
+ for (i = 0; i < ymis; i+=2) {
+ for (j = 0; j < xmis; j+=2) {
+ int bl_index = block_index + i * cm->mi_cols + j;
+ int bl_index1 = bl_index + 1;
+ int bl_index2 = bl_index + cm->mi_cols;
+ int bl_index3 = bl_index2 + 1;
+ int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+ VPXMIN(cpi->consec_zero_mv[bl_index1],
+ VPXMIN(cpi->consec_zero_mv[bl_index2],
+ cpi->consec_zero_mv[bl_index3])));
+ int is_skin = vp9_compute_skin_block(ysignal,
+ usignal,
+ vsignal,
+ sp,
+ spuv,
+ BLOCK_16X16,
+ consec_zeromv,
+ 0);
+ num_16x16_skin += is_skin;
+ num_16x16_nonskin += (1 - is_skin);
+ if (num_16x16_nonskin > 3) {
+ // Exit loop if at least 4 of the 16x16 blocks are not skin.
+ i = ymis;
+ break;
+ }
+ ysignal += 16;
+ usignal += 8;
+ vsignal += 8;
+ }
+ ysignal += (sp << 4) - 64;
+ usignal += (spuv << 3) - 32;
+ vsignal += (spuv << 3) - 32;
+ }
+ if (num_16x16_skin > 12) {
+ *force_split = 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+#endif
+
+static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x,
+ MACROBLOCKD *xd, v64x64 *vt,
+ int force_split[], int64_t thresholds[],
+ MV_REFERENCE_FRAME ref_frame_partition,
+ int mi_col, int mi_row) {
+ int i, j;
+ VP9_COMMON * const cm = &cpi->common;
+ const int mv_thr = cm->width > 640 ? 8 : 4;
+ // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
+ // int_pro mv is small. If the temporal variance is small set the flag
+ // variance_low for the block. The variance threshold can be adjusted, the
+ // higher the more aggressive.
+ if (ref_frame_partition == LAST_FRAME &&
+ (cpi->sf.short_circuit_low_temp_var == 1 ||
+ (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
+ xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
+ xd->mi[0]->mv[0].as_mv.row < mv_thr &&
+ xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
+ if (xd->mi[0]->sb_type == BLOCK_64X64 &&
+ (vt->part_variances).none.variance < (thresholds[0] >> 1)) {
+ x->variance_low[0] = 1;
+ } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
+ for (i = 0; i < 2; i++) {
+ if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
+ x->variance_low[i + 1] = 1;
+ }
+ } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
+ for (i = 0; i < 2; i++) {
+ if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
+ x->variance_low[i + 3] = 1;
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ if (!force_split[i + 1]) {
+ // 32x32
+ if (vt->split[i].part_variances.none.variance <
+ (thresholds[1] >> 1))
+ x->variance_low[i + 5] = 1;
+ } else if (cpi->sf.short_circuit_low_temp_var == 2) {
+ int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};
+ const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];
+ MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
+ // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
+ // inside.
+ if ((*this_mi)->sb_type == BLOCK_16X16 ||
+ (*this_mi)->sb_type == BLOCK_32X16 ||
+ (*this_mi)->sb_type == BLOCK_16X32) {
+ for (j = 0; j < 4; j++) {
+ if (vt->split[i].split[j].part_variances.none.variance <
+ (thresholds[2] >> 8))
+ x->variance_low[(i << 2) + j + 9] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
// This function chooses partitioning based on the variance between source and
// reconstructed last, where variance is computed for down-sampled inputs.
static int choose_partitioning(VP9_COMP *cpi,
- const TileInfo *const tile,
- MACROBLOCK *x,
- int mi_row, int mi_col) {
+ const TileInfo *const tile,
+ MACROBLOCK *x,
+ int mi_row, int mi_col) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
int i, j, k, m;
v64x64 vt;
v16x16 vt2[16];
int force_split[21];
+ int avg_32x32;
+ int avg_16x16[4];
uint8_t *s;
const uint8_t *d;
int sp;
int dp;
+ // Ref frame used in partitioning.
+ MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
int pixels_wide = 64, pixels_high = 64;
int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
+ // For the variance computation under SVC mode, we treat the frame as key if
+ // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
+ const int is_key_frame = (cm->frame_type == KEY_FRAME ||
+ (is_one_pass_cbr_svc(cpi) &&
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
// Always use 4x4 partition for key frame.
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
- const int use_4x4_partition = is_key_frame;
+ const int use_4x4_partition = cm->frame_type == KEY_FRAME;
const int low_res = (cm->width <= 352 && cm->height <= 288);
int variance4x4downsample[16];
+ int segment_id;
- int segment_id = CR_SEGMENT_ID_BASE;
+ set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+ segment_id = xd->mi[0]->segment_id;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map :
- cm->last_frame_seg_map;
- segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
-
if (cyclic_refresh_segment_id_boosted(segment_id)) {
int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
set_vbp_thresholds(cpi, thresholds, q);
}
}
- set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+ memset(x->variance_low, 0, sizeof(x->variance_low));
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -696,17 +842,20 @@ static int choose_partitioning(VP9_COMP *cpi,
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
- if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
+ // 5-20 for the 16x16 blocks.
+ force_split[0] = 0;
+
+ if (!is_key_frame) {
// In the case of spatial/temporal scalable coding, the assumption here is
// that the temporal reference frame will always be of type LAST_FRAME.
// TODO(marpan): If that assumption is broken, we need to revisit this code.
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
unsigned int uv_sad;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
const YV12_BUFFER_CONFIG *yv12_g = NULL;
- unsigned int y_sad, y_sad_g;
+ unsigned int y_sad, y_sad_g, y_sad_thr;
const BLOCK_SIZE bsize = BLOCK_32X32
+ (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
@@ -732,25 +881,38 @@ static int choose_partitioning(VP9_COMP *cpi,
vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
&cm->frame_refs[LAST_FRAME - 1].sf);
- mbmi->ref_frame[0] = LAST_FRAME;
- mbmi->ref_frame[1] = NONE;
- mbmi->sb_type = BLOCK_64X64;
- mbmi->mv[0].as_int = 0;
- mbmi->interp_filter = BILINEAR;
+ mi->ref_frame[0] = LAST_FRAME;
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = BLOCK_64X64;
+ mi->mv[0].as_int = 0;
+ mi->interp_filter = BILINEAR;
y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
- if (y_sad_g < y_sad) {
+ // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+ // are close if short_circuit_low_temp_var is on.
+ y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+ if (y_sad_g < y_sad_thr) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf);
- mbmi->ref_frame[0] = GOLDEN_FRAME;
- mbmi->mv[0].as_int = 0;
+ mi->ref_frame[0] = GOLDEN_FRAME;
+ mi->mv[0].as_int = 0;
y_sad = y_sad_g;
+ ref_frame_partition = GOLDEN_FRAME;
} else {
- x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
+ x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+ ref_frame_partition = LAST_FRAME;
}
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
+ x->sb_is_skin = 0;
+#if !CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->use_skin_detection)
+ x->sb_is_skin = skin_sb_split(cpi, x, low_res, mi_row, mi_col,
+ &force_split[0]);
+#endif
+
for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i];
@@ -762,7 +924,9 @@ static int choose_partitioning(VP9_COMP *cpi,
uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride);
- x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
+ // TODO(marpan): Investigate if we should lower this threshold if
+ // superblock is detected as skin.
+ x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
}
d = xd->plane[0].dst.buf;
@@ -801,9 +965,6 @@ static int choose_partitioning(VP9_COMP *cpi,
#endif // CONFIG_VP9_HIGHBITDEPTH
}
- // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
- // 5-20 for the 16x16 blocks.
- force_split[0] = 0;
// Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
// for splits.
for (i = 0; i < 4; i++) {
@@ -811,6 +972,7 @@ static int choose_partitioning(VP9_COMP *cpi,
const int y32_idx = ((i >> 1) << 5);
const int i2 = i << 2;
force_split[i + 1] = 0;
+ avg_16x16[i] = 0;
for (j = 0; j < 4; j++) {
const int x16_idx = x32_idx + ((j & 1) << 4);
const int y16_idx = y32_idx + ((j >> 1) << 4);
@@ -828,6 +990,7 @@ static int choose_partitioning(VP9_COMP *cpi,
is_key_frame);
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
get_variance(&vt.split[i].split[j].part_variances.none);
+ avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
if (vt.split[i].split[j].part_variances.none.variance >
thresholds[2]) {
// 16X16 variance is above threshold for split, so force split to 8x8
@@ -835,7 +998,8 @@ static int choose_partitioning(VP9_COMP *cpi,
force_split[split_index] = 1;
force_split[i + 1] = 1;
force_split[0] = 1;
- } else if (vt.split[i].split[j].part_variances.none.variance >
+ } else if (cpi->oxcf.speed < 8 &&
+ vt.split[i].split[j].part_variances.none.variance >
thresholds[1] &&
!cyclic_refresh_segment_id_boosted(segment_id)) {
// We have some nominal amount of 16x16 variance (based on average),
@@ -853,9 +1017,7 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
}
- // TODO(marpan): There is an issue with variance based on 4x4 average in
- // svc mode, don't allow it for now.
- if (is_key_frame || (low_res && !cpi->use_svc &&
+ if (is_key_frame || (low_res &&
vt.split[i].split[j].part_variances.none.variance >
(thresholds[1] << 1))) {
force_split[split_index] = 0;
@@ -877,8 +1039,8 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
}
-
// Fill the rest of the variance tree by summing split partition values.
+ avg_32x32 = 0;
for (i = 0; i < 4; i++) {
const int i2 = i << 2;
for (j = 0; j < 4; j++) {
@@ -888,22 +1050,41 @@ static int choose_partitioning(VP9_COMP *cpi,
for (m = 0; m < 4; m++)
fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
fill_variance_tree(vtemp, BLOCK_16X16);
+ // If variance of this 16x16 block is above the threshold, force block
+ // to split. This also forces a split on the upper levels.
+ get_variance(&vtemp->part_variances.none);
+ if (vtemp->part_variances.none.variance > thresholds[2]) {
+ force_split[5 + i2 + j] = 1;
+ force_split[i + 1] = 1;
+ force_split[0] = 1;
+ }
}
}
fill_variance_tree(&vt.split[i], BLOCK_32X32);
- // If variance of this 32x32 block is above the threshold, force the block
- // to split. This also forces a split on the upper (64x64) level.
+ // If variance of this 32x32 block is above the threshold, or if its above
+ // (some threshold of) the average variance over the sub-16x16 blocks, then
+ // force this block to split. This also forces a split on the upper
+ // (64x64) level.
if (!force_split[i + 1]) {
get_variance(&vt.split[i].part_variances.none);
- if (vt.split[i].part_variances.none.variance > thresholds[1]) {
+ if (vt.split[i].part_variances.none.variance > thresholds[1] ||
+ (!is_key_frame &&
+ vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
+ vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
force_split[i + 1] = 1;
force_split[0] = 1;
}
+ avg_32x32 += vt.split[i].part_variances.none.variance;
}
}
if (!force_split[0]) {
fill_variance_tree(&vt, BLOCK_64X64);
get_variance(&vt.part_variances.none);
+ // If variance of this 64x64 block is above (some threshold of) the average
+ // variance over the sub-32x32 blocks, then force this block to split.
+ if (!is_key_frame &&
+ vt.part_variances.none.variance > (5 * avg_32x32) >> 4)
+ force_split[0] = 1;
}
// Now go through the entire structure, splitting every block size until
@@ -960,6 +1141,11 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
}
+
+ if (cpi->sf.short_circuit_low_temp_var) {
+ set_low_temp_var_flag(cpi, x, xd, &vt, force_split, thresholds,
+ ref_frame_partition, mi_col, mi_row);
+ }
return 0;
}
@@ -975,11 +1161,11 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
MODE_INFO *mi = &ctx->mic;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const xdmi = xd->mi[0];
MODE_INFO *mi_addr = xd->mi[0];
const struct segmentation *const seg = &cm->seg;
- const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
- const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
+ const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
MV_REF *const frame_mvs =
@@ -991,7 +1177,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
const int mi_height = num_8x8_blocks_high_lookup[bsize];
int max_plane;
- assert(mi->mbmi.sb_type == bsize);
+ assert(mi->sb_type == bsize);
*mi_addr = *mi;
*x->mbmi_ext = ctx->mbmi_ext;
@@ -1002,19 +1188,19 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
- mi_addr->mbmi.segment_id =
+ mi_addr->segment_id =
get_segment_id(cm, map, bsize, mi_row, mi_col);
}
// Else for cyclic refresh mode update the segment map, set the segment id
// and then update the quantizer.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
+ vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row,
mi_col, bsize, ctx->rate, ctx->dist,
- x->skip);
+ x->skip, p);
}
}
- max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+ max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
for (i = 0; i < max_plane; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
@@ -1038,16 +1224,16 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
xd->mi[x_idx + y * mis] = mi_addr;
}
- if (cpi->oxcf.aq_mode)
+ if (cpi->oxcf.aq_mode != NO_AQ)
vp9_init_plane_quantizers(cpi, x);
- if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
- mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
- mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
+ xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
+ xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
}
x->skip = ctx->skip;
- memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
+ memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
if (!output_enabled)
@@ -1067,19 +1253,19 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
THR_D63_PRED /*D63_PRED*/,
THR_TM /*TM_PRED*/,
};
- ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
+ ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
} else {
// Note how often each mode chosen as best
++cpi->mode_chosen_counts[ctx->best_mode_index];
}
#endif
if (!frame_is_intra_only(cm)) {
- if (is_inter_block(mbmi)) {
+ if (is_inter_block(xdmi)) {
vp9_update_mv_count(td);
if (cm->interp_filter == SWITCHABLE) {
const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
+ ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
}
}
@@ -1095,10 +1281,10 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
for (w = 0; w < x_mis; ++w) {
MV_REF *const mv = frame_mv + w;
- mv->ref_frame[0] = mi->mbmi.ref_frame[0];
- mv->ref_frame[1] = mi->mbmi.ref_frame[1];
- mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
- mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ mv->ref_frame[0] = mi->ref_frame[0];
+ mv->ref_frame[1] = mi->ref_frame[1];
+ mv->mv[0].as_int = mi->mv[0].as_int;
+ mv->mv[1].as_int = mi->mv[1].as_int;
}
}
}
@@ -1121,26 +1307,23 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
RD_COST *rd_cost, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
INTERP_FILTER filter_ref;
- if (xd->up_available)
- filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
- else if (xd->left_available)
- filter_ref = xd->mi[-1]->mbmi.interp_filter;
- else
+ filter_ref = vp9_get_pred_context_switchable_interp(xd);
+ if (filter_ref == SWITCHABLE_FILTERS)
filter_ref = EIGHTTAP;
- mbmi->sb_type = bsize;
- mbmi->mode = ZEROMV;
- mbmi->tx_size =
+ mi->sb_type = bsize;
+ mi->mode = ZEROMV;
+ mi->tx_size =
VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
- mbmi->skip = 1;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame[0] = LAST_FRAME;
- mbmi->ref_frame[1] = NONE;
- mbmi->mv[0].as_int = 0;
- mbmi->interp_filter = filter_ref;
+ mi->skip = 1;
+ mi->uv_mode = DC_PRED;
+ mi->ref_frame[0] = LAST_FRAME;
+ mi->ref_frame[1] = NONE;
+ mi->mv[0].as_int = 0;
+ mi->interp_filter = filter_ref;
xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
x->skip = 1;
@@ -1155,8 +1338,7 @@ static int set_segment_rdmult(VP9_COMP *const cpi,
VP9_COMMON *const cm = &cpi->common;
vp9_init_plane_quantizers(cpi, x);
vpx_clear_system_state();
- segment_qindex = vp9_get_qindex(&cm->seg, segment_id,
- cm->base_qindex);
+ segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
@@ -1169,7 +1351,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi;
+ MODE_INFO *mi;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
@@ -1181,8 +1363,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
x->use_lp32x32fdct = 1;
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- mbmi = &xd->mi[0]->mbmi;
- mbmi->sb_type = bsize;
+ mi = xd->mi[0];
+ mi->sb_type = bsize;
for (i = 0; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -1196,7 +1378,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
- mbmi->skip = 0;
+ mi->skip = 0;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -1221,15 +1403,24 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
if (cm->frame_type == KEY_FRAME ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
- mbmi->segment_id = vp9_vaq_segment_id(energy);
+ mi->segment_id = vp9_vaq_segment_id(energy);
} else {
const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
- mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
}
- x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
+ x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
+ } else if (aq_mode == EQUATOR360_AQ) {
+ if (cm->frame_type == KEY_FRAME) {
+ mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
+ } else {
+ const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ }
+ x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
} else if (aq_mode == COMPLEXITY_AQ) {
- x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
+ x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
} else if (aq_mode == CYCLIC_REFRESH_AQ) {
const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
@@ -1245,7 +1436,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
} else {
if (bsize >= BLOCK_8X8) {
- if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
ctx, best_rd);
else
@@ -1282,27 +1473,26 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
const MACROBLOCK *x = &td->mb;
const MACROBLOCKD *const xd = &x->e_mbd;
const MODE_INFO *const mi = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
if (!frame_is_intra_only(cm)) {
FRAME_COUNTS *const counts = td->counts;
- const int inter_block = is_inter_block(mbmi);
- const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
+ const int inter_block = is_inter_block(mi);
+ const int seg_ref_active = segfeature_active(&cm->seg, mi->segment_id,
SEG_LVL_REF_FRAME);
if (!seg_ref_active) {
- counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
+ counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
// If the segment reference feature is enabled we have only a single
// reference frame allowed for the segment so exclude it from
// the reference frame counts used to work out probabilities.
if (inter_block) {
- const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
if (cm->reference_mode == REFERENCE_MODE_SELECT)
counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
- [has_second_ref(mbmi)]++;
+ [has_second_ref(mi)]++;
- if (has_second_ref(mbmi)) {
+ if (has_second_ref(mi)) {
counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
[ref0 == GOLDEN_FRAME]++;
} else {
@@ -1315,10 +1505,10 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
}
}
if (inter_block &&
- !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
+ !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
+ const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
if (bsize >= BLOCK_8X8) {
- const PREDICTION_MODE mode = mbmi->mode;
+ const PREDICTION_MODE mode = mi->mode;
++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
} else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -1520,7 +1710,7 @@ static void set_partial_b64x64_partition(MODE_INFO *mi, int mis,
for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
const int index = r * mis + c;
mi_8x8[index] = mi + index;
- mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
+ mi_8x8[index]->sb_type = find_partition_size(bsize,
row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
}
}
@@ -1552,7 +1742,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
int index = block_row * mis + block_col;
mi_8x8[index] = mi_upper_left + index;
- mi_8x8[index]->mbmi.sb_type = bsize;
+ mi_8x8[index]->sb_type = bsize;
}
}
} else {
@@ -1617,7 +1807,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
index = b_mi_row * mis + b_mi_col;
mi_8x8[index] = mi_upper_left + index;
- mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
+ mi_8x8[index]->sb_type = BLOCK_16X16;
// TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
// size to further improve quality.
@@ -1639,7 +1829,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
mi_8x8[index] = mi_upper_left + index;
- mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
+ mi_8x8[index]->sb_type = BLOCK_32X32;
}
}
@@ -1651,7 +1841,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
// Use 64x64 partition
if (is_larger_better) {
mi_8x8[0] = mi_upper_left;
- mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
+ mi_8x8[0]->sb_type = BLOCK_64X64;
}
}
} else { // partial in-image SB64
@@ -1669,46 +1859,47 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = x->plane;
const struct segmentation *const seg = &cm->seg;
- const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
- const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
+ const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
*(xd->mi[0]) = ctx->mic;
*(x->mbmi_ext) = ctx->mbmi_ext;
- if (seg->enabled && cpi->oxcf.aq_mode) {
+ if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) {
// For in frame complexity AQ or variance AQ, copy segment_id from
// segmentation_map.
- if (cpi->oxcf.aq_mode == COMPLEXITY_AQ ||
- cpi->oxcf.aq_mode == VARIANCE_AQ ) {
+ if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
- mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
} else {
// Setting segmentation map for cyclic_refresh.
- vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize,
- ctx->rate, ctx->dist, x->skip);
+ vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
+ ctx->rate, ctx->dist, x->skip, p);
}
vp9_init_plane_quantizers(cpi, x);
}
- if (is_inter_block(mbmi)) {
+ if (is_inter_block(mi)) {
vp9_update_mv_count(td);
if (cm->interp_filter == SWITCHABLE) {
const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
- ++td->counts->switchable_interp[pred_ctx][mbmi->interp_filter];
+ ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
}
- if (mbmi->sb_type < BLOCK_8X8) {
- mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
- mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ if (mi->sb_type < BLOCK_8X8) {
+ mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
+ mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
}
}
- if (cm->use_prev_frame_mvs) {
+ if (cm->use_prev_frame_mvs ||
+ (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1
+ && cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
MV_REF *const frame_mvs =
cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
int w, h;
@@ -1717,16 +1908,16 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
for (w = 0; w < x_mis; ++w) {
MV_REF *const mv = frame_mv + w;
- mv->ref_frame[0] = mi->mbmi.ref_frame[0];
- mv->ref_frame[1] = mi->mbmi.ref_frame[1];
- mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
- mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ mv->ref_frame[0] = mi->ref_frame[0];
+ mv->ref_frame[1] = mi->ref_frame[1];
+ mv->mv[0].as_int = mi->mv[0].as_int;
+ mv->mv[1].as_int = mi->mv[1].as_int;
}
}
}
x->skip = ctx->skip;
- x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0];
+ x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0];
}
static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
@@ -1738,16 +1929,6 @@ static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
-#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 &&
- output_enabled &&
- cpi->common.frame_type != KEY_FRAME &&
- cpi->resize_pending == 0) {
- vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
- VPXMAX(BLOCK_8X8, bsize), ctx);
- }
-#endif
-
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
update_stats(&cpi->common, td);
@@ -1776,7 +1957,7 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
const int idx_str = xd->mi_stride * mi_row + mi_col;
MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
- subsize = mi_8x8[0]->mbmi.sb_type;
+ subsize = mi_8x8[0]->sb_type;
} else {
ctx = 0;
subsize = BLOCK_4X4;
@@ -1851,7 +2032,7 @@ static void rd_use_partition(VP9_COMP *cpi,
RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
- BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
+ BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
int do_partition_search = 1;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
@@ -1871,7 +2052,7 @@ static void rd_use_partition(VP9_COMP *cpi,
pc_tree->partitioning = partition;
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
- if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
+ if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
x->mb_energy = vp9_block_energy(cpi, x, bsize);
}
@@ -1886,7 +2067,7 @@ static void rd_use_partition(VP9_COMP *cpi,
for (i = 0; i < 4; i++) {
int jj = i >> 1, ii = i & 0x01;
MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
- if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
+ if (this_mi && this_mi->sb_type >= sub_subsize) {
splits_below = 0;
}
}
@@ -1910,7 +2091,7 @@ static void rd_use_partition(VP9_COMP *cpi,
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
- mi_8x8[0]->mbmi.sb_type = bs_type;
+ mi_8x8[0]->sb_type = bs_type;
pc_tree->partitioning = partition;
}
}
@@ -2068,7 +2249,7 @@ static void rd_use_partition(VP9_COMP *cpi,
// If last_part is better set the partitioning to that.
if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
- mi_8x8[0]->mbmi.sb_type = bsize;
+ mi_8x8[0]->sb_type = bsize;
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = partition;
chosen_rdc = last_part_rdc;
@@ -2134,7 +2315,7 @@ static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
for (i = 0; i < sb_height_in_blocks; ++i) {
for (j = 0; j < sb_width_in_blocks; ++j) {
MODE_INFO *mi = mi_8x8[index+j];
- BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
+ BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
bs_hist[sb_type]++;
*min_block_size = VPXMIN(*min_block_size, sb_type);
*max_block_size = VPXMAX(*max_block_size, sb_type);
@@ -2161,8 +2342,8 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
BLOCK_SIZE *max_block_size) {
VP9_COMMON *const cm = &cpi->common;
MODE_INFO **mi = xd->mi;
- const int left_in_image = xd->left_available && mi[-1];
- const int above_in_image = xd->up_available && mi[-xd->mi_stride];
+ const int left_in_image = !!xd->left_mi;
+ const int above_in_image = !!xd->above_mi;
const int row8x8_remaining = tile->mi_row_end - mi_row;
const int col8x8_remaining = tile->mi_col_end - mi_col;
int bh, bw;
@@ -2250,26 +2431,26 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd,
for (idy = 0; idy < mi_height; ++idy) {
for (idx = 0; idx < mi_width; ++idx) {
mi = prev_mi[idy * cm->mi_stride + idx];
- bs = mi ? mi->mbmi.sb_type : bsize;
+ bs = mi ? mi->sb_type : bsize;
min_size = VPXMIN(min_size, bs);
max_size = VPXMAX(max_size, bs);
}
}
}
- if (xd->left_available) {
+ if (xd->left_mi) {
for (idy = 0; idy < mi_height; ++idy) {
mi = xd->mi[idy * cm->mi_stride - 1];
- bs = mi ? mi->mbmi.sb_type : bsize;
+ bs = mi ? mi->sb_type : bsize;
min_size = VPXMIN(min_size, bs);
max_size = VPXMAX(max_size, bs);
}
}
- if (xd->up_available) {
+ if (xd->above_mi) {
for (idx = 0; idx < mi_width; ++idx) {
mi = xd->mi[idx - cm->mi_stride];
- bs = mi ? mi->mbmi.sb_type : bsize;
+ bs = mi ? mi->sb_type : bsize;
min_size = VPXMIN(min_size, bs);
max_size = VPXMAX(max_size, bs);
}
@@ -2354,7 +2535,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
PARTITION_CONTEXT sl[8], sa[8];
TOKENEXTRA *tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
- int i, pl;
+ int i;
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
BLOCK_SIZE subsize;
RD_COST this_rdc, sum_rdc, best_rdc;
int do_split = bsize >= BLOCK_8X8;
@@ -2400,7 +2582,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode)
+ if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ)
x->mb_energy = vp9_block_energy(cpi, x, bsize);
if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
@@ -2424,8 +2606,15 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.use_square_partition_only &&
bsize > cpi->sf.use_square_only_threshold) {
+ if (cpi->use_svc) {
+ if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
+ partition_horz_allowed &= force_horz_split;
+ if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
+ partition_vert_allowed &= force_vert_split;
+ } else {
partition_horz_allowed &= force_horz_split;
partition_vert_allowed &= force_vert_split;
+ }
}
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -2495,7 +2684,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
&this_rdc, bsize, ctx, best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
@@ -2549,6 +2737,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
break;
}
}
+
if (skip) {
if (src_diff_var == UINT_MAX) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -2580,15 +2769,16 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
i = 4;
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter =
- ctx->mic.mbmi.interp_filter;
+ ctx->mic.interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
pc_tree->leaf_split[0], best_rdc.rdcost);
+
if (sum_rdc.rate == INT_MAX)
sum_rdc.rdcost = INT64_MAX;
} else {
for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
- const int x_idx = (i & 1) * mi_step;
- const int y_idx = (i >> 1) * mi_step;
+ const int x_idx = (i & 1) * mi_step;
+ const int y_idx = (i >> 1) * mi_step;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
@@ -2614,7 +2804,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
sum_rdc.rate, sum_rdc.dist);
@@ -2651,7 +2840,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter =
- ctx->mic.mbmi.interp_filter;
+ ctx->mic.interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->horizontal[0], best_rdc.rdcost);
@@ -2666,7 +2855,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter =
- ctx->mic.mbmi.interp_filter;
+ ctx->mic.interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
&this_rdc, subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost);
@@ -2680,7 +2869,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
@@ -2694,6 +2882,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
}
+
// PARTITION_VERT
if (partition_vert_allowed &&
(do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
@@ -2704,7 +2893,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter =
- ctx->mic.mbmi.interp_filter;
+ ctx->mic.interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->vertical[0], best_rdc.rdcost);
if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
@@ -2718,7 +2907,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter =
- ctx->mic.mbmi.interp_filter;
+ ctx->mic.interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step,
&this_rdc, subsize,
&pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
@@ -2732,7 +2921,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
sum_rdc.rate, sum_rdc.dist);
@@ -2777,6 +2965,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
+ const int mi_col_start = tile_info->mi_col_start;
+ const int mi_col_end = tile_info->mi_col_end;
int mi_col;
// Initialize the left context for the new SB row
@@ -2784,8 +2974,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
- for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
- mi_col += MI_BLOCK_SIZE) {
+ for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
const struct segmentation *const seg = &cm->seg;
int dummy_rate;
int64_t dummy_dist;
@@ -2890,8 +3079,8 @@ static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
- if (mi_ptr[mi_col]->mbmi.tx_size > max_tx_size)
- mi_ptr[mi_col]->mbmi.tx_size = max_tx_size;
+ if (mi_ptr[mi_col]->tx_size > max_tx_size)
+ mi_ptr[mi_col]->tx_size = max_tx_size;
}
}
}
@@ -2938,18 +3127,32 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi;
+ MODE_INFO *mi;
+ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
+ BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
+ int plane;
+
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- mbmi = &xd->mi[0]->mbmi;
- mbmi->sb_type = bsize;
+ mi = xd->mi[0];
+ mi->sb_type = bsize;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
+ (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
+ memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
+ (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
+ }
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
+ if (cyclic_refresh_segment_id_boosted(mi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
if (cm->frame_type == KEY_FRAME)
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
- else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else if (bsize >= BLOCK_8X8)
vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
@@ -2960,6 +3163,14 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
+ (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
+ memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
+ (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
+ }
+
if (rd_cost->rate == INT_MAX)
vp9_rd_cost_reset(rd_cost);
@@ -3109,7 +3320,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (partition_none_allowed) {
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col,
&this_rdc, bsize, ctx);
- ctx->mic.mbmi = xd->mi[0]->mbmi;
+ ctx->mic = *xd->mi[0];
ctx->mbmi_ext = *x->mbmi_ext;
ctx->skip_txfm[0] = x->skip_txfm[0];
ctx->skip = x->skip;
@@ -3192,7 +3403,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->horizontal[0]);
- pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[0].mic = *xd->mi[0];
pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
@@ -3204,7 +3415,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
&this_rdc, subsize,
&pc_tree->horizontal[1]);
- pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[1].mic = *xd->mi[0];
pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
@@ -3237,7 +3448,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->vertical[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->vertical[0]);
- pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[0].mic = *xd->mi[0];
pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
@@ -3248,7 +3459,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms,
&this_rdc, subsize,
&pc_tree->vertical[1]);
- pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[1].mic = *xd->mi[0];
pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
@@ -3320,7 +3531,7 @@ static void nonrd_select_partition(VP9_COMP *cpi,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
+ subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
partition = partition_lookup[bsl][subsize];
if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
@@ -3345,7 +3556,7 @@ static void nonrd_select_partition(VP9_COMP *cpi,
pc_tree->none.pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
subsize, &pc_tree->none);
- pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->none.mic = *xd->mi[0];
pc_tree->none.mbmi_ext = *x->mbmi_ext;
pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
pc_tree->none.skip = x->skip;
@@ -3354,7 +3565,7 @@ static void nonrd_select_partition(VP9_COMP *cpi,
pc_tree->vertical[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
subsize, &pc_tree->vertical[0]);
- pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[0].mic = *xd->mi[0];
pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
@@ -3362,7 +3573,7 @@ static void nonrd_select_partition(VP9_COMP *cpi,
pc_tree->vertical[1].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
&this_rdc, subsize, &pc_tree->vertical[1]);
- pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[1].mic = *xd->mi[0];
pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
@@ -3377,7 +3588,7 @@ static void nonrd_select_partition(VP9_COMP *cpi,
pc_tree->horizontal[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
subsize, &pc_tree->horizontal[0]);
- pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[0].mic = *xd->mi[0];
pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
@@ -3385,7 +3596,7 @@ static void nonrd_select_partition(VP9_COMP *cpi,
pc_tree->horizontal[1].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
&this_rdc, subsize, &pc_tree->horizontal[1]);
- pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[1].mic = *xd->mi[0];
pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
@@ -3457,7 +3668,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
+ subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
partition = partition_lookup[bsl][subsize];
if (output_enabled && bsize != BLOCK_4X4) {
@@ -3470,7 +3681,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
pc_tree->none.pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
subsize, &pc_tree->none);
- pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->none.mic = *xd->mi[0];
pc_tree->none.mbmi_ext = *x->mbmi_ext;
pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
pc_tree->none.skip = x->skip;
@@ -3481,7 +3692,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
pc_tree->vertical[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
subsize, &pc_tree->vertical[0]);
- pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[0].mic = *xd->mi[0];
pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
@@ -3491,7 +3702,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
pc_tree->vertical[1].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
dummy_cost, subsize, &pc_tree->vertical[1]);
- pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[1].mic = *xd->mi[0];
pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
@@ -3503,7 +3714,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
pc_tree->horizontal[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
subsize, &pc_tree->horizontal[0]);
- pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[0].mic = *xd->mi[0];
pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
@@ -3514,7 +3725,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
pc_tree->horizontal[1].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
dummy_cost, subsize, &pc_tree->horizontal[1]);
- pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[1].mic = *xd->mi[0];
pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
@@ -3563,6 +3774,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_col_start = tile_info->mi_col_start;
+ const int mi_col_end = tile_info->mi_col_end;
int mi_col;
// Initialize the left context for the new SB row
@@ -3570,8 +3783,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
- for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
- mi_col += MI_BLOCK_SIZE) {
+ for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
const struct segmentation *const seg = &cm->seg;
RD_COST dummy_rdc;
const int idx_str = cm->mi_stride * mi_row + mi_col;
@@ -3584,6 +3796,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
vp9_rd_cost_init(&dummy_rdc);
x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0;
+ x->sb_is_skin = 0;
if (seg->enabled) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
@@ -3620,8 +3833,14 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
break;
case REFERENCE_PARTITION:
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
- xd->mi[0]->mbmi.segment_id) {
+ // Use nonrd_pick_partition on scene-cut for VBR, or on qp-segment
+ // if cyclic_refresh is enabled.
+ // nonrd_pick_partition does not support 4x4 partition, so avoid it
+ // on key frame for now.
+ if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
+ cm->frame_type != KEY_FRAME) ||
+ (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
+ xd->mi[0]->segment_id)) {
// Use lower max_partition_size for low resoultions.
if (cm->width <= 352 && cm->height <= 288)
x->max_partition_size = BLOCK_32X32;
@@ -3775,8 +3994,7 @@ static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
}
return (intra_count << 2) < inter_count &&
- cm->frame_type != KEY_FRAME &&
- cm->show_frame;
+ cm->frame_type != KEY_FRAME && cm->show_frame;
}
void vp9_init_tile_data(VP9_COMP *cpi) {
@@ -3829,10 +4047,15 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td,
&cpi->tile_data[tile_row * tile_cols + tile_col];
const TileInfo * const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
+ const int mi_row_start = tile_info->mi_row_start;
+ const int mi_row_end = tile_info->mi_row_end;
int mi_row;
- for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
- mi_row += MI_BLOCK_SIZE) {
+ // Set up pointers to per thread motion search counters.
+ td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
+ td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
+
+ for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode)
encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
else
@@ -3887,6 +4110,8 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(rdc->coef_counts);
vp9_zero(rdc->comp_pred_diff);
vp9_zero(rdc->filter_diff);
+ rdc->m_search_count = 0; // Count of motion search hits.
+ rdc->ex_search_count = 0; // Exhaustive mesh search hits.
xd->lossless = cm->base_qindex == 0 &&
cm->y_dc_delta_q == 0 &&
@@ -3957,10 +4182,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vpx_usec_timer_start(&emr_timer);
#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
- &cpi->twopass.this_frame_mb_stats);
- }
+ if (cpi->use_fp_mb_stats) {
+ input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
+ &cpi->twopass.this_frame_mb_stats);
+ }
#endif
// If allowed, encoding tiles in parallel with one thread handling one tile.
@@ -3999,6 +4224,31 @@ static INTERP_FILTER get_interp_filter(
}
}
+static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
+ struct segmentation *const seg = &cm->seg;
+
+ int mi_row, mi_col;
+ int sum_delta = 0;
+ int map_index = 0;
+ int qdelta_index;
+ int segment_id;
+
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ MODE_INFO **mi_8x8 = mi_8x8_ptr;
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
+ segment_id = mi_8x8[0]->segment_id;
+ qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+ sum_delta += qdelta_index;
+ map_index++;
+ }
+ mi_8x8_ptr += cm->mi_stride;
+ }
+
+ return sum_delta / (cm->mi_rows * cm->mi_cols);
+}
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -4121,12 +4371,17 @@ void vp9_encode_frame(VP9_COMP *cpi) {
cm->reference_mode = SINGLE_REFERENCE;
encode_frame_internal(cpi);
}
-}
+ // If segmentated AQ is enabled compute the average AQ weighting.
+ if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
+ (cm->seg.update_map || cm->seg.update_data)) {
+ cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
+ }
+}
static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
- const PREDICTION_MODE y_mode = mi->mbmi.mode;
- const PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
- const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+ const PREDICTION_MODE y_mode = mi->mode;
+ const PREDICTION_MODE uv_mode = mi->uv_mode;
+ const BLOCK_SIZE bsize = mi->sb_type;
if (bsize < BLOCK_8X8) {
int idx, idy;
@@ -4142,6 +4397,32 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
++counts->uv_mode[y_mode][uv_mode];
}
+static void update_zeromv_cnt(VP9_COMP *const cpi,
+ const MODE_INFO *const mi,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ const VP9_COMMON *const cm = &cpi->common;
+ MV mv = mi->mv[0].as_mv;
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ int x, y;
+ for (y = 0; y < ymis; y++)
+ for (x = 0; x < xmis; x++) {
+ int map_offset = block_index + y * cm->mi_cols + x;
+ if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ if (abs(mv.row) < 8 && abs(mv.col) < 8) {
+ if (cpi->consec_zero_mv[map_offset] < 255)
+ cpi->consec_zero_mv[map_offset]++;
+ } else {
+ cpi->consec_zero_mv[map_offset] = 0;
+ }
+ }
+ }
+}
+
static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
TOKENEXTRA **t, int output_enabled,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -4149,16 +4430,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- MODE_INFO **mi_8x8 = xd->mi;
- MODE_INFO *mi = mi_8x8[0];
- MB_MODE_INFO *mbmi = &mi->mbmi;
- const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id,
+ MODE_INFO *mi = xd->mi[0];
+ const int seg_skip = segfeature_active(&cm->seg, mi->segment_id,
SEG_LVL_SKIP);
- const int mis = cm->mi_stride;
- const int mi_width = num_8x8_blocks_wide_lookup[bsize];
- const int mi_height = num_8x8_blocks_high_lookup[bsize];
- x->skip_recode = !x->select_tx_size && mbmi->sb_type >= BLOCK_8X8 &&
+ x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
cpi->sf.allow_skip_recode;
@@ -4175,21 +4451,28 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
if (x->skip_encode)
return;
- if (!is_inter_block(mbmi)) {
+ if (!is_inter_block(mi)) {
int plane;
- mbmi->skip = 1;
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) &&
+ need_top_left[mi->uv_mode])
+ assert(0);
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ mi->skip = 1;
for (plane = 0; plane < MAX_MB_PLANE; ++plane)
- vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane);
+ vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
if (output_enabled)
sum_intra_stats(td->counts, mi);
- vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+ vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
+ VPXMAX(bsize, BLOCK_8X8));
} else {
int ref;
- const int is_compound = has_second_ref(mbmi);
- set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ const int is_compound = has_second_ref(mi);
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
for (ref = 0; ref < 1 + is_compound; ++ref) {
YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
- mbmi->ref_frame[ref]);
+ mi->ref_frame[ref]);
assert(cfg != NULL);
vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
@@ -4202,34 +4485,31 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
VPXMAX(bsize, BLOCK_8X8));
vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
- vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+ vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
+ VPXMAX(bsize, BLOCK_8X8));
}
if (output_enabled) {
if (cm->tx_mode == TX_MODE_SELECT &&
- mbmi->sb_type >= BLOCK_8X8 &&
- !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
+ mi->sb_type >= BLOCK_8X8 &&
+ !(is_inter_block(mi) && (mi->skip || seg_skip))) {
++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
- &td->counts->tx)[mbmi->tx_size];
+ &td->counts->tx)[mi->tx_size];
} else {
- int x, y;
- TX_SIZE tx_size;
// The new intra coding scheme requires no change of transform size
- if (is_inter_block(&mi->mbmi)) {
- tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
- max_txsize_lookup[bsize]);
+ if (is_inter_block(mi)) {
+ mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+ max_txsize_lookup[bsize]);
} else {
- tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
+ mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
}
-
- for (y = 0; y < mi_height; y++)
- for (x = 0; x < mi_width; x++)
- if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
- mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
}
- ++td->counts->tx.tx_totals[mbmi->tx_size];
- ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+
+ ++td->counts->tx.tx_totals[mi->tx_size];
+ ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
- vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize);
+ vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
+ if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
+ update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
}
}
diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c
index 3c6a9283c..169943c10 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/libvpx/vp9/encoder/vp9_encodemb.c
@@ -50,27 +50,21 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
pd->dst.buf, pd->dst.stride);
}
-#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
-
typedef struct vp9_token_state {
+ int64_t error;
int rate;
- int error;
- int next;
+ int16_t next;
int16_t token;
- int16_t qc;
+ tran_low_t qc;
+ tran_low_t dqc;
} vp9_token_state;
-// TODO(jimbankoski): experiment to find optimal RD numbers.
-static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
+static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 7}, };
#define UPDATE_RD_COST()\
{\
rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
- if (rd_cost0 == rd_cost1) {\
- rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
- rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
- }\
}
// This function is a place holder for now but may ultimately need
@@ -91,7 +85,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *const p = &mb->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
- const int ref = is_inter_block(&xd->mi[0]->mbmi);
+ const int ref = is_inter_block(xd->mi[0]);
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
uint8_t token_cache[1024];
@@ -101,32 +95,32 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
const int eob = p->eobs[block];
const PLANE_TYPE type = get_plane_type(plane);
const int default_eob = 16 << (tx_size << 1);
- const int mul = 1 + (tx_size == TX_32X32);
- const int16_t *dequant_ptr = pd->dequant;
- const uint8_t *const band_translate = get_band_translate(tx_size);
+ const int shift = (tx_size == TX_32X32);
+ const int16_t* const dequant_ptr = pd->dequant;
+ const uint8_t* const band_translate = get_band_translate(tx_size);
const scan_order *const so = get_scan(xd, tx_size, type, block);
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
+ const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
int next = eob, sz = 0;
- int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
+ const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
+ const int64_t rddiv = mb->rddiv;
int64_t rd_cost0, rd_cost1;
- int rate0, rate1, error0, error1;
+ int rate0, rate1;
+ int64_t error0, error1;
int16_t t0, t1;
EXTRABIT e0;
int best, band, pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH
- const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
+ const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
#else
- const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
+ const int *cat6_high_cost = vp9_get_high_cost_table(8);
#endif
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
- if (!ref)
- rdmult = (rdmult * 9) >> 4;
-
/* Initialize the sentinel node of the trellis. */
tokens[eob][0].rate = 0;
tokens[eob][0].error = 0;
@@ -165,7 +159,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
/* And pick the best. */
best = rd_cost1 < rd_cost0;
base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
- dx = mul * (dqcoeff[rc] - coeff[rc]);
+ dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dx >>= xd->bd - 8;
@@ -177,14 +171,15 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
tokens[i][0].next = next;
tokens[i][0].token = t0;
tokens[i][0].qc = x;
+ tokens[i][0].dqc = dqcoeff[rc];
best_index[i][0] = best;
/* Evaluate the second possibility for this state. */
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
- (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
+ if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
+ (abs(x) * dequant_ptr[rc != 0] < (abs(coeff[rc]) << shift) +
dequant_ptr[rc != 0]))
shortcut = 1;
else
@@ -193,6 +188,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
if (shortcut) {
sz = -(x < 0);
x -= 2 * sz + 1;
+ } else {
+ tokens[i][1] = tokens[i][0];
+ best_index[i][1] = best_index[i][0];
+ next = i;
+ continue;
}
/* Consider both possible successor states. */
@@ -243,6 +243,24 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
tokens[i][1].next = next;
tokens[i][1].token = best ? t1 : t0;
tokens[i][1].qc = x;
+
+ if (x) {
+ tran_low_t offset = dq_step[rc != 0];
+ // The 32x32 transform coefficient uses half quantization step size.
+ // Account for the rounding difference in the dequantized coefficeint
+ // value when the quantization index is dropped from an even number
+ // to an odd number.
+ if (shift & x)
+ offset += (dequant_ptr[rc != 0] & 0x01);
+
+ if (sz == 0)
+ tokens[i][1].dqc = dqcoeff[rc] - offset;
+ else
+ tokens[i][1].dqc = dqcoeff[rc] + offset;
+ } else {
+ tokens[i][1].dqc = 0;
+ }
+
best_index[i][1] = best;
/* Finally, make this the new head of the trellis. */
next = i;
@@ -282,18 +300,13 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = -1;
- memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
- memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
+
for (i = next; i < eob; i = next) {
const int x = tokens[i][best].qc;
const int rc = scan[i];
- if (x) {
- final_eob = i;
- }
-
+ if (x) final_eob = i;
qcoeff[rc] = x;
- dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
-
+ dqcoeff[rc] = tokens[i][best].dqc;
next = tokens[i][best].next;
best = best_index[i][best];
}
@@ -736,11 +749,11 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip};
+ MODE_INFO *mi = xd->mi[0];
+ struct encode_b_args arg = {x, &ctx, &mi->skip};
int plane;
- mbmi->skip = 1;
+ mi->skip = 1;
if (x->skip)
return;
@@ -751,7 +764,7 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
- const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
vp9_get_entropy_contexts(bsize, tx_size, pd,
ctx.ta[plane], ctx.tl[plane]);
}
@@ -766,7 +779,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
struct encode_b_args* const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
@@ -783,17 +796,26 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
int i, j;
+ struct optimize_ctx *const ctx = args->ctx;
+ ENTROPY_CONTEXT *a = NULL;
+ ENTROPY_CONTEXT *l = NULL;
+ int entropy_ctx = 0;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
dst = &pd->dst.buf[4 * (j * dst_stride + i)];
src = &p->src.buf[4 * (j * src_stride + i)];
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+ if (args->ctx != NULL) {
+ a = &ctx->ta[plane][i];
+ l = &ctx->tl[plane][j];
+ entropy_ctx = combine_entropy_contexts(*a, *l);
+ }
if (tx_size == TX_4X4) {
tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
- mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
+ mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
} else {
- mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
+ mode = plane == 0 ? mi->mode : mi->uv_mode;
if (tx_size == TX_32X32) {
scan_order = &vp9_default_scan_orders[TX_32X32];
} else {
@@ -905,6 +927,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
+ if (args->ctx != NULL && !x->skip_recode) {
+ *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
+ }
if (!x->skip_encode && *eob)
vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
break;
@@ -918,6 +943,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
+ if (args->ctx != NULL && !x->skip_recode) {
+ *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
+ }
if (!x->skip_encode && *eob)
vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
break;
@@ -931,6 +959,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
+ if (args->ctx != NULL && !x->skip_recode) {
+ *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
+ }
if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
break;
@@ -947,7 +978,9 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
}
-
+ if (args->ctx != NULL && !x->skip_recode) {
+ *a = *l = optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
+ }
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
// this is like vp9_short_idct4x4 but has a special case around eob<=1
@@ -966,9 +999,20 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
*(args->skip) = 0;
}
-void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+ int enable_optimize_b) {
const MACROBLOCKD *const xd = &x->e_mbd;
- struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
+ struct optimize_ctx ctx;
+ struct encode_b_args arg = {x, NULL, &xd->mi[0]->skip};
+
+ if (enable_optimize_b && x->optimize &&
+ (!x->skip_recode || !x->skip_optimize)) {
+ const struct macroblockd_plane* const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(xd->mi[0], pd) :
+ xd->mi[0]->tx_size;
+ vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
+ arg.ctx = &ctx;
+ }
vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
vp9_encode_block_intra, &arg);
diff --git a/libvpx/vp9/encoder/vp9_encodemb.h b/libvpx/vp9/encoder/vp9_encodemb.h
index 97df8a66b..25b0b23e0 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.h
+++ b/libvpx/vp9/encoder/vp9_encodemb.h
@@ -37,7 +37,8 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg);
-void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
+void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+ int enable_optimize_b);
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c
index e71966343..71f27cc53 100644
--- a/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/libvpx/vp9/encoder/vp9_encodemv.c
@@ -75,11 +75,12 @@ static void encode_mv_component(vpx_writer* w, int comp,
static void build_nmv_component_cost_table(int *mvcost,
const nmv_component* const mvcomp,
int usehp) {
- int i, v;
int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
int bits_cost[MV_OFFSET_BITS][2];
int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE];
int class0_hp_cost[2], hp_cost[2];
+ int i;
+ int c, o;
sign_cost[0] = vp9_cost_zero(mvcomp->sign);
sign_cost[1] = vp9_cost_one(mvcomp->sign);
@@ -94,51 +95,64 @@ static void build_nmv_component_cost_table(int *mvcost,
vp9_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp9_mv_fp_tree);
vp9_cost_tokens(fp_cost, mvcomp->fp, vp9_mv_fp_tree);
- if (usehp) {
- class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp);
- class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp);
- hp_cost[0] = vp9_cost_zero(mvcomp->hp);
- hp_cost[1] = vp9_cost_one(mvcomp->hp);
- }
+ // Always build the hp costs to avoid an uninitialized warning from gcc
+ class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp);
+ class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp);
+ hp_cost[0] = vp9_cost_zero(mvcomp->hp);
+ hp_cost[1] = vp9_cost_one(mvcomp->hp);
+
mvcost[0] = 0;
- for (v = 1; v <= MV_MAX; ++v) {
- int z, c, o, d, e, f, cost = 0;
- z = v - 1;
- c = vp9_get_mv_class(z, &o);
- cost += class_cost[c];
+ // MV_CLASS_0
+ for (o = 0; o < (CLASS0_SIZE << 3); ++o) {
+ int d, e, f;
+ int cost = class_cost[MV_CLASS_0];
+ int v = o + 1;
d = (o >> 3); /* int mv data */
f = (o >> 1) & 3; /* fractional pel mv data */
- e = (o & 1); /* high precision mv data */
- if (c == MV_CLASS_0) {
- cost += class0_cost[d];
- } else {
- int i, b;
- b = c + CLASS0_BITS - 1; /* number of bits */
- for (i = 0; i < b; ++i)
- cost += bits_cost[i][((d >> i) & 1)];
- }
- if (c == MV_CLASS_0) {
- cost += class0_fp_cost[d][f];
- } else {
- cost += fp_cost[f];
- }
+ cost += class0_cost[d];
+ cost += class0_fp_cost[d][f];
if (usehp) {
- if (c == MV_CLASS_0) {
- cost += class0_hp_cost[e];
- } else {
- cost += hp_cost[e];
- }
+ e = (o & 1); /* high precision mv data */
+ cost += class0_hp_cost[e];
}
mvcost[v] = cost + sign_cost[0];
mvcost[-v] = cost + sign_cost[1];
}
+ for (c = MV_CLASS_1; c < MV_CLASSES; ++c) {
+ int d;
+ for (d = 0; d < (1 << c); ++d) {
+ int f;
+ int whole_cost = class_cost[c];
+ int b = c + CLASS0_BITS - 1; /* number of bits */
+ for (i = 0; i < b; ++i)
+ whole_cost += bits_cost[i][((d >> i) & 1)];
+ for (f = 0; f < 4; ++f) {
+ int cost = whole_cost + fp_cost[f];
+ int v = (CLASS0_SIZE << (c + 2)) + d * 8 + f * 2 /* + e */ + 1;
+ if (usehp) {
+ mvcost[v] = cost + hp_cost[0] + sign_cost[0];
+ mvcost[-v] = cost + hp_cost[0] + sign_cost[1];
+ if (v + 1 > MV_MAX) break;
+ mvcost[v + 1] = cost + hp_cost[1] + sign_cost[0];
+ mvcost[-v - 1] = cost + hp_cost[1] + sign_cost[1];
+ } else {
+ mvcost[v] = cost + sign_cost[0];
+ mvcost[-v] = cost + sign_cost[1];
+ if (v + 1 > MV_MAX) break;
+ mvcost[v + 1] = cost + sign_cost[0];
+ mvcost[-v - 1] = cost + sign_cost[1];
+ }
+ }
+ }
+ }
}
static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
vpx_prob upd_p) {
const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
const int update = cost_branch256(ct, *cur_p) + vp9_cost_zero(upd_p) >
- cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + 7 * 256;
+ cost_branch256(ct, new_p) + vp9_cost_one(upd_p) +
+ (7 << VP9_PROB_COST_SHIFT);
vpx_write(w, update, upd_p);
if (update) {
*cur_p = new_p;
@@ -206,7 +220,7 @@ void vp9_encode_mv(VP9_COMP* cpi, vpx_writer* w,
const MV diff = {mv->row - ref->row,
mv->col - ref->col};
const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff);
- usehp = usehp && vp9_use_mv_hp(ref);
+ usehp = usehp && use_mv_hp(ref);
vp9_write_token(w, vp9_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]);
if (mv_joint_vertical(j))
@@ -230,13 +244,13 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
-static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
+static void inc_mvs(const MODE_INFO *mi, const MB_MODE_INFO_EXT *mbmi_ext,
const int_mv mvs[2],
nmv_context_counts *counts) {
int i;
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
- const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
+ for (i = 0; i < 1 + has_second_ref(mi); ++i) {
+ const MV *ref = &mbmi_ext->ref_mvs[mi->ref_frame[i]][0].as_mv;
const MV diff = {mvs[i].as_mv.row - ref->row,
mvs[i].as_mv.col - ref->col};
vp9_inc_mv(&diff, counts);
@@ -246,24 +260,23 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
void vp9_update_mv_count(ThreadData *td) {
const MACROBLOCKD *xd = &td->mb.e_mbd;
const MODE_INFO *mi = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mi->mbmi;
const MB_MODE_INFO_EXT *mbmi_ext = td->mb.mbmi_ext;
- if (mbmi->sb_type < BLOCK_8X8) {
- const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ if (mi->sb_type < BLOCK_8X8) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[mi->sb_type];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[mi->sb_type];
int idx, idy;
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int i = idy * 2 + idx;
if (mi->bmi[i].as_mode == NEWMV)
- inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv);
+ inc_mvs(mi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv);
}
}
} else {
- if (mbmi->mode == NEWMV)
- inc_mvs(mbmi, mbmi_ext, mbmi->mv, &td->counts->mv);
+ if (mi->mode == NEWMV)
+ inc_mvs(mi, mbmi_ext, mi->mv, &td->counts->mv);
}
}
diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c
index 72eafec40..147f97004 100644
--- a/libvpx/vp9/encoder/vp9_encoder.c
+++ b/libvpx/vp9/encoder/vp9_encoder.c
@@ -36,6 +36,7 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_tile_common.h"
+#include "vp9/encoder/vp9_aq_360.h"
#include "vp9/encoder/vp9_aq_complexity.h"
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
@@ -47,6 +48,7 @@
#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mbgraph.h"
+#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_picklpf.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rd.h"
@@ -60,8 +62,6 @@
#define AM_SEGMENT_ID_INACTIVE 7
#define AM_SEGMENT_ID_ACTIVE 0
-#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */
-
#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
// for altref computation.
#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
@@ -86,6 +86,25 @@ FILE *kf_list;
FILE *keyfile;
#endif
+static const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
+ {LEVEL_1, 829440, 36864, 200, 400, 2, 1, 4, 8},
+ {LEVEL_1_1, 2764800, 73728, 800, 1000, 2, 1, 4, 8},
+ {LEVEL_2, 4608000, 122880, 1800, 1500, 2, 1, 4, 8},
+ {LEVEL_2_1, 9216000, 245760, 3600, 2800, 2, 2, 4, 8},
+ {LEVEL_3, 20736000, 552960, 7200, 6000, 2, 4, 4, 8},
+ {LEVEL_3_1, 36864000, 983040, 12000, 10000, 2, 4, 4, 8},
+ {LEVEL_4, 83558400, 2228224, 18000, 16000, 4, 4, 4, 8},
+ {LEVEL_4_1, 160432128, 2228224, 30000, 18000, 4, 4, 5, 6},
+ {LEVEL_5, 311951360, 8912896, 60000, 36000, 6, 8, 6, 4},
+ {LEVEL_5_1, 588251136, 8912896, 120000, 46000, 8, 8, 10, 4},
+ // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
+ // they are finalized (currently TBD).
+ {LEVEL_5_2, 1176502272, 8912896, 180000, 0, 8, 8, 10, 4},
+ {LEVEL_6, 1176502272, 35651584, 180000, 0, 8, 16, 10, 4},
+ {LEVEL_6_1, 2353004544u, 35651584, 240000, 0, 8, 16, 10, 4},
+ {LEVEL_6_2, 4706009088u, 35651584, 480000, 0, 8, 16, 10, 4},
+};
+
static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
switch (mode) {
case NORMAL:
@@ -116,11 +135,16 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
// so memset cannot be used, instead only inactive blocks should be reset.
static void suppress_active_map(VP9_COMP *cpi) {
unsigned char *const seg_map = cpi->segmentation_map;
- int i;
- if (cpi->active_map.enabled || cpi->active_map.update)
- for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
+
+ if (cpi->active_map.enabled || cpi->active_map.update) {
+ const int rows = cpi->common.mi_rows;
+ const int cols = cpi->common.mi_cols;
+ int i;
+
+ for (i = 0; i < rows * cols; ++i)
if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
seg_map[i] = AM_SEGMENT_ID_ACTIVE;
+ }
}
static void apply_active_map(VP9_COMP *cpi) {
@@ -159,6 +183,39 @@ static void apply_active_map(VP9_COMP *cpi) {
}
}
+static void init_level_info(Vp9LevelInfo *level_info) {
+ Vp9LevelStats *const level_stats = &level_info->level_stats;
+ Vp9LevelSpec *const level_spec = &level_info->level_spec;
+
+ memset(level_stats, 0, sizeof(*level_stats));
+ memset(level_spec, 0, sizeof(*level_spec));
+ level_spec->level = LEVEL_UNKNOWN;
+ level_spec->min_altref_distance = INT_MAX;
+}
+
+VP9_LEVEL vp9_get_level(const Vp9LevelSpec * const level_spec) {
+ int i;
+ const Vp9LevelSpec *this_level;
+
+ vpx_clear_system_state();
+
+ for (i = 0; i < VP9_LEVELS; ++i) {
+ this_level = &vp9_level_defs[i];
+ if ((double)level_spec->max_luma_sample_rate * (1 + SAMPLE_RATE_GRACE_P) >
+ (double)this_level->max_luma_sample_rate ||
+ level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
+ level_spec->average_bitrate > this_level->average_bitrate ||
+ level_spec->max_cpb_size > this_level->max_cpb_size ||
+ level_spec->compression_ratio < this_level->compression_ratio ||
+ level_spec->max_col_tiles > this_level->max_col_tiles ||
+ level_spec->min_altref_distance < this_level->min_altref_distance ||
+ level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
+ continue;
+ break;
+ }
+ return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
+}
+
int vp9_set_active_map(VP9_COMP* cpi,
unsigned char* new_map_16x16,
int rows,
@@ -375,6 +432,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->active_map.map);
cpi->active_map.map = NULL;
+ vpx_free(cpi->consec_zero_mv);
+ cpi->consec_zero_mv = NULL;
+
vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
vp9_free_postproc_buffers(cm);
@@ -410,6 +470,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
memset(&cpi->svc.scaled_frames[0], 0,
MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
+ vpx_free_frame_buffer(&cpi->svc.scaled_temp);
+ memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
+
vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
@@ -607,7 +670,7 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) {
MODE_INFO **mi_8x8 = mi_8x8_ptr;
uint8_t *cache = cache_ptr;
for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
- cache[0] = mi_8x8[0]->mbmi.segment_id;
+ cache[0] = mi_8x8[0]->segment_id;
mi_8x8_ptr += cm->mi_stride;
cache_ptr += cm->mi_cols;
}
@@ -768,7 +831,6 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
cpi->oxcf = *oxcf;
cpi->framerate = oxcf->init_framerate;
-
cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -777,6 +839,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
cm->color_space = oxcf->color_space;
cm->color_range = oxcf->color_range;
+ cpi->target_level = oxcf->target_level;
+ cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
+
cm->width = oxcf->width;
cm->height = oxcf->height;
alloc_compressor_data(cpi);
@@ -805,6 +870,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
cpi->ref_frame_flags = 0;
init_buffer_indices(cpi);
+
+ vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
}
static void set_rc_buffer_sizes(RATE_CONTROL *rc,
@@ -1465,6 +1532,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cm->color_space = oxcf->color_space;
cm->color_range = oxcf->color_range;
+ cpi->target_level = oxcf->target_level;
+ cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
+
if (cm->profile <= PROFILE_1)
assert(cm->bit_depth == VPX_BITS_8);
else
@@ -1475,7 +1545,11 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
+ if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
+ rc->baseline_gf_interval = FIXED_GF_INTERVAL;
+ } else {
+ rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
+ }
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 1;
@@ -1519,6 +1593,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
cm->width = cpi->oxcf.width;
cm->height = cpi->oxcf.height;
+ cpi->external_resize = 1;
}
if (cpi->initial_width) {
@@ -1530,10 +1605,22 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
alloc_compressor_data(cpi);
realloc_segmentation_maps(cpi);
cpi->initial_width = cpi->initial_height = 0;
+ cpi->external_resize = 0;
+ } else if (cm->mi_alloc_size == new_mi_size &&
+ (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
+ vp9_alloc_loop_filter(cm);
}
}
+
update_frame_size(cpi);
+ if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
+ memset(cpi->consec_zero_mv, 0,
+ cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_reset_resize(cpi);
+ }
+
if ((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
((cpi->svc.number_temporal_layers > 1 ||
@@ -1567,7 +1654,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
#endif
#define log2f(x) (log (x) / (float) M_LOG2_E)
+/***********************************************************************
+ * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' *
+ ***********************************************************************
+ * The following 2 functions ('cal_nmvjointsadcost' and *
+ * 'cal_nmvsadcosts') are used to calculate cost lookup tables *
+ * used by 'vp9_diamond_search_sad'. The C implementation of the *
+ * function is generic, but the AVX intrinsics optimised version *
+ * relies on the following properties of the computed tables: *
+ * For cal_nmvjointsadcost: *
+ * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
+ * For cal_nmvsadcosts: *
+ * - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
+ * (Equal costs for both components) *
+ * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
+ * (Cost function is even) *
+ * If these do not hold, then the AVX optimised version of the *
+ * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
+ * case you can revert to using the C function instead. *
+ ***********************************************************************/
+
static void cal_nmvjointsadcost(int *mvjointsadcost) {
+ /*********************************************************************
+ * Warning: Read the comments above before modifying this function *
+ *********************************************************************/
mvjointsadcost[0] = 600;
mvjointsadcost[1] = 300;
mvjointsadcost[2] = 300;
@@ -1575,6 +1685,9 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) {
}
static void cal_nmvsadcosts(int *mvsadcost[2]) {
+ /*********************************************************************
+ * Warning: Read the comments above before modifying this function *
+ *********************************************************************/
int i = 1;
mvsadcost[0][0] = 0;
@@ -1604,7 +1717,6 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
} while (++i <= MV_MAX);
}
-
VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
BufferPool *const pool) {
unsigned int i;
@@ -1635,12 +1747,12 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->use_svc = 0;
cpi->resize_state = 0;
+ cpi->external_resize = 0;
cpi->resize_avg_qp = 0;
cpi->resize_buffer_underflow = 0;
+ cpi->use_skin_detection = 0;
cpi->common.buffer_pool = pool;
- cpi->rc.high_source_sad = 0;
-
init_config(cpi, oxcf);
vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
@@ -1650,6 +1762,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
realloc_segmentation_maps(cpi);
+ CHECK_MEM_ERROR(cm, cpi->consec_zero_mv,
+ vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*cpi->consec_zero_mv)));
+
CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
@@ -1689,6 +1805,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->multi_arf_last_grp_enabled = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
+
+ init_level_info(&cpi->level_info);
+
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
cpi->b_calculate_blockiness = 1;
@@ -1727,8 +1846,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
}
if (cpi->b_calculate_consistency) {
- cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars) *
- 4 * cpi->common.mi_rows * cpi->common.mi_cols);
+ CHECK_MEM_ERROR(cm, cpi->ssim_vars,
+ vpx_malloc(sizeof(*cpi->ssim_vars) * 4 *
+ cpi->common.mi_rows * cpi->common.mi_cols));
cpi->worst_consistency = 100.0;
}
@@ -1736,6 +1856,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->first_time_stamp_ever = INT64_MAX;
+ /*********************************************************************
+ * Warning: Read the comments around 'cal_nmvjointsadcost' and *
+ * 'cal_nmvsadcosts' before modifying how these tables are computed. *
+ *********************************************************************/
cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
@@ -1928,11 +2052,14 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
return cpi;
}
+
+#if CONFIG_INTERNAL_STATS
#define SNPRINT(H, T) \
snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
#define SNPRINT2(H, T, V) \
snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
+#endif // CONFIG_INTERNAL_STATS
void vp9_remove_compressor(VP9_COMP *cpi) {
VP9_COMMON *cm;
@@ -1958,6 +2085,8 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
const double dr =
(double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+ const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr) {
const double total_psnr =
@@ -2009,8 +2138,9 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);
}
- fprintf(f, "%s\t Time\n", headings);
- fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
+ fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings);
+ fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results,
+ total_encode_time, rate_err, fabs(rate_err));
}
fclose(f);
@@ -2128,7 +2258,7 @@ static void encoder_variance(const uint8_t *a, int a_stride,
static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint64_t *sse,
- uint64_t *sum) {
+ int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -2152,7 +2282,7 @@ static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
int w, int h,
unsigned int *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
&sse_long, &sum_long);
*sse = (unsigned int)sse_long;
@@ -2574,10 +2704,6 @@ static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
#if CONFIG_VP9_HIGHBITDEPTH
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst, int bd) {
-#else
-static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
- YV12_BUFFER_CONFIG *dst) {
-#endif // CONFIG_VP9_HIGHBITDEPTH
const int src_w = src->y_crop_width;
const int src_h = src->y_crop_height;
const int dst_w = dst->y_crop_width;
@@ -2589,19 +2715,18 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
int x, y, i;
- for (y = 0; y < dst_h; y += 16) {
- for (x = 0; x < dst_w; x += 16) {
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- const int factor = (i == 0 || i == 3 ? 1 : 2);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ const int factor = (i == 0 || i == 3 ? 1 : 2);
+ const int src_stride = src_strides[i];
+ const int dst_stride = dst_strides[i];
+ for (y = 0; y < dst_h; y += 16) {
+ const int y_q4 = y * (16 / factor) * src_h / dst_h;
+ for (x = 0; x < dst_w; x += 16) {
const int x_q4 = x * (16 / factor) * src_w / dst_w;
- const int y_q4 = y * (16 / factor) * src_h / dst_h;
- const int src_stride = src_strides[i];
- const int dst_stride = dst_strides[i];
const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h *
- src_stride + (x / factor) * src_w / dst_w;
+ src_stride + (x / factor) * src_w / dst_w;
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
-#if CONFIG_VP9_HIGHBITDEPTH
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
@@ -2613,18 +2738,49 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
kernel[y_q4 & 0xf], 16 * src_h / dst_h,
16 / factor, 16 / factor);
}
+ }
+ }
+ }
+
+ vpx_extend_frame_borders(dst);
+}
#else
+void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ const int src_w = src->y_crop_width;
+ const int src_h = src->y_crop_height;
+ const int dst_w = dst->y_crop_width;
+ const int dst_h = dst->y_crop_height;
+ const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
+ const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
+ uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
+ const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
+ const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
+ int x, y, i;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ const int factor = (i == 0 || i == 3 ? 1 : 2);
+ const int src_stride = src_strides[i];
+ const int dst_stride = dst_strides[i];
+ for (y = 0; y < dst_h; y += 16) {
+ const int y_q4 = y * (16 / factor) * src_h / dst_h;
+ for (x = 0; x < dst_w; x += 16) {
+ const int x_q4 = x * (16 / factor) * src_w / dst_w;
+ const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h *
+ src_stride + (x / factor) * src_w / dst_w;
+ uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
+
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
kernel[y_q4 & 0xf], 16 * src_h / dst_h,
16 / factor, 16 / factor);
-#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
}
vpx_extend_frame_borders(dst);
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
static int scale_down(VP9_COMP *cpi, int q) {
RATE_CONTROL *const rc = &cpi->rc;
@@ -2641,6 +2797,13 @@ static int scale_down(VP9_COMP *cpi, int q) {
return scale;
}
+static int big_rate_miss(VP9_COMP *cpi, int high_limit, int low_limit) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+
+ return (rc->projected_frame_size > ((high_limit * 3) / 2)) ||
+ (rc->projected_frame_size < (low_limit / 2));
+}
+
// Function to test for conditions that indicate we should loop
// back and recode a frame.
static int recode_loop_test(VP9_COMP *cpi,
@@ -2652,6 +2815,7 @@ static int recode_loop_test(VP9_COMP *cpi,
int force_recode = 0;
if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
+ big_rate_miss(cpi, high_limit, low_limit) ||
(cpi->sf.recode_loop == ALLOW_RECODE) ||
(frame_is_kfgfarf &&
(cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
@@ -2693,7 +2857,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
} else if (vp9_preserve_existing_gf(cpi)) {
// We have decided to preserve the previously existing golden frame as our
// new ARF frame. However, in the short term in function
- // vp9_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
+ // vp9_get_refresh_mask() we left it in the GF slot and, if
// we're updating the GF with the current decoded frame, we save it to the
// ARF slot instead.
// We now have to update the ARF with the current frame and swap gld_fb_idx
@@ -2750,7 +2914,8 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
sizeof(cpi->interp_filter_selected[0]));
}
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0) {
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ cpi->denoiser.denoising_level > kDenLowLow) {
vp9_denoiser_update_frame_info(&cpi->denoiser,
*cpi->Source,
cpi->common.frame_type,
@@ -2760,6 +2925,22 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
cpi->resize_pending);
}
#endif
+ if (is_one_pass_cbr_svc(cpi)) {
+ // Keep track of frame index for each reference frame.
+ SVC *const svc = &cpi->svc;
+ if (cm->frame_type == KEY_FRAME) {
+ svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
+ svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
+ svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ } else {
+ if (cpi->refresh_last_frame)
+ svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_golden_frame)
+ svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_alt_ref_frame)
+ svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ }
+ }
}
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
@@ -2768,6 +2949,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
if (xd->lossless) {
lf->filter_level = 0;
+ lf->last_filt_level = 0;
} else {
struct vpx_usec_timer timer;
@@ -2775,7 +2957,16 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vpx_usec_timer_start(&timer);
- vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ if ((cpi->common.frame_type == KEY_FRAME) &&
+ (!cpi->rc.this_key_frame_forced)) {
+ lf->last_filt_level = 0;
+ }
+ vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
+ lf->last_filt_level = lf->filter_level;
+ } else {
+ lf->filter_level = 0;
+ }
vpx_usec_timer_mark(&timer);
cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
@@ -2796,16 +2987,16 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
vpx_extend_frame_inner_borders(cm->frame_to_show);
}
-static INLINE void alloc_frame_mvs(const VP9_COMMON *cm,
+static INLINE void alloc_frame_mvs(VP9_COMMON *const cm,
int buffer_idx) {
RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
if (new_fb_ptr->mvs == NULL ||
new_fb_ptr->mi_rows < cm->mi_rows ||
new_fb_ptr->mi_cols < cm->mi_cols) {
vpx_free(new_fb_ptr->mvs);
- new_fb_ptr->mvs =
- (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(*new_fb_ptr->mvs));
+ CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*new_fb_ptr->mvs)));
new_fb_ptr->mi_rows = cm->mi_rows;
new_fb_ptr->mi_cols = cm->mi_cols;
}
@@ -2843,12 +3034,13 @@ void vp9_scale_references(VP9_COMP *cpi) {
if (force_scaling ||
new_fb_ptr->buf.y_crop_width != cm->width ||
new_fb_ptr->buf.y_crop_height != cm->height) {
- vpx_realloc_frame_buffer(&new_fb_ptr->buf,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- cm->use_highbitdepth,
- VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL);
+ if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ cm->use_highbitdepth,
+ VP9_ENC_BORDER_IN_PIXELS,
+ cm->byte_alignment, NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
alloc_frame_mvs(cm, new_fb);
@@ -2868,19 +3060,31 @@ void vp9_scale_references(VP9_COMP *cpi) {
if (force_scaling ||
new_fb_ptr->buf.y_crop_width != cm->width ||
new_fb_ptr->buf.y_crop_height != cm->height) {
- vpx_realloc_frame_buffer(&new_fb_ptr->buf,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL);
- scale_and_extend_frame(ref, &new_fb_ptr->buf);
+ if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS,
+ cm->byte_alignment, NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+ vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf);
cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
alloc_frame_mvs(cm, new_fb);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
} else {
- const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
- RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
+ int buf_idx;
+ RefCntBuffer *buf = NULL;
+ if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
+ // Check for release of scaled reference.
+ buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
+ buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL;
+ if (buf != NULL) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
+ }
+ }
+ buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ buf = &pool->frame_bufs[buf_idx];
buf->buf.y_crop_width = ref->y_crop_width;
buf->buf.y_crop_height = ref->y_crop_height;
cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
@@ -2959,18 +3163,49 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
vpx_clear_system_state();
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ recon_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ } else {
+ recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ }
+#else
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+
+ if (cpi->twopass.total_left_stats.coded_error != 0.0) {
+ double dc_quant_devisor;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ dc_quant_devisor = 4.0;
+ break;
+ case VPX_BITS_10:
+ dc_quant_devisor = 16.0;
+ break;
+ case VPX_BITS_12:
+ dc_quant_devisor = 64.0;
+ break;
+ default:
+ assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ break;
+ }
+#else
+ dc_quant_devisor = 4.0;
+#endif
- if (cpi->twopass.total_left_stats.coded_error != 0.0)
- fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
+ fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
"%10"PRId64" %10"PRId64" %10d "
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
"%6d %6d %5d %5d %5d "
"%10"PRId64" %10.3lf"
- "%10lf %8u %10"PRId64" %10d %10d %10d\n",
+ "%10lf %8u %10"PRId64" %10d %10d %10d %10d %10d\n",
cpi->common.current_video_frame,
cm->width, cm->height,
+ cpi->td.rd_counts.m_search_count,
+ cpi->td.rd_counts.ex_search_count,
cpi->rc.source_alt_ref_pending,
cpi->rc.source_alt_ref_active,
cpi->rc.this_frame_target,
@@ -2985,7 +3220,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
(cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
cpi->rc.total_actual_bits, cm->base_qindex,
vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
- (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) / 4.0,
+ (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
+ dc_quant_devisor,
vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
cm->bit_depth),
cpi->rc.avg_q,
@@ -2998,8 +3234,10 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
(1 + cpi->twopass.total_left_stats.coded_error),
cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
cpi->twopass.kf_zeromotion_pct,
- cpi->twopass.fr_content_type);
-
+ cpi->twopass.fr_content_type,
+ cm->lf.filter_level,
+ cm->seg.aq_av_offset);
+ }
fclose(f);
if (0) {
@@ -3074,7 +3312,7 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q,
if (oxcf->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
-#if CONFIG_VP9_POSTPROC
+#if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
if (oxcf->noise_sensitivity > 0) {
int l = 0;
switch (oxcf->noise_sensitivity) {
@@ -3105,12 +3343,14 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (cpi->oxcf.noise_sensitivity > 0 &&
!cpi->denoiser.frame_buffer_initialized) {
- vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
+ if (vp9_denoiser_alloc(&cpi->denoiser, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
+ cm->use_highbitdepth,
#endif
- VP9_ENC_BORDER_IN_PIXELS);
+ VP9_ENC_BORDER_IN_PIXELS))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate denoiser");
}
}
#endif
@@ -3160,6 +3400,7 @@ static void set_frame_size(VP9_COMP *cpi) {
// TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
set_mv_search_params(cpi);
+ vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
#if CONFIG_VP9_TEMPORAL_DENOISING
// Reset the denoiser on the resized frame.
if (cpi->oxcf.noise_sensitivity > 0) {
@@ -3182,14 +3423,15 @@ static void set_frame_size(VP9_COMP *cpi) {
alloc_frame_mvs(cm, cm->new_fb_idx);
// Reset the frame pointers to the current frame size.
- vpx_realloc_frame_buffer(get_frame_new_buffer(cm),
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
+ if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
+ cm->use_highbitdepth,
#endif
- VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL);
+ VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
alloc_util_frame_buffers(cpi);
init_motion_estimation(cpi);
@@ -3234,43 +3476,70 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
set_frame_size(cpi);
- cpi->Source = vp9_scale_if_required(cm,
- cpi->un_scaled_source,
- &cpi->scaled_source,
- (cpi->oxcf.pass == 0));
-
+ if (is_one_pass_cbr_svc(cpi) &&
+ cpi->un_scaled_source->y_width == cm->width << 2 &&
+ cpi->un_scaled_source->y_height == cm->height << 2 &&
+ cpi->svc.scaled_temp.y_width == cm->width << 1 &&
+ cpi->svc.scaled_temp.y_height == cm->height << 1) {
+ cpi->Source = vp9_svc_twostage_scale(cm,
+ cpi->un_scaled_source,
+ &cpi->scaled_source,
+ &cpi->svc.scaled_temp);
+ } else {
+ cpi->Source = vp9_scale_if_required(cm,
+ cpi->un_scaled_source,
+ &cpi->scaled_source,
+ (cpi->oxcf.pass == 0));
+ }
// Avoid scaling last_source unless its needed.
- // Last source is currently only used for screen-content mode,
- // or if partition_search_type == SOURCE_VAR_BASED_PARTITION.
+ // Last source is needed if vp9_avg_source_sad() is used, or if
+ // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
+ // estimation is enabled.
if (cpi->unscaled_last_source != NULL &&
(cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
- cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION))
+ (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
+ cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
+ cpi->noise_estimate.enabled))
cpi->Last_Source = vp9_scale_if_required(cm,
cpi->unscaled_last_source,
&cpi->scaled_last_source,
(cpi->oxcf.pass == 0));
-#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 &&
- cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
- vp9_denoiser_update_noise_estimate(cpi);
+ if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) {
+ memset(cpi->consec_zero_mv, 0,
+ cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
}
-#endif
+
+ vp9_update_noise_estimate(cpi);
if (cpi->oxcf.pass == 0 &&
- cpi->oxcf.rc_mode == VPX_CBR &&
+ cpi->oxcf.mode == REALTIME &&
+ cpi->oxcf.speed >= 5 &&
cpi->resize_state == 0 &&
cm->frame_type != KEY_FRAME &&
- cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
+ cpi->oxcf.rc_mode == VPX_VBR))
vp9_avg_source_sad(cpi);
- if (frame_is_intra_only(cm) == 0) {
+ // For 1 pass SVC, since only ZEROMV is allowed for upsampled reference
+ // frame (i.e, svc->force_zero_mode_spatial_ref = 0), we can avoid this
+ // frame-level upsampling.
+ if (frame_is_intra_only(cm) == 0 && !is_one_pass_cbr_svc(cpi)) {
vp9_scale_references(cpi);
}
set_size_independent_vars(cpi);
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+ if (cpi->oxcf.speed >= 5 &&
+ cpi->oxcf.pass == 0 &&
+ cpi->oxcf.rc_mode == VPX_CBR &&
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
+ cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ cpi->use_skin_detection = 1;
+ }
+
vp9_set_quantizer(cm, q);
vp9_set_variance_partition_thresholds(cpi, q);
@@ -3281,6 +3550,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_vaq_frame_setup(cpi);
+ } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
+ vp9_360aq_frame_setup(cpi);
} else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
vp9_setup_in_frame_q_adj(cpi);
} else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
@@ -3411,6 +3682,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_vaq_frame_setup(cpi);
+ } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
+ vp9_360aq_frame_setup(cpi);
} else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
vp9_setup_in_frame_q_adj(cpi);
}
@@ -3642,6 +3915,25 @@ static void set_ext_overrides(VP9_COMP *cpi) {
}
}
+YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled,
+ YV12_BUFFER_CONFIG *scaled_temp) {
+ if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
+ cm->mi_rows * MI_SIZE != unscaled->y_height) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth);
+ scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth);
+#else
+ vp9_scale_and_extend_frame(unscaled, scaled_temp);
+ vp9_scale_and_extend_frame(scaled_temp, scaled);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return scaled;
+ } else {
+ return unscaled;
+ }
+}
+
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled,
@@ -3649,13 +3941,17 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
cm->mi_rows * MI_SIZE != unscaled->y_height) {
#if CONFIG_VP9_HIGHBITDEPTH
- if (use_normative_scaler)
+ if (use_normative_scaler &&
+ unscaled->y_width <= (scaled->y_width << 1) &&
+ unscaled->y_height <= (scaled->y_height << 1))
scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth);
else
scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
#else
- if (use_normative_scaler)
- scale_and_extend_frame(unscaled, scaled);
+ if (use_normative_scaler &&
+ unscaled->y_width <= (scaled->y_width << 1) &&
+ unscaled->y_height <= (scaled->y_height << 1))
+ vp9_scale_and_extend_frame(unscaled, scaled);
else
scale_and_extend_frame_nonnormative(unscaled, scaled);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -3797,14 +4093,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
// For 1 pass CBR, check if we are dropping this frame.
- // Never drop on key frame.
+ // For spatial layers, for now only check for frame-dropping on first spatial
+ // layer, and if decision is to drop, we drop whole super-frame.
if (oxcf->pass == 0 &&
oxcf->rc_mode == VPX_CBR &&
cm->frame_type != KEY_FRAME) {
- if (vp9_rc_drop_frame(cpi)) {
+ if (vp9_rc_drop_frame(cpi) ||
+ (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
vp9_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->svc.rc_drop_superframe = 1;
+ // TODO(marpan): Advancing the svc counters on dropped frames can break
+ // the referencing scheme for the fixed svc patterns defined in
+ // vp9_one_pass_cbr_svc_start_layer(). Look into fixing this issue, but
+ // for now, don't advance the svc frame counters on dropped frame.
+ // if (cpi->use_svc)
+ // vp9_inc_frame_in_layer(cpi);
return;
}
}
@@ -4020,13 +4325,16 @@ static void check_initial_width(VP9_COMP *cpi,
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
struct vpx_usec_timer timer;
int res = 0;
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
- const int use_highbitdepth = sd->flags & YV12_FLAG_HIGHBITDEPTH;
+ const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
check_initial_width(cpi, subsampling_x, subsampling_y);
@@ -4171,6 +4479,124 @@ static void adjust_image_stat(double y, double u, double v, double all,
}
#endif // CONFIG_INTERNAL_STATS
+static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
+ VP9_COMMON *const cm = &cpi->common;
+ Vp9LevelInfo *const level_info = &cpi->level_info;
+ Vp9LevelSpec *const level_spec = &level_info->level_spec;
+ Vp9LevelStats *const level_stats = &level_info->level_stats;
+ int i, idx;
+ uint64_t luma_samples, dur_end;
+ const uint32_t luma_pic_size = cm->width * cm->height;
+ double cpb_data_size;
+
+ vpx_clear_system_state();
+
+ // update level_stats
+ level_stats->total_compressed_size += *size;
+ if (cm->show_frame) {
+ level_stats->total_uncompressed_size +=
+ luma_pic_size +
+ 2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
+ level_stats->time_encoded =
+ (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
+ (double)TICKS_PER_SEC;
+ }
+
+ if (arf_src_index > 0) {
+ if (!level_stats->seen_first_altref) {
+ level_stats->seen_first_altref = 1;
+ } else if (level_stats->frames_since_last_altref <
+ level_spec->min_altref_distance) {
+ level_spec->min_altref_distance = level_stats->frames_since_last_altref;
+ }
+ level_stats->frames_since_last_altref = 0;
+ } else {
+ ++level_stats->frames_since_last_altref;
+ }
+
+ if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
+ idx = (level_stats->frame_window_buffer.start +
+ level_stats->frame_window_buffer.len++) % FRAME_WINDOW_SIZE;
+ } else {
+ idx = level_stats->frame_window_buffer.start;
+ level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
+ }
+ level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
+ level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
+ level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
+
+ if (cm->frame_type == KEY_FRAME) {
+ level_stats->ref_refresh_map = 0;
+ } else {
+ int count = 0;
+ level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
+ // Also need to consider the case where the encoder refers to a buffer
+ // that has been implicitly refreshed after encoding a keyframe.
+ if (!cm->intra_only) {
+ level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
+ level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
+ level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
+ }
+ for (i = 0; i < REF_FRAMES; ++i) {
+ count += (level_stats->ref_refresh_map >> i) & 1;
+ }
+ if (count > level_spec->max_ref_frame_buffers) {
+ level_spec->max_ref_frame_buffers = count;
+ }
+ }
+
+ // update average_bitrate
+ level_spec->average_bitrate =
+ (double)level_stats->total_compressed_size / 125.0 /
+ level_stats->time_encoded;
+
+ // update max_luma_sample_rate
+ luma_samples = 0;
+ for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
+ idx = (level_stats->frame_window_buffer.start +
+ level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE;
+ if (i == 0) {
+ dur_end = level_stats->frame_window_buffer.buf[idx].ts;
+ }
+ if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
+ TICKS_PER_SEC) {
+ break;
+ }
+ luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
+ }
+ if (luma_samples > level_spec->max_luma_sample_rate) {
+ level_spec->max_luma_sample_rate = luma_samples;
+ }
+
+ // update max_cpb_size
+ cpb_data_size = 0;
+ for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
+ if (i >= level_stats->frame_window_buffer.len) break;
+ idx = (level_stats->frame_window_buffer.start +
+ level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE;
+ cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
+ }
+ cpb_data_size = cpb_data_size / 125.0;
+ if (cpb_data_size > level_spec->max_cpb_size) {
+ level_spec->max_cpb_size = cpb_data_size;
+ }
+
+ // update max_luma_picture_size
+ if (luma_pic_size > level_spec->max_luma_picture_size) {
+ level_spec->max_luma_picture_size = luma_pic_size;
+ }
+
+ // update compression_ratio
+ level_spec->compression_ratio =
+ (double)level_stats->total_uncompressed_size * cm->bit_depth /
+ level_stats->total_compressed_size / 8.0;
+
+ // update max_col_tiles
+ if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
+ level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
+ }
+}
+
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush) {
@@ -4228,6 +4654,20 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
arf_src_index = 0;
if (arf_src_index) {
+ for (i = 0; i <= arf_src_index; ++i) {
+ struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
+ // Avoid creating an alt-ref if there's a forced keyframe pending.
+ if (e == NULL) {
+ break;
+ } else if (e->flags == VPX_EFLAG_FORCE_KF) {
+ arf_src_index = 0;
+ flush = 1;
+ break;
+ }
+ }
+ }
+
+ if (arf_src_index) {
assert(arf_src_index <= rc->frames_to_key);
if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
@@ -4247,7 +4687,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1;
#endif
- if (oxcf->arnr_max_frames > 0) {
+ if ((oxcf->arnr_max_frames > 0) && (oxcf->arnr_strength > 0)) {
// Produce the filtered ARF frame.
vp9_temporal_filter(cpi, arf_src_index);
vpx_extend_frame_borders(&cpi->alt_ref_buffer);
@@ -4427,6 +4867,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
generate_psnr_packet(cpi);
+ if (cpi->keep_level_stats && oxcf->pass != 1)
+ update_level_info(cpi, size, arf_src_index);
+
#if CONFIG_INTERNAL_STATS
if (oxcf->pass != 1) {
diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h
index 159c03aa8..128b62328 100644
--- a/libvpx/vp9/encoder/vp9_encoder.h
+++ b/libvpx/vp9/encoder/vp9_encoder.h
@@ -20,6 +20,7 @@
#include "vpx_dsp/ssim.h"
#endif
#include "vpx_dsp/variance.h"
+#include "vpx_ports/system_state.h"
#include "vpx_util/vpx_thread.h"
#include "vp9/common/vp9_alloccommon.h"
@@ -35,6 +36,7 @@
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rd.h"
@@ -50,6 +52,9 @@
extern "C" {
#endif
+// vp9 uses 10,000,000 ticks/second as time stamp
+#define TICKS_PER_SEC 10000000
+
typedef struct {
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
@@ -111,6 +116,7 @@ typedef enum {
VARIANCE_AQ = 1,
COMPLEXITY_AQ = 2,
CYCLIC_REFRESH_AQ = 3,
+ EQUATOR360_AQ = 4,
AQ_MODE_COUNT // This should always be the last member of the enum
} AQ_MODE;
@@ -127,7 +133,7 @@ typedef struct VP9EncoderConfig {
int height; // height of data passed to the compressor
unsigned int input_bit_depth; // Input bit depth.
double init_framerate; // set to passed in framerate
- int64_t target_bandwidth; // bandwidth to be used in kilobits per second
+ int64_t target_bandwidth; // bandwidth to be used in bits per second
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:
@@ -225,6 +231,8 @@ typedef struct VP9EncoderConfig {
int max_threads;
+ int target_level;
+
vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
@@ -259,6 +267,8 @@ typedef struct RD_COUNTS {
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+ int m_search_count;
+ int ex_search_count;
} RD_COUNTS;
typedef struct ThreadData {
@@ -291,6 +301,69 @@ typedef struct IMAGE_STAT {
double worst;
} ImageStat;
+#define CPB_WINDOW_SIZE 4
+#define FRAME_WINDOW_SIZE 128
+#define SAMPLE_RATE_GRACE_P 0.015
+#define VP9_LEVELS 14
+
+typedef enum {
+ LEVEL_UNKNOWN = 0,
+ LEVEL_1 = 10,
+ LEVEL_1_1 = 11,
+ LEVEL_2 = 20,
+ LEVEL_2_1 = 21,
+ LEVEL_3 = 30,
+ LEVEL_3_1 = 31,
+ LEVEL_4 = 40,
+ LEVEL_4_1 = 41,
+ LEVEL_5 = 50,
+ LEVEL_5_1 = 51,
+ LEVEL_5_2 = 52,
+ LEVEL_6 = 60,
+ LEVEL_6_1 = 61,
+ LEVEL_6_2 = 62,
+ LEVEL_MAX = 255
+} VP9_LEVEL;
+
+typedef struct {
+ VP9_LEVEL level;
+ uint64_t max_luma_sample_rate;
+ uint32_t max_luma_picture_size;
+ double average_bitrate; // in kilobits per second
+ double max_cpb_size; // in kilobits
+ double compression_ratio;
+ uint8_t max_col_tiles;
+ uint32_t min_altref_distance;
+ uint8_t max_ref_frame_buffers;
+} Vp9LevelSpec;
+
+typedef struct {
+ int64_t ts; // timestamp
+ uint32_t luma_samples;
+ uint32_t size; // in bytes
+} FrameRecord;
+
+typedef struct {
+ FrameRecord buf[FRAME_WINDOW_SIZE];
+ uint8_t start;
+ uint8_t len;
+} FrameWindowBuffer;
+
+typedef struct {
+ uint8_t seen_first_altref;
+ uint32_t frames_since_last_altref;
+ uint64_t total_compressed_size;
+ uint64_t total_uncompressed_size;
+ double time_encoded; // in seconds
+ FrameWindowBuffer frame_window_buffer;
+ int ref_refresh_map;
+} Vp9LevelStats;
+
+typedef struct {
+ Vp9LevelStats level_stats;
+ Vp9LevelSpec level_spec;
+} Vp9LevelInfo;
+
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
@@ -335,7 +408,7 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tile_tok[4][1 << 6];
- unsigned int tok_count[4][1 << 6];
+ uint32_t tok_count[4][1 << 6];
// Ambient reconstruction err target for force key frames
int64_t ambient_err;
@@ -367,7 +440,7 @@ typedef struct VP9_COMP {
SPEED_FEATURES sf;
- unsigned int max_mv_magnitude;
+ uint32_t max_mv_magnitude;
int mv_step_param;
int allow_comp_inter_inter;
@@ -379,10 +452,10 @@ typedef struct VP9_COMP {
// clips, and 300 for < HD clips.
int encode_breakout;
- unsigned char *segmentation_map;
+ uint8_t *segmentation_map;
// segment threashold for encode breakout
- int segment_encode_breakout[MAX_SEGMENTS];
+ int segment_encode_breakout[MAX_SEGMENTS];
CYCLIC_REFRESH *cyclic_refresh;
ActiveMap active_map;
@@ -404,11 +477,10 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG alt_ref_buffer;
-
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
- int count;
+ int count;
uint64_t total_sq_error;
uint64_t total_samples;
ImageStat psnr;
@@ -469,7 +541,7 @@ typedef struct VP9_COMP {
int mbmode_cost[INTRA_MODES];
unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
- int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
+ int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES][INTRA_MODES];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
@@ -484,12 +556,22 @@ typedef struct VP9_COMP {
int resize_pending;
int resize_state;
+ int external_resize;
int resize_scale_num;
int resize_scale_den;
int resize_avg_qp;
int resize_buffer_underflow;
int resize_count;
+ int use_skin_detection;
+
+ int target_level;
+
+ NOISE_ESTIMATE noise_estimate;
+
+ // Count on how many consecutive times a block uses small/zeromv for encoding.
+ uint8_t *consec_zero_mv;
+
// VAR_BASED_PARTITION thresholds
// 0 - threshold_64x64; 1 - threshold_32x32;
// 2 - threshold_16x16; 3 - vbp_threshold_8x8;
@@ -503,6 +585,9 @@ typedef struct VP9_COMP {
VPxWorker *workers;
struct EncWorkerData *tile_thr_data;
VP9LfSync lf_row_sync;
+
+ int keep_level_stats;
+ Vp9LevelInfo level_info;
} VP9_COMP;
void vp9_initialize_enc(void);
@@ -614,6 +699,11 @@ void vp9_update_reference_frames(VP9_COMP *cpi);
void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
+YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(VP9_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled,
+ YV12_BUFFER_CONFIG *scaled_temp);
+
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled,
@@ -653,6 +743,8 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) {
return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
}
+VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
+
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
diff --git a/libvpx/vp9/encoder/vp9_ethread.c b/libvpx/vp9/encoder/vp9_ethread.c
index ad25712be..1d1926cae 100644
--- a/libvpx/vp9/encoder/vp9_ethread.c
+++ b/libvpx/vp9/encoder/vp9_ethread.c
@@ -30,6 +30,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
for (n = 0; n < ENTROPY_TOKENS; n++)
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
+
+ // Counts of all motion searches and exhuastive mesh searches.
+ td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
+ td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
}
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index 30738b52d..53a3ec7de 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -41,11 +41,8 @@
#define OUTPUT_FPF 0
#define ARF_STATS_OUTPUT 0
-#define GROUP_ADAPTIVE_MAXQ 1
-
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
-#define ERR_DIVISOR 128.0
#define FACTOR_PT_LOW 0.70
#define FACTOR_PT_HIGH 0.90
#define FIRST_PASS_Q 10.0
@@ -65,7 +62,7 @@
#define NCOUNT_INTRA_THRESH 8192
#define NCOUNT_INTRA_FACTOR 3
-#define NCOUNT_FRAME_II_THRESH 5.0
+
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
@@ -115,7 +112,7 @@ static void output_stats(FIRSTPASS_STATS *stats,
fprintf(fpfile, "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
"%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
- "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
+ "%12.4lf %12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
stats->frame,
stats->weight,
stats->intra_error,
@@ -126,6 +123,7 @@ static void output_stats(FIRSTPASS_STATS *stats,
stats->pcnt_second_ref,
stats->pcnt_neutral,
stats->intra_skip_pct,
+ stats->intra_smooth_pct,
stats->inactive_zone_rows,
stats->inactive_zone_cols,
stats->MVr,
@@ -155,82 +153,85 @@ static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm,
#endif
static void zero_stats(FIRSTPASS_STATS *section) {
- section->frame = 0.0;
- section->weight = 0.0;
- section->intra_error = 0.0;
- section->coded_error = 0.0;
- section->sr_coded_error = 0.0;
- section->pcnt_inter = 0.0;
- section->pcnt_motion = 0.0;
- section->pcnt_second_ref = 0.0;
- section->pcnt_neutral = 0.0;
- section->intra_skip_pct = 0.0;
+ section->frame = 0.0;
+ section->weight = 0.0;
+ section->intra_error = 0.0;
+ section->coded_error = 0.0;
+ section->sr_coded_error = 0.0;
+ section->pcnt_inter = 0.0;
+ section->pcnt_motion = 0.0;
+ section->pcnt_second_ref = 0.0;
+ section->pcnt_neutral = 0.0;
+ section->intra_skip_pct = 0.0;
+ section->intra_smooth_pct = 0.0;
section->inactive_zone_rows = 0.0;
section->inactive_zone_cols = 0.0;
- section->MVr = 0.0;
- section->mvr_abs = 0.0;
- section->MVc = 0.0;
- section->mvc_abs = 0.0;
- section->MVrv = 0.0;
- section->MVcv = 0.0;
- section->mv_in_out_count = 0.0;
- section->new_mv_count = 0.0;
- section->count = 0.0;
- section->duration = 1.0;
- section->spatial_layer_id = 0;
+ section->MVr = 0.0;
+ section->mvr_abs = 0.0;
+ section->MVc = 0.0;
+ section->mvc_abs = 0.0;
+ section->MVrv = 0.0;
+ section->MVcv = 0.0;
+ section->mv_in_out_count = 0.0;
+ section->new_mv_count = 0.0;
+ section->count = 0.0;
+ section->duration = 1.0;
+ section->spatial_layer_id = 0;
}
static void accumulate_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
- section->frame += frame->frame;
- section->weight += frame->weight;
- section->spatial_layer_id = frame->spatial_layer_id;
- section->intra_error += frame->intra_error;
- section->coded_error += frame->coded_error;
- section->sr_coded_error += frame->sr_coded_error;
- section->pcnt_inter += frame->pcnt_inter;
- section->pcnt_motion += frame->pcnt_motion;
- section->pcnt_second_ref += frame->pcnt_second_ref;
- section->pcnt_neutral += frame->pcnt_neutral;
- section->intra_skip_pct += frame->intra_skip_pct;
+ section->frame += frame->frame;
+ section->weight += frame->weight;
+ section->spatial_layer_id = frame->spatial_layer_id;
+ section->intra_error += frame->intra_error;
+ section->coded_error += frame->coded_error;
+ section->sr_coded_error += frame->sr_coded_error;
+ section->pcnt_inter += frame->pcnt_inter;
+ section->pcnt_motion += frame->pcnt_motion;
+ section->pcnt_second_ref += frame->pcnt_second_ref;
+ section->pcnt_neutral += frame->pcnt_neutral;
+ section->intra_skip_pct += frame->intra_skip_pct;
+ section->intra_smooth_pct += frame->intra_smooth_pct;
section->inactive_zone_rows += frame->inactive_zone_rows;
section->inactive_zone_cols += frame->inactive_zone_cols;
- section->MVr += frame->MVr;
- section->mvr_abs += frame->mvr_abs;
- section->MVc += frame->MVc;
- section->mvc_abs += frame->mvc_abs;
- section->MVrv += frame->MVrv;
- section->MVcv += frame->MVcv;
- section->mv_in_out_count += frame->mv_in_out_count;
- section->new_mv_count += frame->new_mv_count;
- section->count += frame->count;
- section->duration += frame->duration;
+ section->MVr += frame->MVr;
+ section->mvr_abs += frame->mvr_abs;
+ section->MVc += frame->MVc;
+ section->mvc_abs += frame->mvc_abs;
+ section->MVrv += frame->MVrv;
+ section->MVcv += frame->MVcv;
+ section->mv_in_out_count += frame->mv_in_out_count;
+ section->new_mv_count += frame->new_mv_count;
+ section->count += frame->count;
+ section->duration += frame->duration;
}
static void subtract_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
- section->frame -= frame->frame;
- section->weight -= frame->weight;
- section->intra_error -= frame->intra_error;
- section->coded_error -= frame->coded_error;
- section->sr_coded_error -= frame->sr_coded_error;
- section->pcnt_inter -= frame->pcnt_inter;
- section->pcnt_motion -= frame->pcnt_motion;
- section->pcnt_second_ref -= frame->pcnt_second_ref;
- section->pcnt_neutral -= frame->pcnt_neutral;
- section->intra_skip_pct -= frame->intra_skip_pct;
+ section->frame -= frame->frame;
+ section->weight -= frame->weight;
+ section->intra_error -= frame->intra_error;
+ section->coded_error -= frame->coded_error;
+ section->sr_coded_error -= frame->sr_coded_error;
+ section->pcnt_inter -= frame->pcnt_inter;
+ section->pcnt_motion -= frame->pcnt_motion;
+ section->pcnt_second_ref -= frame->pcnt_second_ref;
+ section->pcnt_neutral -= frame->pcnt_neutral;
+ section->intra_skip_pct -= frame->intra_skip_pct;
+ section->intra_smooth_pct -= frame->intra_smooth_pct;
section->inactive_zone_rows -= frame->inactive_zone_rows;
section->inactive_zone_cols -= frame->inactive_zone_cols;
- section->MVr -= frame->MVr;
- section->mvr_abs -= frame->mvr_abs;
- section->MVc -= frame->MVc;
- section->mvc_abs -= frame->mvc_abs;
- section->MVrv -= frame->MVrv;
- section->MVcv -= frame->MVcv;
- section->mv_in_out_count -= frame->mv_in_out_count;
- section->new_mv_count -= frame->new_mv_count;
- section->count -= frame->count;
- section->duration -= frame->duration;
+ section->MVr -= frame->MVr;
+ section->mvr_abs -= frame->mvr_abs;
+ section->MVc -= frame->MVc;
+ section->mvc_abs -= frame->mvc_abs;
+ section->MVrv -= frame->MVrv;
+ section->MVcv -= frame->MVcv;
+ section->mv_in_out_count -= frame->mv_in_out_count;
+ section->new_mv_count -= frame->new_mv_count;
+ section->count -= frame->count;
+ section->duration -= frame->duration;
}
// Calculate an active area of the image that discounts formatting
@@ -396,7 +397,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
MV tmp_mv = {0, 0};
MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3};
int num00, tmp_err, n;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
@@ -490,7 +491,63 @@ static void set_first_pass_params(VP9_COMP *cpi) {
cpi->rc.frames_to_key = INT_MAX;
}
+// This threshold is used to track blocks where to all intents and purposes
+// the intra prediction error 0. Though the metric we test against
+// is technically a sse we are mainly interested in blocks where all the pixels
+// in the 8 bit domain have an error of <= 1 (where error = sse) so a
+// linear scaling for 10 and 12 bit gives similar results.
#define UL_INTRA_THRESH 50
+static int get_ul_intra_threshold(VP9_COMMON *cm) {
+ int ret_val = UL_INTRA_THRESH;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ ret_val = UL_INTRA_THRESH;
+ break;
+ case VPX_BITS_10:
+ ret_val = UL_INTRA_THRESH >> 2;
+ break;
+ case VPX_BITS_12:
+ ret_val = UL_INTRA_THRESH >> 4;
+ break;
+ default:
+ assert(0 && "cm->bit_depth should be VPX_BITS_8, "
+ "VPX_BITS_10 or VPX_BITS_12");
+ }
+ }
+#else
+ (void) cm;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return ret_val;
+}
+
+#define SMOOTH_INTRA_THRESH 4000
+static int get_smooth_intra_threshold(VP9_COMMON *cm) {
+ int ret_val = SMOOTH_INTRA_THRESH;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ ret_val = SMOOTH_INTRA_THRESH;
+ break;
+ case VPX_BITS_10:
+ ret_val = SMOOTH_INTRA_THRESH >> 2;
+ break;
+ case VPX_BITS_12:
+ ret_val = SMOOTH_INTRA_THRESH >> 4;
+ break;
+ default:
+ assert(0 && "cm->bit_depth should be VPX_BITS_8, "
+ "VPX_BITS_10 or VPX_BITS_12");
+ }
+ }
+#else
+ (void) cm;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return ret_val;
+}
+
#define INVALID_ROW -1
void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int mb_row, mb_col;
@@ -517,6 +574,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
const int intrapenalty = INTRA_MODE_PENALTY;
double neutral_count;
int intra_skip_count = 0;
+ int intra_smooth_count = 0;
int image_data_start_row = INVALID_ROW;
int new_mv_count = 0;
int sum_in_vectors = 0;
@@ -535,6 +593,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
double intra_factor;
double brightness_factor;
BufferPool *const pool = cm->buffer_pool;
+ MODE_INFO mi_above, mi_left;
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
@@ -636,7 +695,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
MV best_ref_mv = {0, 0};
// Reset above block coeffs.
- xd->up_available = (mb_row != 0);
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
@@ -662,20 +720,25 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
- xd->left_available = (mb_col != 0);
- xd->mi[0]->mbmi.sb_type = bsize;
- xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+ xd->mi[0]->sb_type = bsize;
+ xd->mi[0]->ref_frame[0] = INTRA_FRAME;
set_mi_row_col(xd, &tile,
mb_row << 1, num_8x8_blocks_high_lookup[bsize],
mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
cm->mi_rows, cm->mi_cols);
+ // Are edges available for intra prediction?
+ // Since the firstpass does not populate the mi_grid_visible,
+ // above_mi/left_mi must be overwritten with a nonzero value when edges
+ // are available. Required by vp9_predict_intra_block().
+ xd->above_mi = (mb_row != 0) ? &mi_above : NULL;
+ xd->left_mi = (mb_col > tile.mi_col_start) ? &mi_left : NULL;
// Do intra 16x16 prediction.
x->skip_encode = 0;
- xd->mi[0]->mbmi.mode = DC_PRED;
- xd->mi[0]->mbmi.tx_size = use_dc_pred ?
+ xd->mi[0]->mode = DC_PRED;
+ xd->mi[0]->tx_size = use_dc_pred ?
(bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
- vp9_encode_intra_block_plane(x, bsize, 0);
+ vp9_encode_intra_block_plane(x, bsize, 0, 0);
this_error = vpx_get_mb_ss(x->plane[0].src_diff);
// Keep a record of blocks that have almost no intra error residual
@@ -683,11 +746,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
// domain). In natural videos this is uncommon, but it is much more
// common in animations, graphics and screen content, so may be used
// as a signal to detect these types of content.
- if (this_error < UL_INTRA_THRESH) {
+ if (this_error < get_ul_intra_threshold(cm)) {
++intra_skip_count;
} else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
image_data_start_row = mb_row;
}
+ if (this_error < get_smooth_intra_threshold(cm)) {
+ ++intra_smooth_count;
+ }
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
@@ -897,11 +963,11 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
mv.row *= 8;
mv.col *= 8;
this_error = motion_error;
- xd->mi[0]->mbmi.mode = NEWMV;
- xd->mi[0]->mbmi.mv[0].as_mv = mv;
- xd->mi[0]->mbmi.tx_size = TX_4X4;
- xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
- xd->mi[0]->mbmi.ref_frame[1] = NONE;
+ xd->mi[0]->mode = NEWMV;
+ xd->mi[0]->mv[0].as_mv = mv;
+ xd->mi[0]->tx_size = TX_4X4;
+ xd->mi[0]->ref_frame[0] = LAST_FRAME;
+ xd->mi[0]->ref_frame[1] = NONE;
vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
vp9_encode_sby_pass1(x, bsize);
sum_mvr += mv.row;
@@ -1053,8 +1119,10 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
fps.pcnt_neutral = (double)neutral_count / num_mbs;
fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
+ fps.intra_smooth_pct = (double)intra_smooth_count / num_mbs;
fps.inactive_zone_rows = (double)image_data_start_row;
- fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
+ // Currently set to 0 as most issues relate to letter boxing.
+ fps.inactive_zone_cols = (double)0;
if (mvcount > 0) {
fps.MVr = (double)sum_mvr / mvcount;
@@ -1080,10 +1148,9 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
fps.pcnt_motion = 0.0;
}
- // TODO(paulwilkins): Handle the case when duration is set to 0, or
- // something less than the full time between subsequent values of
- // cpi->source_time_stamp.
- fps.duration = (double)(source->ts_end - source->ts_start);
+ // Dont allow a value of 0 for duration.
+ // (Section duration is also defaulted to minimum of 1.0).
+ fps.duration = VPXMAX(1.0, (double)(source->ts_end - source->ts_start));
// Don't want to do output stats with a stack variable!
twopass->this_frame_stats = fps;
@@ -1171,18 +1238,15 @@ static double calc_correction_factor(double err_per_mb,
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
-static int get_twopass_worst_quality(const VP9_COMP *cpi,
+#define ERR_DIVISOR 115.0
+static int get_twopass_worst_quality(VP9_COMP *cpi,
const double section_err,
double inactive_zone,
- int section_target_bandwidth,
- double group_weight_factor) {
+ int section_target_bandwidth) {
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ TWO_PASS *const twopass = &cpi->twopass;
+
// Clamp the target rate to VBR min / max limts.
const int target_rate =
vp9_rc_clamp_pframe_target_size(cpi, section_target_bandwidth);
@@ -1197,29 +1261,36 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+ double last_group_rate_err;
const int target_norm_bits_per_mb = ((uint64_t)target_rate <<
BPER_MB_NORMBITS) / active_mbs;
-
int q;
int is_svc_upper_layer = 0;
if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)
is_svc_upper_layer = 1;
+ // based on recent history adjust expectations of bits per macroblock.
+ last_group_rate_err = (double)twopass->rolling_arf_group_actual_bits /
+ DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits);
+ last_group_rate_err =
+ VPXMAX(0.25, VPXMIN(4.0, last_group_rate_err));
+ twopass->bpm_factor *= (3.0 + last_group_rate_err) / 4.0;
+ twopass->bpm_factor =
+ VPXMAX(0.25, VPXMIN(4.0, twopass->bpm_factor));
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double factor =
calc_correction_factor(av_err_per_mb,
- ERR_DIVISOR - ediv_size_correction,
+ ERR_DIVISOR,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
cpi->common.bit_depth);
const int bits_per_mb =
vp9_rc_bits_per_mb(INTER_FRAME, q,
- factor * speed_term * group_weight_factor,
+ factor * speed_term * cpi->twopass.bpm_factor,
cpi->common.bit_depth);
if (bits_per_mb <= target_norm_bits_per_mb)
break;
@@ -1270,6 +1341,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const int is_two_pass_svc = (svc->number_spatial_layers > 1) ||
(svc->number_temporal_layers > 1);
+ RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = is_two_pass_svc ?
&svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
double frame_rate;
@@ -1326,26 +1398,33 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
}
// Reset the vbr bits off target counters
- cpi->rc.vbr_bits_off_target = 0;
- cpi->rc.vbr_bits_off_target_fast = 0;
-
- cpi->rc.rate_error_estimate = 0;
+ rc->vbr_bits_off_target = 0;
+ rc->vbr_bits_off_target_fast = 0;
+ rc->rate_error_estimate = 0;
// Static sequence monitor variables.
twopass->kf_zeromotion_pct = 100;
twopass->last_kfgroup_zeromotion_pct = 100;
+ // Initialize bits per macro_block estimate correction factor.
+ twopass->bpm_factor = 1.0;
+ // Initiallize actual and target bits counters for ARF groups so that
+ // at the start we have a neutral bpm adjustment.
+ twopass->rolling_arf_group_target_bits = 1;
+ twopass->rolling_arf_group_actual_bits = 1;
+
if (oxcf->resize_mode != RESIZE_NONE) {
init_subsampling(cpi);
}
}
#define SR_DIFF_PART 0.0015
-#define MOTION_AMP_PART 0.003
#define INTRA_PART 0.005
#define DEFAULT_DECAY_LIMIT 0.75
#define LOW_SR_DIFF_TRHESH 0.1
#define SR_DIFF_MAX 128.0
+#define LOW_CODED_ERR_PER_MB 10.0
+#define NCOUNT_FRAME_II_THRESH 6.0
static double get_sr_decay_rate(const VP9_COMP *cpi,
const FIRSTPASS_STATS *frame) {
@@ -1356,12 +1435,15 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
double sr_decay = 1.0;
double modified_pct_inter;
double modified_pcnt_intra;
- const double motion_amplitude_factor =
- frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
+ const double motion_amplitude_part =
+ frame->pcnt_motion *
+ ((frame->mvc_abs + frame->mvr_abs) /
+ (cpi->initial_height + cpi->initial_width));
modified_pct_inter = frame->pcnt_inter;
- if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
- (double)NCOUNT_FRAME_II_THRESH) {
+ if (((frame->coded_error / num_mbs) > LOW_CODED_ERR_PER_MB) &&
+ ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
+ (double)NCOUNT_FRAME_II_THRESH)) {
modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
}
modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
@@ -1370,7 +1452,7 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);
sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
- (MOTION_AMP_PART * motion_amplitude_factor) -
+ motion_amplitude_part -
(INTRA_PART * modified_pcnt_intra);
}
return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
@@ -1722,15 +1804,13 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
gf_group->update_type[0] = OVERLAY_UPDATE;
gf_group->rf_level[0] = INTER_NORMAL;
gf_group->bit_allocation[0] = 0;
- gf_group->arf_update_idx[0] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
} else {
gf_group->update_type[0] = GF_UPDATE;
gf_group->rf_level[0] = GF_ARF_STD;
gf_group->bit_allocation[0] = gf_arf_bits;
- gf_group->arf_update_idx[0] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
}
+ gf_group->arf_update_idx[0] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
// Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats))
@@ -1857,9 +1937,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double boost_score = 0.0;
double old_boost_score = 0.0;
double gf_group_err = 0.0;
-#if GROUP_ADAPTIVE_MAXQ
double gf_group_raw_error = 0.0;
-#endif
double gf_group_skip_pct = 0.0;
double gf_group_inactive_zone_rows = 0.0;
double gf_first_frame_err = 0.0;
@@ -1909,9 +1987,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// the error score / cost of this frame has already been accounted for.
if (arf_active_or_kf) {
gf_group_err -= gf_first_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error -= this_frame->coded_error;
-#endif
gf_group_skip_pct -= this_frame->intra_skip_pct;
gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
}
@@ -1929,7 +2005,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int int_lbq =
(int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
cpi->common.bit_depth));
- active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200);
+ active_min_gf_interval =
+ rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
if (active_min_gf_interval > rc->max_gf_interval)
active_min_gf_interval = rc->max_gf_interval;
@@ -1940,14 +2017,20 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// bits to spare and are better with a smaller interval and smaller boost.
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
- active_max_gf_interval = 12 + VPXMIN(4, (int_lbq / 6));
- if (active_max_gf_interval < active_min_gf_interval)
- active_max_gf_interval = active_min_gf_interval;
+ active_max_gf_interval =
+ 12 + arf_active_or_kf + VPXMIN(4, (int_lbq / 6));
- if (active_max_gf_interval > rc->max_gf_interval)
- active_max_gf_interval = rc->max_gf_interval;
+ // We have: active_min_gf_interval <= rc->max_gf_interval
if (active_max_gf_interval < active_min_gf_interval)
active_max_gf_interval = active_min_gf_interval;
+ else if (active_max_gf_interval > rc->max_gf_interval)
+ active_max_gf_interval = rc->max_gf_interval;
+
+ // Would the active max drop us out just before the near the next kf?
+ if ((active_max_gf_interval <= rc->frames_to_key) &&
+ (active_max_gf_interval >=
+ (rc->frames_to_key - rc->min_gf_interval)))
+ active_max_gf_interval = rc->frames_to_key / 2;
}
}
@@ -1958,9 +2041,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Accumulate error score of frames in this gf group.
mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
gf_group_err += mod_frame_err;
-#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
-#endif
gf_group_skip_pct += this_frame->intra_skip_pct;
gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
@@ -2005,11 +2086,13 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Break out conditions.
if (
// Break at active_max_gf_interval unless almost totally static.
- (i >= (active_max_gf_interval + arf_active_or_kf) &&
- zero_motion_accumulator < 0.995) ||
+ ((i >= active_max_gf_interval) &&
+ (zero_motion_accumulator < 0.995)) ||
(
// Don't break out with a very short interval.
- (i >= active_min_gf_interval + arf_active_or_kf) &&
+ (i >= active_min_gf_interval) &&
+ // If possible dont break very close to a kf
+ ((rc->frames_to_key - i) >= rc->min_gf_interval) &&
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > 3.0) ||
@@ -2023,8 +2106,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
old_boost_score = boost_score;
}
- twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
-
// Was the group length constrained by the requirement for a new KF?
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
@@ -2060,9 +2141,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
if (EOF == input_stats(twopass, this_frame))
break;
gf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
-#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
-#endif
gf_group_skip_pct += this_frame->intra_skip_pct;
gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
}
@@ -2077,7 +2156,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Calculate the bits to be allocated to the gf/arf group as a whole
gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
-#if GROUP_ADAPTIVE_MAXQ
// Calculate an estimate of the maxq needed for the group.
// We are more agressive about correcting for sections
// where there could be significant overshoot than for easier
@@ -2092,26 +2170,13 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const double group_av_inactive_zone =
((gf_group_inactive_zone_rows * 2) /
(rc->baseline_gf_interval * (double)cm->mb_rows));
-
- int tmp_q;
- // rc factor is a weight factor that corrects for local rate control drift.
- double rc_factor = 1.0;
- if (rc->rate_error_estimate > 0) {
- rc_factor = VPXMAX(RC_FACTOR_MIN,
- (double)(100 - rc->rate_error_estimate) / 100.0);
- } else {
- rc_factor = VPXMIN(RC_FACTOR_MAX,
- (double)(100 - rc->rate_error_estimate) / 100.0);
- }
- tmp_q =
- get_twopass_worst_quality(cpi, group_av_err,
- (group_av_skip_pct + group_av_inactive_zone),
- vbr_group_bits_per_frame,
- twopass->kfgroup_inter_fraction * rc_factor);
+ int tmp_q =
+ get_twopass_worst_quality(cpi, group_av_err,
+ (group_av_skip_pct + group_av_inactive_zone),
+ vbr_group_bits_per_frame);
twopass->active_worst_quality =
- VPXMAX(tmp_q, twopass->active_worst_quality >> 1);
+ (tmp_q + (twopass->active_worst_quality * 3)) >> 2;
}
-#endif
// Calculate the extra bits to be used for boosted frame(s)
gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
@@ -2151,6 +2216,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Default to starting GF groups at normal frame size.
cpi->rc.next_frame_size_selector = UNSCALED;
}
+
+ // Reset rolling actual and target bits counters for ARF groups.
+ twopass->rolling_arf_group_target_bits = 0;
+ twopass->rolling_arf_group_actual_bits = 0;
}
// Threshold for use of the lagging second reference frame. High second ref
@@ -2265,6 +2334,8 @@ static int test_candidate_kf(TWO_PASS *twopass,
return is_viable_kf;
}
+#define FRAMES_TO_CHECK_DECAY 8
+
static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int i, j;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2283,7 +2354,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double boost_score = 0.0;
double kf_mod_err = 0.0;
double kf_group_err = 0.0;
- double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+ double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
vp9_zero(next_frame);
@@ -2310,6 +2381,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
kf_mod_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
+ // Initialize the decay rates for the recent frames to check
+ for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
+ recent_loop_decay[j] = 1.0;
+
// Find the next keyframe.
i = 0;
while (twopass->stats_in < twopass->stats_in_end &&
@@ -2336,9 +2411,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// We want to know something about the recent past... rather than
// as used elsewhere where we are concerned with decay in prediction
// quality since the last GF or KF.
- recent_loop_decay[i % 8] = loop_decay_rate;
+ recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate;
decay_accumulator = 1.0;
- for (j = 0; j < 8; ++j)
+ for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
decay_accumulator *= recent_loop_decay[j];
// Special check for transition or high motion followed by a
@@ -2482,16 +2557,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
rc->kf_boost, twopass->kf_group_bits);
- // Work out the fraction of the kf group bits reserved for the inter frames
- // within the group after discounting the bits for the kf itself.
- if (twopass->kf_group_bits) {
- twopass->kfgroup_inter_fraction =
- (double)(twopass->kf_group_bits - kf_bits) /
- (double)twopass->kf_group_bits;
- } else {
- twopass->kfgroup_inter_fraction = 1.0;
- }
-
twopass->kf_group_bits -= kf_bits;
// Save the bits to spend on the key frame.
@@ -2585,21 +2650,12 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
- int frames_left;
FIRSTPASS_STATS this_frame;
int target_rate;
LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ?
&cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;
- if (lc != NULL) {
- frames_left = (int)(twopass->total_stats.count -
- lc->current_video_frame_in_layer);
- } else {
- frames_left = (int)(twopass->total_stats.count -
- cm->current_video_frame);
- }
-
if (!twopass->stats_in)
return;
@@ -2641,6 +2697,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
twopass->active_worst_quality = cpi->oxcf.cq_level;
} else if (cm->current_video_frame == 0 ||
(lc != NULL && lc->current_video_frame_in_layer == 0)) {
+ const int frames_left = (int)(twopass->total_stats.count -
+ ((lc != NULL) ? lc->current_video_frame_in_layer
+ : cm->current_video_frame));
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
@@ -2652,10 +2711,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
const double section_inactive_zone =
(twopass->total_left_stats.inactive_zone_rows * 2) /
((double)cm->mb_rows * section_length);
- const int tmp_q =
- get_twopass_worst_quality(cpi, section_error,
- section_intra_skip + section_inactive_zone,
- section_target_bandwidth, DEFAULT_GRP_WEIGHT);
+ int tmp_q;
+
+ tmp_q = get_twopass_worst_quality(cpi, section_error,
+ section_intra_skip + section_inactive_zone, section_target_bandwidth);
twopass->active_worst_quality = tmp_q;
twopass->baseline_active_worst_quality = tmp_q;
@@ -2749,6 +2808,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// applied when combining MB error values for the frame.
twopass->mb_av_energy =
log(((this_frame.intra_error * 256.0) / num_mbs) + 1.0);
+ twopass->mb_smooth_pct = this_frame.intra_smooth_pct;
}
// Update the total stats remaining structure.
@@ -2761,6 +2821,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
RATE_CONTROL *const rc = &cpi->rc;
+ VP9_COMMON *const cm = &cpi->common;
const int bits_used = rc->base_frame_target;
// VBR correction is done through rc->vbr_bits_off_target. Based on the
@@ -2771,6 +2832,10 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0);
+ // Target vs actual bits for this arf group.
+ twopass->rolling_arf_group_target_bits += rc->this_frame_target;
+ twopass->rolling_arf_group_actual_bits += rc->projected_frame_size;
+
// Calculate the pct rc error.
if (rc->total_actual_bits) {
rc->rate_error_estimate =
@@ -2792,12 +2857,27 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
// If the rate control is drifting consider adjustment to min or maxq.
if ((cpi->oxcf.rc_mode != VPX_Q) &&
- (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD) &&
!cpi->rc.is_src_frame_alt_ref) {
const int maxq_adj_limit =
rc->worst_quality - twopass->active_worst_quality;
const int minq_adj_limit =
(cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT);
+ int aq_extend_min = 0;
+ int aq_extend_max = 0;
+
+ // Extend min or Max Q range to account for imbalance from the base
+ // value when using AQ.
+ if (cpi->oxcf.aq_mode != NO_AQ) {
+ if (cm->seg.aq_av_offset < 0) {
+ // The balance of the AQ map tends towarda lowering the average Q.
+ aq_extend_min = 0;
+ aq_extend_max = VPXMIN(maxq_adj_limit, -cm->seg.aq_av_offset);
+ } else {
+ // The balance of the AQ map tends towards raising the average Q.
+ aq_extend_min = VPXMIN(minq_adj_limit, cm->seg.aq_av_offset);
+ aq_extend_max = 0;
+ }
+ }
// Undershoot.
if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
@@ -2822,8 +2902,10 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
--twopass->extend_maxq;
}
- twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
- twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
+ twopass->extend_minq =
+ clamp(twopass->extend_minq, aq_extend_min, minq_adj_limit);
+ twopass->extend_maxq =
+ clamp(twopass->extend_maxq, aq_extend_max, maxq_adj_limit);
// If there is a big and undexpected undershoot then feed the extra
// bits back in quickly. One situation where this may happen is if a
diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h
index 5875a7b9b..76072884d 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/libvpx/vp9/encoder/vp9_firstpass.h
@@ -39,8 +39,6 @@ typedef struct {
} FIRSTPASS_MB_STATS;
#endif
-#define VLOW_MOTION_THRESHOLD 950
-
typedef struct {
double frame;
double weight;
@@ -52,6 +50,7 @@ typedef struct {
double pcnt_second_ref;
double pcnt_neutral;
double intra_skip_pct;
+ double intra_smooth_pct; // % of blocks that are smooth
double inactive_zone_rows; // Image mask rows top and bottom.
double inactive_zone_cols; // Image mask columns at left and right edges.
double MVr;
@@ -107,6 +106,7 @@ typedef struct {
double modified_error_max;
double modified_error_left;
double mb_av_energy;
+ double mb_smooth_pct;
#if CONFIG_FP_MB_STATS
uint8_t *frame_mb_stats_buf;
@@ -122,14 +122,13 @@ typedef struct {
// Error score of frames still to be coded in kf group
int64_t kf_group_error_left;
- // The fraction for a kf groups total bits allocated to the inter frames
- double kfgroup_inter_fraction;
+ double bpm_factor;
+ int rolling_arf_group_target_bits;
+ int rolling_arf_group_actual_bits;
int sr_update_lag;
-
int kf_zeromotion_pct;
int last_kfgroup_zeromotion_pct;
- int gf_zeromotion_pct;
int active_worst_quality;
int baseline_active_worst_quality;
int extend_minq;
diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c
index 8787be8ee..787bcf47e 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.c
+++ b/libvpx/vp9/encoder/vp9_lookahead.c
@@ -20,8 +20,8 @@
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
- unsigned int *idx) {
- unsigned int index = *idx;
+ int *idx) {
+ int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
@@ -35,7 +35,7 @@ static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
- unsigned int i;
+ int i;
for (i = 0; i < ctx->max_sz; i++)
vpx_free_frame_buffer(&ctx->buf[i].img);
@@ -89,7 +89,7 @@ struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
#define USE_PARTIAL_COPY 0
-int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
+int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int64_t ts_start, int64_t ts_end,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
@@ -207,7 +207,7 @@ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
int drain) {
struct lookahead_entry *buf = NULL;
- if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
+ if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
buf = pop(ctx, &ctx->read_idx);
ctx->sz--;
}
@@ -221,9 +221,9 @@ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
if (index >= 0) {
// Forward peek
- if (index < (int)ctx->sz) {
+ if (index < ctx->sz) {
index += ctx->read_idx;
- if (index >= (int)ctx->max_sz)
+ if (index >= ctx->max_sz)
index -= ctx->max_sz;
buf = ctx->buf + index;
}
diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h
index 13820380f..db0fd1cde 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.h
+++ b/libvpx/vp9/encoder/vp9_lookahead.h
@@ -36,10 +36,10 @@ struct lookahead_entry {
#define MAX_PRE_FRAMES 1
struct lookahead_ctx {
- unsigned int max_sz; /* Absolute size of the queue */
- unsigned int sz; /* Number of buffers currently in the queue */
- unsigned int read_idx; /* Read index */
- unsigned int write_idx; /* Write index */
+ int max_sz; /* Absolute size of the queue */
+ int sz; /* Number of buffers currently in the queue */
+ int read_idx; /* Read index */
+ int write_idx; /* Write index */
struct lookahead_entry *buf; /* Buffer list */
};
diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c
index 41b6d1954..14a0b162b 100644
--- a/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -59,8 +59,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
{
- int distortion;
- unsigned int sse;
+ uint32_t distortion;
+ uint32_t sse;
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
@@ -69,8 +69,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
&distortion, &sse, NULL, 0, 0);
}
- xd->mi[0]->mbmi.mode = NEWMV;
- xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv;
+ xd->mi[0]->mode = NEWMV;
+ xd->mi[0]->mv[0].as_mv = *dst_mv;
vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
@@ -147,7 +147,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
unsigned int err;
- xd->mi[0]->mbmi.mode = mode;
+ xd->mi[0]->mode = mode;
vp9_predict_intra_block(xd, 2, TX_16X16, mode,
x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
@@ -243,20 +243,23 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
MV gld_top_mv = {0, 0};
MODE_INFO mi_local;
+ MODE_INFO mi_above, mi_left;
vp9_zero(mi_local);
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
x->mv_row_min = -BORDER_MV_PIXELS_B16;
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
- xd->up_available = 0;
+ // Signal to vp9_predict_intra_block() that above is not available
+ xd->above_mi = NULL;
+
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
xd->plane[1].dst.stride = buf->uv_stride;
xd->mi[0] = &mi_local;
- mi_local.mbmi.sb_type = BLOCK_16X16;
- mi_local.mbmi.ref_frame[0] = LAST_FRAME;
- mi_local.mbmi.ref_frame[1] = NONE;
+ mi_local.sb_type = BLOCK_16X16;
+ mi_local.ref_frame[0] = LAST_FRAME;
+ mi_local.ref_frame[1] = NONE;
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
MV gld_left_mv = gld_top_mv;
@@ -268,7 +271,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
// the UMV borders.
x->mv_col_min = -BORDER_MV_PIXELS_B16;
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
- xd->left_available = 0;
+ // Signal to vp9_predict_intra_block() that left is not available
+ xd->left_mi = NULL;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
@@ -280,14 +284,19 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
if (mb_col == 0) {
gld_top_mv = gld_left_mv;
}
- xd->left_available = 1;
+ // Signal to vp9_predict_intra_block() that left is available
+ xd->left_mi = &mi_left;
+
mb_y_in_offset += 16;
gld_y_in_offset += 16;
arf_y_in_offset += 16;
x->mv_col_min -= 16;
x->mv_col_max -= 16;
}
- xd->up_available = 1;
+
+ // Signal to vp9_predict_intra_block() that above is available
+ xd->above_mi = &mi_above;
+
mb_y_offset += buf->y_stride * 16;
gld_y_offset += golden_ref->y_stride * 16;
if (alt_ref)
diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c
index be8f57f7d..2ebacc0b8 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/libvpx/vp9/encoder/vp9_mcomp.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
@@ -69,6 +70,8 @@ int vp9_init_search_range(int size) {
static INLINE int mv_cost(const MV *mv,
const int *joint_cost, int *const comp_cost[2]) {
+ assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
+ assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
return joint_cost[vp9_get_mv_joint(mv)] +
comp_cost[0][mv->row] + comp_cost[1][mv->col];
}
@@ -80,52 +83,52 @@ int vp9_mv_bit_cost(const MV *mv, const MV *ref,
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}
-static int mv_err_cost(const MV *mv, const MV *ref,
- const int *mvjcost, int *mvcost[2],
- int error_per_bit) {
+#define PIXEL_TRANSFORM_ERROR_SCALE 4
+static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
+ int *mvcost[2], int error_per_bit) {
if (mvcost) {
- const MV diff = { mv->row - ref->row,
- mv->col - ref->col };
- return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
- error_per_bit, 13);
+ const MV diff = {mv->row - ref->row, mv->col - ref->col};
+ // This product sits at a 32-bit ceiling right now and any additional
+ // accuracy in either bit cost or error cost will cause it to overflow.
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
+ RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
+ PIXEL_TRANSFORM_ERROR_SCALE);
}
return 0;
}
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
- int error_per_bit) {
+ int sad_per_bit) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
- return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
- x->nmvsadcost) * error_per_bit, 8);
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
+ sad_per_bit,
+ VP9_PROB_COST_SHIFT);
}
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
- int len, ss_count = 1;
-
- cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
- cfg->ss[0].offset = 0;
+ int len;
+ int ss_count = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
int i;
- for (i = 0; i < 4; ++i) {
- search_site *const ss = &cfg->ss[ss_count++];
- ss->mv = ss_mvs[i];
- ss->offset = ss->mv.row * stride + ss->mv.col;
+ for (i = 0; i < 4; ++i, ++ss_count) {
+ cfg->ss_mv[ss_count] = ss_mvs[i];
+ cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
}
}
- cfg->ss_count = ss_count;
cfg->searches_per_step = 4;
+ cfg->total_steps = ss_count / cfg->searches_per_step;
}
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
- int len, ss_count = 1;
-
- cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
- cfg->ss[0].offset = 0;
+ int len;
+ int ss_count = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
@@ -134,33 +137,23 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
{-len, -len}, {-len, len}, {len, -len}, {len, len}
};
int i;
- for (i = 0; i < 8; ++i) {
- search_site *const ss = &cfg->ss[ss_count++];
- ss->mv = ss_mvs[i];
- ss->offset = ss->mv.row * stride + ss->mv.col;
+ for (i = 0; i < 8; ++i, ++ss_count) {
+ cfg->ss_mv[ss_count] = ss_mvs[i];
+ cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
}
}
- cfg->ss_count = ss_count;
cfg->searches_per_step = 8;
+ cfg->total_steps = ss_count / cfg->searches_per_step;
}
-/*
- * To avoid the penalty for crossing cache-line read, preload the reference
- * area in a small buffer, which is aligned to make sure there won't be crossing
- * cache-line read while reading from this buffer. This reduced the cpu
- * cycles spent on reading ref data in sub-pixel filter functions.
- * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
- * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
- * could reduce the area.
- */
-
-/* estimated cost of a motion vector (r,c) */
-#define MVC(r, c) \
- (mvcost ? \
- ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
- mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
- error_per_bit + 4096) >> 13 : 0)
+/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
+ * from the same math as in mv_err_cost(). */
+#define MVC(r, c) \
+ (mvcost ? \
+ ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
+ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+ error_per_bit + 8192) >> 14 : 0)
// convert motion vector component to offset for sv[a]f calc
@@ -172,6 +165,33 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
return &buf[(r >> 3) * stride + (c >> 3)];
}
+#if CONFIG_VP9_HIGHBITDEPTH
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ int64_t tmpmse; \
+ if (second_pred == NULL) { \
+ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), \
+ sp(r), z, src_stride, &sse); \
+ } else { \
+ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), \
+ sp(r), z, src_stride, &sse, second_pred); \
+ } \
+ tmpmse = thismse; \
+ tmpmse += MVC(r, c); \
+ if (tmpmse >= INT_MAX) { \
+ v = INT_MAX; \
+ } else if ((v = (uint32_t)tmpmse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
@@ -192,6 +212,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
v = INT_MAX; \
}
+#endif
#define FIRST_LEVEL_CHECKS \
{ \
unsigned int left, right, up, down, diag; \
@@ -320,10 +341,10 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
const uint8_t *second_pred,
int w, int h, int offset,
int *mvjcost, int *mvcost[2],
- unsigned int *sse1,
- int *distortion) {
- unsigned int besterr;
+ uint32_t *sse1,
+ uint32_t *distortion) {
#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t besterr;
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
@@ -339,9 +360,13 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
} else {
besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
}
- *distortion = besterr;
+ *distortion = (uint32_t)besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ if (besterr >= UINT32_MAX)
+ return UINT32_MAX;
+ return (uint32_t)besterr;
#else
+ uint32_t besterr;
(void) xd;
if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
@@ -352,8 +377,8 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd,
}
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
-#endif // CONFIG_VP9_HIGHBITDEPTH
return besterr;
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
static INLINE int divide_and_round(const int n, const int d) {
@@ -383,7 +408,7 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic,
(cost_list[4] - 2 * cost_list[0] + cost_list[2]));
}
-int vp9_find_best_sub_pixel_tree_pruned_evenmore(
+uint32_t vp9_skip_sub_pixel_tree(
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@@ -393,8 +418,53 @@ int vp9_find_best_sub_pixel_tree_pruned_evenmore(
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
+ SETUP_SUBPEL_SEARCH;
+ besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
+ z, src_stride, y, y_stride, second_pred,
+ w, h, offset, mvjcost, mvcost,
+ sse1, distortion);
+ (void) halfiters;
+ (void) quarteriters;
+ (void) eighthiters;
+ (void) whichdir;
+ (void) allow_hp;
+ (void) forced_stop;
+ (void) hstep;
+ (void) rr;
+ (void) rc;
+ (void) minr;
+ (void) minc;
+ (void) maxr;
+ (void) maxc;
+ (void) tr;
+ (void) tc;
+ (void) sse;
+ (void) thismse;
+ (void) cost_list;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return UINT32_MAX;
+
+ return besterr;
+}
+
+uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
+ const MACROBLOCK *x,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *cost_list,
+ int *mvjcost, int *mvcost[2],
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -445,7 +515,7 @@ int vp9_find_best_sub_pixel_tree_pruned_evenmore(
tr = br;
tc = bc;
- if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
+ if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
if (eighthiters > 1) {
@@ -463,7 +533,7 @@ int vp9_find_best_sub_pixel_tree_pruned_evenmore(
return besterr;
}
-int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
+uint32_t vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
@@ -472,8 +542,8 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -513,7 +583,7 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
}
}
- if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
+ if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
tr = br;
tc = bc;
hstep >>= 1;
@@ -532,12 +602,12 @@ int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
(abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return INT_MAX;
+ return UINT32_MAX;
return besterr;
}
-int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
+uint32_t vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
int error_per_bit,
@@ -546,8 +616,8 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
+ uint32_t *distortion,
+ uint32_t *sse1,
const uint8_t *second_pred,
int w, int h) {
SETUP_SUBPEL_SEARCH;
@@ -608,7 +678,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
tc = bc;
}
- if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
+ if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
if (eighthiters > 1) {
@@ -639,19 +709,19 @@ static const MV search_step_table[12] = {
{0, -1}, {0, 1}, {-1, 0}, {1, 0}
};
-int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
- MV *bestmv, const MV *ref_mv,
- int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop,
- int iters_per_step,
- int *cost_list,
- int *mvjcost, int *mvcost[2],
- int *distortion,
- unsigned int *sse1,
- const uint8_t *second_pred,
- int w, int h) {
+uint32_t vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *cost_list,
+ int *mvjcost, int *mvcost[2],
+ uint32_t *distortion,
+ uint32_t *sse1,
+ const uint8_t *second_pred,
+ int w, int h) {
const uint8_t *const z = x->plane[0].src.buf;
const uint8_t *const src_address = z;
const int src_stride = x->plane[0].src.stride;
@@ -680,7 +750,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
unsigned int cost_array[5];
int kr, kc;
- if (!(allow_hp && vp9_use_mv_hp(ref_mv)))
+ if (!(allow_hp && use_mv_hp(ref_mv)))
if (round == 3)
round = 2;
@@ -790,7 +860,6 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
}
#undef MVC
-#undef PRE
#undef CHECK_BETTER
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
@@ -852,9 +921,9 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
- // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
- mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit);
+ mv_err_cost(&this_mv, &fcenter_mv,
+ x->nmvjointcost, x->mvcost,
+ x->errorperbit);
}
} else {
for (i = 0; i < 4; i++) {
@@ -866,9 +935,9 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
- // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
- mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit);
+ mv_err_cost(&this_mv, &fcenter_mv,
+ x->nmvjointcost, x->mvcost,
+ x->errorperbit);
}
}
}
@@ -1347,12 +1416,22 @@ int vp9_get_mvpred_var(const MACROBLOCK *x,
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV mv = {best_mv->row * 8, best_mv->col * 8};
- unsigned int unused;
-
+ uint32_t unused;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t err= vfp->vf(what->buf, what->stride,
+ get_buf_from_mv(in_what, best_mv),
+ in_what->stride, &unused);
+ err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
+ x->mvcost, x->errorperbit) : 0);
+ if (err >= INT_MAX)
+ return INT_MAX;
+ return (int)err;
+#else
return vfp->vf(what->buf, what->stride,
get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
(use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
x->mvcost, x->errorperbit) : 0);
+#endif
}
int vp9_get_mvpred_av_var(const MACROBLOCK *x,
@@ -1523,69 +1602,83 @@ static int fast_dia_search(const MACROBLOCK *x,
#undef CHECK_BETTER
-int vp9_full_range_search_c(const MACROBLOCK *x,
- const search_site_config *cfg,
- MV *ref_mv, MV *best_mv,
- int search_param, int sad_per_bit, int *num00,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const MV *center_mv) {
+// Exhuastive motion search around a given centre position with a given
+// step size.
+static int exhuastive_mesh_search(const MACROBLOCK *x,
+ MV *ref_mv, MV *best_mv,
+ int range, int step, int sad_per_bit,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const int range = 64;
- const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ MV fcenter_mv = {center_mv->row, center_mv->col};
unsigned int best_sad = INT_MAX;
int r, c, i;
int start_col, end_col, start_row, end_row;
+ int col_step = (step > 1) ? step : 4;
- // The cfg and search_param parameters are not used in this search variant
- (void)cfg;
- (void)search_param;
+ assert(step >= 1);
- clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
- *best_mv = *ref_mv;
- *num00 = 11;
+ clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max,
+ x->mv_row_min, x->mv_row_max);
+ *best_mv = fcenter_mv;
best_sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, ref_mv), in_what->stride) +
- mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
- start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
- start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
- end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
- end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
-
- for (r = start_row; r <= end_row; ++r) {
- for (c = start_col; c <= end_col; c += 4) {
- if (c + 3 <= end_col) {
- unsigned int sads[4];
- const uint8_t *addrs[4];
- for (i = 0; i < 4; ++i) {
- const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
- addrs[i] = get_buf_from_mv(in_what, &mv);
- }
-
- fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
-
- for (i = 0; i < 4; ++i) {
- if (sads[i] < best_sad) {
- const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
- const unsigned int sad = sads[i] +
- mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
- if (sad < best_sad) {
- best_sad = sad;
- *best_mv = mv;
- }
+ get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
+ mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
+ start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row);
+ start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col);
+ end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row);
+ end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col);
+
+ for (r = start_row; r <= end_row; r += step) {
+ for (c = start_col; c <= end_col; c += col_step) {
+ // Step > 1 means we are not checking every location in this pass.
+ if (step > 1) {
+ const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c};
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
}
}
} else {
- for (i = 0; i < end_col - c; ++i) {
- const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
- unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride);
- if (sad < best_sad) {
- sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ // 4 sads in a single call if we are checking every location
+ if (c + 3 <= end_col) {
+ unsigned int sads[4];
+ const uint8_t *addrs[4];
+ for (i = 0; i < 4; ++i) {
+ const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
+ addrs[i] = get_buf_from_mv(in_what, &mv);
+ }
+ fn_ptr->sdx4df(what->buf, what->stride, addrs,
+ in_what->stride, sads);
+
+ for (i = 0; i < 4; ++i) {
+ if (sads[i] < best_sad) {
+ const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
+ const unsigned int sad = sads[i] +
+ mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < end_col - c; ++i) {
+ const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i};
+ unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
- best_sad = sad;
- *best_mv = mv;
+ sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
}
}
}
@@ -1612,8 +1705,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
const uint8_t *best_address;
unsigned int bestsad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
+ int best_site = -1;
+ int last_site = -1;
int ref_row;
int ref_col;
@@ -1623,8 +1716,10 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// 0 = initial step (MAX_FIRST_STEP) pel
// 1 = (MAX_FIRST_STEP/2) pel,
// 2 = (MAX_FIRST_STEP/4) pel...
- const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
- const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
+ const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
+ const int tot_steps = cfg->total_steps - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
@@ -1642,17 +1737,17 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
- i = 1;
+ i = 0;
for (step = 0; step < tot_steps; step++) {
int all_in = 1, t;
// All_in is true if every one of the points we are checking are within
// the bounds of the image.
- all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
- all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
- all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
- all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
+ all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_row_min);
+ all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_row_max);
+ all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_col_min);
+ all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_col_max);
// If all the pixels are within the bounds we don't check whether the
// search point is valid in this loop, otherwise we check each point
@@ -1664,15 +1759,15 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
unsigned char const *block_offset[4];
for (t = 0; t < 4; t++)
- block_offset[t] = ss[i + t].offset + best_address;
+ block_offset[t] = ss_os[i + t] + best_address;
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
sad_array);
for (t = 0; t < 4; t++, i++) {
if (sad_array[t] < bestsad) {
- const MV this_mv = {best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col};
+ const MV this_mv = {best_mv->row + ss_mv[i].row,
+ best_mv->col + ss_mv[i].col};
sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
sad_per_bit);
if (sad_array[t] < bestsad) {
@@ -1685,11 +1780,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
} else {
for (j = 0; j < cfg->searches_per_step; j++) {
// Trap illegal vectors
- const MV this_mv = {best_mv->row + ss[i].mv.row,
- best_mv->col + ss[i].mv.col};
+ const MV this_mv = {best_mv->row + ss_mv[i].row,
+ best_mv->col + ss_mv[i].col};
if (is_mv_in(x, &this_mv)) {
- const uint8_t *const check_here = ss[i].offset + best_address;
+ const uint8_t *const check_here = ss_os[i] + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
in_what_stride);
@@ -1705,25 +1800,25 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
}
}
if (best_site != last_site) {
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
+ best_mv->row += ss_mv[best_site].row;
+ best_mv->col += ss_mv[best_site].col;
+ best_address += ss_os[best_site];
last_site = best_site;
#if defined(NEW_DIAMOND_SEARCH)
while (1) {
- const MV this_mv = {best_mv->row + ss[best_site].mv.row,
- best_mv->col + ss[best_site].mv.col};
+ const MV this_mv = {best_mv->row + ss_mv[best_site].row,
+ best_mv->col + ss_mv[best_site].col};
if (is_mv_in(x, &this_mv)) {
- const uint8_t *const check_here = ss[best_site].offset + best_address;
+ const uint8_t *const check_here = ss_os[best_site] + best_address;
unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
in_what_stride);
if (thissad < bestsad) {
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
- best_mv->row += ss[best_site].mv.row;
- best_mv->col += ss[best_site].mv.col;
- best_address += ss[best_site].offset;
+ best_mv->row += ss_mv[best_site].row;
+ best_mv->col += ss_mv[best_site].col;
+ best_address += ss_os[best_site];
continue;
}
}
@@ -1745,7 +1840,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
int center, offset = 0;
int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
for (d = 0; d <= bw; d += 16) {
- this_sad = vp9_vector_var(&ref[d], src, bwl);
+ this_sad = vpx_vector_var(&ref[d], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
offset = d;
@@ -1758,7 +1853,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1771,7 +1866,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1784,7 +1879,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1797,7 +1892,7 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
// check limit
if (this_pos < 0 || this_pos > bw)
continue;
- this_sad = vp9_vector_var(&ref[this_pos], src, bwl);
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
@@ -1815,7 +1910,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
int mi_row, int mi_col) {
MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
DECLARE_ALIGNED(16, int16_t, hbuf[128]);
DECLARE_ALIGNED(16, int16_t, vbuf[128]);
@@ -1829,12 +1924,12 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
const int src_stride = x->plane[0].src.stride;
const int ref_stride = xd->plane[0].pre[0].stride;
uint8_t const *ref_buf, *src_buf;
- MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
+ MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
unsigned int best_sad, tmp_sad, this_sad[4];
MV this_mv;
const int norm_factor = 3 + (bw >> 5);
const YV12_BUFFER_CONFIG *scaled_ref_frame =
- vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
+ vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
if (scaled_ref_frame) {
int i;
@@ -1866,25 +1961,25 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
// Set up prediction 1-D reference set
ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
for (idx = 0; idx < search_width; idx += 16) {
- vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
+ vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
ref_buf += 16;
}
ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
for (idx = 0; idx < search_height; ++idx) {
- vbuf[idx] = vp9_int_pro_col(ref_buf, bw) >> norm_factor;
+ vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
ref_buf += ref_stride;
}
// Set up src 1-D reference set
for (idx = 0; idx < bw; idx += 16) {
src_buf = x->plane[0].src.buf + idx;
- vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
+ vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
}
src_buf = x->plane[0].src.buf;
for (idx = 0; idx < bh; ++idx) {
- src_vbuf[idx] = vp9_int_pro_col(src_buf, bw) >> norm_factor;
+ src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
src_buf += src_stride;
}
@@ -2015,6 +2110,70 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
return bestsme;
}
+#define MIN_RANGE 7
+#define MAX_RANGE 256
+#define MIN_INTERVAL 1
+// Runs an limited range exhaustive mesh search using a pattern set
+// according to the encode speed profile.
+static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
+ MV *centre_mv_full, int sadpb, int *cost_list,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ MV temp_mv = {centre_mv_full->row, centre_mv_full->col};
+ MV f_ref_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
+ int bestsme;
+ int i;
+ int interval = sf->mesh_patterns[0].interval;
+ int range = sf->mesh_patterns[0].range;
+ int baseline_interval_divisor;
+
+ // Keep track of number of exhaustive calls (this frame in this thread).
+ ++(*x->ex_search_count_ptr);
+
+ // Trap illegal values for interval and range for this function.
+ if ((range < MIN_RANGE) || (range > MAX_RANGE) ||
+ (interval < MIN_INTERVAL) || (interval > range))
+ return INT_MAX;
+
+ baseline_interval_divisor = range / interval;
+
+ // Check size of proposed first range against magnitude of the centre
+ // value used as a starting point.
+ range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
+ range = VPXMIN(range, MAX_RANGE);
+ interval = VPXMAX(interval, range / baseline_interval_divisor);
+
+ // initial search
+ bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range,
+ interval, sadpb, fn_ptr, &temp_mv);
+
+ if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
+ // Progressive searches with range and step size decreasing each time
+ // till we reach a step size of 1. Then break out.
+ for (i = 1; i < MAX_MESH_STEP; ++i) {
+ // First pass with coarser step and longer range
+ bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv,
+ sf->mesh_patterns[i].range,
+ sf->mesh_patterns[i].interval,
+ sadpb, fn_ptr, &temp_mv);
+
+ if (sf->mesh_patterns[i].interval == 1)
+ break;
+ }
+ }
+
+ if (bestsme < INT_MAX)
+ bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
+ *dst_mv = temp_mv;
+
+ // Return cost list.
+ if (cost_list) {
+ calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
+ }
+ return bestsme;
+}
+
int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -2328,6 +2487,18 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
return best_sad;
}
+#define MIN_EX_SEARCH_LIMIT 128
+static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ const int max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
+ (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
+
+ return sf->allow_exhaustive_searches &&
+ (sf->exhaustive_searches_thresh < INT_MAX) &&
+ (*x->ex_search_count_ptr <= max_ex) &&
+ !cpi->rc.is_src_frame_alt_ref;
+}
+
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full,
int step_param, int error_per_bit,
@@ -2346,6 +2517,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
cost_list[4] = INT_MAX;
}
+ // Keep track of number of searches (this frame in this thread).
+ ++(*x->m_search_count_ptr);
+
switch (method) {
case FAST_DIAMOND:
var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
@@ -2371,6 +2545,27 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
MAX_MVSEARCH_STEPS - 1 - step_param,
1, cost_list, fn_ptr, ref_mv, tmp_mv);
+
+ // Should we allow a follow on exhaustive search?
+ if (is_exhaustive_allowed(cpi, x)) {
+ int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
+ exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+
+ // Threshold variance for an exhaustive full search.
+ if (var > exhuastive_thr) {
+ int var_ex;
+ MV tmp_mv_ex;
+ var_ex = full_pixel_exhaustive(cpi, x, tmp_mv,
+ error_per_bit, cost_list, fn_ptr,
+ ref_mv, &tmp_mv_ex);
+
+ if (var_ex < var) {
+ var = var_ex;
+ *tmp_mv = tmp_mv_ex;
+ }
+ }
+ }
break;
default:
assert(0 && "Invalid search method.");
diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h
index 5efd5435b..86cd267f3 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/libvpx/vp9/encoder/vp9_mcomp.h
@@ -31,16 +31,12 @@ extern "C" {
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
-// motion search site
-typedef struct search_site {
- MV mv;
- int offset;
-} search_site;
-
typedef struct search_site_config {
- search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
- int ss_count;
+ // motion search sites
+ MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector
+ intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset
int searches_per_step;
+ int total_steps;
} search_site_config;
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
@@ -78,7 +74,7 @@ unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
BLOCK_SIZE bsize,
int mi_row, int mi_col);
-typedef int (fractional_mv_step_fp) (
+typedef uint32_t (fractional_mv_step_fp) (
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
int allow_hp,
@@ -88,7 +84,7 @@ typedef int (fractional_mv_step_fp) (
int iters_per_step,
int *cost_list,
int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
+ uint32_t *distortion, uint32_t *sse1,
const uint8_t *second_pred,
int w, int h);
@@ -96,6 +92,7 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore;
+extern fractional_mv_step_fp vp9_skip_sub_pixel_tree;
typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
diff --git a/libvpx/vp9/encoder/vp9_noise_estimate.c b/libvpx/vp9/encoder/vp9_noise_estimate.c
new file mode 100644
index 000000000..4b43b3879
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_noise_estimate.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_scale/yv12config.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/encoder/vp9_context_tree.h"
+#include "vp9/encoder/vp9_noise_estimate.h"
+#include "vp9/encoder/vp9_encoder.h"
+
+void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
+ int width,
+ int height) {
+ ne->enabled = 0;
+ ne->level = kLowLow;
+ ne->value = 0;
+ ne->count = 0;
+ ne->thresh = 90;
+ ne->last_w = 0;
+ ne->last_h = 0;
+ if (width * height >= 1920 * 1080) {
+ ne->thresh = 200;
+ } else if (width * height >= 1280 * 720) {
+ ne->thresh = 130;
+ }
+ ne->num_frames_estimate = 20;
+}
+
+static int enable_noise_estimation(VP9_COMP *const cpi) {
+ // Enable noise estimation if denoising is on.
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ return 1;
+#endif
+ // Only allow noise estimate under certain encoding mode.
+ // Enabled for 1 pass CBR, speed >=5, and if resolution is same as original.
+ // Not enabled for SVC mode and screen_content_mode.
+ // Not enabled for low resolutions.
+ if (cpi->oxcf.pass == 0 &&
+ cpi->oxcf.rc_mode == VPX_CBR &&
+ cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cpi->oxcf.speed >= 5 &&
+ cpi->resize_state == ORIG &&
+ cpi->resize_pending == 0 &&
+ !cpi->use_svc &&
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
+ cpi->common.width >= 640 &&
+ cpi->common.height >= 480)
+ return 1;
+ else
+ return 0;
+}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+static void copy_frame(YV12_BUFFER_CONFIG * const dest,
+ const YV12_BUFFER_CONFIG * const src) {
+ int r;
+ const uint8_t *srcbuf = src->y_buffer;
+ uint8_t *destbuf = dest->y_buffer;
+
+ assert(dest->y_width == src->y_width);
+ assert(dest->y_height == src->y_height);
+
+ for (r = 0; r < dest->y_height; ++r) {
+ memcpy(destbuf, srcbuf, dest->y_width);
+ destbuf += dest->y_stride;
+ srcbuf += src->y_stride;
+ }
+}
+#endif // CONFIG_VP9_TEMPORAL_DENOISING
+
+NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) {
+ int noise_level = kLowLow;
+ if (ne->value > (ne->thresh << 1)) {
+ noise_level = kHigh;
+ } else {
+ if (ne->value > ne->thresh)
+ noise_level = kMedium;
+ else if (ne->value > ((9 * ne->thresh) >> 4))
+ noise_level = kLow;
+ else
+ noise_level = kLowLow;
+ }
+ return noise_level;
+}
+
+void vp9_update_noise_estimate(VP9_COMP *const cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ NOISE_ESTIMATE *const ne = &cpi->noise_estimate;
+ // Estimate of noise level every frame_period frames.
+ int frame_period = 8;
+ int thresh_consec_zeromv = 6;
+ unsigned int thresh_sum_diff = 100;
+ unsigned int thresh_sum_spatial = (200 * 200) << 8;
+ unsigned int thresh_spatial_var = (32 * 32) << 8;
+ int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7;
+ // Estimate is between current source and last source.
+ YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ last_source = &cpi->denoiser.last_source;
+#endif
+ ne->enabled = enable_noise_estimation(cpi);
+ if (!ne->enabled ||
+ cm->current_video_frame % frame_period != 0 ||
+ last_source == NULL ||
+ ne->last_w != cm->width ||
+ ne->last_h != cm->height) {
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ copy_frame(&cpi->denoiser.last_source, cpi->Source);
+#endif
+ if (last_source != NULL) {
+ ne->last_w = cm->width;
+ ne->last_h = cm->height;
+ }
+ return;
+ } else if (cpi->rc.avg_frame_low_motion < 50) {
+ // Force noise estimation to 0 and denoiser off if content has high motion.
+ ne->level = kLowLow;
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+#endif
+ return;
+ } else {
+ int num_samples = 0;
+ uint64_t avg_est = 0;
+ int bsize = BLOCK_16X16;
+ static const unsigned char const_source[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have
+ // been encoded as zero/small mv at least x consecutive frames, compute
+ // the variance to update estimate of noise in the source.
+ const uint8_t *src_y = cpi->Source->y_buffer;
+ const int src_ystride = cpi->Source->y_stride;
+ const uint8_t *last_src_y = last_source->y_buffer;
+ const int last_src_ystride = last_source->y_stride;
+ const uint8_t *src_u = cpi->Source->u_buffer;
+ const uint8_t *src_v = cpi->Source->v_buffer;
+ const int src_uvstride = cpi->Source->uv_stride;
+ int mi_row, mi_col;
+ int num_low_motion = 0;
+ int frame_low_motion = 1;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ int bl_index = mi_row * cm->mi_cols + mi_col;
+ if (cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv)
+ num_low_motion++;
+ }
+ }
+ if (num_low_motion < ((3 * cm->mi_rows * cm->mi_cols) >> 3))
+ frame_low_motion = 0;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ // 16x16 blocks, 1/4 sample of frame.
+ if (mi_row % 4 == 0 && mi_col % 4 == 0 &&
+ mi_row < cm->mi_rows - 1 &&
+ mi_col < cm->mi_cols - 1) {
+ int bl_index = mi_row * cm->mi_cols + mi_col;
+ int bl_index1 = bl_index + 1;
+ int bl_index2 = bl_index + cm->mi_cols;
+ int bl_index3 = bl_index2 + 1;
+ // Only consider blocks that are likely steady background. i.e, have
+ // been encoded as zero/low motion x (= thresh_consec_zeromv) frames
+ // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
+ // 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
+ int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+ VPXMIN(cpi->consec_zero_mv[bl_index1],
+ VPXMIN(cpi->consec_zero_mv[bl_index2],
+ cpi->consec_zero_mv[bl_index3])));
+ int is_skin = 0;
+ if (cpi->use_skin_detection) {
+ is_skin = vp9_compute_skin_block(src_y,
+ src_u,
+ src_v,
+ src_ystride,
+ src_uvstride,
+ bsize,
+ consec_zeromv,
+ 0);
+ }
+ if (frame_low_motion &&
+ cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
+ cpi->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
+ cpi->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
+ cpi->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
+ !is_skin) {
+ // Compute variance.
+ unsigned int sse;
+ unsigned int variance = cpi->fn_ptr[bsize].vf(src_y,
+ src_ystride,
+ last_src_y,
+ last_src_ystride,
+ &sse);
+ // Only consider this block as valid for noise measurement if the
+ // average term (sse - variance = N * avg^{2}, N = 16X16) of the
+ // temporal residual is small (avoid effects from lighting change).
+ if ((sse - variance) < thresh_sum_diff) {
+ unsigned int sse2;
+ const unsigned int spatial_variance =
+ cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source,
+ 0, &sse2);
+ // Avoid blocks with high brightness and high spatial variance.
+ if ((sse2 - spatial_variance) < thresh_sum_spatial &&
+ spatial_variance < thresh_spatial_var) {
+ avg_est += variance / ((spatial_variance >> 9) + 1);
+ num_samples++;
+ }
+ }
+ }
+ }
+ src_y += 8;
+ last_src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ }
+ src_y += (src_ystride << 3) - (cm->mi_cols << 3);
+ last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3);
+ src_u += (src_uvstride << 2) - (cm->mi_cols << 2);
+ src_v += (src_uvstride << 2) - (cm->mi_cols << 2);
+ }
+ ne->last_w = cm->width;
+ ne->last_h = cm->height;
+ // Update noise estimate if we have at a minimum number of block samples,
+ // and avg_est > 0 (avg_est == 0 can happen if the application inputs
+ // duplicate frames).
+ if (num_samples > min_blocks_estimate && avg_est > 0) {
+ // Normalize.
+ avg_est = avg_est / num_samples;
+ // Update noise estimate.
+ ne->value = (int)((15 * ne->value + avg_est) >> 4);
+ ne->count++;
+ if (ne->count == ne->num_frames_estimate) {
+ // Reset counter and check noise level condition.
+ ne->num_frames_estimate = 30;
+ ne->count = 0;
+ ne->level = vp9_noise_estimate_extract_level(ne);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+#endif
+ }
+ }
+ }
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ copy_frame(&cpi->denoiser.last_source, cpi->Source);
+#endif
+}
diff --git a/libvpx/vp9/encoder/vp9_noise_estimate.h b/libvpx/vp9/encoder/vp9_noise_estimate.h
new file mode 100644
index 000000000..826d125b5
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_noise_estimate.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_NOISE_ESTIMATE_H_
+#define VP9_ENCODER_NOISE_ESTIMATE_H_
+
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
+#include "vpx_scale/yv12config.h"
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+#include "vp9/encoder/vp9_denoiser.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum noise_level {
+ kLowLow,
+ kLow,
+ kMedium,
+ kHigh
+} NOISE_LEVEL;
+
+typedef struct noise_estimate {
+ int enabled;
+ NOISE_LEVEL level;
+ int value;
+ int thresh;
+ int count;
+ int last_w;
+ int last_h;
+ int num_frames_estimate;
+} NOISE_ESTIMATE;
+
+struct VP9_COMP;
+
+void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne,
+ int width,
+ int height);
+
+NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne);
+
+void vp9_update_noise_estimate(struct VP9_COMP *const cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_NOISE_ESTIMATE_H_
diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c
index 5444bc89f..f6b1dfcd5 100644
--- a/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/libvpx/vp9/encoder/vp9_picklpf.c
@@ -78,7 +78,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
// Start the search at the previous frame filter level unless it is now out of
// range.
- int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
+ int filt_mid =
+ clamp(lf->last_filt_level, min_filter_level, max_filter_level);
int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
// Sum squared error at each filter level
int64_t ss_err[MAX_LOOP_FILTER + 1];
diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c
index fc4d9ae67..ba6a0c6e1 100644
--- a/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/libvpx/vp9/encoder/vp9_pickmode.c
@@ -40,16 +40,25 @@ typedef struct {
int in_use;
} PRED_BUFFER;
-static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
+
+static const int pos_shift_16x16[4][4] = {
+ {9, 10, 13, 14},
+ {11, 12, 15, 16},
+ {17, 18, 21, 22},
+ {19, 20, 23, 24}
+};
+
+static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,
+ const MACROBLOCK *x,
const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
- int_mv *mv_ref_list,
- int mi_row, int mi_col) {
+ int_mv *mv_ref_list, int_mv *base_mv,
+ int mi_row, int mi_col, int use_base_mv) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
- const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+ const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
int different_ref_found = 0;
int context_counter = 0;
@@ -66,12 +75,11 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
xd->mi_stride];
- const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
// Keep counts for entropy encoding.
- context_counter += mode_2_counter[candidate->mode];
+ context_counter += mode_2_counter[candidate_mi->mode];
different_ref_found = 1;
- if (candidate->ref_frame[0] == ref_frame)
+ if (candidate_mi->ref_frame[0] == ref_frame)
ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
refmv_count, mv_ref_list, Done);
}
@@ -85,12 +93,12 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
const POSITION *const mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
- xd->mi_stride]->mbmi;
+ const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
+ xd->mi_stride];
different_ref_found = 1;
- if (candidate->ref_frame[0] == ref_frame)
- ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done);
+ if (candidate_mi->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
}
}
@@ -101,15 +109,29 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
const POSITION *mv_ref = &mv_ref_search[i];
if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
- * xd->mi_stride]->mbmi;
+ const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row
+ * xd->mi_stride];
// If the candidate is INTRA we don't want to consider its mv.
- IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
+ IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
refmv_count, mv_ref_list, Done);
}
}
}
+ if (use_base_mv &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ ref_frame == LAST_FRAME) {
+ // Get base layer mv.
+ MV_REF *candidate =
+ &cm->prev_frame->mvs[(mi_col>>1) + (mi_row>>1) * (cm->mi_cols>>1)];
+ if (candidate->mv[0].as_int != INVALID_MV) {
+ base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2);
+ base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2);
+ clamp_mv_ref(&base_mv->as_mv, xd);
+ } else {
+ base_mv->as_int = INVALID_MV;
+ }
+ }
Done:
@@ -125,16 +147,17 @@ static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCK *x,
static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv,
- int64_t best_rd_sofar) {
+ int64_t best_rd_sofar, int use_base_mv) {
MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
const int step_param = cpi->sf.mv.fullpel_search_step_param;
const int sadpb = x->sadperbit16;
MV mvp_full;
- const int ref = mbmi->ref_frame[0];
+ const int ref = mi->ref_frame[0];
const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
- int dis;
+ MV center_mv;
+ uint32_t dis;
int rate_mode;
const int tmp_col_min = x->mv_col_min;
const int tmp_col_max = x->mv_col_max;
@@ -164,9 +187,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
+ if (!use_base_mv)
+ center_mv = ref_mv;
+ else
+ center_mv = tmp_mv->as_mv;
+
vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
cond_cost_list(cpi, cost_list),
- &ref_mv, &tmp_mv->as_mv, INT_MAX, 0);
+ &center_mv, &tmp_mv->as_mv, INT_MAX, 0);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
@@ -224,7 +252,7 @@ static void block_variance(const uint8_t *src, int src_stride,
&sse8x8[k], &sum8x8[k]);
*sse += sse8x8[k];
*sum += sum8x8[k];
- var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6);
+ var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6);
k++;
}
}
@@ -245,7 +273,7 @@ static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
- var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >>
+ var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >>
(b_width_log2_lookup[unit_size] +
b_height_log2_lookup[unit_size] + 6));
k++;
@@ -300,7 +328,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
tx_size = TX_8X8;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
- cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+ cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
tx_size = TX_8X8;
else if (tx_size > TX_16X16)
tx_size = TX_16X16;
@@ -310,7 +338,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
}
assert(tx_size >= TX_8X8);
- xd->mi[0]->mbmi.tx_size = tx_size;
+ xd->mi[0]->tx_size = tx_size;
// Evaluate if the partition block is a skippable block in Y plane.
{
@@ -379,7 +407,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
for (i = 1; i <= 2; i++) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
- const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd);
+ const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd);
const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
const int uv_bw = b_width_log2_lookup[uv_bsize];
@@ -475,19 +503,19 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
if (cpi->common.tx_mode == TX_MODE_SELECT) {
if (sse > (var << 2))
- xd->mi[0]->mbmi.tx_size =
+ xd->mi[0]->tx_size =
VPXMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
else
- xd->mi[0]->mbmi.tx_size = TX_8X8;
+ xd->mi[0]->tx_size = TX_8X8;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
- cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
- xd->mi[0]->mbmi.tx_size = TX_8X8;
- else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
- xd->mi[0]->mbmi.tx_size = TX_16X16;
+ cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
+ xd->mi[0]->tx_size = TX_8X8;
+ else if (xd->mi[0]->tx_size > TX_16X16)
+ xd->mi[0]->tx_size = TX_16X16;
} else {
- xd->mi[0]->mbmi.tx_size =
+ xd->mi[0]->tx_size =
VPXMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
@@ -495,7 +523,7 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Evaluate if the partition block is a skippable block in Y plane.
{
const BLOCK_SIZE unit_size =
- txsize_to_bsize[xd->mi[0]->mbmi.tx_size];
+ txsize_to_bsize[xd->mi[0]->tx_size];
const unsigned int num_blk_log2 =
(b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
(b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
@@ -562,39 +590,46 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
}
#if CONFIG_VP9_HIGHBITDEPTH
-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
- int *skippable, int64_t *sse, int plane,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
+static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+ int *skippable, int64_t *sse, BLOCK_SIZE bsize,
+ TX_SIZE tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
unsigned int var_y, sse_y;
- (void)plane;
+
(void)tx_size;
- model_rd_for_sb_y(cpi, bsize, x, xd, rate, dist, &var_y, &sse_y);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,
+ &sse_y);
*sse = INT_MAX;
*skippable = 0;
return;
}
#else
-static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
- int *skippable, int64_t *sse, int plane,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
+static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+ int *skippable, int64_t *sse, BLOCK_SIZE bsize,
+ TX_SIZE tx_size) {
MACROBLOCKD *xd = &x->e_mbd;
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *pd = &xd->plane[0];
+ struct macroblock_plane *const p = &x->plane[0];
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
const int step = 1 << (tx_size << 1);
const int block_step = (1 << tx_size);
int block = 0, r, c;
- int shift = tx_size == TX_32X32 ? 0 : 2;
const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
- xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ xd->mb_to_right_edge >> 5);
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
- xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ xd->mb_to_bottom_edge >> 5);
int eob_cost = 0;
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
(void)cpi;
- vp9_subtract_plane(x, bsize, plane);
+
+ // The max tx_size passed in is TX_16X16.
+ assert(tx_size != TX_32X32);
+
+ vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
*skippable = 1;
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
@@ -606,27 +641,20 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
- const int diff_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int diff_stride = bw;
const int16_t *src_diff;
src_diff = &p->src_diff[(r * diff_stride + c) << 2];
switch (tx_size) {
- case TX_32X32:
- vpx_fdct32x32_rd(src_diff, coeff, diff_stride);
- vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
- p->round_fp, p->quant_fp, p->quant_shift,
- qcoeff, dqcoeff, pd->dequant, eob,
- scan_order->scan, scan_order->iscan);
- break;
case TX_16X16:
- vp9_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
+ vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
case TX_8X8:
- vp9_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
+ vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
@@ -650,18 +678,17 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
}
}
- if (*skippable && *sse < INT64_MAX) {
- *rate = 0;
- *dist = (*sse << 6) >> shift;
- *sse = *dist;
- return;
+ this_rdc->rate = 0;
+ if (*sse < INT64_MAX) {
+ *sse = (*sse << 6) >> 2;
+ if (*skippable) {
+ this_rdc->dist = *sse;
+ return;
+ }
}
block = 0;
- *rate = 0;
- *dist = 0;
- if (*sse < INT64_MAX)
- *sse = (*sse << 6) >> shift;
+ this_rdc->dist = 0;
for (r = 0; r < max_blocks_high; r += block_step) {
for (c = 0; c < num_4x4_w; c += block_step) {
if (c < max_blocks_wide) {
@@ -671,26 +698,26 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
uint16_t *const eob = &p->eobs[block];
if (*eob == 1)
- *rate += (int)abs(qcoeff[0]);
+ this_rdc->rate += (int)abs(qcoeff[0]);
else if (*eob > 1)
- *rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
+ this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4);
- *dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
+ this_rdc->dist +=
+ vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;
}
block += step;
}
}
- if (*skippable == 0) {
- *rate <<= 10;
- *rate += (eob_cost << 8);
- }
+ // If skippable is set, rate gets clobbered later.
+ this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);
+ this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);
}
#endif
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
- int *out_rate_sum, int64_t *out_dist_sum,
+ RD_COST *this_rdc,
unsigned int *var_y, unsigned int *sse_y,
int start_plane, int stop_plane) {
// Note our transform coeffs are 8 times an orthogonal transform.
@@ -701,8 +728,8 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
int64_t dist;
int i;
- *out_rate_sum = 0;
- *out_dist_sum = 0;
+ this_rdc->rate = 0;
+ this_rdc->dist = 0;
for (i = start_plane; i <= stop_plane; ++i) {
struct macroblock_plane *const p = &x->plane[i];
@@ -733,8 +760,8 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
dc_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
- *out_rate_sum += rate >> 1;
- *out_dist_sum += dist << 3;
+ this_rdc->rate += rate >> 1;
+ this_rdc->dist += dist << 3;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -749,8 +776,8 @@ static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
ac_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
- *out_rate_sum += rate;
- *out_dist_sum += dist << 4;
+ this_rdc->rate += rate;
+ this_rdc->dist += dist << 4;
}
}
@@ -779,14 +806,20 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
struct buf_2d yv12_mb[][MAX_MB_PLANE],
int *rate, int64_t *dist) {
MACROBLOCKD *xd = &x->e_mbd;
-
+ MODE_INFO *const mi = xd->mi[0];
const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
unsigned int var = var_y, sse = sse_y;
// Skipping threshold for ac.
unsigned int thresh_ac;
// Skipping threshold for dc.
unsigned int thresh_dc;
- if (x->encode_breakout > 0) {
+ int motion_low = 1;
+ if (mi->mv[0].as_mv.row > 64 ||
+ mi->mv[0].as_mv.row < -64 ||
+ mi->mv[0].as_mv.col > 64 ||
+ mi->mv[0].as_mv.col < -64)
+ motion_low = 0;
+ if (x->encode_breakout > 0 && motion_low == 1) {
// Set a maximum for threshold to avoid big PSNR loss in low bit rate
// case. Use extreme low threshold for static frames to limit
// skipping.
@@ -826,6 +859,12 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
if (var <= thresh_ac && (sse - var) <= thresh_dc) {
unsigned int sse_u, sse_v;
unsigned int var_u, var_v;
+ unsigned int thresh_ac_uv = thresh_ac;
+ unsigned int thresh_dc_uv = thresh_dc;
+ if (x->sb_is_skin) {
+ thresh_ac_uv = 0;
+ thresh_dc_uv = 0;
+ }
// Skip UV prediction unless breakout is zero (lossless) to save
// computation with low impact on the result
@@ -841,14 +880,14 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[1].dst.stride, &sse_u);
// U skipping condition checking
- if (((var_u << 2) <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
+ if (((var_u << 2) <= thresh_ac_uv) && (sse_u - var_u <= thresh_dc_uv)) {
var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
x->plane[2].src.stride,
xd->plane[2].dst.buf,
xd->plane[2].dst.stride, &sse_v);
// V skipping condition checking
- if (((var_v << 2) <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
+ if (((var_v << 2) <= thresh_ac_uv) && (sse_v - var_v <= thresh_dc_uv)) {
x->skip = 1;
// The cost of skip bit needs to be added.
@@ -874,8 +913,8 @@ struct estimate_block_intra_args {
VP9_COMP *cpi;
MACROBLOCK *x;
PREDICTION_MODE mode;
- int rate;
- int64_t dist;
+ int skippable;
+ RD_COST *rdc;
};
static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -892,8 +931,7 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
int i, j;
- int rate;
- int64_t dist;
+ RD_COST this_rdc;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
@@ -909,23 +947,20 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (plane == 0) {
int64_t this_sse = INT64_MAX;
- int is_skippable;
// TODO(jingning): This needs further refactoring.
- block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
- bsize_tx, VPXMIN(tx_size, TX_16X16));
- x->skip_txfm[0] = is_skippable;
- // TODO(jingning): Skip is signalled per prediciton block not per tx block.
- rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
+ block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx,
+ VPXMIN(tx_size, TX_16X16));
} else {
- unsigned int var, sse;
- model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
- plane, plane);
+ unsigned int var = 0;
+ unsigned int sse = 0;
+ model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane,
+ plane);
}
p->src.buf = src_buf_base;
pd->dst.buf = dst_buf_base;
- args->rate += rate;
- args->dist += dist;
+ args->rdc->rate += this_rdc.rate;
+ args->rdc->dist += this_rdc.dist;
}
static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
@@ -975,17 +1010,17 @@ static INLINE void update_thresh_freq_fact(VP9_COMP *cpi,
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
RD_COST this_rdc, best_rdc;
PREDICTION_MODE this_mode;
- struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
+ struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
const TX_SIZE intra_tx_size =
VPXMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
MODE_INFO *const mic = xd->mi[0];
int *bmode_costs;
- const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
- const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
bmode_costs = cpi->y_mode_costs[A][L];
@@ -994,29 +1029,35 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
vp9_rd_cost_reset(&best_rdc);
vp9_rd_cost_reset(&this_rdc);
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->mv[0].as_int = INVALID_MV;
- mbmi->uv_mode = DC_PRED;
+ mi->ref_frame[0] = INTRA_FRAME;
+ mi->mv[0].as_int = INVALID_MV;
+ mi->uv_mode = DC_PRED;
memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
// Change the limit of this loop to add other intra prediction
// mode tests.
for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
+ this_rdc.dist = this_rdc.rate = 0;
args.mode = this_mode;
- args.rate = 0;
- args.dist = 0;
- mbmi->tx_size = intra_tx_size;
+ args.skippable = 1;
+ args.rdc = &this_rdc;
+ mi->tx_size = intra_tx_size;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
- this_rdc.rate = args.rate;
- this_rdc.dist = args.dist;
+ if (args.skippable) {
+ x->skip_txfm[0] = SKIP_TXFM_AC_DC;
+ this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
+ } else {
+ x->skip_txfm[0] = SKIP_TXFM_NONE;
+ this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
+ }
this_rdc.rate += bmode_costs[this_mode];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < best_rdc.rdcost) {
best_rdc = this_rdc;
- mbmi->mode = this_mode;
+ mi->mode = this_mode;
}
}
@@ -1068,17 +1109,228 @@ static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = {
{GOLDEN_FRAME, NEWMV}
};
-// TODO(jingning) placeholder for inter-frame non-RD mode decision.
-// this needs various further optimizations. to be continued..
+static int set_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize) {
+ const VP9_COMMON *const cm = &cpi->common;
+ // Reduce the intra cost penalty for small blocks (<=16x16).
+ int reduction_fac =
+ (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
+ if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
+ // Don't reduce intra cost penalty if estimated noise level is high.
+ reduction_fac = 0;
+ return vp9_get_intra_cost_penalty(
+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
+}
+
+static INLINE void find_predictors(VP9_COMP *cpi, MACROBLOCK *x,
+ MV_REFERENCE_FRAME ref_frame,
+ int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
+ int const_motion[MAX_REF_FRAMES],
+ int *ref_frame_skip_mask,
+ const int flag_list[4],
+ TileDataEnc *tile_data,
+ int mi_row, int mi_col,
+ struct buf_2d yv12_mb[4][MAX_MB_PLANE],
+ BLOCK_SIZE bsize,
+ int force_skip_low_temp_var) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ TileInfo *const tile_info = &tile_data->tile_info;
+ // TODO(jingning) placeholder for inter-frame non-RD mode decision.
+ x->pred_mv_sad[ref_frame] = INT_MAX;
+ frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[ZEROMV][ref_frame].as_int = 0;
+ // this needs various further optimizations. to be continued..
+ if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
+ int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
+ const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
+ vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
+ sf, sf);
+ if (cm->use_prev_frame_mvs) {
+ vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
+ candidates, mi_row, mi_col,
+ x->mbmi_ext->mode_context);
+ } else {
+ const_motion[ref_frame] =
+ mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
+ candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
+ (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
+ }
+ vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
+ &frame_mv[NEARESTMV][ref_frame],
+ &frame_mv[NEARMV][ref_frame]);
+ // Early exit for golden frame if force_skip_low_temp_var is set.
+ if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 &&
+ !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) {
+ vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
+ ref_frame, bsize);
+ }
+ } else {
+ *ref_frame_skip_mask |= (1 << ref_frame);
+ }
+}
+
+static void vp9_large_block_mv_bias(const NOISE_ESTIMATE *ne, RD_COST *this_rdc,
+ BLOCK_SIZE bsize, int mv_row, int mv_col,
+ int is_last_frame) {
+ // Bias against non-zero (above some threshold) motion for large blocks.
+ // This is temporary fix to avoid selection of large mv for big blocks.
+ if (mv_row > 64 || mv_row < -64 || mv_col > 64 || mv_col < -64) {
+ if (bsize == BLOCK_64X64)
+ this_rdc->rdcost = this_rdc->rdcost << 1;
+ else if (bsize >= BLOCK_32X32)
+ this_rdc->rdcost = 3 * this_rdc->rdcost >> 1;
+ }
+ // If noise estimation is enabled, and estimated level is above threshold,
+ // add a bias to LAST reference with small motion, for large blocks.
+ if (ne->enabled && ne->level >= kMedium &&
+ bsize >= BLOCK_32X32 && is_last_frame &&
+ mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8) {
+ this_rdc->rdcost = 7 * this_rdc->rdcost >> 3;
+ }
+}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+static void vp9_pickmode_ctx_den_update(
+ VP9_PICKMODE_CTX_DEN *ctx_den,
+ int64_t zero_last_cost_orig,
+ int ref_frame_cost[MAX_REF_FRAMES],
+ int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
+ int reuse_inter_pred,
+ TX_SIZE best_tx_size,
+ PREDICTION_MODE best_mode,
+ MV_REFERENCE_FRAME best_ref_frame,
+ INTERP_FILTER best_pred_filter,
+ uint8_t best_mode_skip_txfm) {
+ ctx_den->zero_last_cost_orig = zero_last_cost_orig;
+ ctx_den->ref_frame_cost = ref_frame_cost;
+ ctx_den->frame_mv = frame_mv;
+ ctx_den->reuse_inter_pred = reuse_inter_pred;
+ ctx_den->best_tx_size = best_tx_size;
+ ctx_den->best_mode = best_mode;
+ ctx_den->best_ref_frame = best_ref_frame;
+ ctx_den->best_pred_filter = best_pred_filter;
+ ctx_den->best_mode_skip_txfm = best_mode_skip_txfm;
+}
+
+static void recheck_zeromv_after_denoising(
+ VP9_COMP *cpi, MODE_INFO *const mi, MACROBLOCK *x, MACROBLOCKD *const xd,
+ VP9_DENOISER_DECISION decision, VP9_PICKMODE_CTX_DEN *ctx_den,
+ struct buf_2d yv12_mb[4][MAX_MB_PLANE], RD_COST *best_rdc, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
+ // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on
+ // denoised result. Only do this under noise conditions, and if rdcost of
+ // ZEROMV onoriginal source is not significantly higher than rdcost of best
+ // mode.
+ if (cpi->noise_estimate.enabled &&
+ cpi->noise_estimate.level > kLow &&
+ ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) &&
+ ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) ||
+ (ctx_den->best_ref_frame == GOLDEN_FRAME &&
+ decision == FILTER_ZEROMV_BLOCK))) {
+ // Check if we should pick ZEROMV on denoised signal.
+ int rate = 0;
+ int64_t dist = 0;
+ uint32_t var_y = UINT_MAX;
+ uint32_t sse_y = UINT_MAX;
+ RD_COST this_rdc;
+ mi->mode = ZEROMV;
+ mi->ref_frame[0] = LAST_FRAME;
+ mi->ref_frame[1] = NONE;
+ mi->mv[0].as_int = 0;
+ mi->interp_filter = EIGHTTAP;
+ xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+ this_rdc.rate = rate + ctx_den->ref_frame_cost[LAST_FRAME] +
+ cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]]
+ [INTER_OFFSET(ZEROMV)];
+ this_rdc.dist = dist;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ // Switch to ZEROMV if the rdcost for ZEROMV on denoised source
+ // is lower than best_ref mode (on original source).
+ if (this_rdc.rdcost > best_rdc->rdcost) {
+ this_rdc = *best_rdc;
+ mi->mode = ctx_den->best_mode;
+ mi->ref_frame[0] = ctx_den->best_ref_frame;
+ mi->interp_filter = ctx_den->best_pred_filter;
+ if (ctx_den->best_ref_frame == INTRA_FRAME)
+ mi->mv[0].as_int = INVALID_MV;
+ else if (ctx_den->best_ref_frame == GOLDEN_FRAME) {
+ mi->mv[0].as_int = ctx_den->frame_mv[ctx_den->best_mode]
+ [ctx_den->best_ref_frame].as_int;
+ if (ctx_den->reuse_inter_pred) {
+ xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0];
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ }
+ }
+ mi->tx_size = ctx_den->best_tx_size;
+ x->skip_txfm[0] = ctx_den->best_mode_skip_txfm;
+ } else {
+ ctx_den->best_ref_frame = LAST_FRAME;
+ *best_rdc = this_rdc;
+ }
+ }
+}
+#endif // CONFIG_VP9_TEMPORAL_DENOISING
+
+static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ const int i = (mi_row & 0x7) >> 1;
+ const int j = (mi_col & 0x7) >> 1;
+ int force_skip_low_temp_var = 0;
+ // Set force_skip_low_temp_var based on the block size and block offset.
+ if (bsize == BLOCK_64X64) {
+ force_skip_low_temp_var = variance_low[0];
+ } else if (bsize == BLOCK_64X32) {
+ if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[1];
+ } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[2];
+ }
+ } else if (bsize == BLOCK_32X64) {
+ if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[3];
+ } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[4];
+ }
+ } else if (bsize == BLOCK_32X32) {
+ if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[5];
+ } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[6];
+ } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[7];
+ } else if ((mi_col & 0x7) && (mi_row & 0x7)) {
+ force_skip_low_temp_var = variance_low[8];
+ }
+ } else if (bsize == BLOCK_16X16) {
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
+ } else if (bsize == BLOCK_32X16) {
+ // The col shift index for the second 16x16 block.
+ const int j2 = ((mi_col + 2) & 0x7) >> 1;
+ // Only if each 16x16 block inside has low temporal variance.
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+ variance_low[pos_shift_16x16[i][j2]];
+ } else if (bsize == BLOCK_16X32) {
+ // The row shift index for the second 16x16 block.
+ const int i2 = ((mi_row + 2) & 0x7) >> 1;
+ force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
+ variance_low[pos_shift_16x16[i2][j]];
+ }
+ return force_skip_low_temp_var;
+}
+
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- TileInfo *const tile_info = &tile_data->tile_info;
+ const SVC *const svc = &cpi->svc;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[0];
PREDICTION_MODE best_mode = ZEROMV;
MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
@@ -1094,14 +1346,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
- // Reduce the intra cost penalty for small blocks (<=16x16).
- const int reduction_fac = (bsize <= BLOCK_16X16) ?
- ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
- const int intra_cost_penalty = vp9_get_intra_cost_penalty(
- cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
- const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
+ const int intra_cost_penalty = set_intra_cost_penalty(cpi, bsize);
+ int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
intra_cost_penalty, 0);
- const int *const rd_threshes = cpi->rd.threshes[mbmi->segment_id][bsize];
+ const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
INTERP_FILTER filter_ref;
const int bsl = mi_width_log2_lookup[bsize];
@@ -1129,6 +1377,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int best_pred_sad = INT_MAX;
int best_early_term = 0;
int ref_frame_cost[MAX_REF_FRAMES];
+ int svc_force_zero_mode[3] = {0};
+ int perform_intra_pred = 1;
+ int use_golden_nonzeromv = 1;
+ int force_skip_low_temp_var = 0;
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ VP9_PICKMODE_CTX_DEN ctx_den;
+ int64_t zero_last_cost_orig = INT64_MAX;
+#endif
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1154,24 +1410,42 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
x->skip = 0;
- if (xd->up_available)
- filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
- else if (xd->left_available)
- filter_ref = xd->mi[-1]->mbmi.interp_filter;
+ // Instead of using vp9_get_pred_context_switchable_interp(xd) to assign
+ // filter_ref, we use a less strict condition on assigning filter_ref.
+ // This is to reduce the probabily of entering the flow of not assigning
+ // filter_ref and then skip filter search.
+ if (xd->above_mi && is_inter_block(xd->above_mi))
+ filter_ref = xd->above_mi->interp_filter;
+ else if (xd->left_mi && is_inter_block(xd->left_mi))
+ filter_ref = xd->left_mi->interp_filter;
else
filter_ref = cm->interp_filter;
// initialize mode decisions
vp9_rd_cost_reset(&best_rdc);
vp9_rd_cost_reset(rd_cost);
- mbmi->sb_type = bsize;
- mbmi->ref_frame[0] = NONE;
- mbmi->ref_frame[1] = NONE;
- mbmi->tx_size = VPXMIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ mi->sb_type = bsize;
+ mi->ref_frame[0] = NONE;
+ mi->ref_frame[1] = NONE;
+ mi->tx_size = VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cm->tx_mode]);
+
+ if (sf->short_circuit_flat_blocks) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ x->source_variance = vp9_high_get_sby_perpixel_variance(
+ cpi, &x->plane[0].src, bsize, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ x->source_variance =
+ vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+ }
#if CONFIG_VP9_TEMPORAL_DENOISING
- vp9_denoiser_reset_frame_stats(ctx);
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ cpi->denoiser.denoising_level > kDenLowLow) {
+ vp9_denoiser_reset_frame_stats(ctx);
+ }
#endif
if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc) {
@@ -1179,40 +1453,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
} else {
usable_ref_frame = GOLDEN_FRAME;
}
- for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
-
- x->pred_mv_sad[ref_frame] = INT_MAX;
- frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
- frame_mv[ZEROMV][ref_frame].as_int = 0;
- if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
- int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
- const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
-
- vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
- sf, sf);
+ // For svc mode, on spatial_layer_id > 0: if the reference has different scale
+ // constrain the inter mode to only test zero motion.
+ if (cpi->use_svc &&
+ svc ->force_zero_mode_spatial_ref &&
+ cpi->svc.spatial_layer_id > 0) {
+ if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
+ struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
+ if (vp9_is_scaled(sf))
+ svc_force_zero_mode[LAST_FRAME - 1] = 1;
+ }
+ if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
+ struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
+ if (vp9_is_scaled(sf))
+ svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
+ }
+ }
- if (cm->use_prev_frame_mvs)
- vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL,
- x->mbmi_ext->mode_context);
- else
- const_motion[ref_frame] = mv_refs_rt(cm, x, xd, tile_info,
- xd->mi[0],
- ref_frame, candidates,
- mi_row, mi_col);
+ if (cpi->sf.short_circuit_low_temp_var) {
+ force_skip_low_temp_var =
+ get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
+ }
- vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
- &frame_mv[NEARESTMV][ref_frame],
- &frame_mv[NEARMV][ref_frame]);
+ if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
+ !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
+ use_golden_nonzeromv = 0;
- if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)
- vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
- ref_frame, bsize);
- } else {
- ref_frame_skip_mask |= (1 << ref_frame);
- }
+ for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
+ find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
+ &ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col,
+ yv12_mb, bsize, force_skip_low_temp_var);
}
for (idx = 0; idx < RT_INTER_MODES; ++idx) {
@@ -1224,21 +1495,52 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int is_skippable;
int this_early_term = 0;
PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
+
if (cpi->use_svc)
this_mode = ref_mode_set_svc[idx].pred_mode;
+ if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
+ this_mode != NEARESTMV) {
+ continue;
+ }
+
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
ref_frame = ref_mode_set[idx].ref_frame;
- if (cpi->use_svc)
+ if (cpi->use_svc) {
ref_frame = ref_mode_set_svc[idx].ref_frame;
+ }
+
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
continue;
+
if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
- if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) {
+ // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
+ // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
+ // later.
+ if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
+ frame_mv[this_mode][ref_frame].as_int != 0) {
+ continue;
+ }
+
+ if (cpi->sf.short_circuit_low_temp_var == 2 &&
+ force_skip_low_temp_var && ref_frame == LAST_FRAME &&
+ this_mode == NEWMV) {
+ continue;
+ }
+
+ if (cpi->use_svc) {
+ if (svc_force_zero_mode[ref_frame - 1] &&
+ frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
+ }
+
+ if (!force_skip_low_temp_var &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == LAST_FRAME)) {
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
@@ -1251,7 +1553,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
- mbmi->ref_frame[0] = ref_frame;
+ mi->ref_frame[0] = ref_frame;
set_ref_ptrs(cm, xd, ref_frame, NONE);
mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
@@ -1262,9 +1564,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (this_mode == NEWMV) {
- if (ref_frame > LAST_FRAME && !cpi->use_svc) {
+ if (ref_frame > LAST_FRAME &&
+ !cpi->use_svc &&
+ cpi->oxcf.rc_mode == VPX_CBR) {
int tmp_sad;
- int dis, cost_list[5];
+ uint32_t dis;
+ int cost_list[5];
if (bsize < BLOCK_16X16)
continue;
@@ -1276,7 +1581,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
continue;
- frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
+ frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
&x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
@@ -1293,13 +1598,44 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &dis,
&x->pred_sse[ref_frame], NULL, 0, 0);
+ } else if (svc->use_base_mv && svc->spatial_layer_id) {
+ if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV &&
+ frame_mv[NEWMV][ref_frame].as_int != 0) {
+ const int pre_stride = xd->plane[0].pre[0].stride;
+ int base_mv_sad = INT_MAX;
+ const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
+ (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride +
+ (frame_mv[NEWMV][ref_frame].as_mv.col >> 3);
+ base_mv_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
+ x->plane[0].src.stride,
+ pre_buf, pre_stride);
+
+ // TODO(wonkap): make the decision to use base layer mv on RD;
+ // not just SAD.
+ if (base_mv_sad < x->pred_mv_sad[ref_frame]) {
+ // Base layer mv is good.
+ if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 1)) {
+ continue;
+ }
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) {
+ continue;
+ }
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) {
+ continue;
+ }
} else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) {
+ &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost, 0)) {
continue;
}
}
- if (this_mode == NEWMV && ref_frame == LAST_FRAME &&
+ // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no
+ // need to compute best_pred_sad which is only used to skip golden NEWMV.
+ if (use_golden_nonzeromv && this_mode == NEWMV &&
+ ref_frame == LAST_FRAME &&
frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
const int pre_stride = xd->plane[0].pre[0].stride;
const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
@@ -1311,28 +1647,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
}
- if (cpi->use_svc) {
- if (this_mode == NEWMV && ref_frame == GOLDEN_FRAME &&
- frame_mv[NEWMV][GOLDEN_FRAME].as_int != INVALID_MV) {
- const int pre_stride = xd->plane[0].pre[0].stride;
- const uint8_t * const pre_buf = xd->plane[0].pre[0].buf +
- (frame_mv[NEWMV][GOLDEN_FRAME].as_mv.row >> 3) * pre_stride +
- (frame_mv[NEWMV][GOLDEN_FRAME].as_mv.col >> 3);
- best_pred_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
- x->plane[0].src.stride,
- pre_buf, pre_stride);
- x->pred_mv_sad[GOLDEN_FRAME] = best_pred_sad;
- }
- }
-
-
if (this_mode != NEARESTMV &&
frame_mv[this_mode][ref_frame].as_int ==
frame_mv[NEARESTMV][ref_frame].as_int)
continue;
- mbmi->mode = this_mode;
- mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+ mi->mode = this_mode;
+ mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
// Search for the best prediction filter type, when the resulting
// motion vector is at sub-pixel accuracy level for luma component, i.e.,
@@ -1349,8 +1670,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search
&& (ref_frame == LAST_FRAME ||
- (ref_frame == GOLDEN_FRAME && cpi->use_svc))
- && (((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) != 0)) {
+ (ref_frame == GOLDEN_FRAME &&
+ (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
+ (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
int pf_rate[3];
int64_t pf_dist[3];
unsigned int pf_var[3];
@@ -1362,13 +1684,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) {
int64_t cost;
- mbmi->interp_filter = filter;
+ mi->interp_filter = filter;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
&pf_var[filter], &pf_sse[filter]);
pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
- pf_tx_size[filter] = mbmi->tx_size;
+ pf_tx_size[filter] = mi->tx_size;
if (cost < best_cost) {
best_filter = filter;
best_cost = cost;
@@ -1379,12 +1701,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
free_pred_buffer(this_mode_pred);
this_mode_pred = current_pred;
}
-
- if (filter < EIGHTTAP_SHARP) {
- current_pred = &tmp[get_pred_buffer(tmp, 3)];
- pd->dst.buf = current_pred->data;
- pd->dst.stride = bw;
- }
+ current_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = current_pred->data;
+ pd->dst.stride = bw;
}
}
}
@@ -1392,8 +1711,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (reuse_inter_pred && this_mode_pred != current_pred)
free_pred_buffer(current_pred);
- mbmi->interp_filter = best_filter;
- mbmi->tx_size = pf_tx_size[best_filter];
+ mi->interp_filter = best_filter;
+ mi->tx_size = pf_tx_size[best_filter];
this_rdc.rate = pf_rate[best_filter];
this_rdc.dist = pf_dist[best_filter];
var_y = pf_var[best_filter];
@@ -1404,13 +1723,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
pd->dst.stride = this_mode_pred->stride;
}
} else {
- mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
+ // TODO(jackychen): the low-bitdepth condition causes a segfault in
+ // high-bitdepth builds.
+ // https://bugs.chromium.org/p/webm/issues/detail?id=1250
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int large_block = bsize > BLOCK_32X32;
+#else
+ const int large_block = bsize >= BLOCK_32X32;
+#endif
+ mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
// For large partition blocks, extra testing is done.
- if (bsize > BLOCK_32X32 &&
- !cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id) &&
- cm->base_qindex) {
+ if (cpi->oxcf.rc_mode == VPX_CBR && large_block &&
+ !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
+ cm->base_qindex) {
model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
&this_rdc.dist, &var_y, &sse_y, mi_row, mi_col,
&this_early_term);
@@ -1422,8 +1749,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!this_early_term) {
this_sse = (int64_t)sse_y;
- block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable,
- &this_sse, 0, bsize, VPXMIN(mbmi->tx_size, TX_16X16));
+ block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize,
+ VPXMIN(mi->tx_size, TX_16X16));
x->skip_txfm[0] = is_skippable;
if (is_skippable) {
this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
@@ -1439,7 +1766,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cm->interp_filter == SWITCHABLE) {
- if ((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07)
+ if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07)
this_rdc.rate += vp9_get_switchable_rate(cpi, xd);
}
} else {
@@ -1449,17 +1776,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
- int uv_rate = 0;
- int64_t uv_dist = 0;
+ RD_COST rdc_uv;
const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]);
if (x->color_sensitivity[0])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
if (x->color_sensitivity[1])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
- model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &uv_rate, &uv_dist,
- &var_y, &sse_y, 1, 2);
- this_rdc.rate += uv_rate;
- this_rdc.dist += uv_dist;
+ model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2);
+ this_rdc.rate += rdc_uv.rate;
+ this_rdc.dist += rdc_uv.dist;
}
this_rdc.rate += rate_mv;
@@ -1469,6 +1794,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
this_rdc.rate += ref_frame_cost[ref_frame];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
+ // Bias against non-zero motion
+ if (cpi->oxcf.rc_mode == VPX_CBR &&
+ cpi->oxcf.speed >= 5 &&
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
+ !x->sb_is_skin) {
+ vp9_large_block_mv_bias(&cpi->noise_estimate, &this_rdc, bsize,
+ frame_mv[this_mode][ref_frame].as_mv.row,
+ frame_mv[this_mode][ref_frame].as_mv.col,
+ ref_frame == LAST_FRAME);
+ }
+
// Skipping checking: test to see if this block can be reconstructed by
// prediction only.
if (cpi->allow_encode_breakout) {
@@ -1483,8 +1819,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0)
- vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ cpi->denoiser.denoising_level > kDenLowLow) {
+ vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx);
+ // Keep track of zero_last cost.
+ if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0)
+ zero_last_cost_orig = this_rdc.rdcost;
+ }
#else
(void)ctx;
#endif
@@ -1492,8 +1833,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
best_rdc = this_rdc;
best_mode = this_mode;
- best_pred_filter = mbmi->interp_filter;
- best_tx_size = mbmi->tx_size;
+ best_pred_filter = mi->interp_filter;
+ best_tx_size = mi->tx_size;
best_ref_frame = ref_frame;
best_mode_skip_txfm = x->skip_txfm[0];
best_early_term = this_early_term;
@@ -1518,20 +1859,33 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- mbmi->mode = best_mode;
- mbmi->interp_filter = best_pred_filter;
- mbmi->tx_size = best_tx_size;
- mbmi->ref_frame[0] = best_ref_frame;
- mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
- xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+ mi->mode = best_mode;
+ mi->interp_filter = best_pred_filter;
+ mi->tx_size = best_tx_size;
+ mi->ref_frame[0] = best_ref_frame;
+ mi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int;
x->skip_txfm[0] = best_mode_skip_txfm;
+ // For spatial enhancemanent layer: perform intra prediction only if base
+ // layer is chosen as the reference. Always perform intra prediction if
+ // LAST is the only reference or is_key_frame is set.
+ if (cpi->svc.spatial_layer_id) {
+ perform_intra_pred =
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
+ !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
+ (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame
+ && svc_force_zero_mode[best_ref_frame - 1]);
+ inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
+ }
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
- if (best_rdc.rdcost == INT64_MAX ||
- (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
- bsize <= cpi->sf.max_intra_bsize)) {
- struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
+ if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&
+ perform_intra_pred &&
+ (best_rdc.rdcost == INT64_MAX ||
+ (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
+ bsize <= cpi->sf.max_intra_bsize))) {
+ struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
int i;
TX_SIZE best_intra_tx_size = TX_SIZES;
TX_SIZE intra_tx_size =
@@ -1566,6 +1920,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const PREDICTION_MODE this_mode = intra_mode_list[i];
THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
int mode_rd_thresh = rd_threshes[mode_index];
+ if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
+ this_mode != DC_PRED) {
+ continue;
+ }
if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
continue;
@@ -1574,14 +1932,24 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
rd_thresh_freq_fact[mode_index]))
continue;
- mbmi->mode = this_mode;
- mbmi->ref_frame[0] = INTRA_FRAME;
+ mi->mode = this_mode;
+ mi->ref_frame[0] = INTRA_FRAME;
+ this_rdc.dist = this_rdc.rate = 0;
args.mode = this_mode;
- args.rate = 0;
- args.dist = 0;
- mbmi->tx_size = intra_tx_size;
+ args.skippable = 1;
+ args.rdc = &this_rdc;
+ mi->tx_size = intra_tx_size;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
+ // Check skip cost here since skippable is not set for for uv, this
+ // mirrors the behavior used by inter
+ if (args.skippable) {
+ x->skip_txfm[0] = SKIP_TXFM_AC_DC;
+ this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
+ } else {
+ x->skip_txfm[0] = SKIP_TXFM_NONE;
+ this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
+ }
// Inter and intra RD will mismatch in scale for non-screen content.
if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
if (x->color_sensitivity[0])
@@ -1591,8 +1959,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_foreach_transformed_block_in_plane(xd, bsize, 2,
estimate_block_intra, &args);
}
- this_rdc.rate = args.rate;
- this_rdc.dist = args.dist;
this_rdc.rate += cpi->mbmode_cost[this_mode];
this_rdc.rate += ref_frame_cost[INTRA_FRAME];
this_rdc.rate += intra_cost_penalty;
@@ -1602,29 +1968,33 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rdc.rdcost < best_rdc.rdcost) {
best_rdc = this_rdc;
best_mode = this_mode;
- best_intra_tx_size = mbmi->tx_size;
+ best_intra_tx_size = mi->tx_size;
best_ref_frame = INTRA_FRAME;
- mbmi->uv_mode = this_mode;
- mbmi->mv[0].as_int = INVALID_MV;
+ mi->uv_mode = this_mode;
+ mi->mv[0].as_int = INVALID_MV;
best_mode_skip_txfm = x->skip_txfm[0];
}
}
// Reset mb_mode_info to the best inter mode.
if (best_ref_frame != INTRA_FRAME) {
- mbmi->tx_size = best_tx_size;
+ mi->tx_size = best_tx_size;
} else {
- mbmi->tx_size = best_intra_tx_size;
+ mi->tx_size = best_intra_tx_size;
}
}
pd->dst = orig_dst;
- mbmi->mode = best_mode;
- mbmi->ref_frame[0] = best_ref_frame;
+ mi->mode = best_mode;
+ mi->ref_frame[0] = best_ref_frame;
x->skip_txfm[0] = best_mode_skip_txfm;
+ if (!is_inter_block(mi)) {
+ mi->interp_filter = SWITCHABLE_FILTERS;
+ }
+
if (reuse_inter_pred && best_pred != NULL) {
- if (best_pred->data != orig_dst.buf && is_inter_mode(mbmi->mode)) {
+ if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
@@ -1642,8 +2012,25 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ cpi->resize_pending == 0 &&
+ cpi->denoiser.denoising_level > kDenLowLow &&
+ cpi->denoiser.reset == 0) {
+ VP9_DENOISER_DECISION decision = COPY_BLOCK;
+ vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost,
+ frame_mv, reuse_inter_pred, best_tx_size,
+ best_mode, best_ref_frame, best_pred_filter,
+ best_mode_skip_txfm);
+ vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
+ recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb,
+ &best_rdc, bsize, mi_row, mi_col);
+ best_ref_frame = ctx_den.best_ref_frame;
+ }
+#endif
+
if (cpi->sf.adaptive_rd_thresh) {
- THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)];
+ THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mi->mode)];
if (best_ref_frame == INTRA_FRAME) {
// Only consider the modes that are included in the intra_mode_list.
@@ -1677,12 +2064,12 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const struct segmentation *const seg = &cm->seg;
MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
MV_REFERENCE_FRAME best_ref_frame = NONE;
- unsigned char segment_id = mbmi->segment_id;
+ unsigned char segment_id = mi->segment_id;
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
@@ -1708,8 +2095,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL,
- mbmi_ext->mode_context);
+ candidates, mi_row, mi_col, mbmi_ext->mode_context);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&dummy_mv[0], &dummy_mv[1]);
@@ -1718,13 +2104,13 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- mbmi->sb_type = bsize;
- mbmi->tx_size = TX_4X4;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame[0] = LAST_FRAME;
- mbmi->ref_frame[1] = NONE;
- mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
- : cm->interp_filter;
+ mi->sb_type = bsize;
+ mi->tx_size = TX_4X4;
+ mi->uv_mode = DC_PRED;
+ mi->ref_frame[0] = LAST_FRAME;
+ mi->ref_frame[1] = NONE;
+ mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+ : cm->interp_filter;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
int64_t this_rd = 0;
@@ -1733,6 +2119,13 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (ref_frame_skip_mask & (1 << ref_frame))
continue;
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ if ((bsize == BLOCK_8X4 || bsize == BLOCK_4X8) &&
+ ref_frame > INTRA_FRAME &&
+ vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+ continue;
+#endif
+
// TODO(jingning, agrange): Scaling reference frame not supported for
// sub8x8 blocks. Is this supported now?
if (ref_frame > INTRA_FRAME &&
@@ -1745,7 +2138,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
- mbmi->ref_frame[0] = ref_frame;
+ mi->ref_frame[0] = ref_frame;
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
@@ -1799,7 +2192,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
const int tmp_col_max = x->mv_col_max;
const int tmp_row_min = x->mv_row_min;
const int tmp_row_max = x->mv_row_max;
- int dummy_dist;
+ uint32_t dummy_dist;
if (i == 0) {
mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3;
@@ -1862,7 +2255,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
&xd->block_refs[0]->sf,
4 * num_4x4_blocks_wide,
4 * num_4x4_blocks_high, 0,
- vp9_filter_kernels[mbmi->interp_filter],
+ vp9_filter_kernels[mi->interp_filter],
MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i & 0x01),
mi_row * MI_SIZE + 4 * (i >> 1), xd->bd);
@@ -1874,7 +2267,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
&xd->block_refs[0]->sf,
4 * num_4x4_blocks_wide,
4 * num_4x4_blocks_high, 0,
- vp9_filter_kernels[mbmi->interp_filter],
+ vp9_filter_kernels[mi->interp_filter],
MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i & 0x01),
mi_row * MI_SIZE + 4 * (i >> 1));
@@ -1916,8 +2309,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
}
} // reference frames
- mbmi->tx_size = TX_4X4;
- mbmi->ref_frame[0] = best_ref_frame;
+ mi->tx_size = TX_4X4;
+ mi->ref_frame[0] = best_ref_frame;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int block = idy * 2 + idx;
@@ -1928,7 +2321,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block];
}
}
- mbmi->mode = xd->mi[0]->bmi[3].as_mode;
+ mi->mode = xd->mi[0]->bmi[3].as_mode;
ctx->mic = *(xd->mi[0]);
ctx->mbmi_ext = *x->mbmi_ext;
ctx->skip_txfm[0] = SKIP_TXFM_NONE;
diff --git a/libvpx/vp9/encoder/vp9_quantize.c b/libvpx/vp9/encoder/vp9_quantize.c
index cb3e21a56..d68b6845c 100644
--- a/libvpx/vp9/encoder/vp9_quantize.c
+++ b/libvpx/vp9/encoder/vp9_quantize.c
@@ -94,7 +94,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr,
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[rc != 0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16);
+ const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 16);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
if (abs_qcoeff)
@@ -219,12 +219,12 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
unsigned t;
- int l;
+ int l, m;
t = d;
for (l = 0; t > 1; l++)
t >>= 1;
- t = 1 + (1 << (16 + l)) / d;
- *quant = (int16_t)(t - (1 << 16));
+ m = 1 + (1 << (16 + l)) / d;
+ *quant = (int16_t)(m - (1 << 16));
*shift = 1 << (16 - l);
}
@@ -308,7 +308,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
QUANTS *const quants = &cpi->quants;
- const int segment_id = xd->mi[0]->mbmi.segment_id;
+ const int segment_id = xd->mi[0]->segment_id;
const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
int i;
@@ -342,8 +342,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
- x->errorperbit = rdmult >> 6;
- x->errorperbit += (x->errorperbit == 0);
+ set_error_per_bit(x, rdmult);
vp9_initialize_me_consts(cpi, x, x->q_index);
}
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c
index d70068570..b45f8d0d9 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -133,7 +133,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m,
kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth);
arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
- inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth);
+ inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth);
rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth);
}
}
@@ -337,6 +337,10 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->total_actual_bits = 0;
rc->total_target_bits = 0;
rc->total_target_vs_actual = 0;
+ rc->avg_frame_low_motion = 0;
+ rc->high_source_sad = 0;
+ rc->count_last_scene_change = 0;
+ rc->avg_source_sad = 0;
rc->frames_since_key = 8; // Sensible default for first frame.
rc->this_key_frame_forced = 0;
@@ -370,8 +374,9 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
int vp9_rc_drop_frame(VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
-
- if (!oxcf->drop_frames_water_mark) {
+ if (!oxcf->drop_frames_water_mark ||
+ (is_one_pass_cbr_svc(cpi) &&
+ cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) {
return 0;
} else {
if (rc->buffer_level < 0) {
@@ -499,6 +504,12 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
else
cpi->rc.rc_1_frame = 0;
+ // Turn off oscilation detection in the case of massive overshoot.
+ if (cpi->rc.rc_1_frame == -1 && cpi->rc.rc_2_frame == 1 &&
+ correction_factor > 1000) {
+ cpi->rc.rc_2_frame = 0;
+ }
+
if (correction_factor > 102) {
// We are not already at the worst allowable quality
correction_factor = (int)(100 + ((correction_factor - 100) *
@@ -614,15 +625,16 @@ static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
if (cpi->common.frame_type == KEY_FRAME) {
active_worst_quality = curr_frame == 0 ? rc->worst_quality
- : rc->last_q[KEY_FRAME] * 2;
+ : rc->last_q[KEY_FRAME] << 1;
} else {
if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4
+ active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 >> 2
: rc->last_q[INTER_FRAME];
} else {
- active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2
- : rc->last_q[INTER_FRAME] * 2;
+ active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] << 1 :
+ VPXMIN(rc->last_q[INTER_FRAME] << 1,
+ (rc->avg_frame_qindex[INTER_FRAME] * 3 >> 1));
}
}
return VPXMIN(active_worst_quality, rc->worst_quality);
@@ -655,7 +667,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
VPXMIN(rc->avg_frame_qindex[INTER_FRAME],
rc->avg_frame_qindex[KEY_FRAME]) :
rc->avg_frame_qindex[INTER_FRAME];
- active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 / 4);
+ active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
// Maximum limit for down adjustment, ~30%.
@@ -804,8 +816,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
return q;
}
-static int get_active_cq_level(const RATE_CONTROL *rc,
- const VP9EncoderConfig *const oxcf) {
+static int get_active_cq_level_one_pass(
+ const RATE_CONTROL *rc, const VP9EncoderConfig *const oxcf) {
static const double cq_adjust_threshold = 0.1;
int active_cq_level = oxcf->cq_level;
if (oxcf->rc_mode == VPX_CQ &&
@@ -818,13 +830,36 @@ static int get_active_cq_level(const RATE_CONTROL *rc,
return active_cq_level;
}
+#define SMOOTH_PCT_MIN 0.1
+#define SMOOTH_PCT_DIV 0.05
+static int get_active_cq_level_two_pass(
+ const TWO_PASS *twopass, const RATE_CONTROL *rc,
+ const VP9EncoderConfig *const oxcf) {
+ static const double cq_adjust_threshold = 0.1;
+ int active_cq_level = oxcf->cq_level;
+ if (oxcf->rc_mode == VPX_CQ) {
+ if (twopass->mb_smooth_pct > SMOOTH_PCT_MIN) {
+ active_cq_level -= (int)((twopass->mb_smooth_pct - SMOOTH_PCT_MIN) /
+ SMOOTH_PCT_DIV);
+ active_cq_level = VPXMAX(active_cq_level, 0);
+ }
+ if (rc->total_target_bits > 0) {
+ const double x = (double)rc->total_actual_bits / rc->total_target_bits;
+ if (x < cq_adjust_threshold) {
+ active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold);
+ }
+ }
+ }
+ return active_cq_level;
+}
+
static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
int *bottom_index,
int *top_index) {
const VP9_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const int cq_level = get_active_cq_level(rc, oxcf);
+ const int cq_level = get_active_cq_level_one_pass(rc, oxcf);
int active_best_quality;
int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
int q;
@@ -832,10 +867,16 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
if (frame_is_intra_only(cm)) {
- // Handle the special case for key frames forced when we have reached
- // the maximum key frame interval. Here force the Q to a range
- // based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
+ if (oxcf->rc_mode == VPX_Q) {
+ int qindex = cq_level;
+ double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex = vp9_compute_qdelta(rc, q, q * 0.25,
+ cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else if (rc->this_key_frame_forced) {
+ // Handle the special case for key frames forced when we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
int qindex = rc->last_boosted_qindex;
double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
@@ -868,9 +909,12 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
- if (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
- q = rc->avg_frame_qindex[INTER_FRAME];
+ if (rc->frames_since_key > 1) {
+ if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+ q = rc->avg_frame_qindex[INTER_FRAME];
+ } else {
+ q = active_worst_quality;
+ }
} else {
q = rc->avg_frame_qindex[KEY_FRAME];
}
@@ -885,23 +929,37 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
active_best_quality = active_best_quality * 15 / 16;
} else if (oxcf->rc_mode == VPX_Q) {
- if (!cpi->refresh_alt_ref_frame) {
- active_best_quality = cq_level;
- } else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
- }
+ int qindex = cq_level;
+ double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex;
+ if (cpi->refresh_alt_ref_frame)
+ delta_qindex = vp9_compute_qdelta(rc, q, q * 0.40, cm->bit_depth);
+ else
+ delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
- active_best_quality = cq_level;
+ int qindex = cq_level;
+ double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ double delta_rate[FIXED_GF_INTERVAL] =
+ {0.50, 1.0, 0.85, 1.0, 0.70, 1.0, 0.85, 1.0};
+ int delta_qindex =
+ vp9_compute_qdelta(rc, q,
+ q * delta_rate[cm->current_video_frame %
+ FIXED_GF_INTERVAL], cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
- // Use the lower of active_worst_quality and recent/average Q.
- if (cm->current_video_frame > 1)
- active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
- else
+ // Use the min of the average Q and active_worst_quality as basis for
+ // active_best.
+ if (cm->current_video_frame > 1) {
+ q = VPXMIN(rc->avg_frame_qindex[INTER_FRAME], active_worst_quality);
+ active_best_quality = inter_minq[q];
+ } else {
active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ }
// For the constrained quality mode we don't want
// q to fall below the cq level.
if ((oxcf->rc_mode == VPX_CQ) &&
@@ -993,7 +1051,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
- const int cq_level = get_active_cq_level(rc, oxcf);
+ const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf);
int active_best_quality;
int active_worst_quality = cpi->twopass.active_worst_quality;
int q;
@@ -1074,7 +1132,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.
@@ -1101,8 +1159,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
// Extension to max or min Q if undershoot or overshoot is outside
// the permitted range.
- if ((cpi->oxcf.rc_mode != VPX_Q) &&
- (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) {
+ if (cpi->oxcf.rc_mode != VPX_Q) {
if (frame_is_intra_only(cm) ||
(!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
@@ -1256,8 +1313,12 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
rc->frames_since_golden = 0;
// If we are not using alt ref in the up and coming group clear the arf
- // active flag.
- if (!rc->source_alt_ref_pending) {
+ // active flag. In multi arf group case, if the index is not 0 then
+ // we are overlaying a mid group arf so should not reset the flag.
+ if (cpi->oxcf.pass == 2) {
+ if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0))
+ rc->source_alt_ref_active = 0;
+ } else if (!rc->source_alt_ref_pending) {
rc->source_alt_ref_active = 0;
}
@@ -1274,6 +1335,26 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
}
}
+static void compute_frame_low_motion(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int mi_row, mi_col;
+ MODE_INFO **mi = cm->mi_grid_visible;
+ RATE_CONTROL *const rc = &cpi->rc;
+ const int rows = cm->mi_rows, cols = cm->mi_cols;
+ int cnt_zeromv = 0;
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ if (abs(mi[0]->mv[0].as_mv.row) < 16 &&
+ abs(mi[0]->mv[0].as_mv.col) < 16)
+ cnt_zeromv++;
+ mi++;
+ }
+ mi += 8;
+ }
+ cnt_zeromv = 100 * cnt_zeromv / (rows * cols);
+ rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) >> 2;
+}
+
void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1308,9 +1389,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
}
} else {
- if (rc->is_src_frame_alt_ref ||
- !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
- (cpi->use_svc && oxcf->rc_mode == VPX_CBR)) {
+ if ((cpi->use_svc && oxcf->rc_mode == VPX_CBR) ||
+ (!rc->is_src_frame_alt_ref &&
+ !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
rc->last_q[INTER_FRAME] = qindex;
rc->avg_frame_qindex[INTER_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
@@ -1383,6 +1464,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->next_frame_size_selector != rc->frame_size_selector;
rc->frame_size_selector = rc->next_frame_size_selector;
}
+
+ if (oxcf->pass == 0) {
+ if (cm->frame_type != KEY_FRAME)
+ compute_frame_low_motion(cpi);
+ }
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1421,6 +1507,24 @@ static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
return vp9_rc_clamp_iframe_target_size(cpi, target);
}
+static void adjust_gf_key_frame(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ rc->constrained_gf_group = 0;
+ // Reset gf interval to make more equal spacing for up-coming key frame.
+ if ((rc->frames_to_key <= 7 * rc->baseline_gf_interval >> 2) &&
+ (rc->frames_to_key > rc->baseline_gf_interval)) {
+ rc->baseline_gf_interval = rc->frames_to_key >> 1;
+ if (rc->baseline_gf_interval < 5)
+ rc->baseline_gf_interval = rc->frames_to_key;
+ rc->constrained_gf_group = 1;
+ } else {
+ // Reset since frames_till_gf_update_due must be <= frames_to_key.
+ if (rc->baseline_gf_interval > rc->frames_to_key) {
+ rc->baseline_gf_interval = rc->frames_to_key;
+ rc->constrained_gf_group = 1;
+ }
+ }
+}
void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -1441,24 +1545,41 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
cm->frame_type = INTER_FRAME;
}
if (rc->frames_till_gf_update_due == 0) {
- rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- // NOTE: frames_till_gf_update_due must be <= frames_to_key.
- if (rc->frames_till_gf_update_due > rc->frames_to_key) {
- rc->frames_till_gf_update_due = rc->frames_to_key;
- rc->constrained_gf_group = 1;
+ double rate_err = 1.0;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) {
+ vp9_cyclic_refresh_set_golden_update(cpi);
} else {
- rc->constrained_gf_group = 0;
+ rc->baseline_gf_interval =
+ (rc->min_gf_interval + rc->max_gf_interval) / 2;
}
+ if (rc->rolling_target_bits > 0)
+ rate_err =
+ (double)rc->rolling_actual_bits / (double)rc->rolling_target_bits;
+ // Increase gf interval at high Q and high overshoot.
+ if (cm->current_video_frame > 30 &&
+ rc->avg_frame_qindex[INTER_FRAME] > (7 * rc->worst_quality) >> 3 &&
+ rate_err > 3.5) {
+ rc->baseline_gf_interval =
+ VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1);
+ } else if (cm->current_video_frame > 30 &&
+ rc->avg_frame_low_motion < 20) {
+ // Decrease boost and gf interval for high motion case.
+ rc->gfu_boost = DEFAULT_GF_BOOST >> 1;
+ rc->baseline_gf_interval = VPXMAX(5, rc->baseline_gf_interval >> 1);
+ }
+ adjust_gf_key_frame(cpi);
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
cpi->refresh_golden_frame = 1;
rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
- rc->gfu_boost = DEFAULT_GF_BOOST;
}
if (cm->frame_type == KEY_FRAME)
target = calc_iframe_target_size_one_pass_vbr(cpi);
else
target = calc_pframe_target_size_one_pass_vbr(cpi);
vp9_rc_set_frame_target(cpi, target);
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0)
+ vp9_cyclic_refresh_update_parameters(cpi);
}
static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
@@ -1539,41 +1660,31 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
return vp9_rc_clamp_iframe_target_size(cpi, target);
}
-// Reset information needed to set proper reference frames and buffer updates
-// for temporal layering. This is called when a key frame is encoded.
-static void reset_temporal_layer_to_zero(VP9_COMP *cpi) {
- int sl;
- LAYER_CONTEXT *lc = NULL;
- cpi->svc.temporal_layer_id = 0;
-
- for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
- lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
- lc->current_video_frame_in_layer = 0;
- lc->frames_from_key_frame = 0;
- }
-}
-
void vp9_rc_get_svc_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target = rc->avg_frame_bandwidth;
- const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
+ int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers);
-
+ // Periodic key frames is based on the super-frame counter
+ // (svc.current_superframe), also only base spatial layer is key frame.
if ((cm->current_video_frame == 0) ||
(cpi->frame_flags & FRAMEFLAGS_KEY) ||
- (cpi->oxcf.auto_key && (rc->frames_since_key %
- cpi->oxcf.key_freq == 0))) {
+ (cpi->oxcf.auto_key &&
+ (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) &&
+ cpi->svc.spatial_layer_id == 0)) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
-
if (is_two_pass_svc(cpi)) {
cpi->svc.layer_context[layer].is_key_frame = 1;
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
} else if (is_one_pass_cbr_svc(cpi)) {
+ if (cm->current_video_frame > 0)
+ vp9_svc_reset_key_frame(cpi);
+ layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
+ cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers);
cpi->svc.layer_context[layer].is_key_frame = 1;
- reset_temporal_layer_to_zero(cpi);
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
// Assumption here is that LAST_FRAME is being updated for a keyframe.
@@ -1715,29 +1826,36 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
RATE_CONTROL *const rc) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- // Set Maximum gf/arf interval
- rc->max_gf_interval = oxcf->max_gf_interval;
- rc->min_gf_interval = oxcf->min_gf_interval;
- if (rc->min_gf_interval == 0)
- rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
- oxcf->width, oxcf->height, cpi->framerate);
- if (rc->max_gf_interval == 0)
- rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
- cpi->framerate, rc->min_gf_interval);
+ // Special case code for 1 pass fixed Q mode tests
+ if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
+ rc->max_gf_interval = FIXED_GF_INTERVAL;
+ rc->min_gf_interval = FIXED_GF_INTERVAL;
+ rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
+ } else {
+ // Set Maximum gf/arf interval
+ rc->max_gf_interval = oxcf->max_gf_interval;
+ rc->min_gf_interval = oxcf->min_gf_interval;
+ if (rc->min_gf_interval == 0)
+ rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
+ oxcf->width, oxcf->height, cpi->framerate);
+ if (rc->max_gf_interval == 0)
+ rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
+ cpi->framerate, rc->min_gf_interval);
+
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+
+ if (is_altref_enabled(cpi)) {
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+ if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+ rc->max_gf_interval = rc->static_scene_max_gf_interval;
- if (is_altref_enabled(cpi)) {
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ // Clamp min to max
+ rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
}
-
- if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
- rc->max_gf_interval = rc->static_scene_max_gf_interval;
-
- // Clamp min to max
- rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
}
void vp9_rc_update_framerate(VP9_COMP *cpi) {
@@ -1774,27 +1892,28 @@ static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) {
RATE_CONTROL *const rc = &cpi->rc;
int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
int max_delta;
- double position_factor = 1.0;
-
- // How far through the clip are we.
- // This number is used to damp the per frame rate correction.
- // Range 0 - 1.0
- if (cpi->twopass.total_stats.count) {
- position_factor = sqrt((double)cpi->common.current_video_frame /
- cpi->twopass.total_stats.count);
- }
- max_delta = (int)(position_factor *
- ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100));
-
- // vbr_bits_off_target > 0 means we have extra bits to spend
- if (vbr_bits_off_target > 0) {
- *this_frame_target +=
- (vbr_bits_off_target > max_delta) ? max_delta
- : (int)vbr_bits_off_target;
- } else {
- *this_frame_target -=
- (vbr_bits_off_target < -max_delta) ? max_delta
- : (int)-vbr_bits_off_target;
+ int frame_window = VPXMIN(16,
+ ((int)cpi->twopass.total_stats.count - cpi->common.current_video_frame));
+
+ // Calcluate the adjustment to rate for this frame.
+ if (frame_window > 0) {
+ max_delta = (vbr_bits_off_target > 0)
+ ? (int)(vbr_bits_off_target / frame_window)
+ : (int)(-vbr_bits_off_target / frame_window);
+
+ max_delta = VPXMIN(max_delta,
+ ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100));
+
+ // vbr_bits_off_target > 0 means we have extra bits to spend
+ if (vbr_bits_off_target > 0) {
+ *this_frame_target +=
+ (vbr_bits_off_target > max_delta) ? max_delta
+ : (int)vbr_bits_off_target;
+ } else {
+ *this_frame_target -=
+ (vbr_bits_off_target < -max_delta) ? max_delta
+ : (int)-vbr_bits_off_target;
+ }
}
// Fast redistribution of bits arising from massive local undershoot.
@@ -1835,6 +1954,9 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
RESIZE_ACTION resize_action = NO_RESIZE;
int avg_qp_thr1 = 70;
int avg_qp_thr2 = 50;
+ int min_width = 180;
+ int min_height = 180;
+ int down_size_on = 1;
cpi->resize_scale_num = 1;
cpi->resize_scale_den = 1;
// Don't resize on key frame; reset the counters on key frame.
@@ -1843,6 +1965,21 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
cpi->resize_count = 0;
return 0;
}
+ // Check current frame reslution to avoid generating frames smaller than
+ // the minimum resolution.
+ if (ONEHALFONLY_RESIZE) {
+ if ((cm->width >> 1) < min_width || (cm->height >> 1) < min_height)
+ down_size_on = 0;
+ } else {
+ if (cpi->resize_state == ORIG &&
+ (cm->width * 3 / 4 < min_width ||
+ cm->height * 3 / 4 < min_height))
+ return 0;
+ else if (cpi->resize_state == THREE_QUARTER &&
+ ((cpi->oxcf.width >> 1) < min_width ||
+ (cpi->oxcf.height >> 1) < min_height))
+ down_size_on = 0;
+ }
#if CONFIG_VP9_TEMPORAL_DENOISING
// If denoiser is on, apply a smaller qp threshold.
@@ -1854,7 +1991,7 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
// Resize based on average buffer underflow and QP over some window.
// Ignore samples close to key frame, since QP is usually high after key.
- if (cpi->rc.frames_since_key > 1 * cpi->framerate) {
+ if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
const int window = (int)(4 * cpi->framerate);
cpi->resize_avg_qp += cm->base_qindex;
if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
@@ -1869,7 +2006,7 @@ int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
// down state, i.e. 1/2 or 3/4 of original resolution.
// Currently, use a flag to turn 3/4 resizing feature on/off.
if (cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
- if (cpi->resize_state == THREE_QUARTER) {
+ if (cpi->resize_state == THREE_QUARTER && down_size_on) {
resize_action = DOWN_ONEHALF;
cpi->resize_state = ONE_HALF;
} else if (cpi->resize_state == ORIG) {
@@ -1955,13 +2092,17 @@ void vp9_avg_source_sad(VP9_COMP *cpi) {
VP9_COMMON * const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
rc->high_source_sad = 0;
- if (cpi->Last_Source != NULL) {
+ if (cpi->Last_Source != NULL &&
+ cpi->Last_Source->y_width == cpi->Source->y_width &&
+ cpi->Last_Source->y_height == cpi->Source->y_height) {
const uint8_t *src_y = cpi->Source->y_buffer;
const int src_ystride = cpi->Source->y_stride;
const uint8_t *last_src_y = cpi->Last_Source->y_buffer;
const int last_src_ystride = cpi->Last_Source->y_stride;
int sbi_row, sbi_col;
const BLOCK_SIZE bsize = BLOCK_64X64;
+ uint32_t min_thresh = 4000;
+ float thresh = 8.0f;
// Loop over sub-sample of frame, and compute average sad over 64x64 blocks.
uint64_t avg_sad = 0;
int num_samples = 0;
@@ -1992,12 +2133,37 @@ void vp9_avg_source_sad(VP9_COMP *cpi) {
// between current and the previous frame value(s). Use a minimum threshold
// for cases where there is small change from content that is completely
// static.
- if (avg_sad > VPXMAX(4000, (rc->avg_source_sad << 3)) &&
+ if (cpi->oxcf.rc_mode == VPX_VBR) {
+ min_thresh = 60000;
+ thresh = 2.1f;
+ }
+ if (avg_sad >
+ VPXMAX(min_thresh, (unsigned int)(rc->avg_source_sad * thresh)) &&
rc->frames_since_key > 1)
rc->high_source_sad = 1;
else
rc->high_source_sad = 0;
- rc->avg_source_sad = (rc->avg_source_sad + avg_sad) >> 1;
+ if (avg_sad > 0 || cpi->oxcf.rc_mode == VPX_CBR)
+ rc->avg_source_sad = (3 * rc->avg_source_sad + avg_sad) >> 2;
+ // For VBR, under scene change/high content change, force golden refresh.
+ if (cpi->oxcf.rc_mode == VPX_VBR &&
+ rc->high_source_sad &&
+ rc->frames_to_key > 3 &&
+ rc->count_last_scene_change > 4 &&
+ cpi->ext_refresh_frame_flags_pending == 0) {
+ int target;
+ cpi->refresh_golden_frame = 1;
+ rc->gfu_boost = DEFAULT_GF_BOOST >> 1;
+ rc->baseline_gf_interval = VPXMIN(20,
+ VPXMAX(10, rc->baseline_gf_interval));
+ adjust_gf_key_frame(cpi);
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ target = calc_pframe_target_size_one_pass_vbr(cpi);
+ vp9_rc_set_frame_target(cpi, target);
+ rc->count_last_scene_change = 0;
+ } else {
+ rc->count_last_scene_change++;
+ }
}
}
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h
index 136fd3e78..7024bcfa9 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -26,6 +26,7 @@ extern "C" {
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
+#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
#define ONEHALFONLY_RESIZE 0
typedef enum {
@@ -160,6 +161,8 @@ typedef struct {
uint64_t avg_source_sad;
int high_source_sad;
+ int count_last_scene_change;
+ int avg_frame_low_motion;
} RATE_CONTROL;
struct VP9_COMP;
diff --git a/libvpx/vp9/encoder/vp9_rd.c b/libvpx/vp9/encoder/vp9_rd.c
index b085c7a0c..91b291187 100644
--- a/libvpx/vp9/encoder/vp9_rd.c
+++ b/libvpx/vp9/encoder/vp9_rd.c
@@ -41,7 +41,6 @@
#include "vp9/encoder/vp9_tokenize.h"
#define RD_THRESH_POW 1.25
-#define RD_MULT_EPB_RATIO 64
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
@@ -76,10 +75,12 @@ static void fill_mode_costs(VP9_COMP *cpi) {
vp9_intra_mode_tree);
vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
- vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
- vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
- vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
- fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+ for (i = 0; i < INTRA_MODES; ++i) {
+ vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
+ vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
+ vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
+ fc->uv_mode_prob[i], vp9_intra_mode_tree);
+ }
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
vp9_cost_tokens(cpi->switchable_interp_costs[i],
@@ -277,8 +278,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
- x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
- x->errorperbit += (x->errorperbit == 0);
+ set_error_per_bit(x, rd->RDMULT);
x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ? 0 : 1;
@@ -286,29 +286,37 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_block_thresholds(cm, rd);
set_partition_probs(cm, xd);
- if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
- fill_token_costs(x->token_costs, cm->fc->coef_probs);
-
- if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
- cm->frame_type == KEY_FRAME) {
- for (i = 0; i < PARTITION_CONTEXTS; ++i)
- vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
- vp9_partition_tree);
- }
+ if (cpi->oxcf.pass == 1) {
+ if (!frame_is_intra_only(cm))
+ vp9_build_nmv_cost_table(
+ x->nmvjointcost,
+ cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
+ &cm->fc->nmvc, cm->allow_high_precision_mv);
+ } else {
+ if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
+ fill_token_costs(x->token_costs, cm->fc->coef_probs);
+
+ if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
+ cm->frame_type == KEY_FRAME) {
+ for (i = 0; i < PARTITION_CONTEXTS; ++i)
+ vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
+ vp9_partition_tree);
+ }
- if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
- cm->frame_type == KEY_FRAME) {
- fill_mode_costs(cpi);
+ if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+ cm->frame_type == KEY_FRAME) {
+ fill_mode_costs(cpi);
- if (!frame_is_intra_only(cm)) {
- vp9_build_nmv_cost_table(x->nmvjointcost,
- cm->allow_high_precision_mv ? x->nmvcost_hp
- : x->nmvcost,
- &cm->fc->nmvc, cm->allow_high_precision_mv);
+ if (!frame_is_intra_only(cm)) {
+ vp9_build_nmv_cost_table(
+ x->nmvjointcost,
+ cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
+ &cm->fc->nmvc, cm->allow_high_precision_mv);
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
- cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
+ cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
+ }
}
}
}
@@ -341,6 +349,7 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
38, 28, 21, 16, 12, 10, 8, 6,
5, 3, 2, 1, 1, 1, 0, 0,
};
+
// Normalized distortion:
// This table models the normalized distortion for a Laplacian source
// with given variance when quantized with a uniform quantizer
@@ -407,7 +416,7 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
(((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
model_rd_norm(xsq_q10, &r_q10, &d_q10);
- *rate = ((r_q10 << n_log2) + 2) >> 2;
+ *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
*dist = (var * (int64_t)d_q10 + 512) >> 10;
}
}
@@ -555,10 +564,10 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
}
int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const MODE_INFO *const mi = xd->mi[0];
const int ctx = vp9_get_pred_context_switchable_interp(xd);
return SWITCHABLE_INTERP_RATE_FACTOR *
- cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+ cpi->switchable_interp_costs[ctx][mi->interp_filter];
}
void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
diff --git a/libvpx/vp9/encoder/vp9_rd.h b/libvpx/vp9/encoder/vp9_rd.h
index 28385c981..9b8e2732c 100644
--- a/libvpx/vp9/encoder/vp9_rd.h
+++ b/libvpx/vp9/encoder/vp9_rd.h
@@ -17,15 +17,17 @@
#include "vp9/encoder/vp9_block.h"
#include "vp9/encoder/vp9_context_tree.h"
+#include "vp9/encoder/vp9_cost.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RDDIV_BITS 7
+#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) \
- (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
+ (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
#define QIDX_SKIP_THRESH 115
#define MV_COST_WEIGHT 108
@@ -167,6 +169,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
+static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
+ x->errorperbit = rdmult >> RD_EPB_SHIFT;
+ x->errorperbit += (x->errorperbit == 0);
+}
+
void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size);
@@ -181,6 +188,15 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd,
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
vpx_bit_depth_t bit_depth);
+unsigned int vp9_get_sby_perpixel_variance(struct VP9_COMP *cpi,
+ const struct buf_2d *ref,
+ BLOCK_SIZE bs);
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int vp9_high_get_sby_perpixel_variance(struct VP9_COMP *cpi,
+ const struct buf_2d *ref,
+ BLOCK_SIZE bs, int bd);
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index 4f3a06e99..e65e05112 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -165,7 +165,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
int i;
int64_t rate_sum = 0;
int64_t dist_sum = 0;
- const int ref = xd->mi[0]->mbmi.ref_frame[0];
+ const int ref = xd->mi[0]->ref_frame[0];
unsigned int sse;
unsigned int var = 0;
unsigned int sum_sse = 0;
@@ -248,7 +248,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
int quantizer = (pd->dequant[1] >> dequant_shift);
if (quantizer < 120)
- rate = (square_error * (280 - quantizer)) >> 8;
+ rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
else
rate = 0;
dist = (square_error * quantizer) >> 8;
@@ -361,73 +361,96 @@ static int cost_coeffs(MACROBLOCK *x,
const int16_t *scan, const int16_t *nb,
int use_fast_coef_costing) {
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
const struct macroblock_plane *p = &x->plane[plane];
const PLANE_TYPE type = get_plane_type(plane);
const int16_t *band_count = &band_counts[tx_size][1];
const int eob = p->eobs[block];
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- x->token_costs[tx_size][type][is_inter_block(mbmi)];
+ x->token_costs[tx_size][type][is_inter_block(mi)];
uint8_t token_cache[32 * 32];
int pt = combine_entropy_contexts(*A, *L);
int c, cost;
#if CONFIG_VP9_HIGHBITDEPTH
- const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
+ const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
#else
- const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
+ const int *cat6_high_cost = vp9_get_high_cost_table(8);
#endif
// Check for consistency of tx_size with mode info
- assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size :
- get_uv_tx_size(mbmi, &xd->plane[plane]) == tx_size);
+ assert(type == PLANE_TYPE_Y ? mi->tx_size == tx_size :
+ get_uv_tx_size(mi, &xd->plane[plane]) == tx_size);
if (eob == 0) {
// single eob token
cost = token_costs[0][0][pt][EOB_TOKEN];
c = 0;
} else {
- int band_left = *band_count++;
-
- // dc token
- int v = qcoeff[0];
- int16_t prev_t;
- EXTRABIT e;
- vp9_get_token_extra(v, &prev_t, &e);
- cost = (*token_costs)[0][pt][prev_t] +
- vp9_get_cost(prev_t, e, cat6_high_cost);
-
- token_cache[0] = vp9_pt_energy_class[prev_t];
- ++token_costs;
-
- // ac tokens
- for (c = 1; c < eob; c++) {
- const int rc = scan[c];
- int16_t t;
-
- v = qcoeff[rc];
- vp9_get_token_extra(v, &t, &e);
- if (use_fast_coef_costing) {
- cost += (*token_costs)[!prev_t][!prev_t][t] +
- vp9_get_cost(t, e, cat6_high_cost);
- } else {
- pt = get_coef_context(nb, token_cache, c);
- cost += (*token_costs)[!prev_t][pt][t] +
- vp9_get_cost(t, e, cat6_high_cost);
- token_cache[rc] = vp9_pt_energy_class[t];
- }
- prev_t = t;
- if (!--band_left) {
- band_left = *band_count++;
- ++token_costs;
+ if (use_fast_coef_costing) {
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t prev_t;
+ cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
+ cost += (*token_costs)[0][pt][prev_t];
+
+ token_cache[0] = vp9_pt_energy_class[prev_t];
+ ++token_costs;
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+ int16_t t;
+
+ v = qcoeff[rc];
+ cost += vp9_get_token_cost(v, &t, cat6_high_cost);
+ cost += (*token_costs)[!prev_t][!prev_t][t];
+ prev_t = t;
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
}
- }
- // eob token
- if (band_left) {
- if (use_fast_coef_costing) {
+ // eob token
+ if (band_left)
cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
- } else {
+
+ } else { // !use_fast_coef_costing
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t tok;
+ unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
+ cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
+ cost += (*token_costs)[0][pt][tok];
+
+ token_cache[0] = vp9_pt_energy_class[tok];
+ ++token_costs;
+
+ tok_cost_ptr = &((*token_costs)[!tok]);
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+
+ v = qcoeff[rc];
+ cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
+ pt = get_coef_context(nb, token_cache, c);
+ cost += (*tok_cost_ptr)[pt][tok];
+ token_cache[rc] = vp9_pt_energy_class[tok];
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
+ tok_cost_ptr = &((*token_costs)[!tok]);
+ }
+
+ // eob token
+ if (band_left) {
pt = get_coef_context(nb, token_cache, c);
cost += (*token_costs)[0][pt][EOB_TOKEN];
}
@@ -461,7 +484,7 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_sse = this_sse >> shift;
- if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
+ if (x->skip_encode && !is_inter_block(xd->mi[0])) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >>
@@ -491,7 +514,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
int64_t rd1, rd2, rd;
int rate;
int64_t dist;
@@ -500,8 +523,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
if (args->exit_early)
return;
- if (!is_inter_block(mbmi)) {
- struct encode_b_args arg = {x, NULL, &mbmi->skip};
+ if (!is_inter_block(mi)) {
+ struct encode_b_args arg = {x, NULL, &mi->skip};
vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
dist_block(x, plane, block, tx_size, &dist, &sse);
} else if (max_txsize_lookup[plane_bsize] == tx_size) {
@@ -588,7 +611,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
args.skippable = 1;
if (plane == 0)
- xd->mi[0]->mbmi.tx_size = tx_size;
+ xd->mi[0]->tx_size = tx_size;
vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
@@ -618,13 +641,13 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
VP9_COMMON *const cm = &cpi->common;
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
- mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
+ mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
txfm_rd_in_plane(x, rate, distortion, skip,
sse, ref_best_rd, 0, bs,
- mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+ mi->tx_size, cpi->sf.use_fast_coef_costing);
}
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -637,7 +660,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
@@ -684,7 +707,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
rd[n][0] = rd[n][1] = INT64_MAX;
} else if (s[n]) {
- if (is_inter_block(mbmi)) {
+ if (is_inter_block(mi)) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
r[n][1] -= r_tx_size;
} else {
@@ -696,7 +719,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
- if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
+ if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
}
@@ -713,12 +736,12 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
best_rd = rd[n][1];
}
}
- mbmi->tx_size = best_tx;
+ mi->tx_size = best_tx;
- *distortion = d[mbmi->tx_size];
- *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
- *skip = s[mbmi->tx_size];
- *psse = sse[mbmi->tx_size];
+ *distortion = d[mi->tx_size];
+ *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
+ *skip = s[mi->tx_size];
+ *psse = sse[mi->tx_size];
}
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -729,7 +752,7 @@ static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t sse;
int64_t *ret_sse = psse ? psse : &sse;
- assert(bs == xd->mi[0]->mbmi.sb_type);
+ assert(bs == xd->mi[0]->sb_type);
if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
@@ -787,10 +810,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t best_dst16[8 * 8];
#endif
+ memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
+ memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
- memcpy(ta, a, sizeof(ta));
- memcpy(tl, l, sizeof(tl));
- xd->mi[0]->mbmi.tx_size = TX_4X4;
+ xd->mi[0]->tx_size = TX_4X4;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -810,8 +833,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -874,8 +897,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
@@ -914,8 +937,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -976,8 +999,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
@@ -1005,7 +1028,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
MODE_INFO *const mic = xd->mi[0];
const MODE_INFO *above_mi = xd->above_mi;
const MODE_INFO *left_mi = xd->left_mi;
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
@@ -1013,12 +1036,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[4], t_left[4];
const int *bmode_costs = cpi->mbmode_cost;
- memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
- memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
-
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1034,8 +1053,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
}
this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
- bmode_costs, t_above + idx, t_left + idy,
+ bmode_costs,
+ xd->plane[0].above_context + idx,
+ xd->plane[0].left_context + idy,
&r, &ry, &d, bsize, best_rd - total_rd);
+
if (this_rd >= best_rd - total_rd)
return INT64_MAX;
@@ -1058,7 +1080,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
*rate = cost;
*rate_y = tot_rate_y;
*distortion = total_distortion;
- mic->mbmi.mode = mic->bmi[3].as_mode;
+ mic->mode = mic->bmi[3].as_mode;
return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}
@@ -1095,7 +1117,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
break;
}
- mic->mbmi.mode = mode;
+ mic->mode = mode;
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
&s, NULL, bsize, best_rd);
@@ -1109,7 +1131,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (this_rd < best_rd) {
mode_selected = mode;
best_rd = this_rd;
- best_tx = mic->mbmi.tx_size;
+ best_tx = mic->tx_size;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
@@ -1117,8 +1139,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- mic->mbmi.mode = mode_selected;
- mic->mbmi.tx_size = best_tx;
+ mic->mode = mode_selected;
+ mic->tx_size = best_tx;
return best_rd;
}
@@ -1130,8 +1152,8 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+ MODE_INFO *const mi = xd->mi[0];
+ const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
int plane;
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
@@ -1140,7 +1162,7 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
if (ref_best_rd < 0)
is_cost_valid = 0;
- if (is_inter_block(mbmi) && is_cost_valid) {
+ if (is_inter_block(mi) && is_cost_valid) {
int plane;
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
vp9_subtract_plane(x, bsize, plane);
@@ -1192,14 +1214,20 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
+#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
+ (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode])
+ continue;
+#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
- xd->mi[0]->mbmi.uv_mode = mode;
+ xd->mi[0]->uv_mode = mode;
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd))
continue;
this_rate = this_rate_tokenonly +
- cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
+ cpi->intra_uv_mode_cost[cpi->common.frame_type]
+ [xd->mi[0]->mode][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
@@ -1214,7 +1242,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- xd->mi[0]->mbmi.uv_mode = mode_selected;
+ xd->mi[0]->uv_mode = mode_selected;
return best_rd;
}
@@ -1225,11 +1253,13 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
const VP9_COMMON *cm = &cpi->common;
int64_t unused;
- x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
+ x->e_mbd.mi[0]->uv_mode = DC_PRED;
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
super_block_uvrd(cpi, x, rate_tokenonly, distortion,
skippable, &unused, bsize, INT64_MAX);
- *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+ *rate = *rate_tokenonly +
+ cpi->intra_uv_mode_cost[cm->frame_type]
+ [x->e_mbd.mi[0]->mode][DC_PRED];
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
@@ -1251,7 +1281,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x,
rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
}
- *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
+ *mode_uv = x->e_mbd.mi[0]->uv_mode;
}
static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
@@ -1267,31 +1297,30 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
int_mv seg_mvs[MAX_REF_FRAMES],
int_mv *best_ref_mv[2], const int *mvjcost,
int *mvcost[2]) {
- MODE_INFO *const mic = xd->mi[0];
- const MB_MODE_INFO *const mbmi = &mic->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
int thismvcost = 0;
int idx, idy;
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
- const int is_compound = has_second_ref(mbmi);
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
+ const int is_compound = has_second_ref(mi);
switch (mode) {
case NEWMV:
- this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+ this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int;
thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
if (is_compound) {
- this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+ this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int;
thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
}
break;
case NEARMV:
case NEARESTMV:
- this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
+ this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int;
if (is_compound)
- this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
+ this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
break;
case ZEROMV:
this_mv[0].as_int = 0;
@@ -1302,17 +1331,17 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
break;
}
- mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
+ mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
if (is_compound)
- mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
+ mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
- mic->bmi[i].as_mode = mode;
+ mi->bmi[i].as_mode = mode;
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
- memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
+ memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i]));
- return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
+ return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) +
thismvcost;
}
@@ -1330,7 +1359,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
struct macroblockd_plane *const pd = &xd->plane[0];
struct macroblock_plane *const p = &x->plane[0];
MODE_INFO *const mi = xd->mi[0];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd);
const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
int idx, idy;
@@ -1342,15 +1371,29 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0, ref;
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
- const int is_compound = has_second_ref(&mi->mbmi);
- const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter];
+ const int is_compound = has_second_ref(mi);
+ const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
for (ref = 0; ref < 1 + is_compound; ++ref) {
- const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
- pd->pre[ref].stride)];
+ const int bw = b_width_log2_lookup[BLOCK_8X8];
+ const int h = 4 * (i >> bw);
+ const int w = 4 * (i & ((1 << bw) - 1));
+ const struct scale_factors *sf = &xd->block_refs[ref]->sf;
+ int y_stride = pd->pre[ref].stride;
+ uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
+
+ if (vp9_is_scaled(sf)) {
+ const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+ const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+
+ y_stride = xd->block_refs[ref]->buf->y_stride;
+ pre = xd->block_refs[ref]->buf->y_buffer;
+ pre += scaled_buffer_offset(x_start + w, y_start + h,
+ y_stride, sf);
+ }
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp9_highbd_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_highbd_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height,
@@ -1358,7 +1401,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_col * MI_SIZE + 4 * (i % 2),
mi_row * MI_SIZE + 4 * (i / 2), xd->bd);
} else {
- vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
@@ -1367,7 +1410,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
mi_row * MI_SIZE + 4 * (i / 2));
}
#else
- vp9_build_inter_predictor(pre, pd->pre[ref].stride,
+ vp9_build_inter_predictor(pre, y_stride,
dst, pd->dst.stride,
&mi->bmi[i].as_mv[ref].as_mv,
&xd->block_refs[ref]->sf, width, height, ref,
@@ -1467,7 +1510,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
}
static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
- MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
+ MODE_INFO *const mi = x->e_mbd.mi[0];
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
@@ -1476,17 +1519,17 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[0].stride)];
- if (has_second_ref(mbmi))
+ if (has_second_ref(mi))
pd->pre[1].buf = &pd->pre[1].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[1].stride)];
}
static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
struct buf_2d orig_pre[2]) {
- MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ MODE_INFO *mi = x->e_mbd.mi[0];
x->plane[0].src = orig_src;
x->e_mbd.plane[0].pre[0] = orig_pre[0];
- if (has_second_ref(mbmi))
+ if (has_second_ref(mi))
x->e_mbd.plane[0].pre[1] = orig_pre[1];
}
@@ -1541,20 +1584,20 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- const int refs[2] = {mbmi->ref_frame[0],
- mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
+ MODE_INFO *mi = xd->mi[0];
+ const int refs[2] = {mi->ref_frame[0],
+ mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]};
int_mv ref_mv[2];
int ite, ref;
- const InterpKernel *kernel = vp9_filter_kernels[mbmi->interp_filter];
+ const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
struct scale_factors sf;
// Do joint motion search in compound mode to get more accurate mv.
struct buf_2d backup_yv12[2][MAX_MB_PLANE];
- int last_besterr[2] = {INT_MAX, INT_MAX};
+ uint32_t last_besterr[2] = {UINT32_MAX, UINT32_MAX};
const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
- vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
- vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
+ vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]),
+ vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1])
};
// Prediction buffer from second frame.
@@ -1597,7 +1640,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// and break out of the search loop if it couldn't find a better mv.
for (ite = 0; ite < 4; ite++) {
struct buf_2d ref_yv12[2];
- int bestsme = INT_MAX;
+ uint32_t bestsme = UINT32_MAX;
int sadpb = x->sadperbit16;
MV tmp_mv;
int search_range = 3;
@@ -1662,7 +1705,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
search_range,
&cpi->fn_ptr[bsize],
&ref_mv[id].as_mv, second_pred);
- if (bestsme < INT_MAX)
+ if (bestsme < UINT32_MAX)
bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
second_pred, &cpi->fn_ptr[bsize], 1);
@@ -1671,9 +1714,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
- if (bestsme < INT_MAX) {
- int dis; /* TODO: use dis in distortion calculation later. */
- unsigned int sse;
+ if (bestsme < UINT32_MAX) {
+ uint32_t dis; /* TODO: use dis in distortion calculation later. */
+ uint32_t sse;
bestsme = cpi->find_fractional_mv_step(
x, &tmp_mv,
&ref_mv[id].as_mv,
@@ -1730,7 +1773,6 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
MACROBLOCKD *xd = &x->e_mbd;
MODE_INFO *mi = xd->mi[0];
- MB_MODE_INFO *mbmi = &mi->mbmi;
int mode_idx;
int k, br = 0, idx, idy;
int64_t bd = 0, block_sse = 0;
@@ -1742,13 +1784,14 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t this_segment_rd = 0;
int label_mv_thresh;
int segmentyrate = 0;
- const BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE bsize = mi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
- const int has_second_rf = has_second_ref(mbmi);
- const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
+ SPEED_FEATURES *const sf = &cpi->sf;
+ const int has_second_rf = has_second_ref(mi);
+ const int inter_mode_mask = sf->inter_mode_mask[bsize];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
vp9_zero(*bsi);
@@ -1784,7 +1827,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
int ref;
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
- const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
frame_mv[ZEROMV][frame].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
&frame_mv[NEARESTMV][frame],
@@ -1803,7 +1846,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
- this_mode, mbmi->ref_frame))
+ this_mode, mi->ref_frame))
continue;
memcpy(orig_pre, pd->pre, sizeof(orig_pre));
@@ -1814,10 +1857,10 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
// motion search for newmv (single predictor case only)
if (!has_second_rf && this_mode == NEWMV &&
- seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
+ seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) {
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
int step_param = 0;
- int thissme, bestsme = INT_MAX;
+ uint32_t bestsme = UINT32_MAX;
int sadpb = x->sadperbit4;
MV mvp_full;
int max_mv;
@@ -1837,12 +1880,12 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
if (i == 0)
- max_mv = x->max_mv_context[mbmi->ref_frame[0]];
+ max_mv = x->max_mv_context[mi->ref_frame[0]];
else
max_mv =
VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
- if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+ if (sf->mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and the best ref mvs of the current block for
// the given reference.
@@ -1855,9 +1898,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.row = bsi->mvp.as_mv.row >> 3;
mvp_full.col = bsi->mvp.as_mv.col >> 3;
- if (cpi->sf.adaptive_motion_search) {
- mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
- mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
+ if (sf->adaptive_motion_search) {
+ mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
+ mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
step_param = VPXMAX(step_param, 8);
}
@@ -1868,77 +1911,56 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
bestsme = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, sadpb,
- cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
+ sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
&bsi->ref_mv[0]->as_mv, new_mv,
INT_MAX, 1);
- // Should we do a full search (best quality only)
- if (cpi->oxcf.mode == BEST) {
- int_mv *const best_mv = &mi->bmi[i].as_mv[0];
- /* Check if mvp_full is within the range. */
- clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
- x->mv_row_min, x->mv_row_max);
- thissme = cpi->full_search_sad(x, &mvp_full,
- sadpb, 16, &cpi->fn_ptr[bsize],
- &bsi->ref_mv[0]->as_mv,
- &best_mv->as_mv);
- cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
- if (thissme < bestsme) {
- bestsme = thissme;
- *new_mv = best_mv->as_mv;
- } else {
- // The full search result is actually worse so re-instate the
- // previous best vector
- best_mv->as_mv = *new_mv;
- }
- }
-
- if (bestsme < INT_MAX) {
- int distortion;
+ if (bestsme < UINT32_MAX) {
+ uint32_t distortion;
cpi->find_fractional_mv_step(
x,
new_mv,
&bsi->ref_mv[0]->as_mv,
cm->allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
+ sf->mv.subpel_force_stop,
+ sf->mv.subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&distortion,
- &x->pred_sse[mbmi->ref_frame[0]],
+ &x->pred_sse[mi->ref_frame[0]],
NULL, 0, 0);
// save motion search result for use in compound prediction
- seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
+ seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
}
- if (cpi->sf.adaptive_motion_search)
- x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
+ if (sf->adaptive_motion_search)
+ x->pred_mv[mi->ref_frame[0]] = *new_mv;
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
}
if (has_second_rf) {
- if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
- seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
+ if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV ||
+ seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV)
continue;
}
if (has_second_rf && this_mode == NEWMV &&
- mbmi->interp_filter == EIGHTTAP) {
+ mi->interp_filter == EIGHTTAP) {
// adjust src pointers
mi_buf_shift(x, i);
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ if (sf->comp_inter_joint_search_thresh <= bsize) {
int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
mi_row, mi_col, seg_mvs[i],
&rate_mv);
- seg_mvs[i][mbmi->ref_frame[0]].as_int =
- frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
- seg_mvs[i][mbmi->ref_frame[1]].as_int =
- frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
+ seg_mvs[i][mi->ref_frame[0]].as_int =
+ frame_mv[this_mode][mi->ref_frame[0]].as_int;
+ seg_mvs[i][mi->ref_frame[1]].as_int =
+ frame_mv[this_mode][mi->ref_frame[1]].as_int;
}
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
@@ -2080,7 +2102,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < 4; i++) {
mode_idx = INTER_OFFSET(bsi->modes[i]);
mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
- if (has_second_ref(mbmi))
+ if (has_second_ref(mi))
mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
mi->bmi[i].as_mode = bsi->modes[i];
@@ -2094,7 +2116,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
*returnyrate = bsi->segment_yrate;
*skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
*psse = bsi->sse;
- mbmi->mode = bsi->modes[3];
+ mi->mode = bsi->modes[3];
return bsi->segment_rd;
}
@@ -2205,7 +2227,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
// Gets an initial list of candidate vectors from neighbours and orders them
vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
- NULL, NULL, mbmi_ext->mode_context);
+ mbmi_ext->mode_context);
// Candidate refinement carried out at encoder and decoder
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
@@ -2226,13 +2248,13 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *tmp_mv, int *rate_mv) {
MACROBLOCKD *xd = &x->e_mbd;
const VP9_COMMON *cm = &cpi->common;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
int bestsme = INT_MAX;
int step_param;
int sadpb = x->sadperbit16;
MV mvp_full;
- int ref = mbmi->ref_frame[0];
+ int ref = mi->ref_frame[0];
MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
int tmp_col_min = x->mv_col_min;
@@ -2324,7 +2346,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
x->mv_row_max = tmp_row_max;
if (bestsme < INT_MAX) {
- int dis; /* TODO: use dis in distortion calculation later. */
+ uint32_t dis; /* TODO: use dis in distortion calculation later. */
cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
x->errorperbit,
@@ -2398,14 +2420,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t filter_cache[]) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
- const int is_comp_pred = has_second_ref(mbmi);
- const int this_mode = mbmi->mode;
+ const int is_comp_pred = has_second_ref(mi);
+ const int this_mode = mi->mode;
int_mv *frame_mv = mode_mv[this_mode];
int i;
- int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+ int refs[2] = { mi->ref_frame[0],
+ (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) };
int_mv cur_mv[2];
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
@@ -2443,10 +2465,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (pred_filter_search) {
INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
- if (xd->up_available)
- af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
- if (xd->left_available)
- lf = xd->mi[-1]->mbmi.interp_filter;
+ if (xd->above_mi)
+ af = xd->above_mi->interp_filter;
+ if (xd->left_mi)
+ lf = xd->left_mi->interp_filter;
if ((this_mode != NEWMV) || (af == lf))
best_filter = af;
@@ -2514,7 +2536,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (mv_check_bounds(x, &cur_mv[i].as_mv))
return INT64_MAX;
- mbmi->mv[i].as_int = cur_mv[i].as_int;
+ mi->mv[i].as_int = cur_mv[i].as_int;
}
// do first prediction into the destination buffer. Do the next
@@ -2544,14 +2566,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
- mbmi->mode != NEARESTMV)
+ mi->mode != NEARESTMV)
return INT64_MAX;
pred_exists = 0;
// Are all MVs integer pel for Y and UV
- intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+ intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv);
if (is_comp_pred)
- intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+ intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv);
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
@@ -2572,7 +2594,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int tmp_skip_sb = 0;
int64_t tmp_skip_sse = INT64_MAX;
- mbmi->interp_filter = i;
+ mi->interp_filter = i;
rs = vp9_get_switchable_rate(cpi, xd);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -2597,7 +2619,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if ((cm->interp_filter == SWITCHABLE &&
(!i || best_needs_copy)) ||
(cm->interp_filter != SWITCHABLE &&
- (cm->interp_filter == mbmi->interp_filter ||
+ (cm->interp_filter == mi->interp_filter ||
(i == 0 && intpel_mv)))) {
restore_dst_buf(xd, orig_dst, orig_dst_stride);
} else {
@@ -2634,14 +2656,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (newbest) {
best_rd = rd;
- best_filter = mbmi->interp_filter;
+ best_filter = mi->interp_filter;
if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
best_needs_copy = !best_needs_copy;
}
if ((cm->interp_filter == SWITCHABLE && newbest) ||
(cm->interp_filter != SWITCHABLE &&
- cm->interp_filter == mbmi->interp_filter)) {
+ cm->interp_filter == mi->interp_filter)) {
pred_exists = 1;
tmp_rd = best_rd;
@@ -2655,7 +2677,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
// Set the appropriate filter
- mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
+ mi->interp_filter = cm->interp_filter != SWITCHABLE ?
cm->interp_filter : best_filter;
rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0;
@@ -2683,7 +2705,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
if (!is_comp_pred)
- single_filter[this_mode][refs[0]] = mbmi->interp_filter;
+ single_filter[this_mode][refs[0]] = mi->interp_filter;
if (cpi->sf.adaptive_mode_search)
if (is_comp_pred)
@@ -2770,8 +2792,8 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
TX_SIZE max_uv_tx_size;
x->skip_encode = 0;
ctx->skip = 0;
- xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
- xd->mi[0]->mbmi.ref_frame[1] = NONE;
+ xd->mi[0]->ref_frame[0] = INTRA_FRAME;
+ xd->mi[0]->ref_frame[1] = NONE;
if (bsize >= BLOCK_8X8) {
if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
@@ -2788,7 +2810,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
return;
}
}
- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
+ max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->tx_size, bsize,
pd[1].subsampling_x,
pd[1].subsampling_y);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
@@ -2848,9 +2870,9 @@ static void rd_variance_adjustment(VP9_COMP *cpi,
? (source_variance - recon_variance)
: (recon_variance - source_variance);
- var_error = (200 * source_variance * recon_variance) /
- ((source_variance * source_variance) +
- (recon_variance * recon_variance));
+ var_error = ((int64_t)200 * source_variance * recon_variance) /
+ (((int64_t)source_variance * source_variance) +
+ ((int64_t)recon_variance * recon_variance));
var_error = 100 - var_error;
}
@@ -2952,12 +2974,12 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
RD_OPT *const rd_opt = &cpi->rd;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
const struct segmentation *const seg = &cm->seg;
PREDICTION_MODE this_mode;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
- unsigned char segment_id = mbmi->segment_id;
+ unsigned char segment_id = mi->segment_id;
int comp_pred, i, k;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
@@ -2971,7 +2993,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
int64_t best_pred_rd[REFERENCE_MODES];
int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
- MB_MODE_INFO best_mbmode;
+ MODE_INFO best_mbmode;
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
@@ -3038,7 +3060,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
// Skip checking missing references in both single and compound reference
- // modes. Note that a mode will be skipped iff both reference frames
+ // modes. Note that a mode will be skipped if both reference frames
// are masked out.
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
@@ -3189,7 +3211,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
const int bsl = mi_width_log2_lookup[bsize];
int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
+ get_chessboard_index(cm->current_video_frame)) & 0x1;
- MB_MODE_INFO *ref_mbmi;
+ MODE_INFO *ref_mi;
int const_motion = 1;
int skip_ref_frame = !cb_partition_search_ctrl;
MV_REFERENCE_FRAME rf = NONE;
@@ -3197,26 +3219,26 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
ref_mv.as_int = INVALID_MV;
if ((mi_row - 1) >= tile_info->mi_row_start) {
- ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0];
- rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0];
+ ref_mv = xd->mi[-xd->mi_stride]->mv[0];
+ rf = xd->mi[-xd->mi_stride]->ref_frame[0];
for (i = 0; i < mi_width; ++i) {
- ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi;
- const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
- (ref_frame == ref_mbmi->ref_frame[0]);
- skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
+ ref_mi = xd->mi[-xd->mi_stride + i];
+ const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
+ (ref_frame == ref_mi->ref_frame[0]);
+ skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
}
}
if ((mi_col - 1) >= tile_info->mi_col_start) {
if (ref_mv.as_int == INVALID_MV)
- ref_mv = xd->mi[-1]->mbmi.mv[0];
+ ref_mv = xd->mi[-1]->mv[0];
if (rf == NONE)
- rf = xd->mi[-1]->mbmi.ref_frame[0];
+ rf = xd->mi[-1]->ref_frame[0];
for (i = 0; i < mi_height; ++i) {
- ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi;
- const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
- (ref_frame == ref_mbmi->ref_frame[0]);
- skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
+ ref_mi = xd->mi[i * xd->mi_stride - 1];
+ const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
+ (ref_frame == ref_mi->ref_frame[0]);
+ skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
}
}
@@ -3287,15 +3309,15 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
continue;
}
- mbmi->mode = this_mode;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->ref_frame[1] = second_ref_frame;
+ mi->mode = this_mode;
+ mi->uv_mode = DC_PRED;
+ mi->ref_frame[0] = ref_frame;
+ mi->ref_frame[1] = second_ref_frame;
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
- mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+ mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
: cm->interp_filter;
- mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
+ mi->mv[0].as_int = mi->mv[1].as_int = 0;
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
@@ -3316,7 +3338,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
if (rate_y == INT_MAX)
continue;
- uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
+ uv_tx = get_uv_tx_size_impl(mi->tx_size, bsize, pd->subsampling_x,
pd->subsampling_y);
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
@@ -3327,9 +3349,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
rate_uv = rate_uv_tokenonly[uv_tx];
distortion_uv = dist_uv[uv_tx];
skippable = skippable && skip_uv[uv_tx];
- mbmi->uv_mode = mode_uv[uv_tx];
+ mi->uv_mode = mode_uv[uv_tx];
- rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
+ rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx];
if (this_mode != DC_PRED && this_mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
@@ -3360,28 +3382,34 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
}
if (!disable_skip) {
+ const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
+ const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
+ const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
+
if (skippable) {
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// Cost the skip mb case
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+ rate2 += skip_cost1;
} else if (ref_frame != INTRA_FRAME && !xd->lossless) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
+ if (RDCOST(x->rdmult, x->rddiv,
+ rate_y + rate_uv + skip_cost0, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
} else {
// FIXME(rbultje) make this work for splitmv also
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
- distortion2 = total_sse;
assert(total_sse >= 0);
+
+ rate2 += skip_cost1;
+ distortion2 = total_sse;
rate2 -= (rate_y + rate_uv);
this_skip2 = 1;
}
} else {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
}
// Calculate the final RD estimate for this mode.
@@ -3397,7 +3425,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
// Keep record of best intra rd
if (this_rd < best_intra_rd) {
best_intra_rd = this_rd;
- best_intra_mode = mbmi->mode;
+ best_intra_mode = mi->mode;
}
}
@@ -3417,7 +3445,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
- mbmi->mv[0].as_int = 0;
+ mi->mv[0].as_int = 0;
max_plane = 1;
} else {
best_pred_sse = x->pred_sse[ref_frame];
@@ -3427,13 +3455,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
best_rd = this_rd;
- best_mbmode = *mbmi;
+ best_mbmode = *mi;
best_skip2 = this_skip2;
best_mode_skippable = skippable;
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
- memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
+ memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
// TODO(debargha): enhance this test with a better distortion prediction
@@ -3549,8 +3577,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
TX_SIZE uv_tx_size;
- *mbmi = best_mbmode;
- uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+ *mi = best_mbmode;
+ uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
@@ -3569,7 +3597,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
sf->adaptive_rd_thresh, bsize, best_mode_index);
// macroblock modes
- *mbmi = best_mbmode;
+ *mi = best_mbmode;
x->skip |= best_skip2;
for (i = 0; i < REFERENCE_MODES; ++i) {
@@ -3599,7 +3627,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
if (!x->skip && !x->select_tx_size) {
int has_high_freq_coeff = 0;
int plane;
- int max_plane = is_inter_block(&xd->mi[0]->mbmi)
+ int max_plane = is_inter_block(xd->mi[0])
? MAX_MB_PLANE : 1;
for (plane = 0; plane < max_plane; ++plane) {
x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
@@ -3629,8 +3657,8 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- unsigned char segment_id = mbmi->segment_id;
+ MODE_INFO *const mi = xd->mi[0];
+ unsigned char segment_id = mi->segment_id;
const int comp_pred = 0;
int i;
int64_t best_pred_diff[REFERENCE_MODES];
@@ -3656,11 +3684,11 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
- mbmi->mode = ZEROMV;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame[0] = LAST_FRAME;
- mbmi->ref_frame[1] = NONE;
- mbmi->mv[0].as_int = 0;
+ mi->mode = ZEROMV;
+ mi->uv_mode = DC_PRED;
+ mi->ref_frame[0] = LAST_FRAME;
+ mi->ref_frame[1] = NONE;
+ mi->mv[0].as_int = 0;
x->skip = 1;
if (cm->interp_filter != BILINEAR) {
@@ -3670,21 +3698,21 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
int rs;
int best_rs = INT_MAX;
for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
- mbmi->interp_filter = i;
+ mi->interp_filter = i;
rs = vp9_get_switchable_rate(cpi, xd);
if (rs < best_rs) {
best_rs = rs;
- best_filter = mbmi->interp_filter;
+ best_filter = mi->interp_filter;
}
}
}
}
// Set the appropriate filter
if (cm->interp_filter == SWITCHABLE) {
- mbmi->interp_filter = best_filter;
+ mi->interp_filter = best_filter;
rate2 += vp9_get_switchable_rate(cpi, xd);
} else {
- mbmi->interp_filter = cm->interp_filter;
+ mi->interp_filter = cm->interp_filter;
}
if (cm->reference_mode == REFERENCE_MODE_SELECT)
@@ -3706,7 +3734,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
}
assert((cm->interp_filter == SWITCHABLE) ||
- (cm->interp_filter == mbmi->interp_filter));
+ (cm->interp_filter == mi->interp_filter));
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
@@ -3732,10 +3760,10 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
RD_OPT *const rd_opt = &cpi->rd;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
const struct segmentation *const seg = &cm->seg;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
- unsigned char segment_id = mbmi->segment_id;
+ unsigned char segment_id = mi->segment_id;
int comp_pred, i;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
@@ -3747,7 +3775,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
int64_t best_pred_rd[REFERENCE_MODES];
int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
- MB_MODE_INFO best_mbmode;
+ MODE_INFO best_mbmode;
int ref_index, best_ref_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vpx_prob comp_mode_p;
@@ -3821,6 +3849,16 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
ref_frame = vp9_ref_order[ref_index].ref_frame[0];
second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
+#if CONFIG_BETTER_HW_COMPATIBILITY
+ // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
+ if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
+ int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
+ if (second_ref_frame > INTRA_FRAME)
+ ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
+ if (ref_scaled)
+ continue;
+ }
+#endif
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
@@ -3896,14 +3934,14 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
continue;
}
- mbmi->tx_size = TX_4X4;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->ref_frame[1] = second_ref_frame;
+ mi->tx_size = TX_4X4;
+ mi->uv_mode = DC_PRED;
+ mi->ref_frame[0] = ref_frame;
+ mi->ref_frame[1] = second_ref_frame;
// Evaluate all sub-pel filters irrespective of whether we can use
// them for this frame.
- mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
- : cm->interp_filter;
+ mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+ : cm->interp_filter;
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
@@ -3934,7 +3972,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
rate_uv = rate_uv_tokenonly;
distortion2 += dist_uv;
distortion_uv = dist_uv;
- mbmi->uv_mode = mode_uv;
+ mi->uv_mode = mode_uv;
} else {
int rate;
int64_t distortion;
@@ -3947,7 +3985,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
int_mv *second_ref = comp_pred ?
&x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
b_mode_info tmp_best_bmodes[16];
- MB_MODE_INFO tmp_best_mbmode;
+ MODE_INFO tmp_best_mbmode;
BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
int pred_exists = 0;
int uv_skippable;
@@ -3956,8 +3994,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
int ref;
for (ref = 0; ref < 2; ++ref) {
- scaled_ref_frame[ref] = mbmi->ref_frame[ref] > INTRA_FRAME ?
- vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[ref]) : NULL;
+ scaled_ref_frame[ref] = mi->ref_frame[ref] > INTRA_FRAME ?
+ vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref]) : NULL;
if (scaled_ref_frame[ref]) {
int i;
@@ -3996,7 +4034,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
int newbest, rs;
int64_t rs_rd;
MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
- mbmi->interp_filter = switchable_filter_index;
+ mi->interp_filter = switchable_filter_index;
tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
&mbmi_ext->ref_mvs[ref_frame][0],
second_ref, best_yrd, &rate,
@@ -4020,11 +4058,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
- tmp_best_filter = mbmi->interp_filter;
+ tmp_best_filter = mi->interp_filter;
tmp_best_rd = tmp_rd;
}
if ((newbest && cm->interp_filter == SWITCHABLE) ||
- (mbmi->interp_filter == cm->interp_filter &&
+ (mi->interp_filter == cm->interp_filter &&
cm->interp_filter != SWITCHABLE)) {
tmp_best_rdu = tmp_rd;
tmp_best_rate = rate;
@@ -4032,7 +4070,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
tmp_best_distortion = distortion;
tmp_best_sse = total_sse;
tmp_best_skippable = skippable;
- tmp_best_mbmode = *mbmi;
+ tmp_best_mbmode = *mi;
for (i = 0; i < 4; i++) {
tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
@@ -4044,7 +4082,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
if (tmp_best_rdu / 2 > best_rd) {
// skip searching the other filters if the first is
// already substantially larger than the best so far
- tmp_best_filter = mbmi->interp_filter;
+ tmp_best_filter = mi->interp_filter;
tmp_best_rdu = INT64_MAX;
break;
}
@@ -4057,8 +4095,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
if (tmp_best_rdu == INT64_MAX && pred_exists)
continue;
- mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
- tmp_best_filter : cm->interp_filter);
+ mi->interp_filter = (cm->interp_filter == SWITCHABLE ?
+ tmp_best_filter : cm->interp_filter);
if (!pred_exists) {
// Handles the special case when a filter that is not in the
// switchable list (bilinear, 6-tap) is indicated at the frame level
@@ -4076,7 +4114,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
rate_y = tmp_best_ratey;
distortion = tmp_best_distortion;
skippable = tmp_best_skippable;
- *mbmi = tmp_best_mbmode;
+ *mi = tmp_best_mbmode;
for (i = 0; i < 4; i++)
xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
}
@@ -4143,17 +4181,21 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
}
if (!disable_skip) {
+ const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
+ const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
+ const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
+
// Skip is never coded at the segment level for sub8x8 blocks and instead
// always coded in the bitstream at the mode info level.
-
if (ref_frame != INTRA_FRAME && !xd->lossless) {
- if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
- RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
+ if (RDCOST(x->rdmult, x->rddiv,
+ rate_y + rate_uv + skip_cost0, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
} else {
// FIXME(rbultje) make this work for splitmv also
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+ rate2 += skip_cost1;
distortion2 = total_sse;
assert(total_sse >= 0);
rate2 -= (rate_y + rate_uv);
@@ -4163,7 +4205,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
}
} else {
// Add in the cost of the no skip flag.
- rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
+ rate2 += skip_cost0;
}
// Calculate the final RD estimate for this mode.
@@ -4186,7 +4228,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
- mbmi->mv[0].as_int = 0;
+ mi->mv[0].as_int = 0;
max_plane = 1;
}
@@ -4196,7 +4238,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
best_rd = this_rd;
best_yrd = best_rd -
RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
- best_mbmode = *mbmi;
+ best_mbmode = *mi;
best_skip2 = this_skip2;
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
@@ -4294,7 +4336,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
if (sf->use_uv_intra_rd_estimate) {
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
- *mbmi = best_mbmode;
+ *mi = best_mbmode;
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
&rate_uv_tokenonly,
&dist_uv,
@@ -4318,7 +4360,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
sf->adaptive_rd_thresh, bsize, best_ref_index);
// macroblock modes
- *mbmi = best_mbmode;
+ *mi = best_mbmode;
x->skip |= best_skip2;
if (!is_inter_block(&best_mbmode)) {
for (i = 0; i < 4; i++)
@@ -4327,8 +4369,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
for (i = 0; i < 4; ++i)
memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
- mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
- mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
+ mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
+ mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
}
for (i = 0; i < REFERENCE_MODES; ++i) {
diff --git a/libvpx/vp9/encoder/vp9_rdopt.h b/libvpx/vp9/encoder/vp9_rdopt.h
index 00ee55c67..253e4a02d 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.h
+++ b/libvpx/vp9/encoder/vp9_rdopt.h
@@ -29,15 +29,6 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd);
-unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
- const struct buf_2d *ref,
- BLOCK_SIZE bs);
-#if CONFIG_VP9_HIGHBITDEPTH
-unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
- const struct buf_2d *ref,
- BLOCK_SIZE bs, int bd);
-#endif
-
void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi,
struct TileDataEnc *tile_data,
struct macroblock *x,
diff --git a/libvpx/vp9/encoder/vp9_resize.c b/libvpx/vp9/encoder/vp9_resize.c
index 59c747852..307a1123a 100644
--- a/libvpx/vp9/encoder/vp9_resize.c
+++ b/libvpx/vp9/encoder/vp9_resize.c
@@ -15,6 +15,7 @@
#include <stdlib.h>
#include <string.h>
+#include "./vpx_config.h"
#if CONFIG_VP9_HIGHBITDEPTH
#include "vpx_dsp/vpx_dsp_common.h"
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -445,7 +446,7 @@ static void resize_multistep(const uint8_t *const input,
int length,
uint8_t *output,
int olength,
- uint8_t *buf) {
+ uint8_t *otmp) {
int steps;
if (length == olength) {
memcpy(output, input, sizeof(output[0]) * length);
@@ -456,15 +457,10 @@ static void resize_multistep(const uint8_t *const input,
if (steps > 0) {
int s;
uint8_t *out = NULL;
- uint8_t *tmpbuf = NULL;
- uint8_t *otmp, *otmp2;
+ uint8_t *otmp2;
int filteredlength = length;
- if (!tmpbuf) {
- tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
- otmp = tmpbuf;
- } else {
- otmp = buf;
- }
+
+ assert(otmp != NULL);
otmp2 = otmp + get_down2_length(length, 1);
for (s = 0; s < steps; ++s) {
const int proj_filteredlength = get_down2_length(filteredlength, 1);
@@ -482,8 +478,6 @@ static void resize_multistep(const uint8_t *const input,
if (filteredlength != olength) {
interpolate(out, filteredlength, output, olength);
}
- if (tmpbuf)
- free(tmpbuf);
} else {
interpolate(input, length, output, olength);
}
@@ -519,22 +513,29 @@ void vp9_resize_plane(const uint8_t *const input,
uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
(width < height ? height : width));
- uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
+ uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
+ uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
+ if (intbuf == NULL || tmpbuf == NULL ||
+ arrbuf == NULL || arrbuf2 == NULL)
+ goto Error;
assert(width > 0);
assert(height > 0);
assert(width2 > 0);
assert(height2 > 0);
for (i = 0; i < height; ++i)
resize_multistep(input + in_stride * i, width,
- intbuf + width2 * i, width2, tmpbuf);
+ intbuf + width2 * i, width2, tmpbuf);
for (i = 0; i < width2; ++i) {
fill_col_to_arr(intbuf + i, width2, height, arrbuf);
- resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
- fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
+ resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf);
+ fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
}
+
+ Error:
free(intbuf);
free(tmpbuf);
free(arrbuf);
+ free(arrbuf2);
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -737,7 +738,7 @@ static void highbd_resize_multistep(const uint16_t *const input,
int length,
uint16_t *output,
int olength,
- uint16_t *buf,
+ uint16_t *otmp,
int bd) {
int steps;
if (length == olength) {
@@ -749,15 +750,10 @@ static void highbd_resize_multistep(const uint16_t *const input,
if (steps > 0) {
int s;
uint16_t *out = NULL;
- uint16_t *tmpbuf = NULL;
- uint16_t *otmp, *otmp2;
+ uint16_t *otmp2;
int filteredlength = length;
- if (!tmpbuf) {
- tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length);
- otmp = tmpbuf;
- } else {
- otmp = buf;
- }
+
+ assert(otmp != NULL);
otmp2 = otmp + get_down2_length(length, 1);
for (s = 0; s < steps; ++s) {
const int proj_filteredlength = get_down2_length(filteredlength, 1);
@@ -775,8 +771,6 @@ static void highbd_resize_multistep(const uint16_t *const input,
if (filteredlength != olength) {
highbd_interpolate(out, filteredlength, output, olength, bd);
}
- if (tmpbuf)
- free(tmpbuf);
} else {
highbd_interpolate(input, length, output, olength, bd);
}
@@ -815,21 +809,28 @@ void vp9_highbd_resize_plane(const uint8_t *const input,
uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height);
uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
(width < height ? height : width));
- uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2));
+ uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height);
+ uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2);
+ if (intbuf == NULL || tmpbuf == NULL ||
+ arrbuf == NULL || arrbuf2 == NULL)
+ goto Error;
for (i = 0; i < height; ++i) {
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
intbuf + width2 * i, width2, tmpbuf, bd);
}
for (i = 0; i < width2; ++i) {
highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
- highbd_resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf,
+ highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf,
bd);
highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
- arrbuf + height);
+ arrbuf2);
}
+
+ Error:
free(intbuf);
free(tmpbuf);
free(arrbuf);
+ free(arrbuf2);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c
index c5c50a244..5a0a23d48 100644
--- a/libvpx/vp9/encoder/vp9_segmentation.c
+++ b/libvpx/vp9/encoder/vp9_segmentation.c
@@ -118,7 +118,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
return;
xd->mi = mi;
- segment_id = xd->mi[0]->mbmi.segment_id;
+ segment_id = xd->mi[0]->segment_id;
set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
@@ -127,7 +127,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
// Temporal prediction not allowed on key frames
if (cm->frame_type != KEY_FRAME) {
- const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
// Test to see if the segment id matches the predicted value.
const int pred_segment_id = get_segment_id(cm, cm->last_frame_seg_map,
bsize, mi_row, mi_col);
@@ -136,7 +136,7 @@ static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
// Store the prediction status for this mb and update counts
// as appropriate
- xd->mi[0]->mbmi.seg_id_predicted = pred_flag;
+ xd->mi[0]->seg_id_predicted = pred_flag;
temporal_predictor_count[pred_context][pred_flag]++;
// Update the "unpredicted" segment count
@@ -159,8 +159,8 @@ static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
- bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
+ bw = num_8x8_blocks_wide_lookup[mi[0]->sb_type];
+ bh = num_8x8_blocks_high_lookup[mi[0]->sb_type];
if (bw == bs && bh == bs) {
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
diff --git a/libvpx/vp9/encoder/vp9_skin_detection.c b/libvpx/vp9/encoder/vp9_skin_detection.c
index c2763b7da..23a5fc775 100644
--- a/libvpx/vp9/encoder/vp9_skin_detection.c
+++ b/libvpx/vp9/encoder/vp9_skin_detection.c
@@ -15,22 +15,29 @@
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_skin_detection.h"
+#define MODEL_MODE 1
+
// Fixed-point skin color model parameters.
-static const int skin_mean[2] = {7463, 9614}; // q6
+static const int skin_mean[5][2] = {
+ {7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}};
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
-static const int skin_threshold = 1570636; // q18
+static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000,
+ 800000}; // q18
// Thresholds on luminance.
-static const int y_low = 20;
+static const int y_low = 40;
static const int y_high = 220;
// Evaluates the Mahalanobis distance measure for the input CbCr values.
-static int evaluate_skin_color_difference(int cb, int cr) {
+static int evaluate_skin_color_difference(int cb, int cr, int idx) {
const int cb_q6 = cb << 6;
const int cr_q6 = cr << 6;
- const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]);
- const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]);
- const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]);
+ const int cb_diff_q12 =
+ (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]);
+ const int cbcr_diff_q12 =
+ (cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]);
+ const int cr_diff_q12 =
+ (cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]);
const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
@@ -41,13 +48,65 @@ static int evaluate_skin_color_difference(int cb, int cr) {
return skin_diff;
}
-int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) {
- if (y < y_low || y > y_high)
+int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr,
+ int motion) {
+ if (y < y_low || y > y_high) {
return 0;
- else
- return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
+ } else {
+ if (MODEL_MODE == 0) {
+ return (evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]);
+ } else {
+ int i = 0;
+ // Exit on grey.
+ if (cb == 128 && cr == 128)
+ return 0;
+ // Exit on very strong cb.
+ if (cb > 150 && cr < 110)
+ return 0;
+ for (; i < 5; i++) {
+ int skin_color_diff = evaluate_skin_color_difference(cb, cr, i);
+ if (skin_color_diff < skin_threshold[i + 1]) {
+ if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
+ return 0;
+ else if (motion == 0 &&
+ skin_color_diff > (skin_threshold[i + 1] >> 1))
+ return 0;
+ else
+ return 1;
+ }
+ // Exit if difference is much large than the threshold.
+ if (skin_color_diff > (skin_threshold[i + 1] << 3)) {
+ return 0;
+ }
+ }
+ return 0;
+ }
+ }
}
+int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+ int stride, int strideuv, int bsize,
+ int consec_zeromv, int curr_motion_magn) {
+ // No skin if block has been zero/small motion for long consecutive time.
+ if (consec_zeromv > 60 && curr_motion_magn == 0) {
+ return 0;
+ } else {
+ int motion = 1;
+ // Take center pixel in block to determine is_skin.
+ const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
+ const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
+ const int uv_width_shift = y_width_shift >> 1;
+ const int uv_height_shift = y_height_shift >> 1;
+ const uint8_t ysource = y[y_height_shift * stride + y_width_shift];
+ const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift];
+ const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift];
+ if (consec_zeromv > 25 && curr_motion_magn == 0)
+ motion = 0;
+ return vp9_skin_pixel(ysource, usource, vsource, motion);
+ }
+}
+
+
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
@@ -67,7 +126,7 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
int shuv = shy - 1;
int fac = y_bsize / 8;
// Use center pixel or average of center 2x2 pixels.
- int mode_filter = 1;
+ int mode_filter = 0;
YV12_BUFFER_CONFIG skinmap;
memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG));
if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height,
@@ -84,27 +143,46 @@ void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
for (mi_row = 0; mi_row < cm->mi_rows - 1; mi_row += fac) {
num_bl = 0;
for (mi_col = 0; mi_col < cm->mi_cols - 1; mi_col += fac) {
- // Select pixel for each block for skin detection.
- // Use center pixel, or 2x2 average at center.
- uint8_t ysource = src_y[ypos * src_ystride + ypos];
- uint8_t usource = src_u[uvpos * src_uvstride + uvpos];
- uint8_t vsource = src_v[uvpos * src_uvstride + uvpos];
- uint8_t ysource2 = src_y[(ypos + 1) * src_ystride + ypos];
- uint8_t usource2 = src_u[(uvpos + 1) * src_uvstride + uvpos];
- uint8_t vsource2 = src_v[(uvpos + 1) * src_uvstride + uvpos];
- uint8_t ysource3 = src_y[ypos * src_ystride + (ypos + 1)];
- uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos + 1)];
- uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos + 1)];
- uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)];
- uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)];
- uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)];
int is_skin = 0;
if (mode_filter == 1) {
+ // Use 2x2 average at center.
+ uint8_t ysource = src_y[ypos * src_ystride + ypos];
+ uint8_t usource = src_u[uvpos * src_uvstride + uvpos];
+ uint8_t vsource = src_v[uvpos * src_uvstride + uvpos];
+ uint8_t ysource2 = src_y[(ypos + 1) * src_ystride + ypos];
+ uint8_t usource2 = src_u[(uvpos + 1) * src_uvstride + uvpos];
+ uint8_t vsource2 = src_v[(uvpos + 1) * src_uvstride + uvpos];
+ uint8_t ysource3 = src_y[ypos * src_ystride + (ypos + 1)];
+ uint8_t usource3 = src_u[uvpos * src_uvstride + (uvpos + 1)];
+ uint8_t vsource3 = src_v[uvpos * src_uvstride + (uvpos + 1)];
+ uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)];
+ uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)];
+ uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)];
ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2;
usource = (usource + usource2 + usource3 + usource4) >> 2;
vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2;
+ is_skin = vp9_skin_pixel(ysource, usource, vsource, 1);
+ } else {
+ int block_size = BLOCK_8X8;
+ int consec_zeromv = 0;
+ int bl_index = mi_row * cm->mi_cols + mi_col;
+ int bl_index1 = bl_index + 1;
+ int bl_index2 = bl_index + cm->mi_cols;
+ int bl_index3 = bl_index2 + 1;
+ if (y_bsize == 8)
+ consec_zeromv = cpi->consec_zero_mv[bl_index];
+ else
+ consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+ VPXMIN(cpi->consec_zero_mv[bl_index1],
+ VPXMIN(cpi->consec_zero_mv[bl_index2],
+ cpi->consec_zero_mv[bl_index3])));
+ if (y_bsize == 16)
+ block_size = BLOCK_16X16;
+ is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride,
+ src_uvstride, block_size,
+ consec_zeromv,
+ 0);
}
- is_skin = vp9_skin_pixel(ysource, usource, vsource);
for (i = 0; i < y_bsize; i++) {
for (j = 0; j < y_bsize; j++) {
if (is_skin)
diff --git a/libvpx/vp9/encoder/vp9_skin_detection.h b/libvpx/vp9/encoder/vp9_skin_detection.h
index 0a87ef9f4..c77382dbd 100644
--- a/libvpx/vp9/encoder/vp9_skin_detection.h
+++ b/libvpx/vp9/encoder/vp9_skin_detection.h
@@ -21,7 +21,12 @@ struct VP9_COMP;
// #define OUTPUT_YUV_SKINMAP
-int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr);
+int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr,
+ int motion);
+
+int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
+ int stride, int strideuv, int bsize,
+ int consec_zeromv, int curr_motion_magn);
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c
index a53962984..e7f04a244 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/libvpx/vp9/encoder/vp9_speed_features.c
@@ -15,6 +15,22 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vpx_dsp/vpx_dsp_common.h"
+// Mesh search patters for various speed settings
+static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] =
+ {{64, 4}, {28, 2}, {15, 1}, {7, 1}};
+
+#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
+static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1]
+ [MAX_MESH_STEP] =
+ {{{64, 8}, {28, 4}, {15, 1}, {7, 1}},
+ {{64, 8}, {28, 4}, {15, 1}, {7, 1}},
+ {{64, 8}, {14, 2}, {7, 1}, {7, 1}},
+ {{64, 16}, {24, 8}, {12, 4}, {7, 1}},
+ {{64, 16}, {24, 8}, {12, 4}, {7, 1}},
+ {{64, 16}, {24, 8}, {12, 4}, {7, 1}},
+ };
+static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] =
+ {50, 25, 15, 5, 1, 1};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
@@ -259,6 +275,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
sf->use_fast_coef_costing = 1;
+ sf->allow_exhaustive_searches = 0;
+ sf->exhaustive_searches_thresh = INT_MAX;
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
@@ -285,12 +303,26 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
- // Disable reference masking if using spatial scaling since
- // pred_mv_sad will not be set (since vp9_mv_pred will not
- // be called).
- // TODO(marpan/agrange): Fix this condition.
- sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
- cpi->svc.number_spatial_layers == 1) ? 1 : 0;
+ // Reference masking only enabled for 1 spatial layer, and if none of the
+ // references have been scaled. The latter condition needs to be checked
+ // for external or internal dynamic resize.
+ sf->reference_masking = (cpi->svc.number_spatial_layers == 1);
+ if (sf->reference_masking == 1 &&
+ (cpi->external_resize == 1 ||
+ cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) {
+ MV_REFERENCE_FRAME ref_frame;
+ static const int flag_list[4] =
+ {0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
+ const struct scale_factors *const scale_fac =
+ &cm->frame_refs[ref_frame - 1].sf;
+ if (vp9_is_scaled(scale_fac))
+ sf->reference_masking = 0;
+ }
+ }
+ }
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -368,6 +400,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->simple_model_rd_from_var = 1;
+ if (cpi->oxcf.rc_mode == VPX_VBR)
+ sf->mv.search_method = NSTEP;
if (!is_keyframe) {
int i;
@@ -376,13 +410,16 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V;
} else {
for (i = 0; i < BLOCK_SIZES; ++i)
- if (i >= BLOCK_16X16)
+ if (i > BLOCK_16X16)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
else
// Use H and V intra mode for block sizes <= 16X16.
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V;
}
}
+ if (content == VP9E_CONTENT_SCREEN) {
+ sf->short_circuit_flat_blocks = 1;
+ }
}
if (speed >= 6) {
@@ -392,6 +429,11 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
+ content != VP9E_CONTENT_SCREEN) {
+ // Enable short circuit for low temporal variance.
+ sf->short_circuit_low_temp_var = 1;
+ }
}
if (speed >= 7) {
@@ -406,8 +448,19 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
}
if (speed >= 8) {
sf->adaptive_rd_thresh = 4;
- sf->mv.subpel_force_stop = 2;
+ sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ // Only keep INTRA_DC mode for speed 8.
+ if (!is_keyframe) {
+ int i = 0;
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+ }
+ if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
+ content != VP9E_CONTENT_SCREEN) {
+ // More aggressive short circuit for speed 8.
+ sf->short_circuit_low_temp_var = 2;
+ }
}
}
@@ -460,7 +513,6 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->mv.auto_mv_step_size = 0;
sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
- sf->adaptive_rd_thresh = 0;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
@@ -516,10 +568,15 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
// Recode loop tolerance %.
sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE;
- sf->tx_size_search_breakout = 0;
- sf->partition_search_breakout_dist_thr = 0;
- sf->partition_search_breakout_rate_thr = 0;
sf->simple_model_rd_from_var = 0;
+ sf->short_circuit_flat_blocks = 0;
+ sf->short_circuit_low_temp_var = 0;
+
+ // Some speed-up features even for best quality as minimal impact on quality.
+ sf->adaptive_rd_thresh = 1;
+ sf->tx_size_search_breakout = 1;
+ sf->partition_search_breakout_dist_thr = (1 << 19);
+ sf->partition_search_breakout_rate_thr = 80;
if (oxcf->mode == REALTIME)
set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
@@ -527,8 +584,36 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
set_good_speed_feature(cpi, cm, sf, oxcf->speed);
cpi->full_search_sad = vp9_full_search_sad;
- cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search
- : vp9_diamond_search_sad;
+ cpi->diamond_search_sad = vp9_diamond_search_sad;
+
+ sf->allow_exhaustive_searches = 1;
+ if (oxcf->mode == BEST) {
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
+ sf->exhaustive_searches_thresh = (1 << 20);
+ else
+ sf->exhaustive_searches_thresh = (1 << 21);
+ sf->max_exaustive_pct = 100;
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
+ sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
+ }
+ } else {
+ int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
+ sf->exhaustive_searches_thresh = (1 << 22);
+ else
+ sf->exhaustive_searches_thresh = (1 << 23);
+ sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
+ if (speed > 0)
+ sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
+
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ sf->mesh_patterns[i].range =
+ good_quality_mesh_patterns[speed][i].range;
+ sf->mesh_patterns[i].interval =
+ good_quality_mesh_patterns[speed][i].interval;
+ }
+ }
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
@@ -541,7 +626,10 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->optimize_coefficients = 0;
}
- if (sf->mv.subpel_search_method == SUBPEL_TREE) {
+ if (sf->mv.subpel_force_stop == 3) {
+ // Whole pel only
+ cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree;
+ } else if (sf->mv.subpel_search_method == SUBPEL_TREE) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned;
diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h
index 575e98cf5..e88a7dfff 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/libvpx/vp9/encoder/vp9_speed_features.h
@@ -188,13 +188,24 @@ typedef struct MV_SPEED_FEATURES {
// Maximum number of steps in logarithmic subpel search before giving up.
int subpel_iters_per_step;
- // Control when to stop subpel search
+ // Control when to stop subpel search:
+ // 0: Full subpel search.
+ // 1: Stop at quarter pixel.
+ // 2: Stop at half pixel.
+ // 3: Stop at full pixel.
int subpel_force_stop;
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
} MV_SPEED_FEATURES;
+#define MAX_MESH_STEP 4
+
+typedef struct MESH_PATTERN {
+ int range;
+ int interval;
+} MESH_PATTERN;
+
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -299,6 +310,18 @@ typedef struct SPEED_FEATURES {
// point for this motion search and limits the search range around it.
int adaptive_motion_search;
+ // Flag for allowing some use of exhaustive searches;
+ int allow_exhaustive_searches;
+
+ // Threshold for allowing exhaistive motion search.
+ int exhaustive_searches_thresh;
+
+ // Maximum number of exhaustive searches for a frame.
+ int max_exaustive_pct;
+
+ // Pattern to be used for any exhaustive mesh searches.
+ MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
+
int schedule_mode_search;
// Allows sub 8x8 modes to use the prediction filter that was determined
@@ -419,6 +442,18 @@ typedef struct SPEED_FEATURES {
// Fast approximation of vp9_model_rd_from_var_lapndz
int simple_model_rd_from_var;
+
+ // Skip a number of expensive mode evaluations for blocks with zero source
+ // variance.
+ int short_circuit_flat_blocks;
+
+ // Skip a number of expensive mode evaluations for blocks with very low
+ // temporal variance.
+ // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.
+ // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL
+ // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and
+ // 32x16.
+ int short_circuit_low_temp_var;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/libvpx/vp9/encoder/vp9_subexp.c b/libvpx/vp9/encoder/vp9_subexp.c
index 799f179d9..29db01542 100644
--- a/libvpx/vp9/encoder/vp9_subexp.c
+++ b/libvpx/vp9/encoder/vp9_subexp.c
@@ -14,9 +14,7 @@
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_subexp.h"
-#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
-
-static const int update_bits[255] = {
+static const uint8_t update_bits[255] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
@@ -34,6 +32,7 @@ static const int update_bits[255] = {
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0,
};
+#define MIN_DELP_BITS 5
static int recenter_nonneg(int v, int m) {
if (v > (m << 1))
@@ -46,7 +45,7 @@ static int recenter_nonneg(int v, int m) {
static int remap_prob(int v, int m) {
int i;
- static const int map_table[MAX_PROB - 1] = {
+ static const uint8_t map_table[MAX_PROB - 1] = {
// generated by:
// map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM);
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33,
@@ -80,7 +79,7 @@ static int remap_prob(int v, int m) {
static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) {
int delp = remap_prob(newp, oldp);
- return update_bits[delp] * 256;
+ return update_bits[delp] << VP9_PROB_COST_SHIFT;
}
static void encode_uniform(vpx_writer *w, int v) {
@@ -123,14 +122,17 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct,
int bestsavings = 0;
vpx_prob newp, bestnewp = oldp;
const int step = *bestp > oldp ? -1 : 1;
+ const int upd_cost = vp9_cost_one(upd) - vp9_cost_zero(upd);
- for (newp = *bestp; newp != oldp; newp += step) {
- const int new_b = cost_branch256(ct, newp);
- const int update_b = prob_diff_update_cost(newp, oldp) + vp9_cost_upd256;
- const int savings = old_b - new_b - update_b;
- if (savings > bestsavings) {
- bestsavings = savings;
- bestnewp = newp;
+ if (old_b > upd_cost + (MIN_DELP_BITS << VP9_PROB_COST_SHIFT)) {
+ for (newp = *bestp; newp != oldp; newp += step) {
+ const int new_b = cost_branch256(ct, newp);
+ const int update_b = prob_diff_update_cost(newp, oldp) + upd_cost;
+ const int savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
}
}
*bestp = bestnewp;
@@ -138,52 +140,35 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct,
}
int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
- const vpx_prob *oldp,
+ const vpx_prob oldp,
vpx_prob *bestp,
vpx_prob upd,
int stepsize) {
- int i, old_b, new_b, update_b, savings, bestsavings, step;
+ int i, old_b, new_b, update_b, savings, bestsavings;
int newp;
- vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
- vp9_model_to_full_probs(oldp, oldplist);
- memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
- for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i)
- old_b += cost_branch256(ct + 2 * i, oldplist[i]);
- old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]);
+ const int step_sign = *bestp > oldp ? -1 : 1;
+ const int step = stepsize * step_sign;
+ const int upd_cost = vp9_cost_one(upd) - vp9_cost_zero(upd);
+ const vpx_prob *newplist, *oldplist;
+ vpx_prob bestnewp;
+ oldplist = vp9_pareto8_full[oldp - 1];
+ old_b = cost_branch256(ct + 2 * PIVOT_NODE, oldp);
+ for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i)
+ old_b += cost_branch256(ct + 2 * i, oldplist[i - UNCONSTRAINED_NODES]);
bestsavings = 0;
- bestnewp = oldp[PIVOT_NODE];
-
- if (*bestp > oldp[PIVOT_NODE]) {
- step = -stepsize;
- for (newp = *bestp; newp > oldp[PIVOT_NODE]; newp += step) {
- if (newp < 1 || newp > 255)
- continue;
- newplist[PIVOT_NODE] = newp;
- vp9_model_to_full_probs(newplist, newplist);
- for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
- new_b += cost_branch256(ct + 2 * i, newplist[i]);
- new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
- update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) +
- vp9_cost_upd256;
- savings = old_b - new_b - update_b;
- if (savings > bestsavings) {
- bestsavings = savings;
- bestnewp = newp;
- }
- }
- } else {
- step = stepsize;
- for (newp = *bestp; newp < oldp[PIVOT_NODE]; newp += step) {
- if (newp < 1 || newp > 255)
- continue;
- newplist[PIVOT_NODE] = newp;
- vp9_model_to_full_probs(newplist, newplist);
- for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
- new_b += cost_branch256(ct + 2 * i, newplist[i]);
- new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
- update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) +
- vp9_cost_upd256;
+ bestnewp = oldp;
+
+ assert(stepsize > 0);
+
+ if (old_b > upd_cost + (MIN_DELP_BITS << VP9_PROB_COST_SHIFT)) {
+ for (newp = *bestp; (newp - oldp) * step_sign < 0; newp += step) {
+ if (newp < 1 || newp > 255) continue;
+ newplist = vp9_pareto8_full[newp - 1];
+ new_b = cost_branch256(ct + 2 * PIVOT_NODE, newp);
+ for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i)
+ new_b += cost_branch256(ct + 2 * i, newplist[i - UNCONSTRAINED_NODES]);
+ update_b = prob_diff_update_cost(newp, oldp) + upd_cost;
savings = old_b - new_b - update_b;
if (savings > bestsavings) {
bestsavings = savings;
diff --git a/libvpx/vp9/encoder/vp9_subexp.h b/libvpx/vp9/encoder/vp9_subexp.h
index b96823232..efe62c0e7 100644
--- a/libvpx/vp9/encoder/vp9_subexp.h
+++ b/libvpx/vp9/encoder/vp9_subexp.h
@@ -32,7 +32,7 @@ int vp9_prob_diff_update_savings_search(const unsigned int *ct,
int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
- const vpx_prob *oldp,
+ const vpx_prob oldp,
vpx_prob *bestp,
vpx_prob upd,
int stepsize);
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 8a6818c86..1814a32c9 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -16,7 +16,6 @@
#include "vp9/encoder/vp9_extend.h"
#include "vpx_dsp/vpx_dsp_common.h"
-#define SMALL_FRAME_FB_IDX 7
#define SMALL_FRAME_WIDTH 32
#define SMALL_FRAME_HEIGHT 16
@@ -25,12 +24,44 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
int mi_rows = cpi->common.mi_rows;
int mi_cols = cpi->common.mi_cols;
- int sl, tl;
+ int sl, tl, i;
int alt_ref_idx = svc->number_spatial_layers;
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
svc->first_spatial_layer_to_encode = 0;
+ svc->rc_drop_superframe = 0;
+ svc->force_zero_mode_spatial_ref = 0;
+ svc->use_base_mv = 0;
+ svc->current_superframe = 0;
+ for (i = 0; i < REF_FRAMES; ++i)
+ svc->ref_frame_index[i] = -1;
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ cpi->svc.ext_frame_flags[sl] = 0;
+ cpi->svc.ext_lst_fb_idx[sl] = 0;
+ cpi->svc.ext_gld_fb_idx[sl] = 1;
+ cpi->svc.ext_alt_fb_idx[sl] = 2;
+ }
+
+ // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
+ // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
+ // target of 1/4x1/4.
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+ if (vpx_realloc_frame_buffer(&cpi->svc.scaled_temp,
+ cpi->common.width >> 1,
+ cpi->common.height >> 1,
+ cpi->common.subsampling_x,
+ cpi->common.subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cpi->common.use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS,
+ cpi->common.byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate scaled_frame for svc ");
+ }
+
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
@@ -107,15 +138,20 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
tl == 0) {
size_t last_coded_q_map_size;
size_t consec_zero_mv_size;
+ VP9_COMMON *const cm = &cpi->common;
lc->sb_index = 0;
- lc->map = vpx_malloc(mi_rows * mi_cols * sizeof(signed char));
+ CHECK_MEM_ERROR(cm, lc->map,
+ vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
memset(lc->map, 0, mi_rows * mi_cols);
- last_coded_q_map_size = mi_rows * mi_cols * sizeof(uint8_t);
- lc->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
+ last_coded_q_map_size = mi_rows * mi_cols *
+ sizeof(*lc->last_coded_q_map);
+ CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
+ vpx_malloc(last_coded_q_map_size));
assert(MAXQ <= 255);
memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
- consec_zero_mv_size = mi_rows * mi_cols * sizeof(uint8_t);
- lc->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
+ consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv);
+ CHECK_MEM_ERROR(cm, lc->consec_zero_mv,
+ vpx_malloc(consec_zero_mv_size));
memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
}
}
@@ -277,7 +313,8 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
cpi->alt_ref_source = lc->alt_ref_source;
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
- if (cpi->svc.number_temporal_layers > 1) {
+ if (cpi->svc.number_temporal_layers > 1 ||
+ (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) {
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
@@ -290,12 +327,12 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
signed char *temp = cr->map;
uint8_t *temp2 = cr->last_coded_q_map;
- uint8_t *temp3 = cr->consec_zero_mv;
+ uint8_t *temp3 = cpi->consec_zero_mv;
cr->map = lc->map;
lc->map = temp;
cr->last_coded_q_map = lc->last_coded_q_map;
lc->last_coded_q_map = temp2;
- cr->consec_zero_mv = lc->consec_zero_mv;
+ cpi->consec_zero_mv = lc->consec_zero_mv;
lc->consec_zero_mv = temp3;
cr->sb_index = lc->sb_index;
}
@@ -323,8 +360,8 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
cr->map = temp;
lc->last_coded_q_map = cr->last_coded_q_map;
cr->last_coded_q_map = temp2;
- lc->consec_zero_mv = cr->consec_zero_mv;
- cr->consec_zero_mv = temp3;
+ lc->consec_zero_mv = cpi->consec_zero_mv;
+ cpi->consec_zero_mv = temp3;
lc->sb_index = cr->sb_index;
}
}
@@ -351,6 +388,8 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
cpi->svc.number_temporal_layers];
++lc->current_video_frame_in_layer;
++lc->frames_from_key_frame;
+ if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
+ ++cpi->svc.current_superframe;
}
int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
@@ -402,7 +441,9 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
// base layer is a key frame.
- cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ cpi->ext_refresh_last_frame = 0;
+ cpi->ext_refresh_golden_frame = 1;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
@@ -417,7 +458,13 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
} else {
if (frame_num_within_temporal_struct == 1) {
// the first tl2 picture
- if (!spatial_id) {
+ if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer
+ cpi->ext_refresh_frame_flags_pending = 1;
+ if (!spatial_id)
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ else
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ } else if (!spatial_id) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_alt_ref_frame = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG;
@@ -425,32 +472,38 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
cpi->ext_refresh_frame_flags_pending = 1;
cpi->ext_refresh_alt_ref_frame = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
- } else { // Top layer
- cpi->ext_refresh_frame_flags_pending = 0;
- cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
} else {
// The second tl2 picture
- if (!spatial_id) {
+ if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer
cpi->ext_refresh_frame_flags_pending = 1;
+ if (!spatial_id)
cpi->ref_frame_flags = VP9_LAST_FLAG;
- cpi->ext_refresh_last_frame = 1;
- } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+ else
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ } else if (!spatial_id) {
cpi->ext_refresh_frame_flags_pending = 1;
- cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
- cpi->ext_refresh_last_frame = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ cpi->ext_refresh_alt_ref_frame = 1;
} else { // top layer
- cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->ext_refresh_frame_flags_pending = 1;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ cpi->ext_refresh_alt_ref_frame = 1;
}
}
}
if (temporal_id == 0) {
cpi->lst_fb_idx = spatial_id;
- if (spatial_id)
+ if (spatial_id) {
+ if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+ cpi->lst_fb_idx = spatial_id - 1;
+ cpi->gld_fb_idx = spatial_id;
+ } else {
cpi->gld_fb_idx = spatial_id - 1;
- else
+ }
+ } else {
cpi->gld_fb_idx = 0;
+ }
cpi->alt_fb_idx = 0;
} else if (temporal_id == 1) {
cpi->lst_fb_idx = spatial_id;
@@ -463,7 +516,7 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
} else {
cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
- cpi->alt_fb_idx = 0;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
}
@@ -485,7 +538,9 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
// base layer is a key frame.
- cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ cpi->ext_refresh_last_frame = 0;
+ cpi->ext_refresh_golden_frame = 1;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
@@ -501,10 +556,16 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
if (temporal_id == 0) {
cpi->lst_fb_idx = spatial_id;
- if (spatial_id)
+ if (spatial_id) {
+ if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+ cpi->lst_fb_idx = spatial_id - 1;
+ cpi->gld_fb_idx = spatial_id;
+ } else {
cpi->gld_fb_idx = spatial_id - 1;
- else
+ }
+ } else {
cpi->gld_fb_idx = 0;
+ }
cpi->alt_fb_idx = 0;
} else if (temporal_id == 1) {
cpi->lst_fb_idx = spatial_id;
@@ -526,20 +587,31 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else if (cpi->svc.layer_context[0].is_key_frame) {
- cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ cpi->ext_refresh_last_frame = 0;
+ cpi->ext_refresh_golden_frame = 1;
} else {
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
cpi->lst_fb_idx = spatial_id;
- if (spatial_id)
+ if (spatial_id) {
+ if (cpi->svc.layer_context[0].is_key_frame) {
+ cpi->lst_fb_idx = spatial_id - 1;
+ cpi->gld_fb_idx = spatial_id;
+ } else {
cpi->gld_fb_idx = spatial_id - 1;
- else
+ }
+ } else {
cpi->gld_fb_idx = 0;
+ }
}
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc = NULL;
+ if (cpi->svc.number_spatial_layers > 1)
+ cpi->svc.use_base_mv = 1;
+ cpi->svc.force_zero_mode_spatial_ref = 1;
if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
@@ -557,6 +629,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
// Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is
// needed to support the case where the frame flags may be passed in via
// vpx_codec_encode(), which can be used for the temporal-only svc case.
+ // TODO(marpan): Consider adding an enc_config parameter to better handle
+ // this case.
if (cpi->ext_refresh_frame_flags_pending == 0) {
int sl;
cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
@@ -568,6 +642,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
}
}
+ if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode)
+ cpi->svc.rc_drop_superframe = 0;
+
lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id];
@@ -591,6 +668,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
}
#if CONFIG_SPATIAL_SVC
+#define SMALL_FRAME_FB_IDX 7
+
int vp9_svc_start_frame(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc;
@@ -701,7 +780,8 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
return 0;
}
-#endif
+#undef SMALL_FRAME_FB_IDX
+#endif // CONFIG_SPATIAL_SVC
struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
struct lookahead_ctx *ctx,
@@ -736,3 +816,27 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) {
}
}
}
+
+// Reset on key frame: reset counters, references and buffer updates.
+void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
+ int sl, tl;
+ SVC *const svc = &cpi->svc;
+ LAYER_CONTEXT *lc = NULL;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl];
+ lc->current_video_frame_in_layer = 0;
+ lc->frames_from_key_frame = 0;
+ }
+ }
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
+ set_flags_and_fb_idx_for_temporal_mode3(cpi);
+ } else if (svc->temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+ set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
+ } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) {
+ set_flags_and_fb_idx_for_temporal_mode2(cpi);
+ }
+ vp9_update_temporal_layer_framerate(cpi);
+ vp9_restore_layer_context(cpi);
+}
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h
index 694b5abdc..9f386fb08 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -56,6 +56,7 @@ typedef struct {
int spatial_layer_to_encode;
int first_spatial_layer_to_encode;
+ int rc_drop_superframe;
// Workaround for multiple frame contexts
enum {
@@ -69,6 +70,8 @@ typedef struct {
// Store scaled source frames to be used for temporal filter to generate
// a alt ref frame.
YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
+ // Temp buffer used for 2-stage down-sampling, for real-time mode.
+ YV12_BUFFER_CONFIG scaled_temp;
// Layer context used for rate control in one pass temporal CBR mode or
// two pass spatial mode.
@@ -82,6 +85,10 @@ typedef struct {
int ext_lst_fb_idx[VPX_MAX_LAYERS];
int ext_gld_fb_idx[VPX_MAX_LAYERS];
int ext_alt_fb_idx[VPX_MAX_LAYERS];
+ int ref_frame_index[REF_FRAMES];
+ int force_zero_mode_spatial_ref;
+ int current_superframe;
+ int use_base_mv;
} SVC;
struct VP9_COMP;
@@ -129,6 +136,8 @@ int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
+void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c
index 16f9c8573..b6323e048 100644
--- a/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -45,8 +45,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
int x, int y) {
const int which_mv = 0;
const MV mv = { mv_row, mv_col };
- const InterpKernel *const kernel =
- vp9_filter_kernels[xd->mi[0]->mbmi.interp_filter];
+ const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
enum mv_precision mv_precision_uv;
int uv_stride;
@@ -86,6 +85,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
return;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+ (void)xd;
vp9_build_inter_predictor(y_mb_ptr, stride,
&pred[0], 16,
&mv,
@@ -135,15 +135,38 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
-
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
- modifier *= 3;
+ int pixel_value = *frame2;
+
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = (int)i + idy;
+ int col = (int)j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
modifier += rounding;
modifier >>= strength;
@@ -182,15 +205,34 @@ void vp9_highbd_temporal_filter_apply_c(uint8_t *frame1_8,
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int src_byte = frame1[byte];
- int pixel_value = *frame2++;
-
- modifier = src_byte - pixel_value;
- // This is an integer approximation of:
- // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
- // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
- modifier *= modifier;
+ int pixel_value = *frame2;
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = (int)i + idy;
+ int col = (int)j + idx;
+
+ if (row >= 0 && row < (int)block_height &&
+ col >= 0 && col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx)
+ modifier += diff_sse[idx];
+
modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
modifier += rounding;
modifier >>= strength;
@@ -222,8 +264,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int step_param;
int sadpb = x->sadperbit16;
int bestsme = INT_MAX;
- int distortion;
- unsigned int sse;
+ uint32_t distortion;
+ uint32_t sse;
int cost_list[5];
MV best_ref_mv1 = {0, 0};
@@ -383,55 +425,57 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
- vp9_highbd_temporal_filter_apply(f->y_buffer + mb_y_offset,
- f->y_stride,
- predictor, 16, 16, adj_strength,
- filter_weight,
- accumulator, count);
- vp9_highbd_temporal_filter_apply(f->u_buffer + mb_uv_offset,
- f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height,
- adj_strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp9_highbd_temporal_filter_apply(f->v_buffer + mb_uv_offset,
- f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height,
- adj_strength, filter_weight,
- accumulator + 512, count + 512);
+ vp9_highbd_temporal_filter_apply_c(f->y_buffer + mb_y_offset,
+ f->y_stride,
+ predictor, 16, 16, adj_strength,
+ filter_weight,
+ accumulator, count);
+ vp9_highbd_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height,
+ adj_strength, filter_weight,
+ accumulator + 256, count + 256);
+ vp9_highbd_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height,
+ adj_strength, filter_weight,
+ accumulator + 512, count + 512);
} else {
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, 16,
+ strength, filter_weight,
+ accumulator, count);
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 256,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 256,
+ count + 256);
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset,
+ f->uv_stride,
+ predictor + 512,
+ mb_uv_width, mb_uv_height, strength,
+ filter_weight, accumulator + 512,
+ count + 512);
+ }
+#else
+ // Apply the filter (YUV)
+ // TODO(jingning): Need SIMD optimization for this.
+ vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
predictor, 16, 16,
strength, filter_weight,
accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
predictor + 256,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 256,
count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
+ vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
predictor + 512,
mb_uv_width, mb_uv_height, strength,
filter_weight, accumulator + 512,
count + 512);
- }
-#else
- // Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16,
- strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512,
- mb_uv_width, mb_uv_height, strength,
- filter_weight, accumulator + 512,
- count + 512);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c
index 6076e2a61..edec755dd 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/libvpx/vp9/encoder/vp9_tokenize.c
@@ -18,7 +18,6 @@
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_scan.h"
-#include "vp9/common/vp9_seg_common.h"
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_encoder.h"
@@ -50,6 +49,35 @@ static const TOKENVALUE dct_cat_lt_10_value_tokens[] = {
const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
/ 2;
+// The corresponding costs of the extrabits for the tokens in the above table
+// are stored in the table below. The values are obtained from looking up the
+// entry for the specified extrabits in the table corresponding to the token
+// (as defined in cost element vp9_extra_bits)
+// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
+static const int dct_cat_lt_10_value_cost[] = {
+ 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531,
+ 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190,
+ 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894,
+ 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553,
+ 3197, 3116, 3058, 2977, 2881, 2800,
+ 2742, 2661, 2615, 2534, 2476, 2395,
+ 2299, 2218, 2160, 2079,
+ 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652,
+ 1893, 1696, 1453, 1256, 1229, 864,
+ 512, 512, 512, 512, 0,
+ 512, 512, 512, 512,
+ 864, 1229, 1256, 1453, 1696, 1893,
+ 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566,
+ 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615,
+ 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197,
+ 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795,
+ 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
+ 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432,
+ 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773,
+};
+const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost +
+ (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost))
+ / 2;
// Array indices are identical to previously-existing CONTEXT_NODE indices
const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
@@ -67,303 +95,178 @@ const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
};
static const int16_t zero_cost[] = {0};
-static const int16_t one_cost[] = {255, 257};
-static const int16_t two_cost[] = {255, 257};
-static const int16_t three_cost[] = {255, 257};
-static const int16_t four_cost[] = {255, 257};
-static const int16_t cat1_cost[] = {429, 431, 616, 618};
-static const int16_t cat2_cost[] = {624, 626, 727, 729, 848, 850, 951, 953};
-static const int16_t cat3_cost[] = {
- 820, 822, 893, 895, 940, 942, 1013, 1015, 1096, 1098, 1169, 1171, 1216, 1218,
- 1289, 1291
-};
-static const int16_t cat4_cost[] = {
- 1032, 1034, 1075, 1077, 1105, 1107, 1148, 1150, 1194, 1196, 1237, 1239,
- 1267, 1269, 1310, 1312, 1328, 1330, 1371, 1373, 1401, 1403, 1444, 1446,
- 1490, 1492, 1533, 1535, 1563, 1565, 1606, 1608
-};
-static const int16_t cat5_cost[] = {
- 1269, 1271, 1283, 1285, 1306, 1308, 1320,
- 1322, 1347, 1349, 1361, 1363, 1384, 1386, 1398, 1400, 1443, 1445, 1457,
- 1459, 1480, 1482, 1494, 1496, 1521, 1523, 1535, 1537, 1558, 1560, 1572,
- 1574, 1592, 1594, 1606, 1608, 1629, 1631, 1643, 1645, 1670, 1672, 1684,
- 1686, 1707, 1709, 1721, 1723, 1766, 1768, 1780, 1782, 1803, 1805, 1817,
- 1819, 1844, 1846, 1858, 1860, 1881, 1883, 1895, 1897
-};
+static const int16_t sign_cost[1] = {512};
+static const int16_t cat1_cost[1 << 1] = {864, 1229};
+static const int16_t cat2_cost[1 << 2] = {1256, 1453, 1696, 1893};
+static const int16_t cat3_cost[1 << 3] = {1652, 1791, 1884, 2023,
+ 2195, 2334, 2427, 2566};
+static const int16_t cat4_cost[1 << 4] = {2079, 2160, 2218, 2299, 2395, 2476,
+ 2534, 2615, 2661, 2742, 2800, 2881,
+ 2977, 3058, 3116, 3197};
+static const int16_t cat5_cost[1 << 5] = {
+ 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963,
+ 2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363,
+ 3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773};
const int16_t vp9_cat6_low_cost[256] = {
- 1638, 1640, 1646, 1648, 1652, 1654, 1660, 1662,
- 1670, 1672, 1678, 1680, 1684, 1686, 1692, 1694, 1711, 1713, 1719, 1721,
- 1725, 1727, 1733, 1735, 1743, 1745, 1751, 1753, 1757, 1759, 1765, 1767,
- 1787, 1789, 1795, 1797, 1801, 1803, 1809, 1811, 1819, 1821, 1827, 1829,
- 1833, 1835, 1841, 1843, 1860, 1862, 1868, 1870, 1874, 1876, 1882, 1884,
- 1892, 1894, 1900, 1902, 1906, 1908, 1914, 1916, 1940, 1942, 1948, 1950,
- 1954, 1956, 1962, 1964, 1972, 1974, 1980, 1982, 1986, 1988, 1994, 1996,
- 2013, 2015, 2021, 2023, 2027, 2029, 2035, 2037, 2045, 2047, 2053, 2055,
- 2059, 2061, 2067, 2069, 2089, 2091, 2097, 2099, 2103, 2105, 2111, 2113,
- 2121, 2123, 2129, 2131, 2135, 2137, 2143, 2145, 2162, 2164, 2170, 2172,
- 2176, 2178, 2184, 2186, 2194, 2196, 2202, 2204, 2208, 2210, 2216, 2218,
- 2082, 2084, 2090, 2092, 2096, 2098, 2104, 2106, 2114, 2116, 2122, 2124,
- 2128, 2130, 2136, 2138, 2155, 2157, 2163, 2165, 2169, 2171, 2177, 2179,
- 2187, 2189, 2195, 2197, 2201, 2203, 2209, 2211, 2231, 2233, 2239, 2241,
- 2245, 2247, 2253, 2255, 2263, 2265, 2271, 2273, 2277, 2279, 2285, 2287,
- 2304, 2306, 2312, 2314, 2318, 2320, 2326, 2328, 2336, 2338, 2344, 2346,
- 2350, 2352, 2358, 2360, 2384, 2386, 2392, 2394, 2398, 2400, 2406, 2408,
- 2416, 2418, 2424, 2426, 2430, 2432, 2438, 2440, 2457, 2459, 2465, 2467,
- 2471, 2473, 2479, 2481, 2489, 2491, 2497, 2499, 2503, 2505, 2511, 2513,
- 2533, 2535, 2541, 2543, 2547, 2549, 2555, 2557, 2565, 2567, 2573, 2575,
- 2579, 2581, 2587, 2589, 2606, 2608, 2614, 2616, 2620, 2622, 2628, 2630,
- 2638, 2640, 2646, 2648, 2652, 2654, 2660, 2662
-};
-const int16_t vp9_cat6_high_cost[128] = {
- 72, 892, 1183, 2003, 1448, 2268, 2559, 3379,
- 1709, 2529, 2820, 3640, 3085, 3905, 4196, 5016, 2118, 2938, 3229, 4049,
- 3494, 4314, 4605, 5425, 3755, 4575, 4866, 5686, 5131, 5951, 6242, 7062,
- 2118, 2938, 3229, 4049, 3494, 4314, 4605, 5425, 3755, 4575, 4866, 5686,
- 5131, 5951, 6242, 7062, 4164, 4984, 5275, 6095, 5540, 6360, 6651, 7471,
- 5801, 6621, 6912, 7732, 7177, 7997, 8288, 9108, 2118, 2938, 3229, 4049,
- 3494, 4314, 4605, 5425, 3755, 4575, 4866, 5686, 5131, 5951, 6242, 7062,
- 4164, 4984, 5275, 6095, 5540, 6360, 6651, 7471, 5801, 6621, 6912, 7732,
- 7177, 7997, 8288, 9108, 4164, 4984, 5275, 6095, 5540, 6360, 6651, 7471,
- 5801, 6621, 6912, 7732, 7177, 7997, 8288, 9108, 6210, 7030, 7321, 8141,
- 7586, 8406, 8697, 9517, 7847, 8667, 8958, 9778, 9223, 10043, 10334, 11154
-};
+ 3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552,
+ 3574, 3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763,
+ 3810, 3822, 3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008,
+ 4030, 4042, 4053, 4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204,
+ 4266, 4278, 4289, 4301, 4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440,
+ 4462, 4474, 4485, 4497, 4253, 4265, 4276, 4288, 4310, 4322, 4333, 4345,
+ 4392, 4404, 4415, 4427, 4449, 4461, 4472, 4484, 4546, 4558, 4569, 4581,
+ 4603, 4615, 4626, 4638, 4685, 4697, 4708, 4720, 4742, 4754, 4765, 4777,
+ 4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940, 4987, 4999, 5010, 5022,
+ 5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198, 5210, 5221, 5233,
+ 5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000, 5011, 5023,
+ 5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207, 5219,
+ 5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455,
+ 5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675,
+ 5722, 5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911,
+ 5933, 5945, 5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107,
+ 5863, 5875, 5886, 5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037,
+ 6059, 6071, 6082, 6094, 6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248,
+ 6295, 6307, 6318, 6330, 6352, 6364, 6375, 6387, 6458, 6470, 6481, 6493,
+ 6515, 6527, 6538, 6550, 6597, 6609, 6620, 6632, 6654, 6666, 6677, 6689,
+ 6751, 6763, 6774, 6786, 6808, 6820, 6831, 6843, 6890, 6902, 6913, 6925,
+ 6947, 6959, 6970, 6982};
+const int vp9_cat6_high_cost[64] = {
+ 88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305,
+ 8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889,
+ 9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666,
+ 5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046,
+ 10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943,
+ 15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684};
#if CONFIG_VP9_HIGHBITDEPTH
-const int16_t vp9_cat6_high10_high_cost[512] = {
- 74, 894, 1185, 2005, 1450, 2270, 2561,
- 3381, 1711, 2531, 2822, 3642, 3087, 3907, 4198, 5018, 2120, 2940, 3231,
- 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, 5688, 5133, 5953, 6244,
- 7064, 2120, 2940, 3231, 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868,
- 5688, 5133, 5953, 6244, 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653,
- 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 2120, 2940, 3231,
- 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, 5688, 5133, 5953, 6244,
- 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653, 7473, 5803, 6623, 6914,
- 7734, 7179, 7999, 8290, 9110, 4166, 4986, 5277, 6097, 5542, 6362, 6653,
- 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, 7032, 7323,
- 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336,
- 11156, 2120, 2940, 3231, 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868,
- 5688, 5133, 5953, 6244, 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653,
- 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 4166, 4986, 5277,
- 6097, 5542, 6362, 6653, 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290,
- 9110, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960,
- 9780, 9225, 10045, 10336, 11156, 4166, 4986, 5277, 6097, 5542, 6362, 6653,
- 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, 7032, 7323,
- 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336,
- 11156, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960,
- 9780, 9225, 10045, 10336, 11156, 8258, 9078, 9369, 10189, 9634, 10454,
- 10745, 11565, 9895, 10715, 11006, 11826, 11271, 12091, 12382, 13202, 2120,
- 2940, 3231, 4051, 3496, 4316, 4607, 5427, 3757, 4577, 4868, 5688, 5133,
- 5953, 6244, 7064, 4166, 4986, 5277, 6097, 5542, 6362, 6653, 7473, 5803,
- 6623, 6914, 7734, 7179, 7999, 8290, 9110, 4166, 4986, 5277, 6097, 5542,
- 6362, 6653, 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212,
- 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225,
- 10045, 10336, 11156, 4166, 4986, 5277, 6097, 5542, 6362, 6653, 7473, 5803,
- 6623, 6914, 7734, 7179, 7999, 8290, 9110, 6212, 7032, 7323, 8143, 7588,
- 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336, 11156, 6212,
- 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960, 9780, 9225,
- 10045, 10336, 11156, 8258, 9078, 9369, 10189, 9634, 10454, 10745, 11565,
- 9895, 10715, 11006, 11826, 11271, 12091, 12382, 13202, 4166, 4986, 5277,
- 6097, 5542, 6362, 6653, 7473, 5803, 6623, 6914, 7734, 7179, 7999, 8290,
- 9110, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669, 8960,
- 9780, 9225, 10045, 10336, 11156, 6212, 7032, 7323, 8143, 7588, 8408, 8699,
- 9519, 7849, 8669, 8960, 9780, 9225, 10045, 10336, 11156, 8258, 9078, 9369,
- 10189, 9634, 10454, 10745, 11565, 9895, 10715, 11006, 11826, 11271, 12091,
- 12382, 13202, 6212, 7032, 7323, 8143, 7588, 8408, 8699, 9519, 7849, 8669,
- 8960, 9780, 9225, 10045, 10336, 11156, 8258, 9078, 9369, 10189, 9634, 10454,
- 10745, 11565, 9895, 10715, 11006, 11826, 11271, 12091, 12382, 13202, 8258,
- 9078, 9369, 10189, 9634, 10454, 10745, 11565, 9895, 10715, 11006, 11826,
- 11271, 12091, 12382, 13202, 10304, 11124, 11415, 12235, 11680, 12500, 12791,
- 13611, 11941, 12761, 13052, 13872, 13317, 14137, 14428, 15248,
-};
-const int16_t vp9_cat6_high12_high_cost[2048] = {
- 76, 896, 1187, 2007, 1452, 2272, 2563,
- 3383, 1713, 2533, 2824, 3644, 3089, 3909, 4200, 5020, 2122, 2942, 3233,
- 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, 5955, 6246,
- 7066, 2122, 2942, 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870,
- 5690, 5135, 5955, 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655,
- 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 2122, 2942, 3233,
- 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, 5955, 6246,
- 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916,
- 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, 6099, 5544, 6364, 6655,
- 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325,
- 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338,
- 11158, 2122, 2942, 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870,
- 5690, 5135, 5955, 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655,
- 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279,
- 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292,
- 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962,
- 9782, 9227, 10047, 10338, 11158, 4168, 4988, 5279, 6099, 5544, 6364, 6655,
- 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325,
- 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338,
- 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962,
- 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456,
- 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 2122,
- 2942, 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135,
- 5955, 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805,
- 6625, 6916, 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, 6099, 5544,
- 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214,
- 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227,
- 10047, 10338, 11158, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805,
- 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590,
- 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214,
- 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227,
- 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567,
- 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 4168, 4988, 5279,
- 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292,
- 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962,
- 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701,
- 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371,
- 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093,
- 12384, 13204, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671,
- 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456,
- 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260,
- 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828,
- 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793,
- 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 2122, 2942,
- 3233, 4053, 3498, 4318, 4609, 5429, 3759, 4579, 4870, 5690, 5135, 5955,
- 6246, 7066, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625,
- 6916, 7736, 7181, 8001, 8292, 9112, 4168, 4988, 5279, 6099, 5544, 6364,
- 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034,
- 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047,
- 10338, 11158, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625,
- 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410,
- 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034,
- 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047,
- 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897,
- 10717, 11008, 11828, 11273, 12093, 12384, 13204, 4168, 4988, 5279, 6099,
- 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112,
- 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782,
- 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521,
- 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191,
- 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384,
- 13204, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962,
- 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456,
- 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260,
- 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828,
- 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793,
- 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 4168, 4988,
- 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001,
- 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671,
- 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410,
- 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080,
- 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273,
- 12093, 12384, 13204, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851,
- 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636,
- 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204,
- 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008,
- 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502,
- 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 6214,
- 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227,
- 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567,
- 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371,
- 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093,
- 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943,
- 12763, 13054, 13874, 13319, 14139, 14430, 15250, 8260, 9080, 9371, 10191,
- 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384,
- 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763,
- 13054, 13874, 13319, 14139, 14430, 15250, 10306, 11126, 11417, 12237, 11682,
- 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250,
- 12352, 13172, 13463, 14283, 13728, 14548, 14839, 15659, 13989, 14809, 15100,
- 15920, 15365, 16185, 16476, 17296, 2122, 2942, 3233, 4053, 3498, 4318, 4609,
- 5429, 3759, 4579, 4870, 5690, 5135, 5955, 6246, 7066, 4168, 4988, 5279,
- 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292,
- 9112, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916,
- 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701,
- 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 4168, 4988, 5279,
- 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916, 7736, 7181, 8001, 8292,
- 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962,
- 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325, 8145, 7590, 8410, 8701,
- 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371,
- 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093,
- 12384, 13204, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625,
- 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410,
- 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034,
- 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047,
- 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897,
- 10717, 11008, 11828, 11273, 12093, 12384, 13204, 6214, 7034, 7325, 8145,
- 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158,
- 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008,
- 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, 10456,
- 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306,
- 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874,
- 13319, 14139, 14430, 15250, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475,
- 5805, 6625, 6916, 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145,
- 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158,
- 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782,
- 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747,
- 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 6214, 7034,
- 7325, 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047,
- 10338, 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897,
- 10717, 11008, 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191,
- 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384,
- 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763,
- 13054, 13874, 13319, 14139, 14430, 15250, 6214, 7034, 7325, 8145, 7590,
- 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260,
- 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828,
- 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, 10456, 10747,
- 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126,
- 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319,
- 14139, 14430, 15250, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567,
- 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417,
- 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139,
- 14430, 15250, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943,
- 12763, 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, 13463, 14283,
- 13728, 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, 16185, 16476,
- 17296, 4168, 4988, 5279, 6099, 5544, 6364, 6655, 7475, 5805, 6625, 6916,
- 7736, 7181, 8001, 8292, 9112, 6214, 7034, 7325, 8145, 7590, 8410, 8701,
- 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 6214, 7034, 7325,
- 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338,
- 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717,
- 11008, 11828, 11273, 12093, 12384, 13204, 6214, 7034, 7325, 8145, 7590,
- 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260,
- 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828,
- 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636, 10456, 10747,
- 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126,
- 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319,
- 14139, 14430, 15250, 6214, 7034, 7325, 8145, 7590, 8410, 8701, 9521, 7851,
- 8671, 8962, 9782, 9227, 10047, 10338, 11158, 8260, 9080, 9371, 10191, 9636,
- 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204,
- 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008,
- 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502,
- 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 8260,
- 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717, 11008, 11828,
- 11273, 12093, 12384, 13204, 10306, 11126, 11417, 12237, 11682, 12502, 12793,
- 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 10306, 11126,
- 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319,
- 14139, 14430, 15250, 12352, 13172, 13463, 14283, 13728, 14548, 14839, 15659,
- 13989, 14809, 15100, 15920, 15365, 16185, 16476, 17296, 6214, 7034, 7325,
- 8145, 7590, 8410, 8701, 9521, 7851, 8671, 8962, 9782, 9227, 10047, 10338,
- 11158, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567, 9897, 10717,
- 11008, 11828, 11273, 12093, 12384, 13204, 8260, 9080, 9371, 10191, 9636,
- 10456, 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204,
- 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054,
- 13874, 13319, 14139, 14430, 15250, 8260, 9080, 9371, 10191, 9636, 10456,
- 10747, 11567, 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306,
- 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874,
- 13319, 14139, 14430, 15250, 10306, 11126, 11417, 12237, 11682, 12502, 12793,
- 13613, 11943, 12763, 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172,
- 13463, 14283, 13728, 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365,
- 16185, 16476, 17296, 8260, 9080, 9371, 10191, 9636, 10456, 10747, 11567,
- 9897, 10717, 11008, 11828, 11273, 12093, 12384, 13204, 10306, 11126, 11417,
- 12237, 11682, 12502, 12793, 13613, 11943, 12763, 13054, 13874, 13319, 14139,
- 14430, 15250, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943,
- 12763, 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, 13463, 14283,
- 13728, 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, 16185, 16476,
- 17296, 10306, 11126, 11417, 12237, 11682, 12502, 12793, 13613, 11943, 12763,
- 13054, 13874, 13319, 14139, 14430, 15250, 12352, 13172, 13463, 14283, 13728,
- 14548, 14839, 15659, 13989, 14809, 15100, 15920, 15365, 16185, 16476, 17296,
- 12352, 13172, 13463, 14283, 13728, 14548, 14839, 15659, 13989, 14809, 15100,
- 15920, 15365, 16185, 16476, 17296, 14398, 15218, 15509, 16329, 15774, 16594,
- 16885, 17705, 16035, 16855, 17146, 17966, 17411, 18231, 18522, 19342
-};
+const int vp9_cat6_high10_high_cost[256] = {
+ 94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311,
+ 8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895,
+ 9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672,
+ 5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052,
+ 10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949,
+ 15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350,
+ 6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825,
+ 12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627,
+ 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404,
+ 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566,
+ 17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921,
+ 17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989,
+ 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464,
+ 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506,
+ 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825,
+ 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205,
+ 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560,
+ 19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503,
+ 13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858,
+ 14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238,
+ 18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557,
+ 19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599,
+ 18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074,
+ 24237, 24713, 26876};
+const int vp9_cat6_high12_high_cost[1024] = {
+ 100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317,
+ 8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901,
+ 9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678,
+ 5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058,
+ 10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955,
+ 15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356,
+ 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831,
+ 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
+ 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410,
+ 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
+ 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927,
+ 17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995,
+ 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470,
+ 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512,
+ 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831,
+ 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211,
+ 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566,
+ 19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509,
+ 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864,
+ 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244,
+ 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
+ 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605,
+ 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080,
+ 24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055,
+ 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410,
+ 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
+ 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349,
+ 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151,
+ 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626,
+ 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027,
+ 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924,
+ 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
+ 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081,
+ 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665,
+ 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286,
+ 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666,
+ 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
+ 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027,
+ 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
+ 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
+ 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018,
+ 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180,
+ 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535,
+ 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759,
+ 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
+ 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276,
+ 25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253,
+ 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
+ 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988,
+ 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994,
+ 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349,
+ 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729,
+ 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985,
+ 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027,
+ 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
+ 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
+ 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081,
+ 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243,
+ 24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864,
+ 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666,
+ 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141,
+ 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605,
+ 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502,
+ 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882,
+ 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596,
+ 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180,
+ 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957,
+ 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
+ 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
+ 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449,
+ 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924,
+ 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
+ 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503,
+ 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665,
+ 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020,
+ 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181,
+ 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
+ 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698,
+ 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017,
+ 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397,
+ 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752,
+ 27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602,
+ 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957,
+ 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
+ 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
+ 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698,
+ 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173,
+ 28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334,
+ 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689,
+ 24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851,
+ 29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628,
+ 25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430,
+ 26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905,
+ 35068};
#endif
const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = {
{0, 0, 0, zero_cost}, // ZERO_TOKEN
- {0, 0, 1, one_cost}, // ONE_TOKEN
- {0, 0, 2, two_cost}, // TWO_TOKEN
- {0, 0, 3, three_cost}, // THREE_TOKEN
- {0, 0, 4, four_cost}, // FOUR_TOKEN
+ {0, 0, 1, sign_cost}, // ONE_TOKEN
+ {0, 0, 2, sign_cost}, // TWO_TOKEN
+ {0, 0, 3, sign_cost}, // THREE_TOKEN
+ {0, 0, 4, sign_cost}, // FOUR_TOKEN
{vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN
{vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN
{vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN
@@ -375,32 +278,32 @@ const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = {
#if CONFIG_VP9_HIGHBITDEPTH
const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = {
- {0, 0, 0, zero_cost}, // ZERO
- {0, 0, 1, one_cost}, // ONE
- {0, 0, 2, two_cost}, // TWO
- {0, 0, 3, three_cost}, // THREE
- {0, 0, 4, four_cost}, // FOUR
- {vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
- {vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
- {vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
- {vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
- {vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
- {vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6
- {0, 0, 0, zero_cost} // EOB
+ {0, 0, 0, zero_cost}, // ZERO
+ {0, 0, 1, sign_cost}, // ONE
+ {0, 0, 2, sign_cost}, // TWO
+ {0, 0, 3, sign_cost}, // THREE
+ {0, 0, 4, sign_cost}, // FOUR
+ {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
+ {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
+ {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
+ {vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
+ {vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
+ {vp9_cat6_prob_high12 + 2, 16, CAT6_MIN_VAL, 0}, // CAT6
+ {0, 0, 0, zero_cost} // EOB
};
const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = {
- {0, 0, 0, zero_cost}, // ZERO
- {0, 0, 1, one_cost}, // ONE
- {0, 0, 2, two_cost}, // TWO
- {0, 0, 3, three_cost}, // THREE
- {0, 0, 4, four_cost}, // FOUR
- {vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
- {vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
- {vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
- {vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
- {vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
- {vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
- {0, 0, 0, zero_cost} // EOB
+ {0, 0, 0, zero_cost}, // ZERO
+ {0, 0, 1, sign_cost}, // ONE
+ {0, 0, 2, sign_cost}, // TWO
+ {0, 0, 3, sign_cost}, // THREE
+ {0, 0, 4, sign_cost}, // FOUR
+ {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
+ {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
+ {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
+ {vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
+ {vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
+ {vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
+ {0, 0, 0, zero_cost} // EOB
};
#endif
@@ -431,35 +334,25 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
}
static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
- int32_t extra, uint8_t token,
- uint8_t skip_eob_node,
+ int16_t token, EXTRABIT extra,
unsigned int *counts) {
+ (*t)->context_tree = context_tree;
(*t)->token = token;
(*t)->extra = extra;
- (*t)->context_tree = context_tree;
- (*t)->skip_eob_node = skip_eob_node;
(*t)++;
++counts[token];
}
static INLINE void add_token_no_extra(TOKENEXTRA **t,
const vpx_prob *context_tree,
- uint8_t token,
- uint8_t skip_eob_node,
+ int16_t token,
unsigned int *counts) {
- (*t)->token = token;
(*t)->context_tree = context_tree;
- (*t)->skip_eob_node = skip_eob_node;
+ (*t)->token = token;
(*t)++;
++counts[token];
}
-static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
- TX_SIZE tx_size) {
- const int eob_max = 16 << (tx_size << 1);
- return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
-}
-
static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct tokenize_b_args* const args = arg;
@@ -471,17 +364,16 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
uint8_t token_cache[32 * 32];
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi = xd->mi[0];
int pt; /* near block/prev token context index */
int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
int eob = p->eobs[block];
const PLANE_TYPE type = get_plane_type(plane);
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
const scan_order *so;
- const int ref = is_inter_block(mbmi);
+ const int ref = is_inter_block(mi);
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[tx_size][type][ref];
vpx_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
@@ -489,7 +381,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
td->counts->eob_branch[tx_size][type][ref];
const uint8_t *const band = get_band_translate(tx_size);
- const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
+ const int tx_eob = 16 << (tx_size << 1);
int16_t token;
EXTRABIT extra;
int aoff, loff;
@@ -504,15 +396,13 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
while (c < eob) {
int v = 0;
- int skip_eob = 0;
v = qcoeff[scan[c]];
+ ++eob_branch[band[c]][pt];
while (!v) {
- add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, skip_eob,
+ add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN,
counts[band[c]][pt]);
- eob_branch[band[c]][pt] += !skip_eob;
- skip_eob = 1;
token_cache[scan[c]] = 0;
++c;
pt = get_coef_context(nb, token_cache, c);
@@ -521,18 +411,17 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_get_token_extra(v, &token, &extra);
- add_token(&t, coef_probs[band[c]][pt], extra, (uint8_t)token,
- (uint8_t)skip_eob, counts[band[c]][pt]);
- eob_branch[band[c]][pt] += !skip_eob;
+ add_token(&t, coef_probs[band[c]][pt], token, extra,
+ counts[band[c]][pt]);
token_cache[scan[c]] = vp9_pt_energy_class[token];
++c;
pt = get_coef_context(nb, token_cache, c);
}
- if (c < seg_eob) {
- add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
- counts[band[c]][pt]);
+ if (c < tx_eob) {
++eob_branch[band[c]][pt];
+ add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN,
+ counts[band[c]][pt]);
}
*tp = t;
@@ -584,24 +473,26 @@ int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
- int dry_run, BLOCK_SIZE bsize) {
- VP9_COMMON *const cm = &cpi->common;
+ int dry_run, int seg_skip, BLOCK_SIZE bsize) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *const mi = xd->mi[0];
const int ctx = vp9_get_skip_context(xd);
- const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
- if (mbmi->skip) {
- if (!dry_run)
- td->counts->skip[ctx][1] += skip_inc;
+
+ if (seg_skip) {
+ assert(mi->skip);
+ }
+
+ if (mi->skip) {
+ if (!dry_run && !seg_skip)
+ ++td->counts->skip[ctx][1];
reset_skip_context(xd, bsize);
return;
}
if (!dry_run) {
- td->counts->skip[ctx][0] += skip_inc;
+ ++td->counts->skip[ctx][0];
vp9_foreach_transformed_block(xd, bsize, tokenize_b, &arg);
} else {
vp9_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h
index c0f09c7b2..1caab2ac1 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.h
+++ b/libvpx/vp9/encoder/vp9_tokenize.h
@@ -36,9 +36,8 @@ typedef struct {
typedef struct {
const vpx_prob *context_tree;
+ int16_t token;
EXTRABIT extra;
- uint8_t token;
- uint8_t skip_eob_node;
} TOKENEXTRA;
extern const vpx_tree_index vp9_coef_tree[];
@@ -52,7 +51,8 @@ struct VP9_COMP;
struct ThreadData;
void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td,
- TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
+ TOKENEXTRA **t, int dry_run, int seg_skip,
+ BLOCK_SIZE bsize);
typedef struct {
const vpx_prob *prob;
@@ -75,26 +75,27 @@ extern const int16_t *vp9_dct_value_cost_ptr;
*/
extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens;
+extern const int *vp9_dct_cat_lt_10_value_cost;
extern const int16_t vp9_cat6_low_cost[256];
-extern const int16_t vp9_cat6_high_cost[128];
-extern const int16_t vp9_cat6_high10_high_cost[512];
-extern const int16_t vp9_cat6_high12_high_cost[2048];
-static INLINE int16_t vp9_get_cost(int16_t token, EXTRABIT extrabits,
- const int16_t *cat6_high_table) {
+extern const int vp9_cat6_high_cost[64];
+extern const int vp9_cat6_high10_high_cost[256];
+extern const int vp9_cat6_high12_high_cost[1024];
+static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits,
+ const int *cat6_high_table) {
if (token != CATEGORY6_TOKEN)
- return vp9_extra_bits[token].cost[extrabits];
- return vp9_cat6_low_cost[extrabits & 0xff]
- + cat6_high_table[extrabits >> 8];
+ return vp9_extra_bits[token].cost[extrabits >> 1];
+ return vp9_cat6_low_cost[(extrabits >> 1) & 0xff]
+ + cat6_high_table[extrabits >> 9];
}
#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE const int16_t* vp9_get_high_cost_table(int bit_depth) {
+static INLINE const int* vp9_get_high_cost_table(int bit_depth) {
return bit_depth == 8 ? vp9_cat6_high_cost
: (bit_depth == 10 ? vp9_cat6_high10_high_cost :
vp9_cat6_high12_high_cost);
}
#else
-static INLINE const int16_t* vp9_get_high_cost_table(int bit_depth) {
+static INLINE const int* vp9_get_high_cost_table(int bit_depth) {
(void) bit_depth;
return vp9_cat6_high_cost;
}
@@ -118,6 +119,18 @@ static INLINE int16_t vp9_get_token(int v) {
return vp9_dct_cat_lt_10_value_tokens[v].token;
}
+static INLINE int vp9_get_token_cost(int v, int16_t *token,
+ const int *cat6_high_table) {
+ if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
+ EXTRABIT extrabits;
+ *token = CATEGORY6_TOKEN;
+ extrabits = abs(v) - CAT6_MIN_VAL;
+ return vp9_cat6_low_cost[extrabits & 0xff] +
+ cat6_high_table[extrabits >> 8];
+ }
+ *token = vp9_dct_cat_lt_10_value_tokens[v].token;
+ return vp9_dct_cat_lt_10_value_cost[v];
+}
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
index fa37b6fed..fa37b6fed 100644
--- a/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm b/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm
deleted file mode 100644
index 7a7a6b655..000000000
--- a/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm
+++ /dev/null
@@ -1,104 +0,0 @@
-;
-; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%define private_prefix vp9
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-%macro TRANSFORM_COLS 0
- paddw m0, m1
- movq m4, m0
- psubw m3, m2
- psubw m4, m3
- psraw m4, 1
- movq m5, m4
- psubw m5, m1 ;b1
- psubw m4, m2 ;c1
- psubw m0, m4
- paddw m3, m5
- ; m0 a0
- SWAP 1, 4 ; m1 c1
- SWAP 2, 3 ; m2 d1
- SWAP 3, 5 ; m3 b1
-%endmacro
-
-%macro TRANSPOSE_4X4 0
- movq m4, m0
- movq m5, m2
- punpcklwd m4, m1
- punpckhwd m0, m1
- punpcklwd m5, m3
- punpckhwd m2, m3
- movq m1, m4
- movq m3, m0
- punpckldq m1, m5
- punpckhdq m4, m5
- punpckldq m3, m2
- punpckhdq m0, m2
- SWAP 2, 3, 0, 1, 4
-%endmacro
-
-INIT_MMX mmx
-cglobal fwht4x4, 3, 4, 8, input, output, stride
- lea r3q, [inputq + strideq*4]
- movq m0, [inputq] ;a1
- movq m1, [inputq + strideq*2] ;b1
- movq m2, [r3q] ;c1
- movq m3, [r3q + strideq*2] ;d1
-
- TRANSFORM_COLS
- TRANSPOSE_4X4
- TRANSFORM_COLS
- TRANSPOSE_4X4
-
- psllw m0, 2
- psllw m1, 2
- psllw m2, 2
- psllw m3, 2
-
-%if CONFIG_VP9_HIGHBITDEPTH
- pxor m4, m4
- pxor m5, m5
- pcmpgtw m4, m0
- pcmpgtw m5, m1
- movq m6, m0
- movq m7, m1
- punpcklwd m0, m4
- punpcklwd m1, m5
- punpckhwd m6, m4
- punpckhwd m7, m5
- movq [outputq], m0
- movq [outputq + 8], m6
- movq [outputq + 16], m1
- movq [outputq + 24], m7
- pxor m4, m4
- pxor m5, m5
- pcmpgtw m4, m2
- pcmpgtw m5, m3
- movq m6, m2
- movq m7, m3
- punpcklwd m2, m4
- punpcklwd m3, m5
- punpckhwd m6, m4
- punpckhwd m7, m5
- movq [outputq + 32], m2
- movq [outputq + 40], m6
- movq [outputq + 48], m3
- movq [outputq + 56], m7
-%else
- movq [outputq], m0
- movq [outputq + 8], m1
- movq [outputq + 16], m2
- movq [outputq + 24], m3
-%endif
-
- RET
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_sse2.asm b/libvpx/vp9/encoder/x86/vp9_dct_sse2.asm
new file mode 100644
index 000000000..ced37bd16
--- /dev/null
+++ b/libvpx/vp9/encoder/x86/vp9_dct_sse2.asm
@@ -0,0 +1,86 @@
+;
+; Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp9
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro TRANSFORM_COLS 0
+ paddw m0, m1
+ movq m4, m0
+ psubw m3, m2
+ psubw m4, m3
+ psraw m4, 1
+ movq m5, m4
+ psubw m5, m1 ;b1
+ psubw m4, m2 ;c1
+ psubw m0, m4
+ paddw m3, m5
+ ; m0 a0
+ SWAP 1, 4 ; m1 c1
+ SWAP 2, 3 ; m2 d1
+ SWAP 3, 5 ; m3 b1
+%endmacro
+
+%macro TRANSPOSE_4X4 0
+ ; 00 01 02 03
+ ; 10 11 12 13
+ ; 20 21 22 23
+ ; 30 31 32 33
+ punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13
+ punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33
+ mova m1, m0
+ punpckldq m0, m2 ; 00 10 20 30 01 11 21 31
+ punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33
+%endmacro
+
+INIT_XMM sse2
+cglobal fwht4x4, 3, 4, 8, input, output, stride
+ lea r3q, [inputq + strideq*4]
+ movq m0, [inputq] ;a1
+ movq m1, [inputq + strideq*2] ;b1
+ movq m2, [r3q] ;c1
+ movq m3, [r3q + strideq*2] ;d1
+
+ TRANSFORM_COLS
+ TRANSPOSE_4X4
+ SWAP 1, 2
+ psrldq m1, m0, 8
+ psrldq m3, m2, 8
+ TRANSFORM_COLS
+ TRANSPOSE_4X4
+
+ psllw m0, 2
+ psllw m1, 2
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; sign extension
+ mova m2, m0
+ mova m3, m1
+ punpcklwd m0, m0
+ punpcklwd m1, m1
+ punpckhwd m2, m2
+ punpckhwd m3, m3
+ psrad m0, 16
+ psrad m1, 16
+ psrad m2, 16
+ psrad m3, 16
+ mova [outputq], m0
+ mova [outputq + 16], m2
+ mova [outputq + 32], m1
+ mova [outputq + 48], m3
+%else
+ mova [outputq], m0
+ mova [outputq + 16], m1
+%endif
+
+ RET
diff --git a/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c b/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c
index bf7c7af77..883507af3 100644
--- a/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -125,7 +125,7 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
return acc_diff;
}
-// Denoiser for 4xM and 8xM blocks.
+// Denoise 8x8 and 8x16 blocks.
static int vp9_denoiser_NxM_sse2_small(
const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
@@ -147,9 +147,9 @@ static int vp9_denoiser_NxM_sse2_small(
const __m128i l32 = _mm_set1_epi8(2);
// Difference between level 2 and level 1 is 1.
const __m128i l21 = _mm_set1_epi8(1);
- const uint8_t shift = (width == 4) ? 2 : 1;
+ const int b_height = (4 << b_height_log2_lookup[bs]) >> 1;
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
+ for (r = 0; r < b_height; ++r) {
memcpy(sig_buffer[r], sig, width);
memcpy(sig_buffer[r] + width, sig + sig_stride, width);
memcpy(mc_running_buffer[r], mc_running_avg_y, width);
@@ -157,18 +157,6 @@ static int vp9_denoiser_NxM_sse2_small(
mc_running_avg_y + mc_avg_y_stride, width);
memcpy(running_buffer[r], running_avg_y, width);
memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width);
- if (width == 4) {
- memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width);
- memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width);
- memcpy(mc_running_buffer[r] + width * 2,
- mc_running_avg_y + mc_avg_y_stride * 2, width);
- memcpy(mc_running_buffer[r] + width * 3,
- mc_running_avg_y + mc_avg_y_stride * 3, width);
- memcpy(running_buffer[r] + width * 2,
- running_avg_y + avg_y_stride * 2, width);
- memcpy(running_buffer[r] + width * 3,
- running_avg_y + avg_y_stride * 3, width);
- }
acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
mc_running_buffer[r],
running_buffer[r],
@@ -176,16 +164,10 @@ static int vp9_denoiser_NxM_sse2_small(
&l3, &l32, &l21, acc_diff);
memcpy(running_avg_y, running_buffer[r], width);
memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
- if (width == 4) {
- memcpy(running_avg_y + avg_y_stride * 2,
- running_buffer[r] + width * 2, width);
- memcpy(running_avg_y + avg_y_stride * 3,
- running_buffer[r] + width * 3, width);
- }
// Update pointers for next iteration.
- sig += (sig_stride << shift);
- mc_running_avg_y += (mc_avg_y_stride << shift);
- running_avg_y += (avg_y_stride << shift);
+ sig += (sig_stride << 1);
+ mc_running_avg_y += (mc_avg_y_stride << 1);
+ running_avg_y += (avg_y_stride << 1);
}
{
@@ -207,22 +189,16 @@ static int vp9_denoiser_NxM_sse2_small(
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
+ running_avg_y -= avg_y_stride * (b_height << 1);
+ for (r = 0; r < b_height; ++r) {
acc_diff = vp9_denoiser_adj_16x1_sse2(
sig_buffer[r], mc_running_buffer[r], running_buffer[r],
k_0, k_delta, acc_diff);
memcpy(running_avg_y, running_buffer[r], width);
memcpy(running_avg_y + avg_y_stride,
running_buffer[r] + width, width);
- if (width == 4) {
- memcpy(running_avg_y + avg_y_stride * 2,
- running_buffer[r] + width * 2, width);
- memcpy(running_avg_y + avg_y_stride * 3,
- running_buffer[r] + width * 3, width);
- }
// Update pointers for next iteration.
- running_avg_y += (avg_y_stride << shift);
+ running_avg_y += (avg_y_stride << 1);
}
sum_diff = sum_diff_16x1(acc_diff);
if (abs(sum_diff) > sum_diff_thresh) {
@@ -236,7 +212,7 @@ static int vp9_denoiser_NxM_sse2_small(
return FILTER_BLOCK;
}
-// Denoiser for 16xM, 32xM and 64xM blocks
+// Denoise 16x16, 16x32, 32x16, 32x32, 32x64, 64x32 and 64x64 blocks.
static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
const uint8_t *mc_running_avg_y,
int mc_avg_y_stride,
@@ -260,38 +236,37 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
const __m128i l32 = _mm_set1_epi8(2);
// Difference between level 2 and level 1 is 1.
const __m128i l21 = _mm_set1_epi8(1);
+ const int b_width = (4 << b_width_log2_lookup[bs]);
+ const int b_height = (4 << b_height_log2_lookup[bs]);
+ const int b_width_shift4 = b_width >> 4;
- for (c = 0; c < 4; ++c) {
- for (r = 0; r < 4; ++r) {
+ for (r = 0; r < 4; ++r) {
+ for (c = 0; c < b_width_shift4; ++c) {
acc_diff[c][r] = _mm_setzero_si128();
}
}
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2(
+ for (r = 0; r < b_height; ++r) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ acc_diff[c][r>>4] = vp9_denoiser_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
- &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
+ &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
running_avg_y += 16;
}
- if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
+ if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
}
}
// Update pointers for next iteration.
- sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
- mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
- running_avg_y = running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- avg_y_stride;
+ sig = sig - b_width + sig_stride;
+ mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride;
+ running_avg_y = running_avg_y - b_width + avg_y_stride;
}
{
@@ -303,33 +278,29 @@ static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
- sig -= sig_stride * (4 << b_height_log2_lookup[bs]);
- mc_running_avg_y -= mc_avg_y_stride * (4 << b_height_log2_lookup[bs]);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
+ sig -= sig_stride * b_height;
+ mc_running_avg_y -= mc_avg_y_stride * b_height;
+ running_avg_y -= avg_y_stride * b_height;
sum_diff = 0;
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- acc_diff[c>>4][r>>4] = vp9_denoiser_adj_16x1_sse2(
+ for (r = 0; r < b_height; ++r) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ acc_diff[c][r>>4] = vp9_denoiser_adj_16x1_sse2(
sig, mc_running_avg_y, running_avg_y, k_0,
- k_delta, acc_diff[c>>4][r>>4]);
+ k_delta, acc_diff[c][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
running_avg_y += 16;
}
- if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
+ if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) {
+ for (c = 0; c < b_width_shift4; ++c) {
+ sum_diff += sum_diff_16x1(acc_diff[c][r>>4]);
}
}
- sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
- mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
- running_avg_y = running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- avg_y_stride;
+ sig = sig - b_width + sig_stride;
+ mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride;
+ running_avg_y = running_avg_y - b_width + avg_y_stride;
}
if (abs(sum_diff) > sum_diff_thresh) {
return COPY_BLOCK;
@@ -349,26 +320,21 @@ int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
int increase_denoising,
BLOCK_SIZE bs,
int motion_magnitude) {
- if (bs == BLOCK_4X4 || bs == BLOCK_4X8) {
- return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude, 4);
- } else if (bs == BLOCK_8X4 || bs == BLOCK_8X8 || bs == BLOCK_8X16) {
- return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude, 8);
- } else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 ||
- bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 ||
- bs == BLOCK_64X32 || bs == BLOCK_64X64) {
+ // Rank by frequency of the block type to have an early termination.
+ if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 ||
+ bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 ||
+ bs == BLOCK_32X64 || bs == BLOCK_64X32) {
return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
mc_avg, mc_avg_stride,
avg, avg_stride,
increase_denoising,
bs, motion_magnitude);
+ } else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) {
+ return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
+ mc_avg, mc_avg_stride,
+ avg, avg_stride,
+ increase_denoising,
+ bs, motion_magnitude, 8);
} else {
return COPY_BLOCK;
}
diff --git a/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
new file mode 100644
index 000000000..cd3e87ec8
--- /dev/null
+++ b/libvpx/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#if defined(_MSC_VER)
+# include <intrin.h>
+#endif
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __GNUC__
+# define LIKELY(v) __builtin_expect(v, 1)
+# define UNLIKELY(v) __builtin_expect(v, 0)
+#else
+# define LIKELY(v) (v)
+# define UNLIKELY(v) (v)
+#endif
+
+static INLINE int_mv pack_int_mv(int16_t row, int16_t col) {
+ int_mv result;
+ result.as_mv.row = row;
+ result.as_mv.col = col;
+ return result;
+}
+
+static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) {
+ // This is simplified from the C implementation to utilise that
+ // x->nmvjointsadcost[1] == x->nmvjointsadcost[2] and
+ // x->nmvjointsadcost[1] == x->nmvjointsadcost[3]
+ return mv.as_int == 0 ? 0 : 1;
+}
+
+static INLINE int mv_cost(const int_mv mv,
+ const int *joint_cost, int *const comp_cost[2]) {
+ return joint_cost[get_mv_joint(mv)] +
+ comp_cost[0][mv.as_mv.row] + comp_cost[1][mv.as_mv.col];
+}
+
+static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
+ int sad_per_bit) {
+ const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,
+ mv.as_mv.col - ref->col);
+ return ROUND_POWER_OF_TWO((unsigned)mv_cost(diff, x->nmvjointsadcost,
+ x->nmvsadcost) *
+ sad_per_bit, VP9_PROB_COST_SHIFT);
+}
+
+/*****************************************************************************
+ * This function utilizes 3 properties of the cost function lookup tables, *
+ * constructed in using 'cal_nmvjointsadcost' and 'cal_nmvsadcosts' in *
+ * vp9_encoder.c. *
+ * For the joint cost: *
+ * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
+ * For the component costs: *
+ * - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
+ * (Equal costs for both components) *
+ * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
+ * (Cost function is even) *
+ * If these do not hold, then this function cannot be used without *
+ * modification, in which case you can revert to using the C implementation, *
+ * which does not rely on these properties. *
+ *****************************************************************************/
+int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ MV *ref_mv, MV *best_mv, int search_param,
+ int sad_per_bit, int *num00,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv) {
+ const int_mv maxmv = pack_int_mv(x->mv_row_max, x->mv_col_max);
+ const __m128i v_max_mv_w = _mm_set1_epi32(maxmv.as_int);
+ const int_mv minmv = pack_int_mv(x->mv_row_min, x->mv_col_min);
+ const __m128i v_min_mv_w = _mm_set1_epi32(minmv.as_int);
+
+ const __m128i v_spb_d = _mm_set1_epi32(sad_per_bit);
+
+ const __m128i v_joint_cost_0_d = _mm_set1_epi32(x->nmvjointsadcost[0]);
+ const __m128i v_joint_cost_1_d = _mm_set1_epi32(x->nmvjointsadcost[1]);
+
+ // search_param determines the length of the initial step and hence the number
+ // of iterations.
+ // 0 = initial step (MAX_FIRST_STEP) pel
+ // 1 = (MAX_FIRST_STEP/2) pel,
+ // 2 = (MAX_FIRST_STEP/4) pel...
+ const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param];
+ const intptr_t *ss_os = &cfg->ss_os[cfg->searches_per_step * search_param];
+ const int tot_steps = cfg->total_steps - search_param;
+
+ const int_mv fcenter_mv = pack_int_mv(center_mv->row >> 3,
+ center_mv->col >> 3);
+ const __m128i vfcmv = _mm_set1_epi32(fcenter_mv.as_int);
+
+ const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row);
+ const int ref_col = clamp(ref_mv->col, minmv.as_mv.col, maxmv.as_mv.col);
+
+ int_mv bmv = pack_int_mv(ref_row, ref_col);
+ int_mv new_bmv = bmv;
+ __m128i v_bmv_w = _mm_set1_epi32(bmv.as_int);
+
+ const int what_stride = x->plane[0].src.stride;
+ const int in_what_stride = x->e_mbd.plane[0].pre[0].stride;
+ const uint8_t *const what = x->plane[0].src.buf;
+ const uint8_t *const in_what = x->e_mbd.plane[0].pre[0].buf +
+ ref_row * in_what_stride + ref_col;
+
+ // Work out the start point for the search
+ const uint8_t *best_address = in_what;
+ const uint8_t *new_best_address = best_address;
+#if ARCH_X86_64
+ __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
+#else
+ __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address);
+#endif
+
+ unsigned int best_sad;
+ int i, j, step;
+
+ // Check the prerequisite cost function properties that are easy to check
+ // in an assert. See the function-level documentation for details on all
+ // prerequisites.
+ assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[2]);
+ assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[3]);
+
+ // Check the starting position
+ best_sad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
+ best_sad += mvsad_err_cost(x, bmv, &fcenter_mv.as_mv, sad_per_bit);
+
+ *num00 = 0;
+
+ for (i = 0, step = 0; step < tot_steps; step++) {
+ for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) {
+ __m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w;
+#if ARCH_X86_64
+ __m128i v_blocka[2];
+#else
+ __m128i v_blocka[1];
+#endif
+
+ // Compute the candidate motion vectors
+ const __m128i v_ss_mv_w = _mm_loadu_si128((const __m128i *)&ss_mv[i]);
+ const __m128i v_these_mv_w = _mm_add_epi16(v_bmv_w, v_ss_mv_w);
+ // Clamp them to the search bounds
+ __m128i v_these_mv_clamp_w = v_these_mv_w;
+ v_these_mv_clamp_w = _mm_min_epi16(v_these_mv_clamp_w, v_max_mv_w);
+ v_these_mv_clamp_w = _mm_max_epi16(v_these_mv_clamp_w, v_min_mv_w);
+ // The ones that did not change are inside the search area
+ v_inside_d = _mm_cmpeq_epi32(v_these_mv_clamp_w, v_these_mv_w);
+
+ // If none of them are inside, then move on
+ if (LIKELY(_mm_test_all_zeros(v_inside_d, v_inside_d))) {
+ continue;
+ }
+
+ // The inverse mask indicates which of the MVs are outside
+ v_outside_d = _mm_xor_si128(v_inside_d, _mm_set1_epi8(0xff));
+ // Shift right to keep the sign bit clear, we will use this later
+ // to set the cost to the maximum value.
+ v_outside_d = _mm_srli_epi32(v_outside_d, 1);
+
+ // Compute the difference MV
+ v_diff_mv_w = _mm_sub_epi16(v_these_mv_clamp_w, vfcmv);
+ // We utilise the fact that the cost function is even, and use the
+ // absolute difference. This allows us to use unsigned indexes later
+ // and reduces cache pressure somewhat as only a half of the table
+ // is ever referenced.
+ v_diff_mv_w = _mm_abs_epi16(v_diff_mv_w);
+
+ // Compute the SIMD pointer offsets.
+ {
+#if ARCH_X86_64 // sizeof(intptr_t) == 8
+ // Load the offsets
+ __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
+ __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
+ // Set the ones falling outside to zero
+ v_bo10_q = _mm_and_si128(v_bo10_q,
+ _mm_cvtepi32_epi64(v_inside_d));
+ v_bo32_q = _mm_and_si128(v_bo32_q,
+ _mm_unpackhi_epi32(v_inside_d, v_inside_d));
+ // Compute the candidate addresses
+ v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
+ v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
+#else // ARCH_X86 // sizeof(intptr_t) == 4
+ __m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]);
+ v_bo_d = _mm_and_si128(v_bo_d, v_inside_d);
+ v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d);
+#endif
+ }
+
+ fn_ptr->sdx4df(what, what_stride,
+ (const uint8_t **)&v_blocka[0], in_what_stride,
+ (uint32_t*)&v_sad_d);
+
+ // Look up the component cost of the residual motion vector
+ {
+ const int32_t row0 = _mm_extract_epi16(v_diff_mv_w, 0);
+ const int32_t col0 = _mm_extract_epi16(v_diff_mv_w, 1);
+ const int32_t row1 = _mm_extract_epi16(v_diff_mv_w, 2);
+ const int32_t col1 = _mm_extract_epi16(v_diff_mv_w, 3);
+ const int32_t row2 = _mm_extract_epi16(v_diff_mv_w, 4);
+ const int32_t col2 = _mm_extract_epi16(v_diff_mv_w, 5);
+ const int32_t row3 = _mm_extract_epi16(v_diff_mv_w, 6);
+ const int32_t col3 = _mm_extract_epi16(v_diff_mv_w, 7);
+
+ // Note: This is a use case for vpgather in AVX2
+ const uint32_t cost0 = x->nmvsadcost[0][row0] + x->nmvsadcost[0][col0];
+ const uint32_t cost1 = x->nmvsadcost[0][row1] + x->nmvsadcost[0][col1];
+ const uint32_t cost2 = x->nmvsadcost[0][row2] + x->nmvsadcost[0][col2];
+ const uint32_t cost3 = x->nmvsadcost[0][row3] + x->nmvsadcost[0][col3];
+
+ __m128i v_cost_10_d, v_cost_32_d;
+ v_cost_10_d = _mm_cvtsi32_si128(cost0);
+ v_cost_10_d = _mm_insert_epi32(v_cost_10_d, cost1, 1);
+ v_cost_32_d = _mm_cvtsi32_si128(cost2);
+ v_cost_32_d = _mm_insert_epi32(v_cost_32_d, cost3, 1);
+ v_cost_d = _mm_unpacklo_epi64(v_cost_10_d, v_cost_32_d);
+ }
+
+ // Now add in the joint cost
+ {
+ const __m128i v_sel_d = _mm_cmpeq_epi32(v_diff_mv_w,
+ _mm_setzero_si128());
+ const __m128i v_joint_cost_d = _mm_blendv_epi8(v_joint_cost_1_d,
+ v_joint_cost_0_d,
+ v_sel_d);
+ v_cost_d = _mm_add_epi32(v_cost_d, v_joint_cost_d);
+ }
+
+ // Multiply by sad_per_bit
+ v_cost_d = _mm_mullo_epi32(v_cost_d, v_spb_d);
+ // ROUND_POWER_OF_TWO(v_cost_d, VP9_PROB_COST_SHIFT)
+ v_cost_d = _mm_add_epi32(v_cost_d,
+ _mm_set1_epi32(1 << (VP9_PROB_COST_SHIFT - 1)));
+ v_cost_d = _mm_srai_epi32(v_cost_d, VP9_PROB_COST_SHIFT);
+ // Add the cost to the sad
+ v_sad_d = _mm_add_epi32(v_sad_d, v_cost_d);
+
+ // Make the motion vectors outside the search area have max cost
+ // by or'ing in the comparison mask, this way the minimum search won't
+ // pick them.
+ v_sad_d = _mm_or_si128(v_sad_d, v_outside_d);
+
+ // Find the minimum value and index horizontally in v_sad_d
+ {
+ // Try speculatively on 16 bits, so we can use the minpos intrinsic
+ const __m128i v_sad_w = _mm_packus_epi32(v_sad_d, v_sad_d);
+ const __m128i v_minp_w = _mm_minpos_epu16(v_sad_w);
+
+ uint32_t local_best_sad = _mm_extract_epi16(v_minp_w, 0);
+ uint32_t local_best_idx = _mm_extract_epi16(v_minp_w, 1);
+
+ // If the local best value is not saturated, just use it, otherwise
+ // find the horizontal minimum again the hard way on 32 bits.
+ // This is executed rarely.
+ if (UNLIKELY(local_best_sad == 0xffff)) {
+ __m128i v_loval_d, v_hival_d, v_loidx_d, v_hiidx_d, v_sel_d;
+
+ v_loval_d = v_sad_d;
+ v_loidx_d = _mm_set_epi32(3, 2, 1, 0);
+ v_hival_d = _mm_srli_si128(v_loval_d, 8);
+ v_hiidx_d = _mm_srli_si128(v_loidx_d, 8);
+
+ v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d);
+
+ v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d);
+ v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d);
+ v_hival_d = _mm_srli_si128(v_loval_d, 4);
+ v_hiidx_d = _mm_srli_si128(v_loidx_d, 4);
+
+ v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d);
+
+ v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d);
+ v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d);
+
+ local_best_sad = _mm_extract_epi32(v_loval_d, 0);
+ local_best_idx = _mm_extract_epi32(v_loidx_d, 0);
+ }
+
+ // Update the global minimum if the local minimum is smaller
+ if (LIKELY(local_best_sad < best_sad)) {
+ new_bmv = ((const int_mv *)&v_these_mv_w)[local_best_idx];
+ new_best_address = ((const uint8_t **)v_blocka)[local_best_idx];
+
+ best_sad = local_best_sad;
+ }
+ }
+ }
+
+ bmv = new_bmv;
+ best_address = new_best_address;
+
+ v_bmv_w = _mm_set1_epi32(bmv.as_int);
+#if ARCH_X86_64
+ v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
+#else
+ v_ba_d = _mm_set1_epi32((intptr_t)best_address);
+#endif
+
+ if (UNLIKELY(best_address == in_what)) {
+ (*num00)++;
+ }
+ }
+
+ *best_mv = bmv.as_mv;
+ return best_sad;
+}
diff --git a/libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c b/libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c
new file mode 100644
index 000000000..38af3b13a
--- /dev/null
+++ b/libvpx/vp9/encoder/x86/vp9_frame_scale_ssse3.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#if defined(_MSC_VER) && _MSC_VER <= 1500
+// Need to include math.h before calling tmmintrin.h/intrin.h
+// in certain versions of MSVS.
+#include <math.h>
+#endif
+#include <tmmintrin.h> // SSSE3
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+#include "vpx_scale/yv12config.h"
+
+extern void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst);
+
+static void downsample_2_to_1_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ int w, int h) {
+ const __m128i mask = _mm_set1_epi16(0x00FF);
+ const int max_width = w & ~15;
+ int y;
+ for (y = 0; y < h; ++y) {
+ int x;
+ for (x = 0; x < max_width; x += 16) {
+ const __m128i a = _mm_loadu_si128((const __m128i *)(src + x * 2 + 0));
+ const __m128i b = _mm_loadu_si128((const __m128i *)(src + x * 2 + 16));
+ const __m128i a_and = _mm_and_si128(a, mask);
+ const __m128i b_and = _mm_and_si128(b, mask);
+ const __m128i c = _mm_packus_epi16(a_and, b_and);
+ _mm_storeu_si128((__m128i *)(dst + x), c);
+ }
+ for (; x < w; ++x)
+ dst[x] = src[x * 2];
+ src += src_stride * 2;
+ dst += dst_stride;
+ }
+}
+
+static INLINE __m128i filter(const __m128i *const a, const __m128i *const b,
+ const __m128i *const c, const __m128i *const d,
+ const __m128i *const e, const __m128i *const f,
+ const __m128i *const g, const __m128i *const h) {
+ const __m128i coeffs_ab =
+ _mm_set_epi8(6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1, 6, -1);
+ const __m128i coeffs_cd =
+ _mm_set_epi8(78, -19, 78, -19, 78, -19, 78, -19, 78, -19, 78, -19,
+ 78, -19, 78, -19);
+ const __m128i const64_x16 = _mm_set1_epi16(64);
+ const __m128i ab = _mm_unpacklo_epi8(*a, *b);
+ const __m128i cd = _mm_unpacklo_epi8(*c, *d);
+ const __m128i fe = _mm_unpacklo_epi8(*f, *e);
+ const __m128i hg = _mm_unpacklo_epi8(*h, *g);
+ const __m128i ab_terms = _mm_maddubs_epi16(ab, coeffs_ab);
+ const __m128i cd_terms = _mm_maddubs_epi16(cd, coeffs_cd);
+ const __m128i fe_terms = _mm_maddubs_epi16(fe, coeffs_cd);
+ const __m128i hg_terms = _mm_maddubs_epi16(hg, coeffs_ab);
+ // can not overflow
+ const __m128i abcd_terms = _mm_add_epi16(ab_terms, cd_terms);
+ // can not overflow
+ const __m128i fehg_terms = _mm_add_epi16(fe_terms, hg_terms);
+ // can overflow, use saturating add
+ const __m128i terms = _mm_adds_epi16(abcd_terms, fehg_terms);
+ const __m128i round = _mm_adds_epi16(terms, const64_x16);
+ const __m128i shift = _mm_srai_epi16(round, 7);
+ return _mm_packus_epi16(shift, shift);
+}
+
+static void eight_tap_row_ssse3(const uint8_t *src, uint8_t *dst, int w) {
+ const int max_width = w & ~7;
+ int x = 0;
+ for (; x < max_width; x += 8) {
+ const __m128i a = _mm_loadl_epi64((const __m128i *)(src + x + 0));
+ const __m128i b = _mm_loadl_epi64((const __m128i *)(src + x + 1));
+ const __m128i c = _mm_loadl_epi64((const __m128i *)(src + x + 2));
+ const __m128i d = _mm_loadl_epi64((const __m128i *)(src + x + 3));
+ const __m128i e = _mm_loadl_epi64((const __m128i *)(src + x + 4));
+ const __m128i f = _mm_loadl_epi64((const __m128i *)(src + x + 5));
+ const __m128i g = _mm_loadl_epi64((const __m128i *)(src + x + 6));
+ const __m128i h = _mm_loadl_epi64((const __m128i *)(src + x + 7));
+ const __m128i pack = filter(&a, &b, &c, &d, &e, &f, &g, &h);
+ _mm_storel_epi64((__m128i *)(dst + x), pack);
+ }
+}
+
+static void upsample_1_to_2_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ int dst_w, int dst_h) {
+ dst_w /= 2;
+ dst_h /= 2;
+ {
+ DECLARE_ALIGNED(16, uint8_t, tmp[1920 * 8]);
+ uint8_t *tmp0 = tmp + dst_w * 0;
+ uint8_t *tmp1 = tmp + dst_w * 1;
+ uint8_t *tmp2 = tmp + dst_w * 2;
+ uint8_t *tmp3 = tmp + dst_w * 3;
+ uint8_t *tmp4 = tmp + dst_w * 4;
+ uint8_t *tmp5 = tmp + dst_w * 5;
+ uint8_t *tmp6 = tmp + dst_w * 6;
+ uint8_t *tmp7 = tmp + dst_w * 7;
+ uint8_t *tmp8 = NULL;
+ const int max_width = dst_w & ~7;
+ int y;
+ eight_tap_row_ssse3(src - src_stride * 3 - 3, tmp0, dst_w);
+ eight_tap_row_ssse3(src - src_stride * 2 - 3, tmp1, dst_w);
+ eight_tap_row_ssse3(src - src_stride * 1 - 3, tmp2, dst_w);
+ eight_tap_row_ssse3(src + src_stride * 0 - 3, tmp3, dst_w);
+ eight_tap_row_ssse3(src + src_stride * 1 - 3, tmp4, dst_w);
+ eight_tap_row_ssse3(src + src_stride * 2 - 3, tmp5, dst_w);
+ eight_tap_row_ssse3(src + src_stride * 3 - 3, tmp6, dst_w);
+ for (y = 0; y < dst_h; y++) {
+ int x;
+ eight_tap_row_ssse3(src + src_stride * 4 - 3, tmp7, dst_w);
+ for (x = 0; x < max_width; x += 8) {
+ const __m128i A = _mm_loadl_epi64((const __m128i *)(src + x));
+ const __m128i B = _mm_loadl_epi64((const __m128i *)(tmp3 + x));
+ const __m128i AB = _mm_unpacklo_epi8(A, B);
+ __m128i C, D, CD;
+ _mm_storeu_si128((__m128i *)(dst + x * 2), AB);
+ {
+ const __m128i a =
+ _mm_loadl_epi64((const __m128i *)(src + x - src_stride * 3));
+ const __m128i b =
+ _mm_loadl_epi64((const __m128i *)(src + x - src_stride * 2));
+ const __m128i c =
+ _mm_loadl_epi64((const __m128i *)(src + x - src_stride * 1));
+ const __m128i d =
+ _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 0));
+ const __m128i e =
+ _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 1));
+ const __m128i f =
+ _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 2));
+ const __m128i g =
+ _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 3));
+ const __m128i h =
+ _mm_loadl_epi64((const __m128i *)(src + x + src_stride * 4));
+ C = filter(&a, &b, &c, &d, &e, &f, &g, &h);
+ }
+ {
+ const __m128i a = _mm_loadl_epi64((const __m128i *)(tmp0 + x));
+ const __m128i b = _mm_loadl_epi64((const __m128i *)(tmp1 + x));
+ const __m128i c = _mm_loadl_epi64((const __m128i *)(tmp2 + x));
+ const __m128i d = _mm_loadl_epi64((const __m128i *)(tmp3 + x));
+ const __m128i e = _mm_loadl_epi64((const __m128i *)(tmp4 + x));
+ const __m128i f = _mm_loadl_epi64((const __m128i *)(tmp5 + x));
+ const __m128i g = _mm_loadl_epi64((const __m128i *)(tmp6 + x));
+ const __m128i h = _mm_loadl_epi64((const __m128i *)(tmp7 + x));
+ D = filter(&a, &b, &c, &d, &e, &f, &g, &h);
+ }
+ CD = _mm_unpacklo_epi8(C, D);
+ _mm_storeu_si128((__m128i *)(dst + x * 2 + dst_stride), CD);
+ }
+ src += src_stride;
+ dst += dst_stride * 2;
+ tmp8 = tmp0;
+ tmp0 = tmp1;
+ tmp1 = tmp2;
+ tmp2 = tmp3;
+ tmp3 = tmp4;
+ tmp4 = tmp5;
+ tmp5 = tmp6;
+ tmp6 = tmp7;
+ tmp7 = tmp8;
+ }
+ }
+}
+
+void vp9_scale_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ const int src_w = src->y_crop_width;
+ const int src_h = src->y_crop_height;
+ const int dst_w = dst->y_crop_width;
+ const int dst_h = dst->y_crop_height;
+ const int dst_uv_w = dst_w / 2;
+ const int dst_uv_h = dst_h / 2;
+
+ if (dst_w * 2 == src_w && dst_h * 2 == src_h) {
+ downsample_2_to_1_ssse3(src->y_buffer, src->y_stride,
+ dst->y_buffer, dst->y_stride, dst_w, dst_h);
+ downsample_2_to_1_ssse3(src->u_buffer, src->uv_stride,
+ dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
+ downsample_2_to_1_ssse3(src->v_buffer, src->uv_stride,
+ dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
+ vpx_extend_frame_borders(dst);
+ } else if (dst_w == src_w * 2 && dst_h == src_h * 2) {
+ // The upsample() supports widths up to 1920 * 2. If greater, fall back
+ // to vp9_scale_and_extend_frame_c().
+ if (dst_w/2 <= 1920) {
+ upsample_1_to_2_ssse3(src->y_buffer, src->y_stride,
+ dst->y_buffer, dst->y_stride, dst_w, dst_h);
+ upsample_1_to_2_ssse3(src->u_buffer, src->uv_stride,
+ dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
+ upsample_1_to_2_ssse3(src->v_buffer, src->uv_stride,
+ dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h);
+ vpx_extend_frame_borders(dst);
+ } else {
+ vp9_scale_and_extend_frame_c(src, dst);
+ }
+ } else {
+ vp9_scale_and_extend_frame_c(src, dst);
+ }
+}
diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c
index 6ccba0f8c..10d68939d 100644
--- a/libvpx/vp9/vp9_cx_iface.c
+++ b/libvpx/vp9/vp9_cx_iface.c
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_encoder.h"
#include "vpx_ports/vpx_once.h"
+#include "vpx_ports/system_state.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "./vpx_version.h"
#include "vp9/encoder/vp9_encoder.h"
@@ -39,6 +40,7 @@ struct vp9_extracfg {
unsigned int rc_max_inter_bitrate_pct;
unsigned int gf_cbr_boost_pct;
unsigned int lossless;
+ unsigned int target_level;
unsigned int frame_parallel_decoding_mode;
AQ_MODE aq_mode;
unsigned int frame_periodic_boost;
@@ -68,6 +70,7 @@ static struct vp9_extracfg default_extra_cfg = {
0, // rc_max_inter_bitrate_pct
0, // gf_cbr_boost_pct
0, // lossless
+ 255, // target_level
1, // frame_parallel_decoding_mode
NO_AQ, // aq_mode
0, // frame_periodic_delta_q
@@ -157,7 +160,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available
RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
- RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
+ RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000);
RANGE_CHECK_HI(cfg, g_profile, 3);
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
@@ -195,6 +198,17 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
+ {
+ unsigned int level = extra_cfg->target_level;
+ if (level != LEVEL_1 && level != LEVEL_1_1 && level != LEVEL_2 &&
+ level != LEVEL_2_1 && level != LEVEL_3 && level != LEVEL_3_1 &&
+ level != LEVEL_4 && level != LEVEL_4_1 && level != LEVEL_5 &&
+ level != LEVEL_5_1 && level != LEVEL_5_2 && level != LEVEL_6 &&
+ level != LEVEL_6_1 && level != LEVEL_6_2 &&
+ level != LEVEL_UNKNOWN && level != LEVEL_MAX)
+ ERROR("target_level is invalid");
+ }
+
if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS)
ERROR("ss_number_layers * ts_number_layers is out of range");
if (cfg->ts_number_layers > 1) {
@@ -485,7 +499,16 @@ static vpx_codec_err_t set_encoder_config(
oxcf->content = extra_cfg->content;
oxcf->tile_columns = extra_cfg->tile_columns;
- oxcf->tile_rows = extra_cfg->tile_rows;
+
+ // TODO(yunqing): The dependencies between row tiles cause error in multi-
+ // threaded encoding. For now, tile_rows is forced to be 0 in this case.
+ // The further fix can be done by adding synchronizations after a tile row
+ // is encoded. But this will hurt multi-threaded encoder performance. So,
+ // it is recommended to use tile-rows=0 while encoding with threads > 1.
+ if (oxcf->max_threads > 1 && oxcf->tile_columns > 0)
+ oxcf->tile_rows = 0;
+ else
+ oxcf->tile_rows = extra_cfg->tile_rows;
oxcf->error_resilient_mode = cfg->g_error_resilient;
oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
@@ -499,6 +522,8 @@ static vpx_codec_err_t set_encoder_config(
oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode)
cfg->temporal_layering_mode;
+ oxcf->target_level = extra_cfg->target_level;
+
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
#if CONFIG_SPATIAL_SVC
oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
@@ -525,6 +550,7 @@ static vpx_codec_err_t set_encoder_config(
/*
printf("Current VP9 Settings: \n");
printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
+ printf("target_level: %d\n", oxcf->target_level);
printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
printf("sharpness: %d\n", oxcf->sharpness);
printf("cpu_used: %d\n", oxcf->cpu_used);
@@ -774,6 +800,20 @@ static vpx_codec_err_t ctrl_set_frame_periodic_boost(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_target_level(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.target_level = CAST(VP9E_SET_TARGET_LEVEL, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_get_level(vpx_codec_alg_priv_t *ctx, va_list args) {
+ int *const arg = va_arg(args, int *);
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+ *arg = (int)vp9_get_level(&ctx->cpi->level_info.level_spec);
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
vpx_codec_priv_enc_mr_cfg_t *data) {
vpx_codec_err_t res = VPX_CODEC_OK;
@@ -865,6 +905,11 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
break;
}
+ if (deadline == VPX_DL_REALTIME) {
+ ctx->oxcf.pass = 0;
+ new_mode = REALTIME;
+ }
+
if (ctx->oxcf.mode != new_mode) {
ctx->oxcf.mode = new_mode;
vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -931,9 +976,6 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
return index_sz;
}
-// vp9 uses 10,000,000 ticks/second as time stamp
-#define TICKS_PER_SEC 10000000LL
-
static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
int64_t n) {
return n * TICKS_PER_SEC * timebase->num / timebase->den;
@@ -941,7 +983,7 @@ static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase,
int64_t n) {
- const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
+ const int64_t round = (int64_t)TICKS_PER_SEC * timebase->num / 2 - 1;
return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
}
@@ -963,28 +1005,30 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
return flags;
}
+const size_t kMinCompressedSize = 8192;
static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
const vpx_image_t *img,
vpx_codec_pts_t pts,
unsigned long duration,
- vpx_enc_frame_flags_t flags,
+ vpx_enc_frame_flags_t enc_flags,
unsigned long deadline) {
- vpx_codec_err_t res = VPX_CODEC_OK;
+ volatile vpx_codec_err_t res = VPX_CODEC_OK;
+ volatile vpx_enc_frame_flags_t flags = enc_flags;
VP9_COMP *const cpi = ctx->cpi;
const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
size_t data_sz;
+ if (cpi == NULL) return VPX_CODEC_INVALID_PARAM;
+
if (img != NULL) {
res = validate_img(ctx, img);
- // TODO(jzern) the checks related to cpi's validity should be treated as a
- // failure condition, encoder setup is done fully in init() currently.
- if (res == VPX_CODEC_OK && cpi != NULL) {
+ if (res == VPX_CODEC_OK) {
// There's no codec control for multiple alt-refs so check the encoder
// instance for its status to determine the compressed data size.
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
(cpi->multi_arf_allowed ? 8 : 2);
- if (data_sz < 4096)
- data_sz = 4096;
+ if (data_sz < kMinCompressedSize)
+ data_sz = kMinCompressedSize;
if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
ctx->cx_data_sz = data_sz;
free(ctx->cx_data);
@@ -1006,6 +1050,14 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_INVALID_PARAM;
}
+ if (setjmp(cpi->common.error.jmp)) {
+ cpi->common.error.setjmp = 0;
+ res = update_error_state(ctx, &cpi->common.error);
+ vpx_clear_system_state();
+ return res;
+ }
+ cpi->common.error.setjmp = 1;
+
vp9_apply_encoding_flags(cpi, flags);
// Handle fixed keyframe intervals
@@ -1017,8 +1069,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
}
}
- // Initialize the encoder instance on the first frame.
- if (res == VPX_CODEC_OK && cpi != NULL) {
+ if (res == VPX_CODEC_OK) {
unsigned int lib_flags = 0;
YV12_BUFFER_CONFIG sd;
int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
@@ -1057,7 +1108,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
* the buffer size anyway.
*/
if (cx_data_sz < ctx->cx_data_sz / 2) {
- ctx->base.err_detail = "Compressed data buffer too small";
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR,
+ "Compressed data buffer too small");
return VPX_CODEC_ERROR;
}
}
@@ -1175,6 +1227,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
}
}
+ cpi->common.error.setjmp = 0;
return res;
}
@@ -1393,8 +1446,8 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers);
LAYER_CONTEXT *lc =
&cpi->svc.layer_context[layer];
- lc->max_q = params->max_quantizers[sl];
- lc->min_q = params->min_quantizers[sl];
+ lc->max_q = params->max_quantizers[layer];
+ lc->min_q = params->min_quantizers[layer];
lc->scaling_factor_num = params->scaling_factor_num[sl];
lc->scaling_factor_den = params->scaling_factor_den[sl];
}
@@ -1496,6 +1549,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval},
{VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config},
{VP9E_SET_RENDER_SIZE, ctrl_set_render_size},
+ {VP9E_SET_TARGET_LEVEL, ctrl_set_target_level},
// Getters
{VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
@@ -1503,6 +1557,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP9_GET_REFERENCE, ctrl_get_reference},
{VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id},
{VP9E_GET_ACTIVEMAP, ctrl_get_active_map},
+ {VP9E_GET_LEVEL, ctrl_get_level},
{ -1, NULL},
};
@@ -1555,7 +1610,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
// keyframing settings (kf)
VPX_KF_AUTO, // g_kfmode
0, // kf_min_dist
- 9999, // kf_max_dist
+ 128, // kf_max_dist
VPX_SS_DEFAULT_LAYERS, // ss_number_layers
{0},
diff --git a/libvpx/vp9/vp9_dx_iface.c b/libvpx/vp9/vp9_dx_iface.c
index be5d1600a..6531e2c61 100644
--- a/libvpx/vp9/vp9_dx_iface.c
+++ b/libvpx/vp9/vp9_dx_iface.c
@@ -127,7 +127,7 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
vpx_decrypt_cb decrypt_cb,
void *decrypt_state) {
int intra_only_flag = 0;
- uint8_t clear_buffer[9];
+ uint8_t clear_buffer[10];
if (data + data_sz <= data)
return VPX_CODEC_INVALID_PARAM;
@@ -141,6 +141,11 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
data = clear_buffer;
}
+ // A maximum of 6 bits are needed to read the frame marker, profile and
+ // show_existing_frame.
+ if (data_sz < 1)
+ return VPX_CODEC_UNSUP_BITSTREAM;
+
{
int show_frame;
int error_resilient;
@@ -154,15 +159,19 @@ static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data,
if (profile >= MAX_PROFILES)
return VPX_CODEC_UNSUP_BITSTREAM;
- if ((profile >= 2 && data_sz <= 1) || data_sz < 1)
- return VPX_CODEC_UNSUP_BITSTREAM;
-
if (vpx_rb_read_bit(&rb)) { // show an existing frame
+ // If profile is > 2 and show_existing_frame is true, then at least 1 more
+ // byte (6+3=9 bits) is needed.
+ if (profile > 2 && data_sz < 2)
+ return VPX_CODEC_UNSUP_BITSTREAM;
vpx_rb_read_literal(&rb, 3); // Frame buffer to show.
return VPX_CODEC_OK;
}
- if (data_sz <= 8)
+ // For the rest of the function, a maximum of 9 more bytes are needed
+ // (computed by taking the maximum possible bits needed in each case). Note
+ // that this has to be updated if we read any more bits in this function.
+ if (data_sz < 10)
return VPX_CODEC_UNSUP_BITSTREAM;
si->is_kf = !vpx_rb_read_bit(&rb);
diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk
index 25a176f81..5f3de8f8a 100644
--- a/libvpx/vp9/vp9cx.mk
+++ b/libvpx/vp9/vp9cx.mk
@@ -17,7 +17,6 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_CX_SRCS-yes += vp9_cx_iface.c
-VP9_CX_SRCS-yes += encoder/vp9_avg.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
@@ -76,12 +75,16 @@ VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c
VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_360.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_360.h
VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c
VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h
VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c
VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h
VP9_CX_SRCS-yes += encoder/vp9_skin_detection.c
VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h
+VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.c
+VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
@@ -91,15 +94,15 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
+VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
ifeq ($(CONFIG_USE_X86INC),yes)
-VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm
@@ -111,12 +114,14 @@ endif
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
-VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
endif
endif
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c
+endif
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c
@@ -128,10 +133,8 @@ ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_error_neon.c
endif
-VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
-VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_avg_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
diff --git a/libvpx/vpx/exports_enc b/libvpx/vpx/exports_enc
index e4707ba10..914e36cd4 100644
--- a/libvpx/vpx/exports_enc
+++ b/libvpx/vpx/exports_enc
@@ -7,9 +7,3 @@ text vpx_codec_get_cx_data
text vpx_codec_get_global_headers
text vpx_codec_get_preview_frame
text vpx_codec_set_cx_data_buf
-text vpx_svc_dump_statistics
-text vpx_svc_encode
-text vpx_svc_get_message
-text vpx_svc_init
-text vpx_svc_release
-text vpx_svc_set_options
diff --git a/libvpx/vpx/exports_spatial_svc b/libvpx/vpx/exports_spatial_svc
new file mode 100644
index 000000000..d258a1d61
--- /dev/null
+++ b/libvpx/vpx/exports_spatial_svc
@@ -0,0 +1,6 @@
+text vpx_svc_dump_statistics
+text vpx_svc_encode
+text vpx_svc_get_message
+text vpx_svc_init
+text vpx_svc_release
+text vpx_svc_set_options
diff --git a/libvpx/vpx/src/svc_encodeframe.c b/libvpx/vpx/src/svc_encodeframe.c
index ff600830e..ef9b3528a 100644
--- a/libvpx/vpx/src/svc_encodeframe.c
+++ b/libvpx/vpx/src/svc_encodeframe.c
@@ -322,8 +322,7 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
if (si->svc_params.scaling_factor_den[sl] > 0) {
- alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] *
- 1.0 / si->svc_params.scaling_factor_den[sl]);
+ alloc_ratio[sl] = (float)( pow(2, sl) );
total += alloc_ratio[sl];
}
}
@@ -334,9 +333,9 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
alloc_ratio[sl] / total);
if (svc_ctx->temporal_layering_mode == 3) {
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
- spatial_layer_target >> 1;
+ (spatial_layer_target*6)/10; // 60%
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
- (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+ (spatial_layer_target*8)/10; // 80%
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] =
spatial_layer_target;
} else if (svc_ctx->temporal_layering_mode == 2 ||
@@ -385,7 +384,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
vpx_codec_iface_t *iface,
vpx_codec_enc_cfg_t *enc_cfg) {
vpx_codec_err_t res;
- int i;
+ int i, sl , tl;
SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
enc_cfg == NULL) {
@@ -398,11 +397,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
si->width = enc_cfg->g_w;
si->height = enc_cfg->g_h;
- if (enc_cfg->kf_max_dist < 2) {
- svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n",
- enc_cfg->kf_max_dist);
- return VPX_CODEC_INVALID_PARAM;
- }
si->kf_dist = enc_cfg->kf_max_dist;
if (svc_ctx->spatial_layers == 0)
@@ -423,11 +417,16 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
svc_ctx->temporal_layers = 2;
}
- for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
- si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
- si->svc_params.min_quantizers[i] = 0;
- si->svc_params.scaling_factor_num[i] = DEFAULT_SCALE_FACTORS_NUM[i];
- si->svc_params.scaling_factor_den[i] = DEFAULT_SCALE_FACTORS_DEN[i];
+ for (sl = 0; sl < VPX_SS_MAX_LAYERS; ++sl) {
+ si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM[sl];
+ si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl];
+ }
+ for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ i = sl * svc_ctx->temporal_layers + tl;
+ si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
+ si->svc_params.min_quantizers[i] = 0;
+ }
}
// Parse aggregate command line options. Options must start with
@@ -485,6 +484,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
enc_cfg->rc_buf_initial_sz = 500;
enc_cfg->rc_buf_optimal_sz = 600;
enc_cfg->rc_buf_sz = 1000;
+ enc_cfg->rc_dropframe_thresh = 0;
}
if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
@@ -571,6 +571,27 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx,
}
#endif
#endif
+ case VPX_CODEC_PSNR_PKT:
+ {
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
+ int j;
+ svc_log(svc_ctx, SVC_LOG_DEBUG,
+ "frame: %d, layer: %d, PSNR(Total/Y/U/V): "
+ "%2.3f %2.3f %2.3f %2.3f \n",
+ si->psnr_pkt_received, 0,
+ cx_pkt->data.layer_psnr[0].psnr[0],
+ cx_pkt->data.layer_psnr[0].psnr[1],
+ cx_pkt->data.layer_psnr[0].psnr[2],
+ cx_pkt->data.layer_psnr[0].psnr[3]);
+ for (j = 0; j < COMPONENTS; ++j) {
+ si->psnr_sum[0][j] +=
+ cx_pkt->data.layer_psnr[0].psnr[j];
+ si->sse_sum[0][j] += cx_pkt->data.layer_psnr[0].sse[j];
+ }
+#endif
+ }
+ ++si->psnr_pkt_received;
+ break;
default: {
break;
}
diff --git a/libvpx/vpx/vp8cx.h b/libvpx/vpx/vp8cx.h
index bd99c6dc1..61882e650 100644
--- a/libvpx/vpx/vp8cx.h
+++ b/libvpx/vpx/vp8cx.h
@@ -45,15 +45,6 @@ extern vpx_codec_iface_t vpx_codec_vp9_cx_algo;
extern vpx_codec_iface_t *vpx_codec_vp9_cx(void);
/*!@} - end algorithm interface member group*/
-/*!\name Algorithm interface for VP10
- *
- * This interface provides the capability to encode raw VP9 streams.
- * @{
- */
-extern vpx_codec_iface_t vpx_codec_vp10_cx_algo;
-extern vpx_codec_iface_t *vpx_codec_vp10_cx(void);
-/*!@} - end algorithm interface member group*/
-
/*
* Algorithm Flags
*/
@@ -554,6 +545,21 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9
*/
VP9E_SET_RENDER_SIZE,
+
+ /*!\brief Codec control function to set target level.
+ *
+ * 255: off (default); 0: only keep level stats; 10: target for level 1.0;
+ * 11: target for level 1.1; ... 62: target for level 6.2
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_TARGET_LEVEL,
+
+ /*!\brief Codec control function to get bitstream level.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_GET_LEVEL
};
/*!\brief vpx 1-D scaling mode
@@ -809,6 +815,12 @@ VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *)
VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
#define VPX_CTRL_VP9E_SET_RENDER_SIZE
+VPX_CTRL_USE_TYPE(VP9E_SET_TARGET_LEVEL, unsigned int)
+#define VPX_CTRL_VP9E_SET_TARGET_LEVEL
+
+VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *)
+#define VPX_CTRL_VP9E_GET_LEVEL
+
/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
diff --git a/libvpx/vpx/vp8dx.h b/libvpx/vpx/vp8dx.h
index 1f02fd595..67c97bb6c 100644
--- a/libvpx/vpx/vp8dx.h
+++ b/libvpx/vpx/vp8dx.h
@@ -46,15 +46,6 @@ extern vpx_codec_iface_t vpx_codec_vp9_dx_algo;
extern vpx_codec_iface_t *vpx_codec_vp9_dx(void);
/*!@} - end algorithm interface member group*/
-/*!\name Algorithm interface for VP10
- *
- * This interface provides the capability to decode VP10 streams.
- * @{
- */
-extern vpx_codec_iface_t vpx_codec_vp10_dx_algo;
-extern vpx_codec_iface_t *vpx_codec_vp10_dx(void);
-/*!@} - end algorithm interface member group*/
-
/*!\enum vp8_dec_control_id
* \brief VP8 decoder control functions
*
diff --git a/libvpx/vpx/vpx_image.h b/libvpx/vpx/vpx_image.h
index e9e952c48..7958c6980 100644
--- a/libvpx/vpx/vpx_image.h
+++ b/libvpx/vpx/vpx_image.h
@@ -28,7 +28,7 @@ extern "C" {
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures
*/
-#define VPX_IMAGE_ABI_VERSION (3) /**<\hideinitializer*/
+#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/
#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */
diff --git a/libvpx/vpx_dsp/add_noise.c b/libvpx/vpx_dsp/add_noise.c
new file mode 100644
index 000000000..682b44419
--- /dev/null
+++ b/libvpx/vpx_dsp/add_noise.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+void vpx_plane_add_noise_c(uint8_t *start, char *noise,
+ char blackclamp[16],
+ char whiteclamp[16],
+ char bothclamp[16],
+ unsigned int width, unsigned int height, int pitch) {
+ unsigned int i, j;
+
+ for (i = 0; i < height; i++) {
+ uint8_t *pos = start + i * pitch;
+ char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
+
+ for (j = 0; j < width; j++) {
+ int v = pos[j];
+
+ v = clamp(v - blackclamp[0], 0, 255);
+ v = clamp(v + bothclamp[0], 0, 255);
+ v = clamp(v - whiteclamp[0], 0, 255);
+
+ pos[j] = v + ref[j];
+ }
+ }
+}
diff --git a/libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c b/libvpx/vpx_dsp/arm/avg_neon.c
index d569ec95d..e52958c54 100644
--- a/libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c
+++ b/libvpx/vpx_dsp/arm/avg_neon.c
@@ -11,7 +11,7 @@
#include <arm_neon.h>
#include <assert.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -24,7 +24,19 @@ static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
return vget_lane_u32(c, 0);
}
-unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
+unsigned int vpx_avg_4x4_neon(const uint8_t *s, int p) {
+ uint16x8_t v_sum;
+ uint32x2_t v_s0 = vdup_n_u32(0);
+ uint32x2_t v_s1 = vdup_n_u32(0);
+ v_s0 = vld1_lane_u32((const uint32_t *)s, v_s0, 0);
+ v_s0 = vld1_lane_u32((const uint32_t *)(s + p), v_s0, 1);
+ v_s1 = vld1_lane_u32((const uint32_t *)(s + 2 * p), v_s1, 0);
+ v_s1 = vld1_lane_u32((const uint32_t *)(s + 3 * p), v_s1, 1);
+ v_sum = vaddl_u8(vreinterpret_u8_u32(v_s0), vreinterpret_u8_u32(v_s1));
+ return (horizontal_add_u16x8(v_sum) + 8) >> 4;
+}
+
+unsigned int vpx_avg_8x8_neon(const uint8_t *s, int p) {
uint8x8_t v_s0 = vld1_u8(s);
const uint8x8_t v_s1 = vld1_u8(s + p);
uint16x8_t v_sum = vaddl_u8(v_s0, v_s1);
@@ -50,7 +62,34 @@ unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
return (horizontal_add_u16x8(v_sum) + 32) >> 6;
}
-void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,
+// coeff: 16 bits, dynamic range [-32640, 32640].
+// length: value range {16, 64, 256, 1024}.
+int vpx_satd_neon(const int16_t *coeff, int length) {
+ const int16x4_t zero = vdup_n_s16(0);
+ int32x4_t accum = vdupq_n_s32(0);
+
+ do {
+ const int16x8_t src0 = vld1q_s16(coeff);
+ const int16x8_t src8 = vld1q_s16(coeff + 8);
+ accum = vabal_s16(accum, vget_low_s16(src0), zero);
+ accum = vabal_s16(accum, vget_high_s16(src0), zero);
+ accum = vabal_s16(accum, vget_low_s16(src8), zero);
+ accum = vabal_s16(accum, vget_high_s16(src8), zero);
+ length -= 16;
+ coeff += 16;
+ } while (length != 0);
+
+ {
+ // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
+ const int64x2_t s0 = vpaddlq_s32(accum); // cascading summation of 'accum'.
+ const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)),
+ vreinterpret_s32_s64(vget_high_s64(s0)));
+ const int satd = vget_lane_s32(s1, 0);
+ return satd;
+ }
+}
+
+void vpx_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,
const int ref_stride, const int height) {
int i;
uint16x8_t vec_sum_lo = vdupq_n_u16(0);
@@ -103,7 +142,7 @@ void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,
vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_hi));
}
-int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) {
+int16_t vpx_int_pro_col_neon(uint8_t const *ref, const int width) {
int i;
uint16x8_t vec_sum = vdupq_n_u16(0);
@@ -119,7 +158,7 @@ int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) {
// ref, src = [0, 510] - max diff = 16-bits
// bwl = {2, 3, 4}, width = {16, 32, 64}
-int vp9_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) {
+int vpx_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) {
int width = 4 << bwl;
int32x4_t sse = vdupq_n_s32(0);
int16x8_t total = vdupq_n_s16(0);
@@ -158,3 +197,60 @@ int vp9_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) {
return s - ((t * t) >> shift_factor);
}
}
+
+void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int *min, int *max) {
+ // Load and concatenate.
+ const uint8x16_t a01 = vcombine_u8(vld1_u8(a),
+ vld1_u8(a + a_stride));
+ const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride),
+ vld1_u8(a + 3 * a_stride));
+ const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride),
+ vld1_u8(a + 5 * a_stride));
+ const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride),
+ vld1_u8(a + 7 * a_stride));
+
+ const uint8x16_t b01 = vcombine_u8(vld1_u8(b),
+ vld1_u8(b + b_stride));
+ const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride),
+ vld1_u8(b + 3 * b_stride));
+ const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride),
+ vld1_u8(b + 5 * b_stride));
+ const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride),
+ vld1_u8(b + 7 * b_stride));
+
+ // Absolute difference.
+ const uint8x16_t ab01_diff = vabdq_u8(a01, b01);
+ const uint8x16_t ab23_diff = vabdq_u8(a23, b23);
+ const uint8x16_t ab45_diff = vabdq_u8(a45, b45);
+ const uint8x16_t ab67_diff = vabdq_u8(a67, b67);
+
+ // Max values between the Q vectors.
+ const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff);
+ const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff);
+ const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff);
+ const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff);
+
+ const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
+ const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
+
+ // Split to D and start doing pairwise.
+ uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max));
+ uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min));
+
+ // Enough runs of vpmax/min propogate the max/min values to every position.
+ ab_max = vpmax_u8(ab_max, ab_max);
+ ab_min = vpmin_u8(ab_min, ab_min);
+
+ ab_max = vpmax_u8(ab_max, ab_max);
+ ab_min = vpmin_u8(ab_min, ab_min);
+
+ ab_max = vpmax_u8(ab_max, ab_max);
+ ab_min = vpmin_u8(ab_min, ab_min);
+
+ *min = *max = 0; // Clear high bits
+ // Store directly to avoid costly neon->gpr transfer.
+ vst1_lane_u8((uint8_t *)max, ab_max, 0);
+ vst1_lane_u8((uint8_t *)min, ab_min, 0);
+}
diff --git a/libvpx/vpx_dsp/arm/hadamard_neon.c b/libvpx/vpx_dsp/arm/hadamard_neon.c
new file mode 100644
index 000000000..21e3e3dba
--- /dev/null
+++ b/libvpx/vpx_dsp/arm/hadamard_neon.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+
+static void hadamard8x8_one_pass(int16x8_t *a0, int16x8_t *a1,
+ int16x8_t *a2, int16x8_t *a3,
+ int16x8_t *a4, int16x8_t *a5,
+ int16x8_t *a6, int16x8_t *a7) {
+ const int16x8_t b0 = vaddq_s16(*a0, *a1);
+ const int16x8_t b1 = vsubq_s16(*a0, *a1);
+ const int16x8_t b2 = vaddq_s16(*a2, *a3);
+ const int16x8_t b3 = vsubq_s16(*a2, *a3);
+ const int16x8_t b4 = vaddq_s16(*a4, *a5);
+ const int16x8_t b5 = vsubq_s16(*a4, *a5);
+ const int16x8_t b6 = vaddq_s16(*a6, *a7);
+ const int16x8_t b7 = vsubq_s16(*a6, *a7);
+
+ const int16x8_t c0 = vaddq_s16(b0, b2);
+ const int16x8_t c1 = vaddq_s16(b1, b3);
+ const int16x8_t c2 = vsubq_s16(b0, b2);
+ const int16x8_t c3 = vsubq_s16(b1, b3);
+ const int16x8_t c4 = vaddq_s16(b4, b6);
+ const int16x8_t c5 = vaddq_s16(b5, b7);
+ const int16x8_t c6 = vsubq_s16(b4, b6);
+ const int16x8_t c7 = vsubq_s16(b5, b7);
+
+ *a0 = vaddq_s16(c0, c4);
+ *a1 = vsubq_s16(c2, c6);
+ *a2 = vsubq_s16(c0, c4);
+ *a3 = vaddq_s16(c2, c6);
+ *a4 = vaddq_s16(c3, c7);
+ *a5 = vsubq_s16(c3, c7);
+ *a6 = vsubq_s16(c1, c5);
+ *a7 = vaddq_s16(c1, c5);
+}
+
+// TODO(johannkoenig): Make a transpose library and dedup with idct. Consider
+// reversing transpose order which may make it easier for the compiler to
+// reconcile the vtrn.64 moves.
+static void transpose8x8(int16x8_t *a0, int16x8_t *a1,
+ int16x8_t *a2, int16x8_t *a3,
+ int16x8_t *a4, int16x8_t *a5,
+ int16x8_t *a6, int16x8_t *a7) {
+ // Swap 64 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 08 09 10 11 12 13 14 15
+ // a2: 16 17 18 19 20 21 22 23
+ // a3: 24 25 26 27 28 29 30 31
+ // a4: 32 33 34 35 36 37 38 39
+ // a5: 40 41 42 43 44 45 46 47
+ // a6: 48 49 50 51 52 53 54 55
+ // a7: 56 57 58 59 60 61 62 63
+ // to:
+ // a04_lo: 00 01 02 03 32 33 34 35
+ // a15_lo: 08 09 10 11 40 41 42 43
+ // a26_lo: 16 17 18 19 48 49 50 51
+ // a37_lo: 24 25 26 27 56 57 58 59
+ // a04_hi: 04 05 06 07 36 37 38 39
+ // a15_hi: 12 13 14 15 44 45 46 47
+ // a26_hi: 20 21 22 23 52 53 54 55
+ // a37_hi: 28 29 30 31 60 61 62 63
+ const int16x8_t a04_lo = vcombine_s16(vget_low_s16(*a0), vget_low_s16(*a4));
+ const int16x8_t a15_lo = vcombine_s16(vget_low_s16(*a1), vget_low_s16(*a5));
+ const int16x8_t a26_lo = vcombine_s16(vget_low_s16(*a2), vget_low_s16(*a6));
+ const int16x8_t a37_lo = vcombine_s16(vget_low_s16(*a3), vget_low_s16(*a7));
+ const int16x8_t a04_hi = vcombine_s16(vget_high_s16(*a0), vget_high_s16(*a4));
+ const int16x8_t a15_hi = vcombine_s16(vget_high_s16(*a1), vget_high_s16(*a5));
+ const int16x8_t a26_hi = vcombine_s16(vget_high_s16(*a2), vget_high_s16(*a6));
+ const int16x8_t a37_hi = vcombine_s16(vget_high_s16(*a3), vget_high_s16(*a7));
+
+ // Swap 32 bit elements resulting in:
+ // a0246_lo:
+ // 00 01 16 17 32 33 48 49
+ // 02 03 18 19 34 35 50 51
+ // a1357_lo:
+ // 08 09 24 25 40 41 56 57
+ // 10 11 26 27 42 43 58 59
+ // a0246_hi:
+ // 04 05 20 21 36 37 52 53
+ // 06 07 22 23 38 39 54 55
+ // a1657_hi:
+ // 12 13 28 29 44 45 60 61
+ // 14 15 30 31 46 47 62 63
+ const int32x4x2_t a0246_lo = vtrnq_s32(vreinterpretq_s32_s16(a04_lo),
+ vreinterpretq_s32_s16(a26_lo));
+ const int32x4x2_t a1357_lo = vtrnq_s32(vreinterpretq_s32_s16(a15_lo),
+ vreinterpretq_s32_s16(a37_lo));
+ const int32x4x2_t a0246_hi = vtrnq_s32(vreinterpretq_s32_s16(a04_hi),
+ vreinterpretq_s32_s16(a26_hi));
+ const int32x4x2_t a1357_hi = vtrnq_s32(vreinterpretq_s32_s16(a15_hi),
+ vreinterpretq_s32_s16(a37_hi));
+
+ // Swap 16 bit elements resulting in:
+ // b0:
+ // 00 08 16 24 32 40 48 56
+ // 01 09 17 25 33 41 49 57
+ // b1:
+ // 02 10 18 26 34 42 50 58
+ // 03 11 19 27 35 43 51 59
+ // b2:
+ // 04 12 20 28 36 44 52 60
+ // 05 13 21 29 37 45 53 61
+ // b3:
+ // 06 14 22 30 38 46 54 62
+ // 07 15 23 31 39 47 55 63
+ const int16x8x2_t b0 = vtrnq_s16(vreinterpretq_s16_s32(a0246_lo.val[0]),
+ vreinterpretq_s16_s32(a1357_lo.val[0]));
+ const int16x8x2_t b1 = vtrnq_s16(vreinterpretq_s16_s32(a0246_lo.val[1]),
+ vreinterpretq_s16_s32(a1357_lo.val[1]));
+ const int16x8x2_t b2 = vtrnq_s16(vreinterpretq_s16_s32(a0246_hi.val[0]),
+ vreinterpretq_s16_s32(a1357_hi.val[0]));
+ const int16x8x2_t b3 = vtrnq_s16(vreinterpretq_s16_s32(a0246_hi.val[1]),
+ vreinterpretq_s16_s32(a1357_hi.val[1]));
+
+ *a0 = b0.val[0];
+ *a1 = b0.val[1];
+ *a2 = b1.val[0];
+ *a3 = b1.val[1];
+ *a4 = b2.val[0];
+ *a5 = b2.val[1];
+ *a6 = b3.val[0];
+ *a7 = b3.val[1];
+}
+
+void vpx_hadamard_8x8_neon(const int16_t *src_diff, int src_stride,
+ int16_t *coeff) {
+ int16x8_t a0 = vld1q_s16(src_diff);
+ int16x8_t a1 = vld1q_s16(src_diff + src_stride);
+ int16x8_t a2 = vld1q_s16(src_diff + 2 * src_stride);
+ int16x8_t a3 = vld1q_s16(src_diff + 3 * src_stride);
+ int16x8_t a4 = vld1q_s16(src_diff + 4 * src_stride);
+ int16x8_t a5 = vld1q_s16(src_diff + 5 * src_stride);
+ int16x8_t a6 = vld1q_s16(src_diff + 6 * src_stride);
+ int16x8_t a7 = vld1q_s16(src_diff + 7 * src_stride);
+
+ hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
+
+ transpose8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
+
+ hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
+
+ // Skip the second transpose because it is not required.
+
+ vst1q_s16(coeff + 0, a0);
+ vst1q_s16(coeff + 8, a1);
+ vst1q_s16(coeff + 16, a2);
+ vst1q_s16(coeff + 24, a3);
+ vst1q_s16(coeff + 32, a4);
+ vst1q_s16(coeff + 40, a5);
+ vst1q_s16(coeff + 48, a6);
+ vst1q_s16(coeff + 56, a7);
+}
+
+void vpx_hadamard_16x16_neon(const int16_t *src_diff, int src_stride,
+ int16_t *coeff) {
+ int i;
+
+ /* Rearrange 16x16 to 8x32 and remove stride.
+ * Top left first. */
+ vpx_hadamard_8x8_neon(src_diff + 0 + 0 * src_stride, src_stride, coeff + 0);
+ /* Top right. */
+ vpx_hadamard_8x8_neon(src_diff + 8 + 0 * src_stride, src_stride, coeff + 64);
+ /* Bottom left. */
+ vpx_hadamard_8x8_neon(src_diff + 0 + 8 * src_stride, src_stride, coeff + 128);
+ /* Bottom right. */
+ vpx_hadamard_8x8_neon(src_diff + 8 + 8 * src_stride, src_stride, coeff + 192);
+
+ for (i = 0; i < 64; i += 8) {
+ const int16x8_t a0 = vld1q_s16(coeff + 0);
+ const int16x8_t a1 = vld1q_s16(coeff + 64);
+ const int16x8_t a2 = vld1q_s16(coeff + 128);
+ const int16x8_t a3 = vld1q_s16(coeff + 192);
+
+ const int16x8_t b0 = vhaddq_s16(a0, a1);
+ const int16x8_t b1 = vhsubq_s16(a0, a1);
+ const int16x8_t b2 = vhaddq_s16(a2, a3);
+ const int16x8_t b3 = vhsubq_s16(a2, a3);
+
+ const int16x8_t c0 = vaddq_s16(b0, b2);
+ const int16x8_t c1 = vaddq_s16(b1, b3);
+ const int16x8_t c2 = vsubq_s16(b0, b2);
+ const int16x8_t c3 = vsubq_s16(b1, b3);
+
+ vst1q_s16(coeff + 0, c0);
+ vst1q_s16(coeff + 64, c1);
+ vst1q_s16(coeff + 128, c2);
+ vst1q_s16(coeff + 192, c3);
+
+ coeff += 8;
+ }
+}
diff --git a/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm b/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm
index e45e34cd4..937115898 100644
--- a/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm
+++ b/libvpx/vpx_dsp/arm/loopfilter_4_neon.asm
@@ -16,37 +16,28 @@
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
;
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_horizontal_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r12, [sp, #8] ; load count
ldr r2, [sp, #4] ; load thresh
add r1, r1, r1 ; double pitch
- cmp r12, #0
- beq end_vpx_lf_h_edge
-
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
-count_lf_h_loop
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r3, r2, r1, lsr #1 ; set to 3 lines down
@@ -69,47 +60,34 @@ count_lf_h_loop
vst1.u8 {d6}, [r2@64], r1 ; store oq0
vst1.u8 {d7}, [r3@64], r1 ; store oq1
- add r0, r0, #8
- subs r12, r12, #1
- bne count_lf_h_loop
-
-end_vpx_lf_h_edge
pop {pc}
ENDP ; |vpx_lpf_horizontal_4_neon|
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
;
; void vpx_lpf_vertical_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_vertical_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r12, [sp, #8] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
- cmp r12, #0
- beq end_vpx_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
-count_lf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
@@ -149,12 +127,6 @@ count_lf_v_loop
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
- add r0, r0, r1, lsl #3 ; s += pitch * 8
- subs r12, r12, #1
- subne r2, r0, #4 ; move s pointer down by 4 columns
- bne count_lf_v_loop
-
-end_vpx_lf_v_edge
pop {pc}
ENDP ; |vpx_lpf_vertical_4_neon|
diff --git a/libvpx/vpx_dsp/arm/loopfilter_4_neon.c b/libvpx/vpx_dsp/arm/loopfilter_4_neon.c
index 7ad411aea..7f3ee70b9 100644
--- a/libvpx/vpx_dsp/arm/loopfilter_4_neon.c
+++ b/libvpx/vpx_dsp/arm/loopfilter_4_neon.c
@@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon(
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
int i;
uint8_t *s, *psrc;
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
- if (count == 0) // end_vpx_lf_h_edge
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
psrc = src - (pitch << 2);
- for (i = 0; i < count; i++) {
+ for (i = 0; i < 1; i++) {
s = psrc + i * 8;
d3u8 = vld1_u8(s);
@@ -170,8 +166,7 @@ void vpx_lpf_vertical_4_neon(
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
int i, pitch8;
uint8_t *s;
uint8x8_t dblimit, dlimit, dthresh;
@@ -181,15 +176,12 @@ void vpx_lpf_vertical_4_neon(
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
uint8x8x4_t d4Result;
- if (count == 0) // end_vpx_lf_h_edge
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
pitch8 = pitch * 8;
- for (i = 0; i < count; i++, src += pitch8) {
+ for (i = 0; i < 1; i++, src += pitch8) {
s = src - (i + 1) * 4;
d3u8 = vld1_u8(s);
diff --git a/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm b/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm
index e81734c04..a2f20e15f 100644
--- a/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm
+++ b/libvpx/vpx_dsp/arm/loopfilter_8_neon.asm
@@ -16,35 +16,26 @@
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
;
; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r12, [sp, #16] ; load count
ldr r2, [sp, #12] ; load thresh
add r1, r1, r1 ; double pitch
- cmp r12, #0
- beq end_vpx_mblf_h_edge
-
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
-count_mblf_h_loop
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r2, r3, r1, lsr #1 ; set to 3 lines down
@@ -69,11 +60,6 @@ count_mblf_h_loop
vst1.u8 {d4}, [r2@64], r1 ; store oq1
vst1.u8 {d5}, [r3@64], r1 ; store oq2
- add r0, r0, #8
- subs r12, r12, #1
- bne count_mblf_h_loop
-
-end_vpx_mblf_h_edge
pop {r4-r5, pc}
ENDP ; |vpx_lpf_horizontal_8_neon|
@@ -82,30 +68,24 @@ end_vpx_mblf_h_edge
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
;
; r0 uint8_t *s,
; r1 int pitch,
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_vertical_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
- ldr r12, [sp, #16] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
- cmp r12, #0
- beq end_vpx_mblf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
-count_mblf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1
@@ -156,12 +136,6 @@ count_mblf_v_loop
vst2.8 {d4[6], d5[6]}, [r3], r1
vst2.8 {d4[7], d5[7]}, [r3]
- add r0, r0, r1, lsl #3 ; s += pitch * 8
- subs r12, r12, #1
- subne r2, r0, #4 ; move s pointer down by 4 columns
- bne count_mblf_v_loop
-
-end_vpx_mblf_v_edge
pop {r4-r5, pc}
ENDP ; |vpx_lpf_vertical_8_neon|
diff --git a/libvpx/vpx_dsp/arm/loopfilter_8_neon.c b/libvpx/vpx_dsp/arm/loopfilter_8_neon.c
index a887e2ee5..ec3757380 100644
--- a/libvpx/vpx_dsp/arm/loopfilter_8_neon.c
+++ b/libvpx/vpx_dsp/arm/loopfilter_8_neon.c
@@ -268,23 +268,19 @@ void vpx_lpf_horizontal_8_neon(
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
int i;
uint8_t *s, *psrc;
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
uint8x8_t d16u8, d17u8, d18u8;
- if (count == 0) // end_vpx_mblf_h_edge
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
psrc = src - (pitch << 2);
- for (i = 0; i < count; i++) {
+ for (i = 0; i < 1; i++) {
s = psrc + i * 8;
d3u8 = vld1_u8(s);
@@ -328,8 +324,7 @@ void vpx_lpf_vertical_8_neon(
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
int i;
uint8_t *s;
uint8x8_t dblimit, dlimit, dthresh;
@@ -341,14 +336,11 @@ void vpx_lpf_vertical_8_neon(
uint8x8x4_t d4Result;
uint8x8x2_t d2Result;
- if (count == 0)
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
- for (i = 0; i < count; i++) {
+ for (i = 0; i < 1; i++) {
s = src + (i * (pitch << 3)) - 4;
d3u8 = vld1_u8(s);
diff --git a/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm b/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm
index 20d9cfb11..d5da7a840 100644
--- a/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm
+++ b/libvpx/vpx_dsp/arm/loopfilter_mb_neon.asm
@@ -8,27 +8,28 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vpx_lpf_horizontal_16_neon|
+ EXPORT |vpx_lpf_horizontal_edge_8_neon|
+ EXPORT |vpx_lpf_horizontal_edge_16_neon|
EXPORT |vpx_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh
-; int count)
+; void mb_lpf_horizontal_edge(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vpx_lpf_horizontal_16_neon| PROC
+; r12 int count
+|mb_lpf_horizontal_edge| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
- ldr r12, [sp, #92] ; load count
h_count
vld1.8 {d16[]}, [r2] ; load *blimit
@@ -115,7 +116,35 @@ h_next
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vpx_lpf_horizontal_16_neon|
+ ENDP ; |mb_lpf_horizontal_edge|
+
+; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
+; r0 uint8_t *s,
+; r1 int pitch,
+; r2 const uint8_t *blimit,
+; r3 const uint8_t *limit,
+; sp const uint8_t *thresh
+|vpx_lpf_horizontal_edge_8_neon| PROC
+ mov r12, #1
+ b mb_lpf_horizontal_edge
+ ENDP ; |vpx_lpf_horizontal_edge_8_neon|
+
+; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
+; r0 uint8_t *s,
+; r1 int pitch,
+; r2 const uint8_t *blimit,
+; r3 const uint8_t *limit,
+; sp const uint8_t *thresh
+|vpx_lpf_horizontal_edge_16_neon| PROC
+ mov r12, #2
+ b mb_lpf_horizontal_edge
+ ENDP ; |vpx_lpf_horizontal_edge_16_neon|
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
diff --git a/libvpx/vpx_dsp/arm/loopfilter_neon.c b/libvpx/vpx_dsp/arm/loopfilter_neon.c
index eff87d29b..aa31f2935 100644
--- a/libvpx/vpx_dsp/arm/loopfilter_neon.c
+++ b/libvpx/vpx_dsp/arm/loopfilter_neon.c
@@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
}
#if HAVE_NEON_ASM
@@ -33,8 +33,8 @@ void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
@@ -44,8 +44,8 @@ void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
diff --git a/libvpx/vp9/encoder/vp9_avg.c b/libvpx/vpx_dsp/avg.c
index a9a4c3050..a8c996663 100644
--- a/libvpx/vp9/encoder/vp9_avg.c
+++ b/libvpx/vpx_dsp/avg.c
@@ -7,11 +7,12 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
+#include <stdlib.h>
+
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
-unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
+unsigned int vpx_avg_8x8_c(const uint8_t *s, int p) {
int i, j;
int sum = 0;
for (i = 0; i < 8; ++i, s+=p)
@@ -20,7 +21,7 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
return (sum + 32) >> 6;
}
-unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
+unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) {
int i, j;
int sum = 0;
for (i = 0; i < 4; ++i, s+=p)
@@ -61,7 +62,9 @@ static void hadamard_col8(const int16_t *src_diff, int src_stride,
coeff[5] = c3 - c7;
}
-void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride,
+// The order of the output coeff of the hadamard is not important. For
+// optimization purposes the final transpose may be skipped.
+void vpx_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
int16_t *coeff) {
int idx;
int16_t buffer[64];
@@ -84,14 +87,14 @@ void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride,
}
// In place 16x16 2D Hadamard transform
-void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
+void vpx_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
int16_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
// src_diff: 9 bit, dynamic range [-255, 255]
- int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8;
- vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
+ vpx_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
}
// coeff: 15 bit, dynamic range [-16320, 16320]
@@ -117,19 +120,19 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
// coeff: 16 bits, dynamic range [-32640, 32640].
// length: value range {16, 64, 256, 1024}.
-int16_t vp9_satd_c(const int16_t *coeff, int length) {
+int vpx_satd_c(const int16_t *coeff, int length) {
int i;
int satd = 0;
for (i = 0; i < length; ++i)
satd += abs(coeff[i]);
// satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
- return (int16_t)satd;
+ return satd;
}
// Integer projection onto row vectors.
// height: value range {16, 32, 64}.
-void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref,
+void vpx_int_pro_row_c(int16_t hbuf[16], const uint8_t *ref,
const int ref_stride, const int height) {
int idx;
const int norm_factor = height >> 1;
@@ -146,7 +149,7 @@ void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref,
}
// width: value range {16, 32, 64}.
-int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
+int16_t vpx_int_pro_col_c(const uint8_t *ref, const int width) {
int idx;
int16_t sum = 0;
// sum: 14 bit, dynamic range [0, 16320]
@@ -158,7 +161,7 @@ int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
// ref: [0 - 510]
// src: [0 - 510]
// bwl: {2, 3, 4}
-int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
+int vpx_vector_var_c(const int16_t *ref, const int16_t *src,
const int bwl) {
int i;
int width = 4 << bwl;
@@ -175,7 +178,7 @@ int vp9_vector_var_c(int16_t const *ref, int16_t const *src,
return var;
}
-void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
+void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
int *min, int *max) {
int i, j;
*min = 255;
@@ -190,7 +193,7 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
}
#if CONFIG_VP9_HIGHBITDEPTH
-unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
+unsigned int vpx_highbd_avg_8x8_c(const uint8_t *s8, int p) {
int i, j;
int sum = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
@@ -200,7 +203,7 @@ unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
return (sum + 32) >> 6;
}
-unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) {
+unsigned int vpx_highbd_avg_4x4_c(const uint8_t *s8, int p) {
int i, j;
int sum = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
@@ -210,7 +213,7 @@ unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) {
return (sum + 8) >> 4;
}
-void vp9_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
+void vpx_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
int dp, int *min, int *max) {
int i, j;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
diff --git a/libvpx/vpx_dsp/bitreader.c b/libvpx/vpx_dsp/bitreader.c
index 6ad806ac3..8140e78e7 100644
--- a/libvpx/vpx_dsp/bitreader.c
+++ b/libvpx/vpx_dsp/bitreader.c
@@ -69,7 +69,7 @@ void vpx_reader_fill(vpx_reader *r) {
buffer += (bits >> 3);
value = r->value | (nv << (shift & 0x7));
} else {
- const int bits_over = (int)(shift + CHAR_BIT - bits_left);
+ const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left);
int loop_end = 0;
if (bits_over >= 0) {
count += LOTS_OF_BITS;
diff --git a/libvpx/vpx_dsp/bitreader.h b/libvpx/vpx_dsp/bitreader.h
index e817c8b0c..9a441b410 100644
--- a/libvpx/vpx_dsp/bitreader.h
+++ b/libvpx/vpx_dsp/bitreader.h
@@ -98,7 +98,7 @@ static INLINE int vpx_read(vpx_reader *r, int prob) {
}
{
- register unsigned int shift = vpx_norm[range];
+ register int shift = vpx_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
diff --git a/libvpx/vpx_dsp/bitreader_buffer.c b/libvpx/vpx_dsp/bitreader_buffer.c
index bb917263e..d7b55cf9f 100644
--- a/libvpx/vpx_dsp/bitreader_buffer.c
+++ b/libvpx/vpx_dsp/bitreader_buffer.c
@@ -45,7 +45,7 @@ int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb,
int bits) {
#if CONFIG_MISC_FIXES
const int nbits = sizeof(unsigned) * 8 - bits - 1;
- const unsigned value = vpx_rb_read_literal(rb, bits + 1) << nbits;
+ const unsigned value = (unsigned)vpx_rb_read_literal(rb, bits + 1) << nbits;
return ((int) value) >> nbits;
#else
return vpx_rb_read_signed_literal(rb, bits);
diff --git a/libvpx/vpx_dsp/bitwriter.h b/libvpx/vpx_dsp/bitwriter.h
index f6ca9b916..d904997af 100644
--- a/libvpx/vpx_dsp/bitwriter.h
+++ b/libvpx/vpx_dsp/bitwriter.h
@@ -35,7 +35,7 @@ static INLINE void vpx_write(vpx_writer *br, int bit, int probability) {
int count = br->count;
unsigned int range = br->range;
unsigned int lowvalue = br->lowvalue;
- register unsigned int shift;
+ register int shift;
split = 1 + (((range - 1) * probability) >> 8);
diff --git a/libvpx/vpx_dsp/fwd_txfm.c b/libvpx/vpx_dsp/fwd_txfm.c
index 7baaa8b0d..4c0d5db83 100644
--- a/libvpx/vpx_dsp/fwd_txfm.c
+++ b/libvpx/vpx_dsp/fwd_txfm.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/fwd_txfm.h"
void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
@@ -85,7 +86,6 @@ void vpx_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
sum += input[r * stride + c];
output[0] = sum << 1;
- output[1] = 0;
}
void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
@@ -182,7 +182,6 @@ void vpx_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
sum += input[r * stride + c];
output[0] = sum;
- output[1] = 0;
}
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
@@ -367,13 +366,12 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
- tran_low_t sum = 0;
+ int sum = 0;
for (r = 0; r < 16; ++r)
for (c = 0; c < 16; ++c)
sum += input[r * stride + c];
- output[0] = sum >> 1;
- output[1] = 0;
+ output[0] = (tran_low_t)(sum >> 1);
}
static INLINE tran_high_t dct_32_round(tran_high_t input) {
@@ -771,13 +769,12 @@ void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
void vpx_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
int r, c;
- tran_low_t sum = 0;
+ int sum = 0;
for (r = 0; r < 32; ++r)
for (c = 0; c < 32; ++c)
sum += input[r * stride + c];
- output[0] = sum >> 3;
- output[1] = 0;
+ output[0] = (tran_low_t)(sum >> 3);
}
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vpx_dsp/intrapred.c b/libvpx/vpx_dsp/intrapred.c
index a9669e512..cc4a74bd2 100644
--- a/libvpx/vpx_dsp/intrapred.c
+++ b/libvpx/vpx_dsp/intrapred.c
@@ -44,6 +44,7 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
}
+#if CONFIG_MISC_FIXES
static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -58,6 +59,7 @@ static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
+#endif // CONFIG_MISC_FIXES
static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -76,6 +78,7 @@ static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
}
}
+#if CONFIG_MISC_FIXES
static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -89,6 +92,7 @@ static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
+#endif // CONFIG_MISC_FIXES
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -109,6 +113,7 @@ static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
}
}
+#if CONFIG_MISC_FIXES
static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -121,6 +126,7 @@ static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
+#endif // CONFIG_MISC_FIXES
static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -152,20 +158,29 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
- int r, c;
- dst[0] = AVG3(left[0], above[-1], above[0]);
- for (c = 1; c < bs; c++)
- dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
+ int i;
+#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
+ // silence a spurious -Warray-bounds warning, possibly related to:
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
+ uint8_t border[69];
+#else
+ uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right
+#endif
- dst[stride] = AVG3(above[-1], left[0], left[1]);
- for (r = 2; r < bs; ++r)
- dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
+ // dst(bs, bs - 2)[0], i.e., border starting at bottom-left
+ for (i = 0; i < bs - 2; ++i) {
+ border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
+ }
+ border[bs - 2] = AVG3(above[-1], left[0], left[1]);
+ border[bs - 1] = AVG3(left[0], above[-1], above[0]);
+ border[bs - 0] = AVG3(above[-1], above[0], above[1]);
+ // dst[0][2, size), i.e., remaining top border ascending
+ for (i = 0; i < bs - 2; ++i) {
+ border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
+ }
- dst += stride;
- for (r = 1; r < bs; ++r) {
- for (c = 1; c < bs; c++)
- dst[c] = dst[-stride + c - 1];
- dst += stride;
+ for (i = 0; i < bs; ++i) {
+ memcpy(dst + i * stride, border + bs - 1 - i, bs);
}
}
@@ -311,6 +326,7 @@ void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const int K = above[2];
const int L = above[3];
const int M = above[4];
+ (void)left;
dst[0] = AVG3(H, I, J);
dst[1] = AVG3(I, J, K);
@@ -528,6 +544,7 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
}
}
+#if CONFIG_MISC_FIXES
static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -544,6 +561,7 @@ static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
dst += stride;
}
}
+#endif // CONFIG_MISC_FIXES
static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
@@ -579,6 +597,7 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
}
}
+#if CONFIG_MISC_FIXES
static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -593,6 +612,7 @@ static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride,
dst += stride;
}
}
+#endif // CONFIG_MISC_FIXES
static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
diff --git a/libvpx/vpx_dsp/inv_txfm.c b/libvpx/vpx_dsp/inv_txfm.c
index 5f3cfddbd..e18d31d7a 100644
--- a/libvpx/vpx_dsp/inv_txfm.c
+++ b/libvpx/vpx_dsp/inv_txfm.c
@@ -11,6 +11,7 @@
#include <math.h>
#include <string.h>
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/inv_txfm.h"
void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
@@ -34,10 +35,10 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
c1 = e1 - c1;
a1 -= b1;
d1 += c1;
- op[0] = WRAPLOW(a1, 8);
- op[1] = WRAPLOW(b1, 8);
- op[2] = WRAPLOW(c1, 8);
- op[3] = WRAPLOW(d1, 8);
+ op[0] = WRAPLOW(a1);
+ op[1] = WRAPLOW(b1);
+ op[2] = WRAPLOW(c1);
+ op[3] = WRAPLOW(d1);
ip += 4;
op += 4;
}
@@ -55,10 +56,10 @@ void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
c1 = e1 - c1;
a1 -= b1;
d1 += c1;
- dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1);
- dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1);
- dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1);
- dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1);
+ dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1));
+ dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1));
+ dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1));
+ dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1));
ip++;
dest++;
@@ -75,8 +76,8 @@ void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
a1 = ip[0] >> UNIT_QUANT_SHIFT;
e1 = a1 >> 1;
a1 -= e1;
- op[0] = WRAPLOW(a1, 8);
- op[1] = op[2] = op[3] = WRAPLOW(e1, 8);
+ op[0] = WRAPLOW(a1);
+ op[1] = op[2] = op[3] = WRAPLOW(e1);
ip = tmp;
for (i = 0; i < 4; i++) {
@@ -97,18 +98,18 @@ void idct4_c(const tran_low_t *input, tran_low_t *output) {
// stage 1
temp1 = (input[0] + input[2]) * cospi_16_64;
temp2 = (input[0] - input[2]) * cospi_16_64;
- step[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step[1] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
- step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step[3] = WRAPLOW(dct_const_round_shift(temp2));
// stage 2
- output[0] = WRAPLOW(step[0] + step[3], 8);
- output[1] = WRAPLOW(step[1] + step[2], 8);
- output[2] = WRAPLOW(step[1] - step[2], 8);
- output[3] = WRAPLOW(step[0] - step[3], 8);
+ output[0] = WRAPLOW(step[0] + step[3]);
+ output[1] = WRAPLOW(step[1] + step[2]);
+ output[2] = WRAPLOW(step[1] - step[2]);
+ output[3] = WRAPLOW(step[0] - step[3]);
}
void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
@@ -140,8 +141,8 @@ void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
int dest_stride) {
int i;
tran_high_t a1;
- tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
- out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 4);
for (i = 0; i < 4; i++) {
@@ -163,48 +164,48 @@ void idct8_c(const tran_low_t *input, tran_low_t *output) {
step1[3] = input[6];
temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
// stage 2
temp1 = (step1[0] + step1[2]) * cospi_16_64;
temp2 = (step1[0] - step1[2]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[4] = WRAPLOW(step1[4] + step1[5]);
+ step2[5] = WRAPLOW(step1[4] - step1[5]);
+ step2[6] = WRAPLOW(-step1[6] + step1[7]);
+ step2[7] = WRAPLOW(step1[6] + step1[7]);
// stage 3
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
+ step1[0] = WRAPLOW(step2[0] + step2[3]);
+ step1[1] = WRAPLOW(step2[1] + step2[2]);
+ step1[2] = WRAPLOW(step2[1] - step2[2]);
+ step1[3] = WRAPLOW(step2[0] - step2[3]);
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
step1[7] = step2[7];
// stage 4
- output[0] = WRAPLOW(step1[0] + step1[7], 8);
- output[1] = WRAPLOW(step1[1] + step1[6], 8);
- output[2] = WRAPLOW(step1[2] + step1[5], 8);
- output[3] = WRAPLOW(step1[3] + step1[4], 8);
- output[4] = WRAPLOW(step1[3] - step1[4], 8);
- output[5] = WRAPLOW(step1[2] - step1[5], 8);
- output[6] = WRAPLOW(step1[1] - step1[6], 8);
- output[7] = WRAPLOW(step1[0] - step1[7], 8);
+ output[0] = WRAPLOW(step1[0] + step1[7]);
+ output[1] = WRAPLOW(step1[1] + step1[6]);
+ output[2] = WRAPLOW(step1[2] + step1[5]);
+ output[3] = WRAPLOW(step1[3] + step1[4]);
+ output[4] = WRAPLOW(step1[3] - step1[4]);
+ output[5] = WRAPLOW(step1[2] - step1[5]);
+ output[6] = WRAPLOW(step1[1] - step1[6]);
+ output[7] = WRAPLOW(step1[0] - step1[7]);
}
void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
@@ -235,8 +236,8 @@ void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
- tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
- out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 5);
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i)
@@ -265,7 +266,7 @@ void iadst4_c(const tran_low_t *input, tran_low_t *output) {
s4 = sinpi_1_9 * x2;
s5 = sinpi_2_9 * x3;
s6 = sinpi_4_9 * x3;
- s7 = x0 - x2 + x3;
+ s7 = WRAPLOW(x0 - x2 + x3);
s0 = s0 + s3 + s5;
s1 = s1 - s4 - s6;
@@ -276,10 +277,10 @@ void iadst4_c(const tran_low_t *input, tran_low_t *output) {
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
- output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8);
- output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8);
- output[2] = WRAPLOW(dct_const_round_shift(s2), 8);
- output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8);
+ output[0] = WRAPLOW(dct_const_round_shift(s0 + s3));
+ output[1] = WRAPLOW(dct_const_round_shift(s1 + s3));
+ output[2] = WRAPLOW(dct_const_round_shift(s2));
+ output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
}
void iadst8_c(const tran_low_t *input, tran_low_t *output) {
@@ -310,14 +311,14 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) {
s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7);
s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7);
- x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8);
- x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8);
- x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8);
- x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8);
- x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8);
- x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8);
- x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8);
- x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8);
+ x0 = WRAPLOW(dct_const_round_shift(s0 + s4));
+ x1 = WRAPLOW(dct_const_round_shift(s1 + s5));
+ x2 = WRAPLOW(dct_const_round_shift(s2 + s6));
+ x3 = WRAPLOW(dct_const_round_shift(s3 + s7));
+ x4 = WRAPLOW(dct_const_round_shift(s0 - s4));
+ x5 = WRAPLOW(dct_const_round_shift(s1 - s5));
+ x6 = WRAPLOW(dct_const_round_shift(s2 - s6));
+ x7 = WRAPLOW(dct_const_round_shift(s3 - s7));
// stage 2
s0 = (int)x0;
@@ -329,14 +330,14 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) {
s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);
- x0 = WRAPLOW(s0 + s2, 8);
- x1 = WRAPLOW(s1 + s3, 8);
- x2 = WRAPLOW(s0 - s2, 8);
- x3 = WRAPLOW(s1 - s3, 8);
- x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8);
- x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8);
- x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8);
- x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
+ x0 = WRAPLOW(s0 + s2);
+ x1 = WRAPLOW(s1 + s3);
+ x2 = WRAPLOW(s0 - s2);
+ x3 = WRAPLOW(s1 - s3);
+ x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
+ x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
+ x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
+ x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
// stage 3
s2 = (int)(cospi_16_64 * (x2 + x3));
@@ -344,19 +345,19 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) {
s6 = (int)(cospi_16_64 * (x6 + x7));
s7 = (int)(cospi_16_64 * (x6 - x7));
- x2 = WRAPLOW(dct_const_round_shift(s2), 8);
- x3 = WRAPLOW(dct_const_round_shift(s3), 8);
- x6 = WRAPLOW(dct_const_round_shift(s6), 8);
- x7 = WRAPLOW(dct_const_round_shift(s7), 8);
-
- output[0] = WRAPLOW(x0, 8);
- output[1] = WRAPLOW(-x4, 8);
- output[2] = WRAPLOW(x6, 8);
- output[3] = WRAPLOW(-x2, 8);
- output[4] = WRAPLOW(x3, 8);
- output[5] = WRAPLOW(-x7, 8);
- output[6] = WRAPLOW(x5, 8);
- output[7] = WRAPLOW(-x1, 8);
+ x2 = WRAPLOW(dct_const_round_shift(s2));
+ x3 = WRAPLOW(dct_const_round_shift(s3));
+ x6 = WRAPLOW(dct_const_round_shift(s6));
+ x7 = WRAPLOW(dct_const_round_shift(s7));
+
+ output[0] = WRAPLOW(x0);
+ output[1] = WRAPLOW(-x4);
+ output[2] = WRAPLOW(x6);
+ output[3] = WRAPLOW(-x2);
+ output[4] = WRAPLOW(x3);
+ output[5] = WRAPLOW(-x7);
+ output[6] = WRAPLOW(x5);
+ output[7] = WRAPLOW(-x1);
}
void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
@@ -419,23 +420,23 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) {
temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[8] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[15] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
// stage 3
step1[0] = step2[0];
@@ -445,109 +446,109 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) {
temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- step1[8] = WRAPLOW(step2[8] + step2[9], 8);
- step1[9] = WRAPLOW(step2[8] - step2[9], 8);
- step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
- step1[11] = WRAPLOW(step2[10] + step2[11], 8);
- step1[12] = WRAPLOW(step2[12] + step2[13], 8);
- step1[13] = WRAPLOW(step2[12] - step2[13], 8);
- step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
- step1[15] = WRAPLOW(step2[14] + step2[15], 8);
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+ step1[8] = WRAPLOW(step2[8] + step2[9]);
+ step1[9] = WRAPLOW(step2[8] - step2[9]);
+ step1[10] = WRAPLOW(-step2[10] + step2[11]);
+ step1[11] = WRAPLOW(step2[10] + step2[11]);
+ step1[12] = WRAPLOW(step2[12] + step2[13]);
+ step1[13] = WRAPLOW(step2[12] - step2[13]);
+ step1[14] = WRAPLOW(-step2[14] + step2[15]);
+ step1[15] = WRAPLOW(step2[14] + step2[15]);
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[4] = WRAPLOW(step1[4] + step1[5]);
+ step2[5] = WRAPLOW(step1[4] - step1[5]);
+ step2[6] = WRAPLOW(-step1[6] + step1[7]);
+ step2[7] = WRAPLOW(step1[6] + step1[7]);
step2[8] = step1[8];
step2[15] = step1[15];
temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
step2[11] = step1[11];
step2[12] = step1[12];
// stage 5
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
+ step1[0] = WRAPLOW(step2[0] + step2[3]);
+ step1[1] = WRAPLOW(step2[1] + step2[2]);
+ step1[2] = WRAPLOW(step2[1] - step2[2]);
+ step1[3] = WRAPLOW(step2[0] - step2[3]);
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
step1[7] = step2[7];
- step1[8] = WRAPLOW(step2[8] + step2[11], 8);
- step1[9] = WRAPLOW(step2[9] + step2[10], 8);
- step1[10] = WRAPLOW(step2[9] - step2[10], 8);
- step1[11] = WRAPLOW(step2[8] - step2[11], 8);
- step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
- step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
- step1[14] = WRAPLOW(step2[13] + step2[14], 8);
- step1[15] = WRAPLOW(step2[12] + step2[15], 8);
+ step1[8] = WRAPLOW(step2[8] + step2[11]);
+ step1[9] = WRAPLOW(step2[9] + step2[10]);
+ step1[10] = WRAPLOW(step2[9] - step2[10]);
+ step1[11] = WRAPLOW(step2[8] - step2[11]);
+ step1[12] = WRAPLOW(-step2[12] + step2[15]);
+ step1[13] = WRAPLOW(-step2[13] + step2[14]);
+ step1[14] = WRAPLOW(step2[13] + step2[14]);
+ step1[15] = WRAPLOW(step2[12] + step2[15]);
// stage 6
- step2[0] = WRAPLOW(step1[0] + step1[7], 8);
- step2[1] = WRAPLOW(step1[1] + step1[6], 8);
- step2[2] = WRAPLOW(step1[2] + step1[5], 8);
- step2[3] = WRAPLOW(step1[3] + step1[4], 8);
- step2[4] = WRAPLOW(step1[3] - step1[4], 8);
- step2[5] = WRAPLOW(step1[2] - step1[5], 8);
- step2[6] = WRAPLOW(step1[1] - step1[6], 8);
- step2[7] = WRAPLOW(step1[0] - step1[7], 8);
+ step2[0] = WRAPLOW(step1[0] + step1[7]);
+ step2[1] = WRAPLOW(step1[1] + step1[6]);
+ step2[2] = WRAPLOW(step1[2] + step1[5]);
+ step2[3] = WRAPLOW(step1[3] + step1[4]);
+ step2[4] = WRAPLOW(step1[3] - step1[4]);
+ step2[5] = WRAPLOW(step1[2] - step1[5]);
+ step2[6] = WRAPLOW(step1[1] - step1[6]);
+ step2[7] = WRAPLOW(step1[0] - step1[7]);
step2[8] = step1[8];
step2[9] = step1[9];
temp1 = (-step1[10] + step1[13]) * cospi_16_64;
temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = (-step1[11] + step1[12]) * cospi_16_64;
temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
step2[14] = step1[14];
step2[15] = step1[15];
// stage 7
- output[0] = WRAPLOW(step2[0] + step2[15], 8);
- output[1] = WRAPLOW(step2[1] + step2[14], 8);
- output[2] = WRAPLOW(step2[2] + step2[13], 8);
- output[3] = WRAPLOW(step2[3] + step2[12], 8);
- output[4] = WRAPLOW(step2[4] + step2[11], 8);
- output[5] = WRAPLOW(step2[5] + step2[10], 8);
- output[6] = WRAPLOW(step2[6] + step2[9], 8);
- output[7] = WRAPLOW(step2[7] + step2[8], 8);
- output[8] = WRAPLOW(step2[7] - step2[8], 8);
- output[9] = WRAPLOW(step2[6] - step2[9], 8);
- output[10] = WRAPLOW(step2[5] - step2[10], 8);
- output[11] = WRAPLOW(step2[4] - step2[11], 8);
- output[12] = WRAPLOW(step2[3] - step2[12], 8);
- output[13] = WRAPLOW(step2[2] - step2[13], 8);
- output[14] = WRAPLOW(step2[1] - step2[14], 8);
- output[15] = WRAPLOW(step2[0] - step2[15], 8);
+ output[0] = WRAPLOW(step2[0] + step2[15]);
+ output[1] = WRAPLOW(step2[1] + step2[14]);
+ output[2] = WRAPLOW(step2[2] + step2[13]);
+ output[3] = WRAPLOW(step2[3] + step2[12]);
+ output[4] = WRAPLOW(step2[4] + step2[11]);
+ output[5] = WRAPLOW(step2[5] + step2[10]);
+ output[6] = WRAPLOW(step2[6] + step2[9]);
+ output[7] = WRAPLOW(step2[7] + step2[8]);
+ output[8] = WRAPLOW(step2[7] - step2[8]);
+ output[9] = WRAPLOW(step2[6] - step2[9]);
+ output[10] = WRAPLOW(step2[5] - step2[10]);
+ output[11] = WRAPLOW(step2[4] - step2[11]);
+ output[12] = WRAPLOW(step2[3] - step2[12]);
+ output[13] = WRAPLOW(step2[2] - step2[13]);
+ output[14] = WRAPLOW(step2[1] - step2[14]);
+ output[15] = WRAPLOW(step2[0] - step2[15]);
}
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
@@ -624,22 +625,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) {
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
- x0 = WRAPLOW(dct_const_round_shift(s0 + s8), 8);
- x1 = WRAPLOW(dct_const_round_shift(s1 + s9), 8);
- x2 = WRAPLOW(dct_const_round_shift(s2 + s10), 8);
- x3 = WRAPLOW(dct_const_round_shift(s3 + s11), 8);
- x4 = WRAPLOW(dct_const_round_shift(s4 + s12), 8);
- x5 = WRAPLOW(dct_const_round_shift(s5 + s13), 8);
- x6 = WRAPLOW(dct_const_round_shift(s6 + s14), 8);
- x7 = WRAPLOW(dct_const_round_shift(s7 + s15), 8);
- x8 = WRAPLOW(dct_const_round_shift(s0 - s8), 8);
- x9 = WRAPLOW(dct_const_round_shift(s1 - s9), 8);
- x10 = WRAPLOW(dct_const_round_shift(s2 - s10), 8);
- x11 = WRAPLOW(dct_const_round_shift(s3 - s11), 8);
- x12 = WRAPLOW(dct_const_round_shift(s4 - s12), 8);
- x13 = WRAPLOW(dct_const_round_shift(s5 - s13), 8);
- x14 = WRAPLOW(dct_const_round_shift(s6 - s14), 8);
- x15 = WRAPLOW(dct_const_round_shift(s7 - s15), 8);
+ x0 = WRAPLOW(dct_const_round_shift(s0 + s8));
+ x1 = WRAPLOW(dct_const_round_shift(s1 + s9));
+ x2 = WRAPLOW(dct_const_round_shift(s2 + s10));
+ x3 = WRAPLOW(dct_const_round_shift(s3 + s11));
+ x4 = WRAPLOW(dct_const_round_shift(s4 + s12));
+ x5 = WRAPLOW(dct_const_round_shift(s5 + s13));
+ x6 = WRAPLOW(dct_const_round_shift(s6 + s14));
+ x7 = WRAPLOW(dct_const_round_shift(s7 + s15));
+ x8 = WRAPLOW(dct_const_round_shift(s0 - s8));
+ x9 = WRAPLOW(dct_const_round_shift(s1 - s9));
+ x10 = WRAPLOW(dct_const_round_shift(s2 - s10));
+ x11 = WRAPLOW(dct_const_round_shift(s3 - s11));
+ x12 = WRAPLOW(dct_const_round_shift(s4 - s12));
+ x13 = WRAPLOW(dct_const_round_shift(s5 - s13));
+ x14 = WRAPLOW(dct_const_round_shift(s6 - s14));
+ x15 = WRAPLOW(dct_const_round_shift(s7 - s15));
// stage 2
s0 = x0;
@@ -659,22 +660,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) {
s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
- x0 = WRAPLOW(s0 + s4, 8);
- x1 = WRAPLOW(s1 + s5, 8);
- x2 = WRAPLOW(s2 + s6, 8);
- x3 = WRAPLOW(s3 + s7, 8);
- x4 = WRAPLOW(s0 - s4, 8);
- x5 = WRAPLOW(s1 - s5, 8);
- x6 = WRAPLOW(s2 - s6, 8);
- x7 = WRAPLOW(s3 - s7, 8);
- x8 = WRAPLOW(dct_const_round_shift(s8 + s12), 8);
- x9 = WRAPLOW(dct_const_round_shift(s9 + s13), 8);
- x10 = WRAPLOW(dct_const_round_shift(s10 + s14), 8);
- x11 = WRAPLOW(dct_const_round_shift(s11 + s15), 8);
- x12 = WRAPLOW(dct_const_round_shift(s8 - s12), 8);
- x13 = WRAPLOW(dct_const_round_shift(s9 - s13), 8);
- x14 = WRAPLOW(dct_const_round_shift(s10 - s14), 8);
- x15 = WRAPLOW(dct_const_round_shift(s11 - s15), 8);
+ x0 = WRAPLOW(s0 + s4);
+ x1 = WRAPLOW(s1 + s5);
+ x2 = WRAPLOW(s2 + s6);
+ x3 = WRAPLOW(s3 + s7);
+ x4 = WRAPLOW(s0 - s4);
+ x5 = WRAPLOW(s1 - s5);
+ x6 = WRAPLOW(s2 - s6);
+ x7 = WRAPLOW(s3 - s7);
+ x8 = WRAPLOW(dct_const_round_shift(s8 + s12));
+ x9 = WRAPLOW(dct_const_round_shift(s9 + s13));
+ x10 = WRAPLOW(dct_const_round_shift(s10 + s14));
+ x11 = WRAPLOW(dct_const_round_shift(s11 + s15));
+ x12 = WRAPLOW(dct_const_round_shift(s8 - s12));
+ x13 = WRAPLOW(dct_const_round_shift(s9 - s13));
+ x14 = WRAPLOW(dct_const_round_shift(s10 - s14));
+ x15 = WRAPLOW(dct_const_round_shift(s11 - s15));
// stage 3
s0 = x0;
@@ -694,22 +695,22 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) {
s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
- x0 = WRAPLOW(check_range(s0 + s2), 8);
- x1 = WRAPLOW(check_range(s1 + s3), 8);
- x2 = WRAPLOW(check_range(s0 - s2), 8);
- x3 = WRAPLOW(check_range(s1 - s3), 8);
- x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8);
- x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8);
- x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8);
- x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
- x8 = WRAPLOW(check_range(s8 + s10), 8);
- x9 = WRAPLOW(check_range(s9 + s11), 8);
- x10 = WRAPLOW(check_range(s8 - s10), 8);
- x11 = WRAPLOW(check_range(s9 - s11), 8);
- x12 = WRAPLOW(dct_const_round_shift(s12 + s14), 8);
- x13 = WRAPLOW(dct_const_round_shift(s13 + s15), 8);
- x14 = WRAPLOW(dct_const_round_shift(s12 - s14), 8);
- x15 = WRAPLOW(dct_const_round_shift(s13 - s15), 8);
+ x0 = WRAPLOW(s0 + s2);
+ x1 = WRAPLOW(s1 + s3);
+ x2 = WRAPLOW(s0 - s2);
+ x3 = WRAPLOW(s1 - s3);
+ x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
+ x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
+ x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
+ x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
+ x8 = WRAPLOW(s8 + s10);
+ x9 = WRAPLOW(s9 + s11);
+ x10 = WRAPLOW(s8 - s10);
+ x11 = WRAPLOW(s9 - s11);
+ x12 = WRAPLOW(dct_const_round_shift(s12 + s14));
+ x13 = WRAPLOW(dct_const_round_shift(s13 + s15));
+ x14 = WRAPLOW(dct_const_round_shift(s12 - s14));
+ x15 = WRAPLOW(dct_const_round_shift(s13 - s15));
// stage 4
s2 = (- cospi_16_64) * (x2 + x3);
@@ -721,31 +722,31 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) {
s14 = (- cospi_16_64) * (x14 + x15);
s15 = cospi_16_64 * (x14 - x15);
- x2 = WRAPLOW(dct_const_round_shift(s2), 8);
- x3 = WRAPLOW(dct_const_round_shift(s3), 8);
- x6 = WRAPLOW(dct_const_round_shift(s6), 8);
- x7 = WRAPLOW(dct_const_round_shift(s7), 8);
- x10 = WRAPLOW(dct_const_round_shift(s10), 8);
- x11 = WRAPLOW(dct_const_round_shift(s11), 8);
- x14 = WRAPLOW(dct_const_round_shift(s14), 8);
- x15 = WRAPLOW(dct_const_round_shift(s15), 8);
-
- output[0] = WRAPLOW(x0, 8);
- output[1] = WRAPLOW(-x8, 8);
- output[2] = WRAPLOW(x12, 8);
- output[3] = WRAPLOW(-x4, 8);
- output[4] = WRAPLOW(x6, 8);
- output[5] = WRAPLOW(x14, 8);
- output[6] = WRAPLOW(x10, 8);
- output[7] = WRAPLOW(x2, 8);
- output[8] = WRAPLOW(x3, 8);
- output[9] = WRAPLOW(x11, 8);
- output[10] = WRAPLOW(x15, 8);
- output[11] = WRAPLOW(x7, 8);
- output[12] = WRAPLOW(x5, 8);
- output[13] = WRAPLOW(-x13, 8);
- output[14] = WRAPLOW(x9, 8);
- output[15] = WRAPLOW(-x1, 8);
+ x2 = WRAPLOW(dct_const_round_shift(s2));
+ x3 = WRAPLOW(dct_const_round_shift(s3));
+ x6 = WRAPLOW(dct_const_round_shift(s6));
+ x7 = WRAPLOW(dct_const_round_shift(s7));
+ x10 = WRAPLOW(dct_const_round_shift(s10));
+ x11 = WRAPLOW(dct_const_round_shift(s11));
+ x14 = WRAPLOW(dct_const_round_shift(s14));
+ x15 = WRAPLOW(dct_const_round_shift(s15));
+
+ output[0] = WRAPLOW(x0);
+ output[1] = WRAPLOW(-x8);
+ output[2] = WRAPLOW(x12);
+ output[3] = WRAPLOW(-x4);
+ output[4] = WRAPLOW(x6);
+ output[5] = WRAPLOW(x14);
+ output[6] = WRAPLOW(x10);
+ output[7] = WRAPLOW(x2);
+ output[8] = WRAPLOW(x3);
+ output[9] = WRAPLOW(x11);
+ output[10] = WRAPLOW(x15);
+ output[11] = WRAPLOW(x7);
+ output[12] = WRAPLOW(x5);
+ output[13] = WRAPLOW(-x13);
+ output[14] = WRAPLOW(x9);
+ output[15] = WRAPLOW(-x1);
}
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
@@ -778,8 +779,8 @@ void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
- tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
- out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i)
@@ -812,43 +813,43 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
- step1[16] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[31] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[16] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[31] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
- step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[17] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[30] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
- step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[29] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
- step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[19] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[28] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
- step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[27] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
- step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
- step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[25] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
- step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[23] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[24] = WRAPLOW(dct_const_round_shift(temp2));
// stage 2
step2[0] = step1[0];
@@ -862,40 +863,40 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[8] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[15] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- step2[16] = WRAPLOW(step1[16] + step1[17], 8);
- step2[17] = WRAPLOW(step1[16] - step1[17], 8);
- step2[18] = WRAPLOW(-step1[18] + step1[19], 8);
- step2[19] = WRAPLOW(step1[18] + step1[19], 8);
- step2[20] = WRAPLOW(step1[20] + step1[21], 8);
- step2[21] = WRAPLOW(step1[20] - step1[21], 8);
- step2[22] = WRAPLOW(-step1[22] + step1[23], 8);
- step2[23] = WRAPLOW(step1[22] + step1[23], 8);
- step2[24] = WRAPLOW(step1[24] + step1[25], 8);
- step2[25] = WRAPLOW(step1[24] - step1[25], 8);
- step2[26] = WRAPLOW(-step1[26] + step1[27], 8);
- step2[27] = WRAPLOW(step1[26] + step1[27], 8);
- step2[28] = WRAPLOW(step1[28] + step1[29], 8);
- step2[29] = WRAPLOW(step1[28] - step1[29], 8);
- step2[30] = WRAPLOW(-step1[30] + step1[31], 8);
- step2[31] = WRAPLOW(step1[30] + step1[31], 8);
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+
+ step2[16] = WRAPLOW(step1[16] + step1[17]);
+ step2[17] = WRAPLOW(step1[16] - step1[17]);
+ step2[18] = WRAPLOW(-step1[18] + step1[19]);
+ step2[19] = WRAPLOW(step1[18] + step1[19]);
+ step2[20] = WRAPLOW(step1[20] + step1[21]);
+ step2[21] = WRAPLOW(step1[20] - step1[21]);
+ step2[22] = WRAPLOW(-step1[22] + step1[23]);
+ step2[23] = WRAPLOW(step1[22] + step1[23]);
+ step2[24] = WRAPLOW(step1[24] + step1[25]);
+ step2[25] = WRAPLOW(step1[24] - step1[25]);
+ step2[26] = WRAPLOW(-step1[26] + step1[27]);
+ step2[27] = WRAPLOW(step1[26] + step1[27]);
+ step2[28] = WRAPLOW(step1[28] + step1[29]);
+ step2[29] = WRAPLOW(step1[28] - step1[29]);
+ step2[30] = WRAPLOW(-step1[30] + step1[31]);
+ step2[31] = WRAPLOW(step1[30] + step1[31]);
// stage 3
step1[0] = step2[0];
@@ -905,42 +906,42 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- step1[8] = WRAPLOW(step2[8] + step2[9], 8);
- step1[9] = WRAPLOW(step2[8] - step2[9], 8);
- step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
- step1[11] = WRAPLOW(step2[10] + step2[11], 8);
- step1[12] = WRAPLOW(step2[12] + step2[13], 8);
- step1[13] = WRAPLOW(step2[12] - step2[13], 8);
- step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
- step1[15] = WRAPLOW(step2[14] + step2[15], 8);
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+ step1[8] = WRAPLOW(step2[8] + step2[9]);
+ step1[9] = WRAPLOW(step2[8] - step2[9]);
+ step1[10] = WRAPLOW(-step2[10] + step2[11]);
+ step1[11] = WRAPLOW(step2[10] + step2[11]);
+ step1[12] = WRAPLOW(step2[12] + step2[13]);
+ step1[13] = WRAPLOW(step2[12] - step2[13]);
+ step1[14] = WRAPLOW(-step2[14] + step2[15]);
+ step1[15] = WRAPLOW(step2[14] + step2[15]);
step1[16] = step2[16];
step1[31] = step2[31];
temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
- step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[17] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[30] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
- step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[29] = WRAPLOW(dct_const_round_shift(temp2));
step1[19] = step2[19];
step1[20] = step2[20];
temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
- step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
- step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[25] = WRAPLOW(dct_const_round_shift(temp2));
step1[23] = step2[23];
step1[24] = step2[24];
step1[27] = step2[27];
@@ -949,87 +950,87 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[4] = WRAPLOW(step1[4] + step1[5]);
+ step2[5] = WRAPLOW(step1[4] - step1[5]);
+ step2[6] = WRAPLOW(-step1[6] + step1[7]);
+ step2[7] = WRAPLOW(step1[6] + step1[7]);
step2[8] = step1[8];
step2[15] = step1[15];
temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
step2[11] = step1[11];
step2[12] = step1[12];
- step2[16] = WRAPLOW(step1[16] + step1[19], 8);
- step2[17] = WRAPLOW(step1[17] + step1[18], 8);
- step2[18] = WRAPLOW(step1[17] - step1[18], 8);
- step2[19] = WRAPLOW(step1[16] - step1[19], 8);
- step2[20] = WRAPLOW(-step1[20] + step1[23], 8);
- step2[21] = WRAPLOW(-step1[21] + step1[22], 8);
- step2[22] = WRAPLOW(step1[21] + step1[22], 8);
- step2[23] = WRAPLOW(step1[20] + step1[23], 8);
-
- step2[24] = WRAPLOW(step1[24] + step1[27], 8);
- step2[25] = WRAPLOW(step1[25] + step1[26], 8);
- step2[26] = WRAPLOW(step1[25] - step1[26], 8);
- step2[27] = WRAPLOW(step1[24] - step1[27], 8);
- step2[28] = WRAPLOW(-step1[28] + step1[31], 8);
- step2[29] = WRAPLOW(-step1[29] + step1[30], 8);
- step2[30] = WRAPLOW(step1[29] + step1[30], 8);
- step2[31] = WRAPLOW(step1[28] + step1[31], 8);
+ step2[16] = WRAPLOW(step1[16] + step1[19]);
+ step2[17] = WRAPLOW(step1[17] + step1[18]);
+ step2[18] = WRAPLOW(step1[17] - step1[18]);
+ step2[19] = WRAPLOW(step1[16] - step1[19]);
+ step2[20] = WRAPLOW(-step1[20] + step1[23]);
+ step2[21] = WRAPLOW(-step1[21] + step1[22]);
+ step2[22] = WRAPLOW(step1[21] + step1[22]);
+ step2[23] = WRAPLOW(step1[20] + step1[23]);
+
+ step2[24] = WRAPLOW(step1[24] + step1[27]);
+ step2[25] = WRAPLOW(step1[25] + step1[26]);
+ step2[26] = WRAPLOW(step1[25] - step1[26]);
+ step2[27] = WRAPLOW(step1[24] - step1[27]);
+ step2[28] = WRAPLOW(-step1[28] + step1[31]);
+ step2[29] = WRAPLOW(-step1[29] + step1[30]);
+ step2[30] = WRAPLOW(step1[29] + step1[30]);
+ step2[31] = WRAPLOW(step1[28] + step1[31]);
// stage 5
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
+ step1[0] = WRAPLOW(step2[0] + step2[3]);
+ step1[1] = WRAPLOW(step2[1] + step2[2]);
+ step1[2] = WRAPLOW(step2[1] - step2[2]);
+ step1[3] = WRAPLOW(step2[0] - step2[3]);
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
step1[7] = step2[7];
- step1[8] = WRAPLOW(step2[8] + step2[11], 8);
- step1[9] = WRAPLOW(step2[9] + step2[10], 8);
- step1[10] = WRAPLOW(step2[9] - step2[10], 8);
- step1[11] = WRAPLOW(step2[8] - step2[11], 8);
- step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
- step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
- step1[14] = WRAPLOW(step2[13] + step2[14], 8);
- step1[15] = WRAPLOW(step2[12] + step2[15], 8);
+ step1[8] = WRAPLOW(step2[8] + step2[11]);
+ step1[9] = WRAPLOW(step2[9] + step2[10]);
+ step1[10] = WRAPLOW(step2[9] - step2[10]);
+ step1[11] = WRAPLOW(step2[8] - step2[11]);
+ step1[12] = WRAPLOW(-step2[12] + step2[15]);
+ step1[13] = WRAPLOW(-step2[13] + step2[14]);
+ step1[14] = WRAPLOW(step2[13] + step2[14]);
+ step1[15] = WRAPLOW(step2[12] + step2[15]);
step1[16] = step2[16];
step1[17] = step2[17];
temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
- step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[29] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
- step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[19] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[28] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
- step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[27] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
- step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
step1[22] = step2[22];
step1[23] = step2[23];
step1[24] = step2[24];
@@ -1038,62 +1039,62 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
step1[31] = step2[31];
// stage 6
- step2[0] = WRAPLOW(step1[0] + step1[7], 8);
- step2[1] = WRAPLOW(step1[1] + step1[6], 8);
- step2[2] = WRAPLOW(step1[2] + step1[5], 8);
- step2[3] = WRAPLOW(step1[3] + step1[4], 8);
- step2[4] = WRAPLOW(step1[3] - step1[4], 8);
- step2[5] = WRAPLOW(step1[2] - step1[5], 8);
- step2[6] = WRAPLOW(step1[1] - step1[6], 8);
- step2[7] = WRAPLOW(step1[0] - step1[7], 8);
+ step2[0] = WRAPLOW(step1[0] + step1[7]);
+ step2[1] = WRAPLOW(step1[1] + step1[6]);
+ step2[2] = WRAPLOW(step1[2] + step1[5]);
+ step2[3] = WRAPLOW(step1[3] + step1[4]);
+ step2[4] = WRAPLOW(step1[3] - step1[4]);
+ step2[5] = WRAPLOW(step1[2] - step1[5]);
+ step2[6] = WRAPLOW(step1[1] - step1[6]);
+ step2[7] = WRAPLOW(step1[0] - step1[7]);
step2[8] = step1[8];
step2[9] = step1[9];
temp1 = (-step1[10] + step1[13]) * cospi_16_64;
temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = (-step1[11] + step1[12]) * cospi_16_64;
temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
step2[14] = step1[14];
step2[15] = step1[15];
- step2[16] = WRAPLOW(step1[16] + step1[23], 8);
- step2[17] = WRAPLOW(step1[17] + step1[22], 8);
- step2[18] = WRAPLOW(step1[18] + step1[21], 8);
- step2[19] = WRAPLOW(step1[19] + step1[20], 8);
- step2[20] = WRAPLOW(step1[19] - step1[20], 8);
- step2[21] = WRAPLOW(step1[18] - step1[21], 8);
- step2[22] = WRAPLOW(step1[17] - step1[22], 8);
- step2[23] = WRAPLOW(step1[16] - step1[23], 8);
-
- step2[24] = WRAPLOW(-step1[24] + step1[31], 8);
- step2[25] = WRAPLOW(-step1[25] + step1[30], 8);
- step2[26] = WRAPLOW(-step1[26] + step1[29], 8);
- step2[27] = WRAPLOW(-step1[27] + step1[28], 8);
- step2[28] = WRAPLOW(step1[27] + step1[28], 8);
- step2[29] = WRAPLOW(step1[26] + step1[29], 8);
- step2[30] = WRAPLOW(step1[25] + step1[30], 8);
- step2[31] = WRAPLOW(step1[24] + step1[31], 8);
+ step2[16] = WRAPLOW(step1[16] + step1[23]);
+ step2[17] = WRAPLOW(step1[17] + step1[22]);
+ step2[18] = WRAPLOW(step1[18] + step1[21]);
+ step2[19] = WRAPLOW(step1[19] + step1[20]);
+ step2[20] = WRAPLOW(step1[19] - step1[20]);
+ step2[21] = WRAPLOW(step1[18] - step1[21]);
+ step2[22] = WRAPLOW(step1[17] - step1[22]);
+ step2[23] = WRAPLOW(step1[16] - step1[23]);
+
+ step2[24] = WRAPLOW(-step1[24] + step1[31]);
+ step2[25] = WRAPLOW(-step1[25] + step1[30]);
+ step2[26] = WRAPLOW(-step1[26] + step1[29]);
+ step2[27] = WRAPLOW(-step1[27] + step1[28]);
+ step2[28] = WRAPLOW(step1[27] + step1[28]);
+ step2[29] = WRAPLOW(step1[26] + step1[29]);
+ step2[30] = WRAPLOW(step1[25] + step1[30]);
+ step2[31] = WRAPLOW(step1[24] + step1[31]);
// stage 7
- step1[0] = WRAPLOW(step2[0] + step2[15], 8);
- step1[1] = WRAPLOW(step2[1] + step2[14], 8);
- step1[2] = WRAPLOW(step2[2] + step2[13], 8);
- step1[3] = WRAPLOW(step2[3] + step2[12], 8);
- step1[4] = WRAPLOW(step2[4] + step2[11], 8);
- step1[5] = WRAPLOW(step2[5] + step2[10], 8);
- step1[6] = WRAPLOW(step2[6] + step2[9], 8);
- step1[7] = WRAPLOW(step2[7] + step2[8], 8);
- step1[8] = WRAPLOW(step2[7] - step2[8], 8);
- step1[9] = WRAPLOW(step2[6] - step2[9], 8);
- step1[10] = WRAPLOW(step2[5] - step2[10], 8);
- step1[11] = WRAPLOW(step2[4] - step2[11], 8);
- step1[12] = WRAPLOW(step2[3] - step2[12], 8);
- step1[13] = WRAPLOW(step2[2] - step2[13], 8);
- step1[14] = WRAPLOW(step2[1] - step2[14], 8);
- step1[15] = WRAPLOW(step2[0] - step2[15], 8);
+ step1[0] = WRAPLOW(step2[0] + step2[15]);
+ step1[1] = WRAPLOW(step2[1] + step2[14]);
+ step1[2] = WRAPLOW(step2[2] + step2[13]);
+ step1[3] = WRAPLOW(step2[3] + step2[12]);
+ step1[4] = WRAPLOW(step2[4] + step2[11]);
+ step1[5] = WRAPLOW(step2[5] + step2[10]);
+ step1[6] = WRAPLOW(step2[6] + step2[9]);
+ step1[7] = WRAPLOW(step2[7] + step2[8]);
+ step1[8] = WRAPLOW(step2[7] - step2[8]);
+ step1[9] = WRAPLOW(step2[6] - step2[9]);
+ step1[10] = WRAPLOW(step2[5] - step2[10]);
+ step1[11] = WRAPLOW(step2[4] - step2[11]);
+ step1[12] = WRAPLOW(step2[3] - step2[12]);
+ step1[13] = WRAPLOW(step2[2] - step2[13]);
+ step1[14] = WRAPLOW(step2[1] - step2[14]);
+ step1[15] = WRAPLOW(step2[0] - step2[15]);
step1[16] = step2[16];
step1[17] = step2[17];
@@ -1101,58 +1102,58 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
step1[19] = step2[19];
temp1 = (-step2[20] + step2[27]) * cospi_16_64;
temp2 = (step2[20] + step2[27]) * cospi_16_64;
- step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[27] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = (-step2[21] + step2[26]) * cospi_16_64;
temp2 = (step2[21] + step2[26]) * cospi_16_64;
- step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = (-step2[22] + step2[25]) * cospi_16_64;
temp2 = (step2[22] + step2[25]) * cospi_16_64;
- step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[25] = WRAPLOW(dct_const_round_shift(temp2));
temp1 = (-step2[23] + step2[24]) * cospi_16_64;
temp2 = (step2[23] + step2[24]) * cospi_16_64;
- step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ step1[23] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[24] = WRAPLOW(dct_const_round_shift(temp2));
step1[28] = step2[28];
step1[29] = step2[29];
step1[30] = step2[30];
step1[31] = step2[31];
// final stage
- output[0] = WRAPLOW(step1[0] + step1[31], 8);
- output[1] = WRAPLOW(step1[1] + step1[30], 8);
- output[2] = WRAPLOW(step1[2] + step1[29], 8);
- output[3] = WRAPLOW(step1[3] + step1[28], 8);
- output[4] = WRAPLOW(step1[4] + step1[27], 8);
- output[5] = WRAPLOW(step1[5] + step1[26], 8);
- output[6] = WRAPLOW(step1[6] + step1[25], 8);
- output[7] = WRAPLOW(step1[7] + step1[24], 8);
- output[8] = WRAPLOW(step1[8] + step1[23], 8);
- output[9] = WRAPLOW(step1[9] + step1[22], 8);
- output[10] = WRAPLOW(step1[10] + step1[21], 8);
- output[11] = WRAPLOW(step1[11] + step1[20], 8);
- output[12] = WRAPLOW(step1[12] + step1[19], 8);
- output[13] = WRAPLOW(step1[13] + step1[18], 8);
- output[14] = WRAPLOW(step1[14] + step1[17], 8);
- output[15] = WRAPLOW(step1[15] + step1[16], 8);
- output[16] = WRAPLOW(step1[15] - step1[16], 8);
- output[17] = WRAPLOW(step1[14] - step1[17], 8);
- output[18] = WRAPLOW(step1[13] - step1[18], 8);
- output[19] = WRAPLOW(step1[12] - step1[19], 8);
- output[20] = WRAPLOW(step1[11] - step1[20], 8);
- output[21] = WRAPLOW(step1[10] - step1[21], 8);
- output[22] = WRAPLOW(step1[9] - step1[22], 8);
- output[23] = WRAPLOW(step1[8] - step1[23], 8);
- output[24] = WRAPLOW(step1[7] - step1[24], 8);
- output[25] = WRAPLOW(step1[6] - step1[25], 8);
- output[26] = WRAPLOW(step1[5] - step1[26], 8);
- output[27] = WRAPLOW(step1[4] - step1[27], 8);
- output[28] = WRAPLOW(step1[3] - step1[28], 8);
- output[29] = WRAPLOW(step1[2] - step1[29], 8);
- output[30] = WRAPLOW(step1[1] - step1[30], 8);
- output[31] = WRAPLOW(step1[0] - step1[31], 8);
+ output[0] = WRAPLOW(step1[0] + step1[31]);
+ output[1] = WRAPLOW(step1[1] + step1[30]);
+ output[2] = WRAPLOW(step1[2] + step1[29]);
+ output[3] = WRAPLOW(step1[3] + step1[28]);
+ output[4] = WRAPLOW(step1[4] + step1[27]);
+ output[5] = WRAPLOW(step1[5] + step1[26]);
+ output[6] = WRAPLOW(step1[6] + step1[25]);
+ output[7] = WRAPLOW(step1[7] + step1[24]);
+ output[8] = WRAPLOW(step1[8] + step1[23]);
+ output[9] = WRAPLOW(step1[9] + step1[22]);
+ output[10] = WRAPLOW(step1[10] + step1[21]);
+ output[11] = WRAPLOW(step1[11] + step1[20]);
+ output[12] = WRAPLOW(step1[12] + step1[19]);
+ output[13] = WRAPLOW(step1[13] + step1[18]);
+ output[14] = WRAPLOW(step1[14] + step1[17]);
+ output[15] = WRAPLOW(step1[15] + step1[16]);
+ output[16] = WRAPLOW(step1[15] - step1[16]);
+ output[17] = WRAPLOW(step1[14] - step1[17]);
+ output[18] = WRAPLOW(step1[13] - step1[18]);
+ output[19] = WRAPLOW(step1[12] - step1[19]);
+ output[20] = WRAPLOW(step1[11] - step1[20]);
+ output[21] = WRAPLOW(step1[10] - step1[21]);
+ output[22] = WRAPLOW(step1[9] - step1[22]);
+ output[23] = WRAPLOW(step1[8] - step1[23]);
+ output[24] = WRAPLOW(step1[7] - step1[24]);
+ output[25] = WRAPLOW(step1[6] - step1[25]);
+ output[26] = WRAPLOW(step1[5] - step1[26]);
+ output[27] = WRAPLOW(step1[4] - step1[27]);
+ output[28] = WRAPLOW(step1[3] - step1[28]);
+ output[29] = WRAPLOW(step1[2] - step1[29]);
+ output[30] = WRAPLOW(step1[1] - step1[30]);
+ output[31] = WRAPLOW(step1[0] - step1[31]);
}
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
@@ -1194,6 +1195,33 @@ void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
}
}
+void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ tran_low_t out[32 * 32] = {0};
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+
+ // Rows
+ // only upper-left 16x16 has non-zero coeff
+ for (i = 0; i < 16; ++i) {
+ idct32_c(input, outptr);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = out[j * 32 + i];
+ idct32_c(temp_in, temp_out);
+ for (j = 0; j < 32; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 6));
+ }
+ }
+}
+
void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
tran_low_t out[32 * 32] = {0};
@@ -1225,8 +1253,8 @@ void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
- tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
- out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 32; ++j) {
@@ -1260,10 +1288,10 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
c1 = e1 - c1;
a1 -= b1;
d1 += c1;
- op[0] = WRAPLOW(a1, bd);
- op[1] = WRAPLOW(b1, bd);
- op[2] = WRAPLOW(c1, bd);
- op[3] = WRAPLOW(d1, bd);
+ op[0] = HIGHBD_WRAPLOW(a1, bd);
+ op[1] = HIGHBD_WRAPLOW(b1, bd);
+ op[2] = HIGHBD_WRAPLOW(c1, bd);
+ op[3] = HIGHBD_WRAPLOW(d1, bd);
ip += 4;
op += 4;
}
@@ -1281,10 +1309,14 @@ void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
c1 = e1 - c1;
a1 -= b1;
d1 += c1;
- dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);
- dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);
- dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);
- dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);
+ dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0],
+ HIGHBD_WRAPLOW(a1, bd), bd);
+ dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1],
+ HIGHBD_WRAPLOW(b1, bd), bd);
+ dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2],
+ HIGHBD_WRAPLOW(c1, bd), bd);
+ dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3],
+ HIGHBD_WRAPLOW(d1, bd), bd);
ip++;
dest++;
@@ -1304,8 +1336,8 @@ void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
a1 = ip[0] >> UNIT_QUANT_SHIFT;
e1 = a1 >> 1;
a1 -= e1;
- op[0] = WRAPLOW(a1, bd);
- op[1] = op[2] = op[3] = WRAPLOW(e1, bd);
+ op[0] = HIGHBD_WRAPLOW(a1, bd);
+ op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd);
ip = tmp;
for (i = 0; i < 4; i++) {
@@ -1331,18 +1363,18 @@ void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
// stage 1
temp1 = (input[0] + input[2]) * cospi_16_64;
temp2 = (input[0] - input[2]) * cospi_16_64;
- step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
- step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
// stage 2
- output[0] = WRAPLOW(step[0] + step[3], bd);
- output[1] = WRAPLOW(step[1] + step[2], bd);
- output[2] = WRAPLOW(step[1] - step[2], bd);
- output[3] = WRAPLOW(step[0] - step[3], bd);
+ output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
+ output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd);
+ output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
+ output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd);
}
void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1376,11 +1408,11 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
int dest_stride, int bd) {
int i;
tran_high_t a1;
- tran_low_t out = WRAPLOW(
- highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
+ tran_low_t out = HIGHBD_WRAPLOW(
+ highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 4);
for (i = 0; i < 4; i++) {
@@ -1402,39 +1434,39 @@ void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
step1[3] = input[6];
temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
- step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
- step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
// stage 2 & stage 3 - even half
vpx_highbd_idct4_c(step1, step1, bd);
// stage 2 - odd half
- step2[4] = WRAPLOW(step1[4] + step1[5], bd);
- step2[5] = WRAPLOW(step1[4] - step1[5], bd);
- step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
- step2[7] = WRAPLOW(step1[6] + step1[7], bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
// stage 3 - odd half
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[7] = step2[7];
// stage 4
- output[0] = WRAPLOW(step1[0] + step1[7], bd);
- output[1] = WRAPLOW(step1[1] + step1[6], bd);
- output[2] = WRAPLOW(step1[2] + step1[5], bd);
- output[3] = WRAPLOW(step1[3] + step1[4], bd);
- output[4] = WRAPLOW(step1[3] - step1[4], bd);
- output[5] = WRAPLOW(step1[2] - step1[5], bd);
- output[6] = WRAPLOW(step1[1] - step1[6], bd);
- output[7] = WRAPLOW(step1[0] - step1[7], bd);
+ output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
}
void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1468,10 +1500,10 @@ void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i, j;
tran_high_t a1;
- tran_low_t out = WRAPLOW(
- highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
+ tran_low_t out = HIGHBD_WRAPLOW(
+ highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 5);
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i)
@@ -1501,7 +1533,7 @@ void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
s4 = sinpi_1_9 * x2;
s5 = sinpi_2_9 * x3;
s6 = sinpi_4_9 * x3;
- s7 = (tran_high_t)(x0 - x2 + x3);
+ s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd);
s0 = s0 + s3 + s5;
s1 = s1 - s4 - s6;
@@ -1512,10 +1544,10 @@ void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
- output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd);
- output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd);
- output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
- output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);
+ output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd);
+ output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd);
+ output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+ output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd);
}
void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
@@ -1546,14 +1578,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
- x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s4, bd), bd);
- x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s5, bd), bd);
- x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s6, bd), bd);
- x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s7, bd), bd);
- x4 = WRAPLOW(highbd_dct_const_round_shift(s0 - s4, bd), bd);
- x5 = WRAPLOW(highbd_dct_const_round_shift(s1 - s5, bd), bd);
- x6 = WRAPLOW(highbd_dct_const_round_shift(s2 - s6, bd), bd);
- x7 = WRAPLOW(highbd_dct_const_round_shift(s3 - s7, bd), bd);
+ x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd);
+ x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd);
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd);
// stage 2
s0 = x0;
@@ -1565,14 +1597,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
- x0 = WRAPLOW(s0 + s2, bd);
- x1 = WRAPLOW(s1 + s3, bd);
- x2 = WRAPLOW(s0 - s2, bd);
- x3 = WRAPLOW(s1 - s3, bd);
- x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd);
- x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd);
- x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd);
- x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd);
+ x0 = HIGHBD_WRAPLOW(s0 + s2, bd);
+ x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
+ x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
+ x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
// stage 3
s2 = cospi_16_64 * (x2 + x3);
@@ -1580,19 +1612,19 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
s6 = cospi_16_64 * (x6 + x7);
s7 = cospi_16_64 * (x6 - x7);
- x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
- x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd);
- x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd);
- x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd);
-
- output[0] = WRAPLOW(x0, bd);
- output[1] = WRAPLOW(-x4, bd);
- output[2] = WRAPLOW(x6, bd);
- output[3] = WRAPLOW(-x2, bd);
- output[4] = WRAPLOW(x3, bd);
- output[5] = WRAPLOW(-x7, bd);
- output[6] = WRAPLOW(x5, bd);
- output[7] = WRAPLOW(-x1, bd);
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+
+ output[0] = HIGHBD_WRAPLOW(x0, bd);
+ output[1] = HIGHBD_WRAPLOW(-x4, bd);
+ output[2] = HIGHBD_WRAPLOW(x6, bd);
+ output[3] = HIGHBD_WRAPLOW(-x2, bd);
+ output[4] = HIGHBD_WRAPLOW(x3, bd);
+ output[5] = HIGHBD_WRAPLOW(-x7, bd);
+ output[6] = HIGHBD_WRAPLOW(x5, bd);
+ output[7] = HIGHBD_WRAPLOW(-x1, bd);
}
void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1657,23 +1689,23 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
// stage 3
step1[0] = step2[0];
@@ -1683,109 +1715,109 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
-
- step1[8] = WRAPLOW(step2[8] + step2[9], bd);
- step1[9] = WRAPLOW(step2[8] - step2[9], bd);
- step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
- step1[11] = WRAPLOW(step2[10] + step2[11], bd);
- step1[12] = WRAPLOW(step2[12] + step2[13], bd);
- step1[13] = WRAPLOW(step2[12] - step2[13], bd);
- step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
- step1[15] = WRAPLOW(step2[14] + step2[15], bd);
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
- step2[4] = WRAPLOW(step1[4] + step1[5], bd);
- step2[5] = WRAPLOW(step1[4] - step1[5], bd);
- step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
- step2[7] = WRAPLOW(step1[6] + step1[7], bd);
+ step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
step2[8] = step1[8];
step2[15] = step1[15];
temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step2[11] = step1[11];
step2[12] = step1[12];
// stage 5
- step1[0] = WRAPLOW(step2[0] + step2[3], bd);
- step1[1] = WRAPLOW(step2[1] + step2[2], bd);
- step1[2] = WRAPLOW(step2[1] - step2[2], bd);
- step1[3] = WRAPLOW(step2[0] - step2[3], bd);
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[7] = step2[7];
- step1[8] = WRAPLOW(step2[8] + step2[11], bd);
- step1[9] = WRAPLOW(step2[9] + step2[10], bd);
- step1[10] = WRAPLOW(step2[9] - step2[10], bd);
- step1[11] = WRAPLOW(step2[8] - step2[11], bd);
- step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
- step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
- step1[14] = WRAPLOW(step2[13] + step2[14], bd);
- step1[15] = WRAPLOW(step2[12] + step2[15], bd);
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+ step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
// stage 6
- step2[0] = WRAPLOW(step1[0] + step1[7], bd);
- step2[1] = WRAPLOW(step1[1] + step1[6], bd);
- step2[2] = WRAPLOW(step1[2] + step1[5], bd);
- step2[3] = WRAPLOW(step1[3] + step1[4], bd);
- step2[4] = WRAPLOW(step1[3] - step1[4], bd);
- step2[5] = WRAPLOW(step1[2] - step1[5], bd);
- step2[6] = WRAPLOW(step1[1] - step1[6], bd);
- step2[7] = WRAPLOW(step1[0] - step1[7], bd);
+ step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
step2[8] = step1[8];
step2[9] = step1[9];
temp1 = (-step1[10] + step1[13]) * cospi_16_64;
temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = (-step1[11] + step1[12]) * cospi_16_64;
temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step2[14] = step1[14];
step2[15] = step1[15];
// stage 7
- output[0] = WRAPLOW(step2[0] + step2[15], bd);
- output[1] = WRAPLOW(step2[1] + step2[14], bd);
- output[2] = WRAPLOW(step2[2] + step2[13], bd);
- output[3] = WRAPLOW(step2[3] + step2[12], bd);
- output[4] = WRAPLOW(step2[4] + step2[11], bd);
- output[5] = WRAPLOW(step2[5] + step2[10], bd);
- output[6] = WRAPLOW(step2[6] + step2[9], bd);
- output[7] = WRAPLOW(step2[7] + step2[8], bd);
- output[8] = WRAPLOW(step2[7] - step2[8], bd);
- output[9] = WRAPLOW(step2[6] - step2[9], bd);
- output[10] = WRAPLOW(step2[5] - step2[10], bd);
- output[11] = WRAPLOW(step2[4] - step2[11], bd);
- output[12] = WRAPLOW(step2[3] - step2[12], bd);
- output[13] = WRAPLOW(step2[2] - step2[13], bd);
- output[14] = WRAPLOW(step2[1] - step2[14], bd);
- output[15] = WRAPLOW(step2[0] - step2[15], bd);
+ output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+ output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd);
+ output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+ output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd);
+ output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+ output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd);
+ output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+ output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd);
+ output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+ output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd);
+ output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+ output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd);
+ output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+ output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd);
+ output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+ output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
}
void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1861,22 +1893,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
- x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s8, bd), bd);
- x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s9, bd), bd);
- x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s10, bd), bd);
- x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s11, bd), bd);
- x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s12, bd), bd);
- x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s13, bd), bd);
- x6 = WRAPLOW(highbd_dct_const_round_shift(s6 + s14, bd), bd);
- x7 = WRAPLOW(highbd_dct_const_round_shift(s7 + s15, bd), bd);
- x8 = WRAPLOW(highbd_dct_const_round_shift(s0 - s8, bd), bd);
- x9 = WRAPLOW(highbd_dct_const_round_shift(s1 - s9, bd), bd);
- x10 = WRAPLOW(highbd_dct_const_round_shift(s2 - s10, bd), bd);
- x11 = WRAPLOW(highbd_dct_const_round_shift(s3 - s11, bd), bd);
- x12 = WRAPLOW(highbd_dct_const_round_shift(s4 - s12, bd), bd);
- x13 = WRAPLOW(highbd_dct_const_round_shift(s5 - s13, bd), bd);
- x14 = WRAPLOW(highbd_dct_const_round_shift(s6 - s14, bd), bd);
- x15 = WRAPLOW(highbd_dct_const_round_shift(s7 - s15, bd), bd);
+ x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd);
+ x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd);
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd);
+ x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd);
+ x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd);
+ x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd);
+ x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd);
+ x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd);
+ x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd);
// stage 2
s0 = x0;
@@ -1896,22 +1928,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
- x0 = WRAPLOW(s0 + s4, bd);
- x1 = WRAPLOW(s1 + s5, bd);
- x2 = WRAPLOW(s2 + s6, bd);
- x3 = WRAPLOW(s3 + s7, bd);
- x4 = WRAPLOW(s0 - s4, bd);
- x5 = WRAPLOW(s1 - s5, bd);
- x6 = WRAPLOW(s2 - s6, bd);
- x7 = WRAPLOW(s3 - s7, bd);
- x8 = WRAPLOW(highbd_dct_const_round_shift(s8 + s12, bd), bd);
- x9 = WRAPLOW(highbd_dct_const_round_shift(s9 + s13, bd), bd);
- x10 = WRAPLOW(highbd_dct_const_round_shift(s10 + s14, bd), bd);
- x11 = WRAPLOW(highbd_dct_const_round_shift(s11 + s15, bd), bd);
- x12 = WRAPLOW(highbd_dct_const_round_shift(s8 - s12, bd), bd);
- x13 = WRAPLOW(highbd_dct_const_round_shift(s9 - s13, bd), bd);
- x14 = WRAPLOW(highbd_dct_const_round_shift(s10 - s14, bd), bd);
- x15 = WRAPLOW(highbd_dct_const_round_shift(s11 - s15, bd), bd);
+ x0 = HIGHBD_WRAPLOW(s0 + s4, bd);
+ x1 = HIGHBD_WRAPLOW(s1 + s5, bd);
+ x2 = HIGHBD_WRAPLOW(s2 + s6, bd);
+ x3 = HIGHBD_WRAPLOW(s3 + s7, bd);
+ x4 = HIGHBD_WRAPLOW(s0 - s4, bd);
+ x5 = HIGHBD_WRAPLOW(s1 - s5, bd);
+ x6 = HIGHBD_WRAPLOW(s2 - s6, bd);
+ x7 = HIGHBD_WRAPLOW(s3 - s7, bd);
+ x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd);
+ x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd);
+ x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd);
+ x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd);
+ x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd);
+ x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd);
// stage 3
s0 = x0;
@@ -1931,22 +1963,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
- x0 = WRAPLOW(s0 + s2, bd);
- x1 = WRAPLOW(s1 + s3, bd);
- x2 = WRAPLOW(s0 - s2, bd);
- x3 = WRAPLOW(s1 - s3, bd);
- x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd);
- x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd);
- x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd);
- x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd);
- x8 = WRAPLOW(s8 + s10, bd);
- x9 = WRAPLOW(s9 + s11, bd);
- x10 = WRAPLOW(s8 - s10, bd);
- x11 = WRAPLOW(s9 - s11, bd);
- x12 = WRAPLOW(highbd_dct_const_round_shift(s12 + s14, bd), bd);
- x13 = WRAPLOW(highbd_dct_const_round_shift(s13 + s15, bd), bd);
- x14 = WRAPLOW(highbd_dct_const_round_shift(s12 - s14, bd), bd);
- x15 = WRAPLOW(highbd_dct_const_round_shift(s13 - s15, bd), bd);
+ x0 = HIGHBD_WRAPLOW(s0 + s2, bd);
+ x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
+ x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
+ x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+ x8 = HIGHBD_WRAPLOW(s8 + s10, bd);
+ x9 = HIGHBD_WRAPLOW(s9 + s11, bd);
+ x10 = HIGHBD_WRAPLOW(s8 - s10, bd);
+ x11 = HIGHBD_WRAPLOW(s9 - s11, bd);
+ x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd);
+ x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd);
// stage 4
s2 = (- cospi_16_64) * (x2 + x3);
@@ -1958,31 +1990,31 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
s14 = (- cospi_16_64) * (x14 + x15);
s15 = cospi_16_64 * (x14 - x15);
- x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
- x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd);
- x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd);
- x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd);
- x10 = WRAPLOW(highbd_dct_const_round_shift(s10, bd), bd);
- x11 = WRAPLOW(highbd_dct_const_round_shift(s11, bd), bd);
- x14 = WRAPLOW(highbd_dct_const_round_shift(s14, bd), bd);
- x15 = WRAPLOW(highbd_dct_const_round_shift(s15, bd), bd);
-
- output[0] = WRAPLOW(x0, bd);
- output[1] = WRAPLOW(-x8, bd);
- output[2] = WRAPLOW(x12, bd);
- output[3] = WRAPLOW(-x4, bd);
- output[4] = WRAPLOW(x6, bd);
- output[5] = WRAPLOW(x14, bd);
- output[6] = WRAPLOW(x10, bd);
- output[7] = WRAPLOW(x2, bd);
- output[8] = WRAPLOW(x3, bd);
- output[9] = WRAPLOW(x11, bd);
- output[10] = WRAPLOW(x15, bd);
- output[11] = WRAPLOW(x7, bd);
- output[12] = WRAPLOW(x5, bd);
- output[13] = WRAPLOW(-x13, bd);
- output[14] = WRAPLOW(x9, bd);
- output[15] = WRAPLOW(-x1, bd);
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+ x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd);
+ x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd);
+
+ output[0] = HIGHBD_WRAPLOW(x0, bd);
+ output[1] = HIGHBD_WRAPLOW(-x8, bd);
+ output[2] = HIGHBD_WRAPLOW(x12, bd);
+ output[3] = HIGHBD_WRAPLOW(-x4, bd);
+ output[4] = HIGHBD_WRAPLOW(x6, bd);
+ output[5] = HIGHBD_WRAPLOW(x14, bd);
+ output[6] = HIGHBD_WRAPLOW(x10, bd);
+ output[7] = HIGHBD_WRAPLOW(x2, bd);
+ output[8] = HIGHBD_WRAPLOW(x3, bd);
+ output[9] = HIGHBD_WRAPLOW(x11, bd);
+ output[10] = HIGHBD_WRAPLOW(x15, bd);
+ output[11] = HIGHBD_WRAPLOW(x7, bd);
+ output[12] = HIGHBD_WRAPLOW(x5, bd);
+ output[13] = HIGHBD_WRAPLOW(-x13, bd);
+ output[14] = HIGHBD_WRAPLOW(x9, bd);
+ output[15] = HIGHBD_WRAPLOW(-x1, bd);
}
void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -2017,11 +2049,11 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i, j;
tran_high_t a1;
- tran_low_t out = WRAPLOW(
- highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
+ tran_low_t out = HIGHBD_WRAPLOW(
+ highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i)
@@ -2056,43 +2088,43 @@ static void highbd_idct32_c(const tran_low_t *input,
temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
- step1[16] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[31] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
- step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
- step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
- step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
- step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
- step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
- step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
- step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
// stage 2
step2[0] = step1[0];
@@ -2106,40 +2138,40 @@ static void highbd_idct32_c(const tran_low_t *input,
temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
-
- step2[16] = WRAPLOW(step1[16] + step1[17], bd);
- step2[17] = WRAPLOW(step1[16] - step1[17], bd);
- step2[18] = WRAPLOW(-step1[18] + step1[19], bd);
- step2[19] = WRAPLOW(step1[18] + step1[19], bd);
- step2[20] = WRAPLOW(step1[20] + step1[21], bd);
- step2[21] = WRAPLOW(step1[20] - step1[21], bd);
- step2[22] = WRAPLOW(-step1[22] + step1[23], bd);
- step2[23] = WRAPLOW(step1[22] + step1[23], bd);
- step2[24] = WRAPLOW(step1[24] + step1[25], bd);
- step2[25] = WRAPLOW(step1[24] - step1[25], bd);
- step2[26] = WRAPLOW(-step1[26] + step1[27], bd);
- step2[27] = WRAPLOW(step1[26] + step1[27], bd);
- step2[28] = WRAPLOW(step1[28] + step1[29], bd);
- step2[29] = WRAPLOW(step1[28] - step1[29], bd);
- step2[30] = WRAPLOW(-step1[30] + step1[31], bd);
- step2[31] = WRAPLOW(step1[30] + step1[31], bd);
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd);
+ step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd);
+ step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd);
+ step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd);
+ step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd);
+ step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd);
+ step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd);
+ step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd);
+ step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd);
+ step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd);
+ step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd);
+ step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd);
+ step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd);
+ step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd);
+ step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd);
+ step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd);
// stage 3
step1[0] = step2[0];
@@ -2149,42 +2181,42 @@ static void highbd_idct32_c(const tran_low_t *input,
temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
-
- step1[8] = WRAPLOW(step2[8] + step2[9], bd);
- step1[9] = WRAPLOW(step2[8] - step2[9], bd);
- step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
- step1[11] = WRAPLOW(step2[10] + step2[11], bd);
- step1[12] = WRAPLOW(step2[12] + step2[13], bd);
- step1[13] = WRAPLOW(step2[12] - step2[13], bd);
- step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
- step1[15] = WRAPLOW(step2[14] + step2[15], bd);
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
step1[16] = step2[16];
step1[31] = step2[31];
temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
- step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
- step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[19] = step2[19];
step1[20] = step2[20];
temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
- step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
- step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[23] = step2[23];
step1[24] = step2[24];
step1[27] = step2[27];
@@ -2193,87 +2225,87 @@ static void highbd_idct32_c(const tran_low_t *input,
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
- step2[4] = WRAPLOW(step1[4] + step1[5], bd);
- step2[5] = WRAPLOW(step1[4] - step1[5], bd);
- step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
- step2[7] = WRAPLOW(step1[6] + step1[7], bd);
+ step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
step2[8] = step1[8];
step2[15] = step1[15];
temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step2[11] = step1[11];
step2[12] = step1[12];
- step2[16] = WRAPLOW(step1[16] + step1[19], bd);
- step2[17] = WRAPLOW(step1[17] + step1[18], bd);
- step2[18] = WRAPLOW(step1[17] - step1[18], bd);
- step2[19] = WRAPLOW(step1[16] - step1[19], bd);
- step2[20] = WRAPLOW(-step1[20] + step1[23], bd);
- step2[21] = WRAPLOW(-step1[21] + step1[22], bd);
- step2[22] = WRAPLOW(step1[21] + step1[22], bd);
- step2[23] = WRAPLOW(step1[20] + step1[23], bd);
-
- step2[24] = WRAPLOW(step1[24] + step1[27], bd);
- step2[25] = WRAPLOW(step1[25] + step1[26], bd);
- step2[26] = WRAPLOW(step1[25] - step1[26], bd);
- step2[27] = WRAPLOW(step1[24] - step1[27], bd);
- step2[28] = WRAPLOW(-step1[28] + step1[31], bd);
- step2[29] = WRAPLOW(-step1[29] + step1[30], bd);
- step2[30] = WRAPLOW(step1[29] + step1[30], bd);
- step2[31] = WRAPLOW(step1[28] + step1[31], bd);
+ step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd);
+ step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd);
+ step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd);
+ step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd);
+ step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd);
+ step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd);
+ step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd);
+ step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd);
+
+ step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd);
+ step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd);
+ step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd);
+ step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd);
+ step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd);
+ step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd);
+ step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd);
+ step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd);
// stage 5
- step1[0] = WRAPLOW(step2[0] + step2[3], bd);
- step1[1] = WRAPLOW(step2[1] + step2[2], bd);
- step1[2] = WRAPLOW(step2[1] - step2[2], bd);
- step1[3] = WRAPLOW(step2[0] - step2[3], bd);
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[7] = step2[7];
- step1[8] = WRAPLOW(step2[8] + step2[11], bd);
- step1[9] = WRAPLOW(step2[9] + step2[10], bd);
- step1[10] = WRAPLOW(step2[9] - step2[10], bd);
- step1[11] = WRAPLOW(step2[8] - step2[11], bd);
- step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
- step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
- step1[14] = WRAPLOW(step2[13] + step2[14], bd);
- step1[15] = WRAPLOW(step2[12] + step2[15], bd);
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+ step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
step1[16] = step2[16];
step1[17] = step2[17];
temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
- step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
- step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
- step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
- step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[22] = step2[22];
step1[23] = step2[23];
step1[24] = step2[24];
@@ -2282,62 +2314,62 @@ static void highbd_idct32_c(const tran_low_t *input,
step1[31] = step2[31];
// stage 6
- step2[0] = WRAPLOW(step1[0] + step1[7], bd);
- step2[1] = WRAPLOW(step1[1] + step1[6], bd);
- step2[2] = WRAPLOW(step1[2] + step1[5], bd);
- step2[3] = WRAPLOW(step1[3] + step1[4], bd);
- step2[4] = WRAPLOW(step1[3] - step1[4], bd);
- step2[5] = WRAPLOW(step1[2] - step1[5], bd);
- step2[6] = WRAPLOW(step1[1] - step1[6], bd);
- step2[7] = WRAPLOW(step1[0] - step1[7], bd);
+ step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
step2[8] = step1[8];
step2[9] = step1[9];
temp1 = (-step1[10] + step1[13]) * cospi_16_64;
temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = (-step1[11] + step1[12]) * cospi_16_64;
temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step2[14] = step1[14];
step2[15] = step1[15];
- step2[16] = WRAPLOW(step1[16] + step1[23], bd);
- step2[17] = WRAPLOW(step1[17] + step1[22], bd);
- step2[18] = WRAPLOW(step1[18] + step1[21], bd);
- step2[19] = WRAPLOW(step1[19] + step1[20], bd);
- step2[20] = WRAPLOW(step1[19] - step1[20], bd);
- step2[21] = WRAPLOW(step1[18] - step1[21], bd);
- step2[22] = WRAPLOW(step1[17] - step1[22], bd);
- step2[23] = WRAPLOW(step1[16] - step1[23], bd);
-
- step2[24] = WRAPLOW(-step1[24] + step1[31], bd);
- step2[25] = WRAPLOW(-step1[25] + step1[30], bd);
- step2[26] = WRAPLOW(-step1[26] + step1[29], bd);
- step2[27] = WRAPLOW(-step1[27] + step1[28], bd);
- step2[28] = WRAPLOW(step1[27] + step1[28], bd);
- step2[29] = WRAPLOW(step1[26] + step1[29], bd);
- step2[30] = WRAPLOW(step1[25] + step1[30], bd);
- step2[31] = WRAPLOW(step1[24] + step1[31], bd);
+ step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd);
+ step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd);
+ step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd);
+ step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd);
+ step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd);
+ step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd);
+ step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd);
+ step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd);
+
+ step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd);
+ step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd);
+ step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd);
+ step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd);
+ step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd);
+ step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd);
+ step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd);
+ step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd);
// stage 7
- step1[0] = WRAPLOW(step2[0] + step2[15], bd);
- step1[1] = WRAPLOW(step2[1] + step2[14], bd);
- step1[2] = WRAPLOW(step2[2] + step2[13], bd);
- step1[3] = WRAPLOW(step2[3] + step2[12], bd);
- step1[4] = WRAPLOW(step2[4] + step2[11], bd);
- step1[5] = WRAPLOW(step2[5] + step2[10], bd);
- step1[6] = WRAPLOW(step2[6] + step2[9], bd);
- step1[7] = WRAPLOW(step2[7] + step2[8], bd);
- step1[8] = WRAPLOW(step2[7] - step2[8], bd);
- step1[9] = WRAPLOW(step2[6] - step2[9], bd);
- step1[10] = WRAPLOW(step2[5] - step2[10], bd);
- step1[11] = WRAPLOW(step2[4] - step2[11], bd);
- step1[12] = WRAPLOW(step2[3] - step2[12], bd);
- step1[13] = WRAPLOW(step2[2] - step2[13], bd);
- step1[14] = WRAPLOW(step2[1] - step2[14], bd);
- step1[15] = WRAPLOW(step2[0] - step2[15], bd);
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd);
+ step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+ step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd);
+ step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+ step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd);
+ step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
step1[16] = step2[16];
step1[17] = step2[17];
@@ -2345,58 +2377,58 @@ static void highbd_idct32_c(const tran_low_t *input,
step1[19] = step2[19];
temp1 = (-step2[20] + step2[27]) * cospi_16_64;
temp2 = (step2[20] + step2[27]) * cospi_16_64;
- step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = (-step2[21] + step2[26]) * cospi_16_64;
temp2 = (step2[21] + step2[26]) * cospi_16_64;
- step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = (-step2[22] + step2[25]) * cospi_16_64;
temp2 = (step2[22] + step2[25]) * cospi_16_64;
- step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
temp1 = (-step2[23] + step2[24]) * cospi_16_64;
temp2 = (step2[23] + step2[24]) * cospi_16_64;
- step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
- step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
+ step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
step1[28] = step2[28];
step1[29] = step2[29];
step1[30] = step2[30];
step1[31] = step2[31];
// final stage
- output[0] = WRAPLOW(step1[0] + step1[31], bd);
- output[1] = WRAPLOW(step1[1] + step1[30], bd);
- output[2] = WRAPLOW(step1[2] + step1[29], bd);
- output[3] = WRAPLOW(step1[3] + step1[28], bd);
- output[4] = WRAPLOW(step1[4] + step1[27], bd);
- output[5] = WRAPLOW(step1[5] + step1[26], bd);
- output[6] = WRAPLOW(step1[6] + step1[25], bd);
- output[7] = WRAPLOW(step1[7] + step1[24], bd);
- output[8] = WRAPLOW(step1[8] + step1[23], bd);
- output[9] = WRAPLOW(step1[9] + step1[22], bd);
- output[10] = WRAPLOW(step1[10] + step1[21], bd);
- output[11] = WRAPLOW(step1[11] + step1[20], bd);
- output[12] = WRAPLOW(step1[12] + step1[19], bd);
- output[13] = WRAPLOW(step1[13] + step1[18], bd);
- output[14] = WRAPLOW(step1[14] + step1[17], bd);
- output[15] = WRAPLOW(step1[15] + step1[16], bd);
- output[16] = WRAPLOW(step1[15] - step1[16], bd);
- output[17] = WRAPLOW(step1[14] - step1[17], bd);
- output[18] = WRAPLOW(step1[13] - step1[18], bd);
- output[19] = WRAPLOW(step1[12] - step1[19], bd);
- output[20] = WRAPLOW(step1[11] - step1[20], bd);
- output[21] = WRAPLOW(step1[10] - step1[21], bd);
- output[22] = WRAPLOW(step1[9] - step1[22], bd);
- output[23] = WRAPLOW(step1[8] - step1[23], bd);
- output[24] = WRAPLOW(step1[7] - step1[24], bd);
- output[25] = WRAPLOW(step1[6] - step1[25], bd);
- output[26] = WRAPLOW(step1[5] - step1[26], bd);
- output[27] = WRAPLOW(step1[4] - step1[27], bd);
- output[28] = WRAPLOW(step1[3] - step1[28], bd);
- output[29] = WRAPLOW(step1[2] - step1[29], bd);
- output[30] = WRAPLOW(step1[1] - step1[30], bd);
- output[31] = WRAPLOW(step1[0] - step1[31], bd);
+ output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd);
+ output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd);
+ output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd);
+ output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd);
+ output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd);
+ output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd);
+ output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd);
+ output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd);
+ output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd);
+ output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd);
+ output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd);
+ output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd);
+ output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd);
+ output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd);
+ output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd);
+ output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd);
+ output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd);
+ output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd);
+ output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd);
+ output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd);
+ output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd);
+ output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd);
+ output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd);
+ output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd);
+ output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd);
+ output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd);
+ output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd);
+ output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd);
+ output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd);
+ output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd);
+ output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd);
+ output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd);
}
void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -2472,9 +2504,9 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
int a1;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- tran_low_t out = WRAPLOW(
- highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
- out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
+ tran_low_t out = HIGHBD_WRAPLOW(
+ highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 32; ++j) {
diff --git a/libvpx/vpx_dsp/inv_txfm.h b/libvpx/vpx_dsp/inv_txfm.h
index 23588139e..9cfe1be3a 100644
--- a/libvpx/vpx_dsp/inv_txfm.h
+++ b/libvpx/vpx_dsp/inv_txfm.h
@@ -21,7 +21,7 @@
extern "C" {
#endif
-static INLINE tran_low_t check_range(tran_high_t input) {
+static INLINE tran_high_t check_range(tran_high_t input) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid VP9 input streams, intermediate stage coefficients should always
// stay within the range of a signed 16 bit integer. Coefficients can go out
@@ -32,17 +32,17 @@ static INLINE tran_low_t check_range(tran_high_t input) {
assert(INT16_MIN <= input);
assert(input <= INT16_MAX);
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
- return (tran_low_t)input;
+ return input;
}
-static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
+static INLINE tran_high_t dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
- return check_range(rv);
+ return (tran_high_t)rv;
}
#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE tran_low_t highbd_check_range(tran_high_t input,
- int bd) {
+static INLINE tran_high_t highbd_check_range(tran_high_t input,
+ int bd) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
// stay within the ranges:
@@ -56,13 +56,12 @@ static INLINE tran_low_t highbd_check_range(tran_high_t input,
(void) int_min;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
(void) bd;
- return (tran_low_t)input;
+ return input;
}
-static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
- int bd) {
+static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
- return highbd_check_range(rv, bd);
+ return (tran_high_t)rv;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -83,9 +82,20 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
-#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
-#else
-#define WRAPLOW(x, bd) ((int32_t)(x))
+
+#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_WRAPLOW(x, bd) \
+ ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#else // CONFIG_EMULATE_HARDWARE
+
+#define WRAPLOW(x) ((int32_t)check_range(x))
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_WRAPLOW(x, bd) \
+ ((int32_t)highbd_check_range((x), bd))
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_EMULATE_HARDWARE
void idct4_c(const tran_low_t *input, tran_low_t *output);
@@ -107,14 +117,14 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
- trans = WRAPLOW(trans, bd);
- return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
+ trans = HIGHBD_WRAPLOW(trans, bd);
+ return clip_pixel_highbd(dest + (int)trans, bd);
}
#endif
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
- trans = WRAPLOW(trans, 8);
- return clip_pixel(WRAPLOW(dest + trans, 8));
+ trans = WRAPLOW(trans);
+ return clip_pixel(dest + (int)trans);
}
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vpx_dsp/loopfilter.c b/libvpx/vpx_dsp/loopfilter.c
index 66f4d9576..645a1ab95 100644
--- a/libvpx/vpx_dsp/loopfilter.c
+++ b/libvpx/vpx_dsp/loopfilter.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
@@ -119,12 +120,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh, int count) {
+ const uint8_t *thresh) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
@@ -138,18 +139,17 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
+ const uint8_t *limit, const uint8_t *thresh) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
@@ -163,9 +163,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1);
+ vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
}
static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
@@ -190,13 +189,12 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
}
void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
+ const uint8_t *limit, const uint8_t *thresh) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -213,16 +211,15 @@ void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
+ const uint8_t *limit, const uint8_t *thresh) {
int i;
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
@@ -238,9 +235,8 @@ void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1);
+ vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
}
static INLINE void filter16(int8_t mask, uint8_t thresh,
@@ -294,9 +290,9 @@ static INLINE void filter16(int8_t mask, uint8_t thresh,
}
}
-void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count) {
+static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -320,6 +316,16 @@ void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
}
}
+void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh) {
+ mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh) {
+ mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
+}
+
static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
@@ -450,12 +456,12 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1,
void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
- const uint8_t *thresh, int count, int bd) {
+ const uint8_t *thresh, int bd) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint16_t p3 = s[-4 * p];
const uint16_t p2 = s[-3 * p];
const uint16_t p1 = s[-2 * p];
@@ -479,18 +485,18 @@ void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd);
}
void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
+ int bd) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = highbd_filter_mask(*limit, *blimit,
@@ -508,9 +514,9 @@ void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd);
vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1, bd);
+ thresh1, bd);
}
static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
@@ -536,12 +542,12 @@ static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
+ int bd) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -564,16 +570,16 @@ void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd);
}
void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
+ int bd) {
int i;
- for (i = 0; i < 8 * count; ++i) {
+ for (i = 0; i < 8; ++i) {
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = highbd_filter_mask(*limit, *blimit,
@@ -596,9 +602,9 @@ void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd);
vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1, bd);
+ thresh1, bd);
}
static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
@@ -664,9 +670,11 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
}
}
-void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count, int bd) {
+static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count, int bd) {
int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -698,6 +706,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
}
}
+void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
+}
+
+void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
+}
+
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
diff --git a/libvpx/vpx_dsp/mips/add_noise_msa.c b/libvpx/vpx_dsp/mips/add_noise_msa.c
new file mode 100644
index 000000000..366770c0d
--- /dev/null
+++ b/libvpx/vpx_dsp/mips/add_noise_msa.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "./macros_msa.h"
+
+void vpx_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
+ char blackclamp[16], char whiteclamp[16],
+ char bothclamp[16], uint32_t width,
+ uint32_t height, int32_t pitch) {
+ uint32_t i, j;
+
+ for (i = 0; i < height / 2; ++i) {
+ uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
+ int8_t *ref0_ptr = (int8_t *)(noise + (rand() & 0xff));
+ uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
+ int8_t *ref1_ptr = (int8_t *)(noise + (rand() & 0xff));
+ for (j = width / 16; j--;) {
+ v16i8 temp00_s, temp01_s;
+ v16u8 temp00, temp01, black_clamp, white_clamp;
+ v16u8 pos0, ref0, pos1, ref1;
+ v16i8 const127 = __msa_ldi_b(127);
+
+ pos0 = LD_UB(pos0_ptr);
+ ref0 = LD_UB(ref0_ptr);
+ pos1 = LD_UB(pos1_ptr);
+ ref1 = LD_UB(ref1_ptr);
+ black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
+ white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
+ temp00 = (pos0 < black_clamp);
+ pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
+ temp01 = (pos1 < black_clamp);
+ pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
+ XORI_B2_128_UB(pos0, pos1);
+ temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
+ temp00 = (v16u8)(temp00_s < pos0);
+ pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
+ temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
+ temp01 = (temp01_s < pos1);
+ pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
+ XORI_B2_128_UB(pos0, pos1);
+ pos0 += ref0;
+ ST_UB(pos0, pos0_ptr);
+ pos1 += ref1;
+ ST_UB(pos1, pos1_ptr);
+ pos0_ptr += 16;
+ pos1_ptr += 16;
+ ref0_ptr += 16;
+ ref1_ptr += 16;
+ }
+ }
+}
diff --git a/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c b/libvpx/vpx_dsp/mips/avg_msa.c
index 611adb1a2..52a24ed37 100644
--- a/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c
+++ b/libvpx/vpx_dsp/mips/avg_msa.c
@@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/mips/macros_msa.h"
-uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
+uint32_t vpx_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
uint32_t sum_out;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
@@ -33,7 +33,7 @@ uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
return sum_out;
}
-uint32_t vp9_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
+uint32_t vpx_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
uint32_t sum_out;
uint32_t src0, src1, src2, src3;
v16u8 vec = { 0 };
diff --git a/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c b/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c
index 2115a348c..f29c14b3d 100644
--- a/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c
+++ b/libvpx/vpx_dsp/mips/fwd_dct32x32_msa.c
@@ -933,23 +933,21 @@ void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
}
void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
- out[1] = 0;
-
- out[0] = LD_HADD(input, stride);
- out[0] += LD_HADD(input + 8, stride);
- out[0] += LD_HADD(input + 16, stride);
- out[0] += LD_HADD(input + 24, stride);
- out[0] += LD_HADD(input + 32 * 8, stride);
- out[0] += LD_HADD(input + 32 * 8 + 8, stride);
- out[0] += LD_HADD(input + 32 * 8 + 16, stride);
- out[0] += LD_HADD(input + 32 * 8 + 24, stride);
- out[0] += LD_HADD(input + 32 * 16, stride);
- out[0] += LD_HADD(input + 32 * 16 + 8, stride);
- out[0] += LD_HADD(input + 32 * 16 + 16, stride);
- out[0] += LD_HADD(input + 32 * 16 + 24, stride);
- out[0] += LD_HADD(input + 32 * 24, stride);
- out[0] += LD_HADD(input + 32 * 24 + 8, stride);
- out[0] += LD_HADD(input + 32 * 24 + 16, stride);
- out[0] += LD_HADD(input + 32 * 24 + 24, stride);
- out[0] >>= 3;
+ int sum = LD_HADD(input, stride);
+ sum += LD_HADD(input + 8, stride);
+ sum += LD_HADD(input + 16, stride);
+ sum += LD_HADD(input + 24, stride);
+ sum += LD_HADD(input + 32 * 8, stride);
+ sum += LD_HADD(input + 32 * 8 + 8, stride);
+ sum += LD_HADD(input + 32 * 8 + 16, stride);
+ sum += LD_HADD(input + 32 * 8 + 24, stride);
+ sum += LD_HADD(input + 32 * 16, stride);
+ sum += LD_HADD(input + 32 * 16 + 8, stride);
+ sum += LD_HADD(input + 32 * 16 + 16, stride);
+ sum += LD_HADD(input + 32 * 16 + 24, stride);
+ sum += LD_HADD(input + 32 * 24, stride);
+ sum += LD_HADD(input + 32 * 24 + 8, stride);
+ sum += LD_HADD(input + 32 * 24 + 16, stride);
+ sum += LD_HADD(input + 32 * 24 + 24, stride);
+ out[0] = (int16_t)(sum >> 3);
}
diff --git a/libvpx/vpx_dsp/mips/fwd_txfm_msa.c b/libvpx/vpx_dsp/mips/fwd_txfm_msa.c
index f66dd5fce..0dd141f41 100644
--- a/libvpx/vpx_dsp/mips/fwd_txfm_msa.c
+++ b/libvpx/vpx_dsp/mips/fwd_txfm_msa.c
@@ -237,11 +237,9 @@ void vpx_fdct16x16_msa(const int16_t *input, int16_t *output,
}
void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
- out[1] = 0;
-
- out[0] = LD_HADD(input, stride);
- out[0] += LD_HADD(input + 8, stride);
- out[0] += LD_HADD(input + 16 * 8, stride);
- out[0] += LD_HADD(input + 16 * 8 + 8, stride);
- out[0] >>= 1;
+ int sum = LD_HADD(input, stride);
+ sum += LD_HADD(input + 8, stride);
+ sum += LD_HADD(input + 16 * 8, stride);
+ sum += LD_HADD(input + 16 * 8 + 8, stride);
+ out[0] = (int16_t)(sum >> 1);
}
diff --git a/libvpx/vpx_dsp/mips/loopfilter_16_msa.c b/libvpx/vpx_dsp/mips/loopfilter_16_msa.c
index b7c9f7bd0..a6c581d72 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_16_msa.c
+++ b/libvpx/vpx_dsp/mips/loopfilter_16_msa.c
@@ -423,11 +423,11 @@ void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
}
}
-void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
- int32_t count) {
+static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
if (1 == count) {
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
uint64_t dword0, dword1;
@@ -648,6 +648,20 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
}
}
+void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
+}
+
static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
uint8_t *output, int32_t out_pitch) {
v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
diff --git a/libvpx/vpx_dsp/mips/loopfilter_4_msa.c b/libvpx/vpx_dsp/mips/loopfilter_4_msa.c
index daf5f38bf..936347031 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_4_msa.c
+++ b/libvpx/vpx_dsp/mips/loopfilter_4_msa.c
@@ -13,14 +13,11 @@
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
- int32_t count) {
+ const uint8_t *thresh_ptr) {
uint64_t p1_d, p0_d, q0_d, q1_d;
v16u8 mask, hev, flat, thresh, b_limit, limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
- (void)count;
-
/* load vector elements */
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
@@ -74,14 +71,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
- int32_t count) {
+ const uint8_t *thresh_ptr) {
v16u8 mask, hev, flat, limit, thresh, b_limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v8i16 vec0, vec1, vec2, vec3;
- (void)count;
-
LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
thresh = (v16u8)__msa_fill_b(*thresh_ptr);
diff --git a/libvpx/vpx_dsp/mips/loopfilter_8_msa.c b/libvpx/vpx_dsp/mips/loopfilter_8_msa.c
index 00b6db550..5b22bd002 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_8_msa.c
+++ b/libvpx/vpx_dsp/mips/loopfilter_8_msa.c
@@ -13,8 +13,7 @@
void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
- int32_t count) {
+ const uint8_t *thresh_ptr) {
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
v16u8 mask, hev, flat, thresh, b_limit, limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
@@ -23,8 +22,6 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
v16i8 zero = { 0 };
- (void)count;
-
/* load vector elements */
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
@@ -161,8 +158,7 @@ void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
- int32_t count) {
+ const uint8_t *thresh_ptr) {
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 p1_out, p0_out, q0_out, q1_out;
v16u8 flat, mask, hev, thresh, b_limit, limit;
@@ -171,8 +167,6 @@ void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
v16u8 zero = { 0 };
v8i16 vec0, vec1, vec2, vec3, vec4;
- (void)count;
-
/* load vector elements */
LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
diff --git a/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c b/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c
index 99a96d89b..8414b9ed5 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c
+++ b/libvpx/vpx_dsp/mips/loopfilter_filters_dspr2.c
@@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
uint8_t i;
uint32_t mask;
uint32_t hev;
@@ -117,8 +116,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
uint8_t i;
uint32_t mask, hev;
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
@@ -313,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
@@ -324,8 +322,8 @@ void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
@@ -335,8 +333,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
@@ -346,9 +344,8 @@ void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
- 1);
+ vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
diff --git a/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c b/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c
index 4138f5697..dd0545eed 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c
+++ b/libvpx/vpx_dsp/mips/loopfilter_mb_dspr2.c
@@ -23,8 +23,7 @@ void vpx_lpf_horizontal_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
uint32_t mask;
uint32_t hev, flat;
uint8_t i;
@@ -322,8 +321,7 @@ void vpx_lpf_vertical_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+ const uint8_t *thresh) {
uint8_t i;
uint32_t mask, hev, flat;
uint8_t *s1, *s2, *s3, *s4;
diff --git a/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c b/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
index 8a4865073..85e167ca0 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
+++ b/libvpx/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
@@ -19,12 +19,12 @@
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+static void mb_lpf_horizontal_edge(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint32_t mask;
uint32_t hev, flat, flat2;
uint8_t i;
@@ -791,4 +791,18 @@ void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
s = s + 4;
}
}
+
+void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
+ mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
+ mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
+}
#endif // #if HAVE_DSPR2
diff --git a/libvpx/vpx_dsp/psnrhvs.c b/libvpx/vpx_dsp/psnrhvs.c
index 300170579..0ffa1b251 100644
--- a/libvpx/vpx_dsp/psnrhvs.c
+++ b/libvpx/vpx_dsp/psnrhvs.c
@@ -200,6 +200,8 @@ static double calc_psnrhvs(const unsigned char *_src, int _systride,
}
}
}
+ if (pixels <=0)
+ return 0;
ret /= pixels;
return ret;
}
diff --git a/libvpx/vpx_dsp/quantize.c b/libvpx/vpx_dsp/quantize.c
index e4e741a90..80fcd66b0 100644
--- a/libvpx/vpx_dsp/quantize.c
+++ b/libvpx/vpx_dsp/quantize.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
@@ -52,7 +53,7 @@ void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr,
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
+ const int abs_qcoeff = (int)((tmp * quant) >> 16);
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
if (abs_qcoeff)
@@ -108,7 +109,7 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
+ const int abs_qcoeff = (int)((tmp * quant) >> 15);
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
if (abs_qcoeff)
diff --git a/libvpx/vpx_dsp/sad.c b/libvpx/vpx_dsp/sad.c
index c0c3ff996..f1f951f14 100644
--- a/libvpx/vpx_dsp/sad.c
+++ b/libvpx/vpx_dsp/sad.c
@@ -33,47 +33,6 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride,
return sad;
}
-// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up.
-/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred.
- * The function averages every corresponding element of the buffers and stores
- * the value in a third buffer, comp_pred.
- * pred and comp_pred are assumed to have stride = width
- * In the usage below comp_pred is a local array.
- */
-static INLINE void avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride) {
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- const int tmp = pred[j] + ref[j];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE void highbd_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride) {
- int i, j;
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- const int tmp = pred[j] + ref[j];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
#define sadMxN(m, n) \
unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride) { \
@@ -83,7 +42,7 @@ unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride, \
const uint8_t *second_pred) { \
uint8_t comp_pred[m * n]; \
- avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
+ vpx_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \
return sad(src, src_stride, comp_pred, m, m, n); \
}
@@ -221,7 +180,7 @@ unsigned int vpx_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \
int ref_stride, \
const uint8_t *second_pred) { \
uint16_t comp_pred[m * n]; \
- highbd_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
+ vpx_highbd_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \
return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
}
diff --git a/libvpx/vpx_dsp/variance.c b/libvpx/vpx_dsp/variance.c
index e8bddb0a0..d960c5435 100644
--- a/libvpx/vpx_dsp/variance.c
+++ b/libvpx/vpx_dsp/variance.c
@@ -275,7 +275,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
- int w, int h, uint64_t *sse, uint64_t *sum) {
+ int w, int h, uint64_t *sse, int64_t *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -298,7 +298,7 @@ static void highbd_8_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
*sse = (uint32_t)sse_long;
*sum = (int)sum_long;
@@ -308,7 +308,7 @@ static void highbd_10_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
*sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
@@ -318,7 +318,7 @@ static void highbd_12_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, uint32_t *sse, int *sum) {
uint64_t sse_long = 0;
- uint64_t sum_long = 0;
+ int64_t sum_long = 0;
highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
*sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
@@ -341,8 +341,10 @@ uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
int b_stride, \
uint32_t *sse) { \
int sum; \
+ int64_t var; \
highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
\
uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
@@ -351,8 +353,10 @@ uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
int b_stride, \
uint32_t *sse) { \
int sum; \
+ int64_t var; \
highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
#define HIGHBD_GET_VAR(S) \
diff --git a/libvpx/vpx_dsp/variance.h b/libvpx/vpx_dsp/variance.h
index cd0fd9878..c18d9b48f 100644
--- a/libvpx/vpx_dsp/variance.h
+++ b/libvpx/vpx_dsp/variance.h
@@ -74,7 +74,7 @@ typedef struct variance_vtable {
} vp8_variance_fn_ptr_t;
#endif // CONFIG_VP8
-#if CONFIG_VP9 || CONFIG_VP10
+#if CONFIG_VP9
typedef struct vp9_variance_vtable {
vpx_sad_fn_t sdf;
vpx_sad_avg_fn_t sdaf;
@@ -85,7 +85,7 @@ typedef struct vp9_variance_vtable {
vpx_sad_multi_fn_t sdx8f;
vpx_sad_multi_d_fn_t sdx4df;
} vp9_variance_fn_ptr_t;
-#endif // CONFIG_VP9 || CONFIG_VP10
+#endif // CONFIG_VP9
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vpx_dsp/vpx_dsp.mk b/libvpx/vpx_dsp/vpx_dsp.mk
index 9620eaa03..84b529136 100644
--- a/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/libvpx/vpx_dsp/vpx_dsp.mk
@@ -52,6 +52,12 @@ DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm
endif # CONFIG_USE_X86INC
endif # CONFIG_VP9_HIGHBITDEPTH
+ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
+DSP_SRCS-yes += add_noise.c
+DSP_SRCS-$(HAVE_MSA) += mips/add_noise_msa.c
+DSP_SRCS-$(HAVE_SSE2) += x86/add_noise_sse2.asm
+endif # CONFIG_POSTPROC
+
DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c
@@ -128,7 +134,6 @@ DSP_SRCS-yes += loopfilter.c
DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/loopfilter_sse2.c
DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c
-DSP_SRCS-$(HAVE_MMX) += x86/loopfilter_mmx.asm
DSP_SRCS-$(HAVE_NEON) += arm/loopfilter_neon.c
ifeq ($(HAVE_NEON_ASM),yes)
@@ -164,7 +169,7 @@ DSP_SRCS-yes += txfm_common.h
DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h
DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h
# forward transform
-ifneq ($(filter yes,$(CONFIG_VP9_ENCODER) $(CONFIG_VP10_ENCODER)),)
+ifeq ($(CONFIG_VP9_ENCODER),yes)
DSP_SRCS-yes += fwd_txfm.c
DSP_SRCS-yes += fwd_txfm.h
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h
@@ -182,10 +187,10 @@ DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h
DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c
DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c
-endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+endif # CONFIG_VP9_ENCODER
# inverse transform
-ifneq ($(filter yes,$(CONFIG_VP9) $(CONFIG_VP10)),)
+ifeq ($(CONFIG_VP9),yes)
DSP_SRCS-yes += inv_txfm.h
DSP_SRCS-yes += inv_txfm.c
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h
@@ -235,10 +240,10 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/itrans16_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_cols_dspr2.c
endif # CONFIG_VP9_HIGHBITDEPTH
-endif # CONFIG_VP9 || CONFIG_VP10
+endif # CONFIG_VP9
# quantization
-ifneq ($(filter yes, $(CONFIG_VP9_ENCODER) $(CONFIG_VP10_ENCODER)),)
+ifeq ($(CONFIG_VP9_ENCODER),yes)
DSP_SRCS-yes += quantize.c
DSP_SRCS-yes += quantize.h
@@ -252,7 +257,20 @@ DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
endif
endif
-endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+
+# avg
+DSP_SRCS-yes += avg.c
+DSP_SRCS-$(HAVE_SSE2) += x86/avg_intrin_sse2.c
+DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
+DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
+DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c
+ifeq ($(ARCH_X86_64),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm
+endif
+endif
+
+endif # CONFIG_VP9_ENCODER
ifeq ($(CONFIG_ENCODERS),yes)
DSP_SRCS-yes += sad.c
@@ -266,7 +284,6 @@ DSP_SRCS-$(HAVE_NEON) += arm/subtract_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c
DSP_SRCS-$(HAVE_MSA) += mips/subtract_msa.c
-DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm
DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm
DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm
@@ -304,8 +321,6 @@ DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/variance_msa.c
DSP_SRCS-$(HAVE_MSA) += mips/sub_pixel_variance_msa.c
-DSP_SRCS-$(HAVE_MMX) += x86/variance_mmx.c
-DSP_SRCS-$(HAVE_MMX) += x86/variance_impl_mmx.asm
DSP_SRCS-$(HAVE_SSE) += x86/variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3
DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_sse2.c
diff --git a/libvpx/vpx_dsp/vpx_dsp_common.h b/libvpx/vpx_dsp/vpx_dsp_common.h
index a9e180e79..a1d0a51ef 100644
--- a/libvpx/vpx_dsp/vpx_dsp_common.h
+++ b/libvpx/vpx_dsp/vpx_dsp_common.h
@@ -8,12 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VPX_DSP_COMMON_H_
-#define VPX_DSP_COMMON_H_
+#ifndef VPX_DSP_VPX_DSP_COMMON_H_
+#define VPX_DSP_VPX_DSP_COMMON_H_
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
@@ -67,4 +66,4 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
} // extern "C"
#endif
-#endif // VPX_DSP_COMMON_H_
+#endif // VPX_DSP_VPX_DSP_COMMON_H_
diff --git a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
index b369b0548..37239a195 100644
--- a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -55,13 +55,13 @@ if ($opts{arch} eq "x86_64") {
#
add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/vpx_d207_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_4x4/;
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
+specialize qw/vpx_d45_predictor_4x4 neon/, "$sse2_x86inc";
add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_4x4/;
@@ -76,7 +76,7 @@ add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d63f_predictor_4x4/;
add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
+specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_he_predictor_4x4/;
@@ -91,25 +91,25 @@ add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
+specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_ve_predictor_4x4/;
add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
+specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
+specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
+specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
+specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
+specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse2_x86inc";
add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
@@ -118,7 +118,7 @@ add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, c
specialize qw/vpx_d207e_predictor_8x8/;
add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
+specialize qw/vpx_d45_predictor_8x8 neon/, "$sse2_x86inc";
add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_8x8/;
@@ -130,7 +130,7 @@ add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d63e_predictor_8x8/;
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
+specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_8x8/;
@@ -142,22 +142,22 @@ add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc";
+specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
+specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
+specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
+specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
+specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
@@ -178,7 +178,7 @@ add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vpx_d63e_predictor_16x16/;
add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
+specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_16x16/;
@@ -226,7 +226,7 @@ add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vpx_d63e_predictor_32x32/;
add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
+specialize qw/vpx_h_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_32x32/;
@@ -241,7 +241,7 @@ add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, con
specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
+specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
@@ -288,13 +288,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_d153_predictor_4x4/;
add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_4x4/;
@@ -387,7 +387,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
@@ -435,10 +435,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
+ specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
+ specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_dc_top_predictor_32x32/;
@@ -535,32 +535,36 @@ add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
-add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
-add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
+add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
-add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
-$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
+add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
+$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon;
-add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
+$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon;
+
+add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
-add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
+add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
@@ -572,28 +576,31 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
@@ -607,7 +614,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Forward transform
#
-if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vpx_fdct4x4 sse2/;
@@ -687,11 +694,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vpx_fdct32x32_1 sse2 msa/;
} # CONFIG_VP9_HIGHBITDEPTH
-} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+} # CONFIG_VP9_ENCODER
#
# Inverse transform
-if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
+if (vpx_config("CONFIG_VP9") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
@@ -699,7 +706,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_iwht4x4_1_add/;
add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_iwht4x4_16_add/;
+ specialize qw/vpx_iwht4x4_16_add/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_1_add/;
@@ -754,12 +761,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1024_add/;
+ add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_135_add/;
+
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_34_add/;
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1_add/;
-
+
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_16_add/;
@@ -782,10 +792,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct4x4_1_add sse2/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_64_add sse2/;
+ specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_12_add sse2/;
+ specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct8x8_1_add sse2/;
@@ -800,10 +810,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct16x16_1_add sse2/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1024_add sse2/;
+ specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc";
+ # Need to add 135 eob idct32x32 implementations.
+ $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_34_add sse2/;
+ specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1_add sse2/;
@@ -853,6 +868,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1024_add/;
+ add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_135_add/;
+
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_34_add/;
@@ -890,12 +908,20 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/;
+ specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ # Need to add 135 eob idct32x32 implementations.
+ $vpx_idct32x32_135_add_sse2=vpx_idct32x32_1024_add_sse2;
+ $vpx_idct32x32_135_add_neon=vpx_idct32x32_1024_add_neon;
+ $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2;
+ $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa;
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_34_add sse2 neon_asm dspr2 msa/;
+ specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
# Need to add 34 eob idct32x32 neon implementation.
- $vpx_idct32x32_34_add_neon_asm=vpx_idct32x32_1024_add_neon;
+ $vpx_idct32x32_34_add_neon=vpx_idct32x32_1024_add_neon;
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/;
@@ -907,12 +933,12 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_iwht4x4_16_add msa/, "$sse2_x86inc";
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_VP9_HIGHBITDEPTH
-} # CONFIG_VP9 || CONFIG_VP10
+} # CONFIG_VP9
#
# Quantization
#
-if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
@@ -926,7 +952,7 @@ if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCO
add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
} # CONFIG_VP9_HIGHBITDEPTH
-} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+} # CONFIG_VP9_ENCODER
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
#
@@ -957,29 +983,58 @@ add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride
specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x8 msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x4 mmx neon msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc";
#
# Avg
#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+ add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p";
+ specialize qw/vpx_avg_8x8 sse2 neon msa/;
+
+ add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p";
+ specialize qw/vpx_avg_4x4 sse2 neon msa/;
+
+ add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ specialize qw/vpx_minmax_8x8 sse2 neon/;
+
+ add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
+ specialize qw/vpx_hadamard_8x8 sse2 neon/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
+ specialize qw/vpx_hadamard_16x16 sse2 neon/;
+
+ add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";
+ specialize qw/vpx_satd sse2 neon/;
+
+ add_proto qw/void vpx_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height";
+ specialize qw/vpx_int_pro_row sse2 neon/;
+
+ add_proto qw/int16_t vpx_int_pro_col/, "const uint8_t *ref, const int width";
+ specialize qw/vpx_int_pro_col sse2 neon/;
+
+ add_proto qw/int vpx_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl";
+ specialize qw/vpx_vector_var neon sse2/;
+} # CONFIG_VP9_ENCODER
+
add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad64x64_avg avx2 msa/, "$sse2_x86inc";
@@ -1014,10 +1069,10 @@ add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stri
specialize qw/vpx_sad8x4_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x8_avg msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x8_avg msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad4x4_avg msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x4_avg msa/, "$sse2_x86inc";
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
@@ -1109,10 +1164,10 @@ add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vpx_sad8x4x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x8x4d msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x8x4d msa/, "$sse2_x86inc";
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
+specialize qw/vpx_sad4x4x4d msa/, "$sse2_x86inc";
#
# Structured Similarity (SSIM)
@@ -1177,6 +1232,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Avg
#
+ add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p";
+ specialize qw/vpx_highbd_avg_8x8/;
+ add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p";
+ specialize qw/vpx_highbd_avg_4x4/;
+ add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ specialize qw/vpx_highbd_minmax_8x8/;
+
add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_highbd_sad64x64_avg/, "$sse2_x86inc";
@@ -1345,16 +1407,16 @@ add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int sourc
specialize qw/vpx_variance16x32 sse2 msa/;
add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/;
+ specialize qw/vpx_variance16x16 sse2 avx2 media neon msa/;
add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x8 mmx sse2 neon msa/;
+ specialize qw/vpx_variance16x8 sse2 neon msa/;
add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x16 mmx sse2 neon msa/;
+ specialize qw/vpx_variance8x16 sse2 neon msa/;
add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x8 mmx sse2 media neon msa/;
+ specialize qw/vpx_variance8x8 sse2 media neon msa/;
add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vpx_variance8x4 sse2 msa/;
@@ -1363,7 +1425,7 @@ add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_
specialize qw/vpx_variance4x8 sse2 msa/;
add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x4 mmx sse2 msa/;
+ specialize qw/vpx_variance4x4 sse2 msa/;
#
# Specialty Variance
@@ -1372,10 +1434,10 @@ add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride,
specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get8x8var mmx sse2 neon msa/;
+ specialize qw/vpx_get8x8var sse2 neon msa/;
add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon msa/;
+ specialize qw/vpx_mse16x16 sse2 avx2 media neon msa/;
add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vpx_mse16x8 sse2 msa/;
@@ -1387,7 +1449,7 @@ add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stri
specialize qw/vpx_mse8x8 sse2 msa/;
add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
- specialize qw/vpx_get_mb_ss mmx sse2 msa/;
+ specialize qw/vpx_get_mb_ss sse2 msa/;
add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
specialize qw/vpx_get4x4sse_cs neon msa/;
@@ -1416,25 +1478,25 @@ add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int
specialize qw/vpx_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x16 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance16x8 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x16 mmx msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance8x8 mmx media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
specialize qw/vpx_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -1470,22 +1532,22 @@ add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, i
specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
#
# Specialty Subpixel
#
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/;
+ specialize qw/vpx_variance_halfpixvar16x16_h sse2 media/;
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/;
+ specialize qw/vpx_variance_halfpixvar16x16_v sse2 media/;
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/;
+ specialize qw/vpx_variance_halfpixvar16x16_hv sse2 media/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
@@ -1845,6 +1907,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
} # CONFIG_VP9_HIGHBITDEPTH
+
+#
+# Post Processing
+#
+if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+ add_proto qw/void vpx_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
+ specialize qw/vpx_plane_add_noise sse2 msa/;
+}
+
} # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
1;
diff --git a/libvpx/vpx_dsp/x86/add_noise_sse2.asm b/libvpx/vpx_dsp/x86/add_noise_sse2.asm
new file mode 100644
index 000000000..ff61b19ba
--- /dev/null
+++ b/libvpx/vpx_dsp/x86/add_noise_sse2.asm
@@ -0,0 +1,83 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vpx_plane_add_noise_sse2(unsigned char *start, unsigned char *noise,
+; unsigned char blackclamp[16],
+; unsigned char whiteclamp[16],
+; unsigned char bothclamp[16],
+; unsigned int width, unsigned int height,
+; int pitch)
+global sym(vpx_plane_add_noise_sse2) PRIVATE
+sym(vpx_plane_add_noise_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ ; get the clamps in registers
+ mov rdx, arg(2) ; blackclamp
+ movdqu xmm3, [rdx]
+ mov rdx, arg(3) ; whiteclamp
+ movdqu xmm4, [rdx]
+ mov rdx, arg(4) ; bothclamp
+ movdqu xmm5, [rdx]
+
+.addnoise_loop:
+ call sym(LIBVPX_RAND) WRT_PLT
+ mov rcx, arg(1) ;noise
+ and rax, 0xff
+ add rcx, rax
+
+ mov rdi, rcx
+ movsxd rcx, dword arg(5) ;[Width]
+ mov rsi, arg(0) ;Pos
+ xor rax,rax
+
+.addnoise_nextset:
+ movdqu xmm1,[rsi+rax] ; get the source
+
+ psubusb xmm1, xmm3 ; subtract black clamp
+ paddusb xmm1, xmm5 ; add both clamp
+ psubusb xmm1, xmm4 ; subtract whiteclamp
+
+ movdqu xmm2,[rdi+rax] ; get the noise for this line
+ paddb xmm1,xmm2 ; add it in
+ movdqu [rsi+rax],xmm1 ; store the result
+
+ add rax,16 ; move to the next line
+
+ cmp rax, rcx
+ jl .addnoise_nextset
+
+ movsxd rax, dword arg(7) ; Pitch
+ add arg(0), rax ; Start += Pitch
+ sub dword arg(6), 1 ; Height -= 1
+ jg .addnoise_loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+rd42:
+ times 8 dw 0x04
+four8s:
+ times 4 dd 8
diff --git a/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/libvpx/vpx_dsp/x86/avg_intrin_sse2.c
index 4531d794a..f9af6cf97 100644
--- a/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/libvpx/vpx_dsp/x86/avg_intrin_sse2.c
@@ -10,10 +10,10 @@
#include <emmintrin.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
-void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
+void vpx_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
int *min, int *max) {
__m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff;
u0 = _mm_setzero_si128();
@@ -91,7 +91,7 @@ void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
*min = _mm_extract_epi16(minabsdiff, 0);
}
-unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
+unsigned int vpx_avg_8x8_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0;
unsigned int avg = 0;
u0 = _mm_setzero_si128();
@@ -118,7 +118,7 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
return (avg + 32) >> 6;
}
-unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) {
+unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0;
unsigned int avg = 0;
u0 = _mm_setzero_si128();
@@ -212,7 +212,7 @@ static void hadamard_col8_sse2(__m128i *in, int iter) {
}
}
-void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
+void vpx_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
__m128i src[8];
src[0] = _mm_load_si128((const __m128i *)src_diff);
@@ -244,13 +244,13 @@ void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
_mm_store_si128((__m128i *)coeff, src[7]);
}
-void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
+void vpx_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8;
- vp9_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64);
+ vpx_hadamard_8x8_sse2(src_ptr, src_stride, coeff + idx * 64);
}
for (idx = 0; idx < 64; idx += 8) {
@@ -283,34 +283,33 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
}
}
-int16_t vp9_satd_sse2(const int16_t *coeff, int length) {
+int vpx_satd_sse2(const int16_t *coeff, int length) {
int i;
- __m128i sum = _mm_load_si128((const __m128i *)coeff);
- __m128i sign = _mm_srai_epi16(sum, 15);
- __m128i val = _mm_xor_si128(sum, sign);
- sum = _mm_sub_epi16(val, sign);
- coeff += 8;
-
- for (i = 8; i < length; i += 8) {
- __m128i src_line = _mm_load_si128((const __m128i *)coeff);
- sign = _mm_srai_epi16(src_line, 15);
- val = _mm_xor_si128(src_line, sign);
- val = _mm_sub_epi16(val, sign);
- sum = _mm_add_epi16(sum, val);
+ const __m128i zero = _mm_setzero_si128();
+ __m128i accum = zero;
+
+ for (i = 0; i < length; i += 8) {
+ const __m128i src_line = _mm_load_si128((const __m128i *)coeff);
+ const __m128i inv = _mm_sub_epi16(zero, src_line);
+ const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line)
+ const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);
+ const __m128i abs_hi = _mm_unpackhi_epi16(abs, zero);
+ const __m128i sum = _mm_add_epi32(abs_lo, abs_hi);
+ accum = _mm_add_epi32(accum, sum);
coeff += 8;
}
- val = _mm_srli_si128(sum, 8);
- sum = _mm_add_epi16(sum, val);
- val = _mm_srli_epi64(sum, 32);
- sum = _mm_add_epi16(sum, val);
- val = _mm_srli_epi32(sum, 16);
- sum = _mm_add_epi16(sum, val);
+ { // cascading summation of accum
+ __m128i hi = _mm_srli_si128(accum, 8);
+ accum = _mm_add_epi32(accum, hi);
+ hi = _mm_srli_epi64(accum, 32);
+ accum = _mm_add_epi32(accum, hi);
+ }
- return _mm_extract_epi16(sum, 0);
+ return _mm_cvtsi128_si32(accum);
}
-void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
+void vpx_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
const int ref_stride, const int height) {
int idx;
__m128i zero = _mm_setzero_si128();
@@ -359,7 +358,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
_mm_storeu_si128((__m128i *)hbuf, s1);
}
-int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
+int16_t vpx_int_pro_col_sse2(uint8_t const *ref, const int width) {
__m128i zero = _mm_setzero_si128();
__m128i src_line = _mm_load_si128((const __m128i *)ref);
__m128i s0 = _mm_sad_epu8(src_line, zero);
@@ -379,7 +378,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
return _mm_extract_epi16(s0, 0);
}
-int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src,
+int vpx_vector_var_sse2(int16_t const *ref, int16_t const *src,
const int bwl) {
int idx;
int width = 4 << bwl;
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm
index 74c52df19..26412e8e4 100644
--- a/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
+++ b/libvpx/vpx_dsp/x86/avg_ssse3_x86_64.asm
@@ -8,11 +8,11 @@
; be found in the AUTHORS file in the root of the source tree.
;
-%define private_prefix vp9
+%define private_prefix vpx
%include "third_party/x86inc/x86inc.asm"
-; This file provides SSSE3 version of the forward transformation. Part
+; This file provides SSSE3 version of the hadamard transformation. Part
; of the macro definitions are originally derived from the ffmpeg project.
; The current version applies to x86 64-bit only.
diff --git a/libvpx/vpx_dsp/x86/convolve.h b/libvpx/vpx_dsp/x86/convolve.h
index b6fbfcf92..7e43eb7c7 100644
--- a/libvpx/vpx_dsp/x86/convolve.h
+++ b/libvpx/vpx_dsp/x86/convolve.h
@@ -33,7 +33,7 @@ typedef void filter8_1dfunction (
int w, int h) { \
assert(filter[3] != 128); \
assert(step_q4 == 16); \
- if (filter[0] || filter[1] || filter[2]) { \
+ if (filter[0] | filter[1] | filter[2]) { \
while (w >= 16) { \
vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \
src_stride, \
@@ -45,27 +45,20 @@ typedef void filter8_1dfunction (
dst += 16; \
w -= 16; \
} \
- while (w >= 8) { \
+ if (w == 8) { \
vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \
src_stride, \
dst, \
dst_stride, \
h, \
filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
+ } else if (w == 4) { \
vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \
src_stride, \
dst, \
dst_stride, \
h, \
filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
} \
} else { \
while (w >= 16) { \
@@ -79,27 +72,20 @@ typedef void filter8_1dfunction (
dst += 16; \
w -= 16; \
} \
- while (w >= 8) { \
+ if (w == 8) { \
vpx_filter_block1d8_##dir##2_##avg##opt(src, \
src_stride, \
dst, \
dst_stride, \
h, \
filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
+ } else if (w == 4) { \
vpx_filter_block1d4_##dir##2_##avg##opt(src, \
src_stride, \
dst, \
dst_stride, \
h, \
filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
} \
} \
}
@@ -116,8 +102,7 @@ void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
assert(h <= 64); \
assert(x_step_q4 == 16); \
assert(y_step_q4 == 16); \
- if (filter_x[0] || filter_x[1] || filter_x[2]|| \
- filter_y[0] || filter_y[1] || filter_y[2]) { \
+ if (filter_x[0] | filter_x[1] | filter_x[2]) { \
DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
filter_x, x_step_q4, filter_y, y_step_q4, \
@@ -161,7 +146,7 @@ typedef void highbd_filter8_1dfunction (
if (step_q4 == 16 && filter[3] != 128) { \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- if (filter[0] || filter[1] || filter[2]) { \
+ if (filter[0] | filter[1] | filter[2]) { \
while (w >= 16) { \
vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
src_stride, \
@@ -253,8 +238,7 @@ void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
assert(w <= 64); \
assert(h <= 64); \
if (x_step_q4 == 16 && y_step_q4 == 16) { \
- if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
- filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
+ if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
CONVERT_TO_BYTEPTR(fdata2), 64, \
diff --git a/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h b/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h
index 4df39dff8..951af3a62 100644
--- a/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h
+++ b/libvpx/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h
@@ -10,6 +10,7 @@
#include <immintrin.h> // AVX2
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/txfm_common.h"
#define pair256_set_epi16(a, b) \
diff --git a/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c b/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c
index bca72e874..3e4f49bd9 100644
--- a/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c
+++ b/libvpx/vpx_dsp/x86/fwd_txfm_sse2.c
@@ -11,6 +11,7 @@
#include <emmintrin.h> // SSE2
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/fwd_txfm_sse2.h"
@@ -40,7 +41,7 @@ void vpx_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
in1 = _mm_add_epi32(tmp, in0);
in0 = _mm_slli_epi32(in1, 1);
- store_output(&in0, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in0);
}
void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
@@ -80,7 +81,7 @@ void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
in0 = _mm_srli_si128(sum, 8);
in1 = _mm_add_epi32(sum, in0);
- store_output(&in1, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
}
void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
@@ -91,40 +92,39 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
int i;
for (i = 0; i < 2; ++i) {
- input += 8 * i;
- in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
- in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
- in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
- in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in0 = _mm_load_si128((const __m128i *)(input + 0 * stride + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 0 * stride + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 1 * stride + 0));
+ in3 = _mm_load_si128((const __m128i *)(input + 1 * stride + 8));
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
- in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
- in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
- in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
- in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ in0 = _mm_load_si128((const __m128i *)(input + 2 * stride + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 2 * stride + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 3 * stride + 0));
+ in3 = _mm_load_si128((const __m128i *)(input + 3 * stride + 8));
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
- in0 = _mm_load_si128((const __m128i *)(input + 8 * stride));
- in1 = _mm_load_si128((const __m128i *)(input + 9 * stride));
- in2 = _mm_load_si128((const __m128i *)(input + 10 * stride));
- in3 = _mm_load_si128((const __m128i *)(input + 11 * stride));
+ in0 = _mm_load_si128((const __m128i *)(input + 4 * stride + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 4 * stride + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 5 * stride + 0));
+ in3 = _mm_load_si128((const __m128i *)(input + 5 * stride + 8));
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
u1 = _mm_add_epi16(in2, in3);
sum = _mm_add_epi16(sum, u0);
- in0 = _mm_load_si128((const __m128i *)(input + 12 * stride));
- in1 = _mm_load_si128((const __m128i *)(input + 13 * stride));
- in2 = _mm_load_si128((const __m128i *)(input + 14 * stride));
- in3 = _mm_load_si128((const __m128i *)(input + 15 * stride));
+ in0 = _mm_load_si128((const __m128i *)(input + 6 * stride + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 6 * stride + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 7 * stride + 0));
+ in3 = _mm_load_si128((const __m128i *)(input + 7 * stride + 8));
sum = _mm_add_epi16(sum, u1);
u0 = _mm_add_epi16(in0, in1);
@@ -132,6 +132,7 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
sum = _mm_add_epi16(sum, u0);
sum = _mm_add_epi16(sum, u1);
+ input += 8 * stride;
}
u0 = _mm_setzero_si128();
@@ -149,7 +150,7 @@ void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
in1 = _mm_add_epi32(sum, in0);
in1 = _mm_srai_epi32(in1, 1);
- store_output(&in1, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
}
void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
@@ -221,7 +222,7 @@ void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
in1 = _mm_add_epi32(sum, in0);
in1 = _mm_srai_epi32(in1, 3);
- store_output(&in1, output);
+ output[0] = (tran_low_t)_mm_cvtsi128_si32(in1);
}
#define DCT_HIGH_BIT_DEPTH 0
diff --git a/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c
index 5782155bf..4a8fb6df7 100644
--- a/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c
+++ b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
+
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
@@ -40,7 +42,9 @@ uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src,
&xsum0, &xxsum0);
*sse = xxsum0;
- return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
+ assert(xsum0 <= 255 * 16 * 16);
+ assert(xsum0 >= -255 * 16 * 16);
+ return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8));
}
uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src,
@@ -54,7 +58,9 @@ uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src,
&xsum0, &xxsum0);
*sse = xxsum0;
- return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
+ assert(xsum0 <= 255 * 16 * 16);
+ assert(xsum0 >= -255 * 16 * 16);
+ return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8));
}
@@ -70,5 +76,7 @@ uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src,
&xsum0, &xxsum0);
*sse = xxsum0;
- return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
+ assert(xsum0 <= 255 * 16 * 16);
+ assert(xsum0 >= -255 * 16 * 16);
+ return (xxsum0 - ((uint32_t)((int64_t)xsum0 * xsum0) >> 8));
}
diff --git a/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm b/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm
index b12d29c0a..c61b62104 100644
--- a/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm
+++ b/libvpx/vpx_dsp/x86/highbd_intrapred_sse2.asm
@@ -17,24 +17,20 @@ pw_16: times 4 dd 16
pw_32: times 4 dd 32
SECTION .text
-INIT_MMX sse
+INIT_XMM sse2
cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq
movq m0, [aboveq]
movq m2, [leftq]
- DEFINE_ARGS dst, stride, one
- mov oned, 0x0001
- pxor m1, m1
- movd m3, oned
- pshufw m3, m3, 0x0
paddw m0, m2
- pmaddwd m0, m3
- packssdw m0, m1
- pmaddwd m0, m3
+ pshuflw m1, m0, 0xe
+ paddw m0, m1
+ pshuflw m1, m0, 0x1
+ paddw m0, m1
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq*2], m0
lea dstq, [dstq+strideq*4]
@@ -122,30 +118,29 @@ cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
-%if ARCH_X86_64
INIT_XMM sse2
-cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
+cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset
GET_GOT goffsetq
- pxor m1, m1
mova m0, [aboveq]
mova m2, [aboveq+16]
mova m3, [aboveq+32]
mova m4, [aboveq+48]
- mova m5, [leftq]
- mova m6, [leftq+16]
- mova m7, [leftq+32]
- mova m8, [leftq+48]
+ paddw m0, m2
+ paddw m3, m4
+ mova m2, [leftq]
+ mova m4, [leftq+16]
+ mova m5, [leftq+32]
+ mova m6, [leftq+48]
+ paddw m2, m4
+ paddw m5, m6
+ paddw m0, m3
+ paddw m2, m5
+ pxor m1, m1
+ paddw m0, m2
DEFINE_ARGS dst, stride, stride3, lines4
lea stride3q, [strideq*3]
mov lines4d, 8
- paddw m0, m2
- paddw m0, m3
- paddw m0, m4
- paddw m0, m5
- paddw m0, m6
- paddw m0, m7
- paddw m0, m8
movhlps m2, m0
paddw m0, m2
punpcklwd m0, m1
@@ -181,9 +176,8 @@ cglobal highbd_dc_predictor_32x32, 4, 5, 9, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
-%endif
-INIT_MMX sse
+INIT_XMM sse2
cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above
movq m0, [aboveq]
movq [dstq ], m0
@@ -261,43 +255,44 @@ cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above
jnz .loop
REP_RET
-INIT_MMX sse
-cglobal highbd_tm_predictor_4x4, 5, 6, 5, dst, stride, above, left, bps, one
+INIT_XMM sse2
+cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bps
movd m1, [aboveq-2]
movq m0, [aboveq]
- pshufw m1, m1, 0x0
+ pshuflw m1, m1, 0x0
+ movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4
+ movlhps m1, m1 ; tl tl tl tl tl tl tl tl
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- movd m3, oned
+ pcmpeqw m3, m3
movd m4, bpsd
- pshufw m3, m3, 0x0
- DEFINE_ARGS dst, stride, line, left
- mov lineq, -2
- mova m2, m3
+ psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl
psllw m3, m4
- add leftq, 8
- psubw m3, m2 ; max possible value
- pxor m4, m4 ; min possible value
- psubw m0, m1
-.loop:
- movq m1, [leftq+lineq*4]
- movq m2, [leftq+lineq*4+2]
- pshufw m1, m1, 0x0
- pshufw m2, m2, 0x0
- paddw m1, m0
+ pcmpeqw m2, m2
+ pxor m4, m4 ; min possible value
+ pxor m3, m2 ; max possible value
+ mova m1, [leftq]
+ pshuflw m2, m1, 0x0
+ pshuflw m5, m1, 0x55
+ movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2
paddw m2, m0
;Clamp to the bit-depth
- pminsw m1, m3
pminsw m2, m3
- pmaxsw m1, m4
pmaxsw m2, m4
;Store the values
- movq [dstq ], m1
- movq [dstq+strideq*2], m2
+ movq [dstq ], m2
+ movhpd [dstq+strideq*2], m2
lea dstq, [dstq+strideq*4]
- inc lineq
- jnz .loop
- REP_RET
+ pshuflw m2, m1, 0xaa
+ pshuflw m5, m1, 0xff
+ movlhps m2, m5
+ paddw m2, m0
+ ;Clamp to the bit-depth
+ pminsw m2, m3
+ pmaxsw m2, m4
+ ;Store the values
+ movq [dstq ], m2
+ movhpd [dstq+strideq*2], m2
+ RET
INIT_XMM sse2
cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
@@ -343,63 +338,55 @@ cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bps, one
jnz .loop
REP_RET
-%if ARCH_X86_64
INIT_XMM sse2
-cglobal highbd_tm_predictor_16x16, 5, 6, 9, dst, stride, above, left, bps, one
+cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bps
movd m2, [aboveq-2]
mova m0, [aboveq]
mova m1, [aboveq+16]
pshuflw m2, m2, 0x0
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- pxor m7, m7
- pxor m8, m8
- pinsrw m7, oned, 0
- pinsrw m8, bpsd, 0
- pshuflw m7, m7, 0x0
+ pcmpeqw m3, m3
+ movd m4, bpsd
+ punpcklqdq m2, m2
+ psllw m3, m4
+ pcmpeqw m5, m5
+ pxor m4, m4 ; min possible value
+ pxor m3, m5 ; max possible value
DEFINE_ARGS dst, stride, line, left
- punpcklqdq m7, m7
mov lineq, -8
- mova m5, m7
- punpcklqdq m2, m2
- psllw m7, m8
- add leftq, 32
- psubw m7, m5 ; max possible value
- pxor m8, m8 ; min possible value
psubw m0, m2
psubw m1, m2
.loop:
- movd m2, [leftq+lineq*4]
- movd m3, [leftq+lineq*4+2]
- pshuflw m2, m2, 0x0
- pshuflw m3, m3, 0x0
- punpcklqdq m2, m2
- punpcklqdq m3, m3
- paddw m4, m2, m0
- paddw m5, m3, m0
+ movd m7, [leftq]
+ pshuflw m5, m7, 0x0
+ pshuflw m2, m7, 0x55
+ punpcklqdq m5, m5 ; l1 l1 l1 l1 l1 l1 l1 l1
+ punpcklqdq m2, m2 ; l2 l2 l2 l2 l2 l2 l2 l2
+ paddw m6, m5, m0 ; t1-tl+l1 to t4-tl+l1
+ paddw m5, m1 ; t5-tl+l1 to t8-tl+l1
+ pminsw m6, m3
+ pminsw m5, m3
+ pmaxsw m6, m4 ; Clamp to the bit-depth
+ pmaxsw m5, m4
+ mova [dstq ], m6
+ mova [dstq +16], m5
+ paddw m6, m2, m0
paddw m2, m1
- paddw m3, m1
- ;Clamp to the bit-depth
- pminsw m4, m7
- pminsw m5, m7
- pminsw m2, m7
- pminsw m3, m7
- pmaxsw m4, m8
- pmaxsw m5, m8
- pmaxsw m2, m8
- pmaxsw m3, m8
- ;Store the values
- mova [dstq ], m4
- mova [dstq+strideq*2 ], m5
- mova [dstq +16], m2
- mova [dstq+strideq*2+16], m3
+ pminsw m6, m3
+ pminsw m2, m3
+ pmaxsw m6, m4
+ pmaxsw m2, m4
+ mova [dstq+strideq*2 ], m6
+ mova [dstq+strideq*2+16], m2
lea dstq, [dstq+strideq*4]
inc lineq
+ lea leftq, [leftq+4]
+
jnz .loop
REP_RET
INIT_XMM sse2
-cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
+cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bps
movd m0, [aboveq-2]
mova m1, [aboveq]
mova m2, [aboveq+16]
@@ -407,70 +394,60 @@ cglobal highbd_tm_predictor_32x32, 5, 6, 12, dst, stride, above, left, bps, one
mova m4, [aboveq+48]
pshuflw m0, m0, 0x0
; Get the values to compute the maximum value at this bit depth
- mov oned, 1
- pxor m10, m10
- pxor m11, m11
- pinsrw m10, oned, 0
- pinsrw m11, bpsd, 0
- pshuflw m10, m10, 0x0
+ pcmpeqw m5, m5
+ movd m6, bpsd
+ psllw m5, m6
+ pcmpeqw m7, m7
+ pxor m6, m6 ; min possible value
+ pxor m5, m7 ; max possible value
+ punpcklqdq m0, m0
DEFINE_ARGS dst, stride, line, left
- punpcklqdq m10, m10
mov lineq, -16
- mova m5, m10
- punpcklqdq m0, m0
- psllw m10, m11
- add leftq, 64
- psubw m10, m5 ; max possible value
- pxor m11, m11 ; min possible value
psubw m1, m0
psubw m2, m0
psubw m3, m0
psubw m4, m0
.loop:
- movd m5, [leftq+lineq*4]
- movd m6, [leftq+lineq*4+2]
- pshuflw m5, m5, 0x0
- pshuflw m6, m6, 0x0
- punpcklqdq m5, m5
- punpcklqdq m6, m6
- paddw m7, m5, m1
- paddw m8, m5, m2
- paddw m9, m5, m3
- paddw m5, m4
- ;Clamp these values to the bit-depth
- pminsw m7, m10
- pminsw m8, m10
- pminsw m9, m10
- pminsw m5, m10
- pmaxsw m7, m11
- pmaxsw m8, m11
- pmaxsw m9, m11
- pmaxsw m5, m11
- ;Store these values
- mova [dstq ], m7
- mova [dstq +16], m8
- mova [dstq +32], m9
- mova [dstq +48], m5
- paddw m7, m6, m1
- paddw m8, m6, m2
- paddw m9, m6, m3
- paddw m6, m4
- ;Clamp these values to the bit-depth
- pminsw m7, m10
- pminsw m8, m10
- pminsw m9, m10
- pminsw m6, m10
- pmaxsw m7, m11
- pmaxsw m8, m11
- pmaxsw m9, m11
- pmaxsw m6, m11
- ;Store these values
- mova [dstq+strideq*2 ], m7
- mova [dstq+strideq*2+16], m8
- mova [dstq+strideq*2+32], m9
- mova [dstq+strideq*2+48], m6
+ movd m7, [leftq]
+ pshuflw m7, m7, 0x0
+ punpcklqdq m7, m7 ; l1 l1 l1 l1 l1 l1 l1 l1
+ paddw m0, m7, m1
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq ], m0
+ paddw m0, m7, m2
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq +16], m0
+ paddw m0, m7, m3
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq +32], m0
+ paddw m0, m7, m4
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq +48], m0
+ movd m7, [leftq+2]
+ pshuflw m7, m7, 0x0
+ punpcklqdq m7, m7 ; l2 l2 l2 l2 l2 l2 l2 l2
+ paddw m0, m7, m1
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2 ], m0
+ paddw m0, m7, m2
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2+16], m0
+ paddw m0, m7, m3
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2+32], m0
+ paddw m0, m7, m4
+ pminsw m0, m5
+ pmaxsw m0, m6
+ mova [dstq+strideq*2+48], m0
lea dstq, [dstq+strideq*4]
+ lea leftq, [leftq+4]
inc lineq
jnz .loop
REP_RET
-%endif
diff --git a/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c b/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c
index c4fd5e1a0..72e42adc9 100644
--- a/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c
+++ b/libvpx/vpx_dsp/x86/highbd_loopfilter_sse2.c
@@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
// TODO(debargha, peter): Break up large functions into smaller ones
// in this file.
-static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s,
- int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh,
- int bd) {
+void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
+ const uint8_t *_blimit,
+ const uint8_t *_limit,
+ const uint8_t *_thresh, int bd) {
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi16(1);
__m128i blimit, limit, thresh;
@@ -496,34 +494,19 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s,
_mm_store_si128((__m128i *)(s - 0 * p), q0);
}
-static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s,
- int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh,
- int bd) {
- highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
- highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh,
- bd);
-}
-
-// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh,
- int count, int bd) {
- if (count == 1)
- highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
- else
- highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
+void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
+ const uint8_t *_blimit,
+ const uint8_t *_limit,
+ const uint8_t *_thresh, int bd) {
+ vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
+ vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
}
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
- int count, int bd) {
+ int bd) {
DECLARE_ALIGNED(16, uint16_t, flat_op2[16]);
DECLARE_ALIGNED(16, uint16_t, flat_op1[16]);
DECLARE_ALIGNED(16, uint16_t, flat_op0[16]);
@@ -556,8 +539,6 @@ void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
__m128i work_a;
__m128i filter1, filter2;
- (void)count;
-
if (bd == 8) {
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
@@ -764,16 +745,15 @@ void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
- 1, bd);
+ vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+ vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
}
void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
- int count, int bd) {
+ int bd) {
const __m128i zero = _mm_set1_epi16(0);
__m128i blimit, limit, thresh;
__m128i mask, hev, flat;
@@ -813,8 +793,6 @@ void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
__m128i work_a;
__m128i filter1, filter2;
- (void)count;
-
if (bd == 8) {
blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
@@ -944,9 +922,8 @@ void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
- bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
}
static INLINE void highbd_transpose(uint16_t *src[], int in_p,
@@ -1058,11 +1035,10 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
- int count, int bd) {
+ int bd) {
DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
uint16_t *src[1];
uint16_t *dst[1];
- (void)count;
// Transpose 8x8
src[0] = s - 4;
@@ -1071,8 +1047,7 @@ void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
- bd);
+ vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1112,11 +1087,10 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
- int count, int bd) {
+ int bd) {
DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
uint16_t *src[1];
uint16_t *dst[1];
- (void)count;
// Transpose 8x8
src[0] = s - 4;
@@ -1125,8 +1099,7 @@ void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
- bd);
+ vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1181,8 +1154,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
highbd_transpose(src, p, dst, 8, 2);
// Loop filtering
- highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit,
- thresh, bd);
+ vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit,
+ thresh, bd);
src[0] = t_dst;
src[1] = t_dst + 8 * 8;
dst[0] = s - 8;
@@ -1205,8 +1178,8 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
// Loop filtering
- highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
- thresh, bd);
+ vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
+ thresh, bd);
// Transpose back
highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
diff --git a/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
index 93df92a9e..30ee81b68 100644
--- a/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -79,20 +79,13 @@ SECTION .text
%macro INC_SRC_BY_SRC_STRIDE 0
%if ARCH_X86=1 && CONFIG_PIC=1
- lea srcq, [srcq + src_stridemp*2]
+ add srcq, src_stridemp
+ add srcq, src_stridemp
%else
lea srcq, [srcq + src_strideq*2]
%endif
%endmacro
-%macro INC_SRC_BY_SRC_2STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
- lea srcq, [srcq + src_stridemp*4]
-%else
- lea srcq, [srcq + src_strideq*4]
-%endif
-%endmacro
-
%macro SUBPEL_VARIANCE 1-2 0 ; W
%define bilin_filter_m bilin_filter_m_sse2
%define filter_idx_shift 5
@@ -123,8 +116,10 @@ SECTION .text
%define sec_str sec_stridemp
; Store bilin_filter and pw_8 location in stack
- GET_GOT eax
- add esp, 4 ; restore esp
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -140,8 +135,10 @@ SECTION .text
%define block_height heightd
; Store bilin_filter and pw_8 location in stack
- GET_GOT eax
- add esp, 4 ; restore esp
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -980,8 +977,9 @@ SECTION .text
.x_other_y_other_loop:
movu m2, [srcq]
movu m4, [srcq+2]
- movu m3, [srcq+src_strideq*2]
- movu m5, [srcq+src_strideq*2+2]
+ INC_SRC_BY_SRC_STRIDE
+ movu m3, [srcq]
+ movu m5, [srcq+2]
pmullw m2, filter_x_a
pmullw m4, filter_x_b
paddw m2, filter_rnd
@@ -1014,7 +1012,7 @@ SECTION .text
SUM_SSE m0, m2, m4, m3, m6, m7
mova m0, m5
- INC_SRC_BY_SRC_2STRIDE
+ INC_SRC_BY_SRC_STRIDE
lea dstq, [dstq + dst_strideq * 4]
%if %2 == 1 ; avg
add secq, sec_str
diff --git a/libvpx/vpx_dsp/x86/highbd_variance_sse2.c b/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
index b45331caa..14d029c9a 100644
--- a/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
@@ -243,20 +243,24 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
}
#if CONFIG_USE_X86INC
+// The 2 unused parameters are place holders for PIC enabled build.
+// These definitions are for functions defined in
+// highbd_subpel_variance_impl_sse2.asm
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
int x_offset, int y_offset, \
const uint16_t *dst, \
ptrdiff_t dst_stride, \
- int height, unsigned int *sse);
-#define DECLS(opt1, opt2) \
- DECL(8, opt1); \
- DECL(16, opt1)
-
-DECLS(sse2, sse);
-// TODO(johannkoenig): enable the ssse3 or delete
-// DECLS(ssse3, ssse3);
+ int height, \
+ unsigned int *sse, \
+ void *unused0, void *unused);
+#define DECLS(opt) \
+ DECL(8, opt); \
+ DECL(16, opt)
+
+DECLS(sse2);
+
#undef DECLS
#undef DECL
@@ -274,7 +278,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
x_offset, y_offset, \
dst, dst_stride, h, \
- &sse); \
+ &sse, NULL, NULL); \
if (w > wf) { \
unsigned int sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
@@ -282,19 +286,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
x_offset, y_offset, \
dst + 16, \
dst_stride, \
- h, &sse2); \
+ h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
- h, &sse2); \
+ h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48, src_stride, x_offset, y_offset, \
- dst + 48, dst_stride, h, &sse2); \
+ dst + 48, dst_stride, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -312,7 +317,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
x_offset, y_offset, \
dst, dst_stride, \
- h, &sse); \
+ h, &sse, NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
@@ -320,20 +325,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
x_offset, y_offset, \
dst + 16, \
dst_stride, \
- h, &sse2); \
+ h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
- h, &sse2); \
+ h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
x_offset, y_offset, \
dst + 48, dst_stride, \
- h, &sse2); \
+ h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -359,27 +365,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 16 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 32 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, \
x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
- dst_stride, height, &sse2); \
+ dst_stride, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
}\
@@ -391,25 +397,26 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
return sse - ((cast se * se) >> (wlog2 + hlog2)); \
}
-#define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (int64_t)); \
-FN(16, 8, 16, 4, 3, opt1, (int64_t)); \
-FN(8, 16, 8, 3, 4, opt1, (int64_t)); \
-FN(8, 8, 8, 3, 3, opt1, (int64_t)); \
-FN(8, 4, 8, 3, 2, opt1, (int64_t));
+#define FNS(opt) \
+FN(64, 64, 16, 6, 6, opt, (int64_t)); \
+FN(64, 32, 16, 6, 5, opt, (int64_t)); \
+FN(32, 64, 16, 5, 6, opt, (int64_t)); \
+FN(32, 32, 16, 5, 5, opt, (int64_t)); \
+FN(32, 16, 16, 5, 4, opt, (int64_t)); \
+FN(16, 32, 16, 4, 5, opt, (int64_t)); \
+FN(16, 16, 16, 4, 4, opt, (int64_t)); \
+FN(16, 8, 16, 4, 3, opt, (int64_t)); \
+FN(8, 16, 8, 3, 4, opt, (int64_t)); \
+FN(8, 8, 8, 3, 3, opt, (int64_t)); \
+FN(8, 4, 8, 3, 2, opt, (int64_t));
-FNS(sse2, sse);
+FNS(sse2);
#undef FNS
#undef FN
+// The 2 unused parameters are place holders for PIC enabled build.
#define DECL(w, opt) \
int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
@@ -419,7 +426,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
const uint16_t *sec, \
ptrdiff_t sec_stride, \
int height, \
- unsigned int *sse);
+ unsigned int *sse, \
+ void *unused0, void *unused);
#define DECLS(opt1) \
DECL(16, opt1) \
DECL(8, opt1)
@@ -439,23 +447,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src, src_stride, x_offset, \
- y_offset, dst, dst_stride, sec, w, h, &sse); \
+ y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16, src_stride, x_offset, y_offset, \
- dst + 16, dst_stride, sec + 16, w, h, &sse2); \
+ dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 32, src_stride, x_offset, y_offset, \
- dst + 32, dst_stride, sec + 32, w, h, &sse2); \
+ dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48, src_stride, x_offset, y_offset, \
- dst + 48, dst_stride, sec + 48, w, h, &sse2); \
+ dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -475,14 +483,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src, src_stride, x_offset, \
y_offset, dst, dst_stride, \
- sec, w, h, &sse); \
+ sec, w, h, &sse, NULL, NULL); \
if (w > wf) { \
uint32_t sse2; \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 16, src_stride, \
x_offset, y_offset, \
dst + 16, dst_stride, \
- sec + 16, w, h, &sse2); \
+ sec + 16, w, h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
if (w > wf * 2) { \
@@ -490,14 +499,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 32, src_stride, \
x_offset, y_offset, \
dst + 32, dst_stride, \
- sec + 32, w, h, &sse2); \
+ sec + 32, w, h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48, src_stride, \
x_offset, y_offset, \
dst + 48, dst_stride, \
- sec + 48, w, h, &sse2); \
+ sec + 48, w, h, &sse2, \
+ NULL, NULL); \
se += se2; \
sse += sse2; \
} \
@@ -525,7 +536,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + (start_row * src_stride), src_stride, x_offset, \
y_offset, dst + (start_row * dst_stride), dst_stride, \
- sec + (start_row * w), w, height, &sse2); \
+ sec + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf) { \
@@ -533,7 +544,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 16 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 16 + (start_row * dst_stride), dst_stride, \
- sec + 16 + (start_row * w), w, height, &sse2); \
+ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
if (w > wf * 2) { \
@@ -541,14 +552,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
src + 32 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 32 + (start_row * dst_stride), dst_stride, \
- sec + 32 + (start_row * w), w, height, &sse2); \
+ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
src + 48 + (start_row * src_stride), src_stride, \
x_offset, y_offset, \
dst + 48 + (start_row * dst_stride), dst_stride, \
- sec + 48 + (start_row * w), w, height, &sse2); \
+ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
se += se2; \
long_sse += sse2; \
} \
diff --git a/libvpx/vpx_dsp/x86/intrapred_sse2.asm b/libvpx/vpx_dsp/x86/intrapred_sse2.asm
index 22b573188..cd6a6ae98 100644
--- a/libvpx/vpx_dsp/x86/intrapred_sse2.asm
+++ b/libvpx/vpx_dsp/x86/intrapred_sse2.asm
@@ -11,6 +11,7 @@
%include "third_party/x86inc/x86inc.asm"
SECTION_RODATA
+pb_1: times 16 db 1
pw_4: times 8 dw 4
pw_8: times 8 dw 8
pw_16: times 8 dw 16
@@ -23,17 +24,127 @@ pw2_32: times 8 dw 16
SECTION .text
-INIT_MMX sse
-cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+; ------------------------------------------
+; input: x, y, z, result
+;
+; trick from pascal
+; (x+2y+z+2)>>2 can be calculated as:
+; result = avg(x,z)
+; result -= xor(x,z) & 1
+; result = avg(result,y)
+; ------------------------------------------
+%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4
+ pavgb %4, %1, %3
+ pxor %3, %1
+ pand %3, [GLOBAL(pb_1)]
+ psubb %4, %3
+ pavgb %4, %2
+%endmacro
+
+INIT_XMM sse2
+cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset
GET_GOT goffsetq
- pxor m1, m1
+ movq m0, [aboveq]
+ DEFINE_ARGS dst, stride, temp
+ psrldq m1, m0, 1
+ psrldq m2, m0, 2
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3
+
+ ; store 4 lines
+ movd [dstq ], m3
+ psrlq m3, 8
+ movd [dstq+strideq ], m3
+ lea dstq, [dstq+strideq*2]
+ psrlq m3, 8
+ movd [dstq ], m3
+ psrlq m3, 8
+ movd [dstq+strideq ], m3
+ psrlq m0, 56
+ movd tempq, m0
+ mov [dstq+strideq+3], tempb
+
+ RESTORE_GOT
+ RET
+
+INIT_XMM sse2
+cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset
+ GET_GOT goffsetq
+
+ movu m1, [aboveq]
+ pslldq m0, m1, 1
+ psrldq m2, m1, 1
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3
+ punpckhbw m0, m0 ; 7 7
+ punpcklwd m0, m0 ; 7 7 7 7
+ punpckldq m0, m0 ; 7 7 7 7 7 7 7 7
+ punpcklqdq m3, m0 ; -1 0 1 2 3 4 5 6 7 7 7 7 7 7 7 7
+
+ ; store 4 lines
+ psrldq m3, 1
+ movq [dstq ], m3
+ psrldq m3, 1
+ movq [dstq+strideq ], m3
+ psrldq m3, 1
+ movq [dstq+strideq*2], m3
+ psrldq m3, 1
+ movq [dstq+stride3q ], m3
+ lea dstq, [dstq+strideq*4]
+
+ ; store next 4 lines
+ psrldq m3, 1
+ movq [dstq ], m3
+ psrldq m3, 1
+ movq [dstq+strideq ], m3
+ psrldq m3, 1
+ movq [dstq+strideq*2], m3
+ psrldq m3, 1
+ movq [dstq+stride3q ], m3
+
+ RESTORE_GOT
+ RET
+
+INIT_XMM sse2
+cglobal d207_predictor_4x4, 4, 4, 5, dst, stride, unused, left, goffset
+ GET_GOT goffsetq
+
+ movd m0, [leftq] ; abcd [byte]
+ punpcklbw m4, m0, m0 ; aabb ccdd
+ punpcklwd m4, m4 ; aaaa bbbb cccc dddd
+ psrldq m4, 12 ; dddd
+ punpckldq m0, m4 ; abcd dddd
+ psrldq m1, m0, 1 ; bcdd
+ psrldq m2, m0, 2 ; cddd
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; a2bc b2cd c3d d
+ pavgb m1, m0 ; ab, bc, cd, d [byte]
+
+ punpcklbw m1, m3 ; ab, a2bc, bc, b2cd, cd, c3d, d, d
+ movd [dstq ], m1
+ psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d
+ movd [dstq+strideq], m1
+
+ lea dstq, [dstq+strideq*2]
+ psrlq m1, 16 ; cd, c3d, d, d
+ movd [dstq ], m1
+ movd [dstq+strideq], m4 ; d, d, d, d
+ RESTORE_GOT
+ RET
+
+INIT_XMM sse2
+cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ movd m2, [leftq]
movd m0, [aboveq]
- punpckldq m0, [leftq]
+ pxor m1, m1
+ punpckldq m0, m2
psadbw m0, m1
paddw m0, [GLOBAL(pw_4)]
psraw m0, 3
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
packuswb m0, m0
movd [dstq ], m0
movd [dstq+strideq], m0
@@ -44,8 +155,9 @@ cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
-cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+INIT_XMM sse2
+cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset
+ movifnidn leftq, leftmp
GET_GOT goffsetq
pxor m1, m1
@@ -53,7 +165,7 @@ cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
psadbw m0, m1
paddw m0, [GLOBAL(pw2_4)]
psraw m0, 2
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
packuswb m0, m0
movd [dstq ], m0
movd [dstq+strideq], m0
@@ -64,8 +176,8 @@ cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
-cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+INIT_XMM sse2
+cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
@@ -73,7 +185,7 @@ cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
psadbw m0, m1
paddw m0, [GLOBAL(pw2_4)]
psraw m0, 2
- pshufw m0, m0, 0x0
+ pshuflw m0, m0, 0x0
packuswb m0, m0
movd [dstq ], m0
movd [dstq+strideq], m0
@@ -84,7 +196,7 @@ cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
+INIT_XMM sse2
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
@@ -98,8 +210,8 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
paddw m0, m2
paddw m0, [GLOBAL(pw_8)]
psraw m0, 4
- pshufw m0, m0, 0x0
- packuswb m0, m0
+ punpcklbw m0, m0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq ], m0
movq [dstq+strideq*2], m0
@@ -113,8 +225,8 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
-cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+INIT_XMM sse2
+cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
@@ -124,8 +236,8 @@ cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
psadbw m0, m1
paddw m0, [GLOBAL(pw2_8)]
psraw m0, 3
- pshufw m0, m0, 0x0
- packuswb m0, m0
+ punpcklbw m0, m0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq ], m0
movq [dstq+strideq*2], m0
@@ -139,8 +251,9 @@ cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
-cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+INIT_XMM sse2
+cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset
+ movifnidn leftq, leftmp
GET_GOT goffsetq
pxor m1, m1
@@ -150,8 +263,8 @@ cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
psadbw m0, m1
paddw m0, [GLOBAL(pw2_8)]
psraw m0, 3
- pshufw m0, m0, 0x0
- packuswb m0, m0
+ punpcklbw m0, m0
+ pshuflw m0, m0, 0x0
movq [dstq ], m0
movq [dstq+strideq ], m0
movq [dstq+strideq*2], m0
@@ -165,8 +278,8 @@ cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
-cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
+INIT_XMM sse2
+cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset
GET_GOT goffsetq
DEFINE_ARGS dst, stride, stride3
@@ -179,8 +292,8 @@ cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
-cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+INIT_XMM sse2
+cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset
GET_GOT goffsetq
DEFINE_ARGS dst, stride, stride3
@@ -236,14 +349,11 @@ cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
- pxor m2, m2
mova m0, [aboveq]
DEFINE_ARGS dst, stride, stride3, lines4
lea stride3q, [strideq*3]
mov lines4d, 4
psadbw m0, m1
- psadbw m2, m1
- paddw m0, m2
movhlps m2, m0
paddw m0, m2
paddw m0, [GLOBAL(pw2_16)]
@@ -268,14 +378,11 @@ cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
pxor m1, m1
- pxor m2, m2
mova m0, [leftq]
DEFINE_ARGS dst, stride, stride3, lines4
lea stride3q, [strideq*3]
mov lines4d, 4
psadbw m0, m1
- psadbw m2, m1
- paddw m0, m2
movhlps m2, m0
paddw m0, m2
paddw m0, [GLOBAL(pw2_16)]
@@ -452,7 +559,7 @@ cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX sse
+INIT_XMM sse2
cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
movd m0, [aboveq]
movd [dstq ], m0
@@ -462,7 +569,7 @@ cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
movd [dstq+strideq], m0
RET
-INIT_MMX sse
+INIT_XMM sse2
cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
movq m0, [aboveq]
DEFINE_ARGS dst, stride, stride3
@@ -515,108 +622,196 @@ cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
jnz .loop
REP_RET
-INIT_MMX sse
-cglobal tm_predictor_4x4, 4, 4, 4, dst, stride, above, left
- pxor m1, m1
- movd m2, [aboveq-1]
- movd m0, [aboveq]
- punpcklbw m2, m1
- punpcklbw m0, m1
- pshufw m2, m2, 0x0
- DEFINE_ARGS dst, stride, line, left
+INIT_XMM sse2
+cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left
+ movifnidn leftq, leftmp
+ movd m0, [leftq]
+ punpcklbw m0, m0
+ punpcklbw m0, m0
+ pshufd m1, m0, 0x1
+ movd [dstq ], m0
+ movd [dstq+strideq], m1
+ pshufd m2, m0, 0x2
+ lea dstq, [dstq+strideq*2]
+ pshufd m3, m0, 0x3
+ movd [dstq ], m2
+ movd [dstq+strideq], m3
+ RET
+
+INIT_XMM sse2
+cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
mov lineq, -2
- add leftq, 4
- psubw m0, m2
+ DEFINE_ARGS dst, stride, line, left, stride3
+ lea stride3q, [strideq*3]
+ movq m0, [leftq ]
+ punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8
.loop:
- movd m2, [leftq+lineq*2]
- movd m3, [leftq+lineq*2+1]
+ pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1
+ pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2
+ movq [dstq ], m1
+ movq [dstq+strideq], m2
+ pshuflw m1, m0, 0xaa
+ pshuflw m2, m0, 0xff
+ movq [dstq+strideq*2], m1
+ movq [dstq+stride3q ], m2
+ pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8
+ inc lineq
+ lea dstq, [dstq+strideq*4]
+ jnz .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
+ mov lineq, -4
+ DEFINE_ARGS dst, stride, line, left, stride3
+ lea stride3q, [strideq*3]
+.loop:
+ movd m0, [leftq]
+ punpcklbw m0, m0
+ punpcklbw m0, m0 ; l1 to l4 each repeated 4 times
+ pshufd m1, m0, 0x0 ; l1 repeated 16 times
+ pshufd m2, m0, 0x55 ; l2 repeated 16 times
+ mova [dstq ], m1
+ mova [dstq+strideq ], m2
+ pshufd m1, m0, 0xaa
+ pshufd m2, m0, 0xff
+ mova [dstq+strideq*2], m1
+ mova [dstq+stride3q ], m2
+ inc lineq
+ lea leftq, [leftq+4 ]
+ lea dstq, [dstq+strideq*4]
+ jnz .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left
+ movifnidn leftq, leftmp
+ mov lineq, -8
+ DEFINE_ARGS dst, stride, line, left, stride3
+ lea stride3q, [strideq*3]
+.loop:
+ movd m0, [leftq]
+ punpcklbw m0, m0
+ punpcklbw m0, m0 ; l1 to l4 each repeated 4 times
+ pshufd m1, m0, 0x0 ; l1 repeated 16 times
+ pshufd m2, m0, 0x55 ; l2 repeated 16 times
+ mova [dstq ], m1
+ mova [dstq+16 ], m1
+ mova [dstq+strideq ], m2
+ mova [dstq+strideq+16 ], m2
+ pshufd m1, m0, 0xaa
+ pshufd m2, m0, 0xff
+ mova [dstq+strideq*2 ], m1
+ mova [dstq+strideq*2+16], m1
+ mova [dstq+stride3q ], m2
+ mova [dstq+stride3q+16 ], m2
+ inc lineq
+ lea leftq, [leftq+4 ]
+ lea dstq, [dstq+strideq*4]
+ jnz .loop
+ REP_RET
+
+INIT_XMM sse2
+cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left
+ pxor m1, m1
+ movq m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x
+ punpcklbw m0, m1
+ pshuflw m2, m0, 0x0 ; [63:0] tl tl tl tl [word]
+ psrldq m0, 2
+ psubw m0, m2 ; [63:0] t1-tl t2-tl t3-tl t4-tl [word]
+ movd m2, [leftq]
punpcklbw m2, m1
- punpcklbw m3, m1
- pshufw m2, m2, 0x0
- pshufw m3, m3, 0x0
- paddw m2, m0
+ pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word]
+ pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word]
+ paddw m4, m0
paddw m3, m0
- packuswb m2, m2
+ packuswb m4, m4
packuswb m3, m3
- movd [dstq ], m2
+ movd [dstq ], m4
movd [dstq+strideq], m3
lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
+ pshuflw m4, m2, 0xaa
+ pshuflw m3, m2, 0xff
+ paddw m4, m0
+ paddw m3, m0
+ packuswb m4, m4
+ packuswb m3, m3
+ movd [dstq ], m4
+ movd [dstq+strideq], m3
+ RET
INIT_XMM sse2
-cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left
+cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left
pxor m1, m1
movd m2, [aboveq-1]
movq m0, [aboveq]
punpcklbw m2, m1
- punpcklbw m0, m1
- pshuflw m2, m2, 0x0
+ punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word]
+ pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word]
DEFINE_ARGS dst, stride, line, left
mov lineq, -4
- punpcklqdq m2, m2
- add leftq, 8
- psubw m0, m2
-.loop:
- movd m2, [leftq+lineq*2]
- movd m3, [leftq+lineq*2+1]
- punpcklbw m2, m1
- punpcklbw m3, m1
- pshuflw m2, m2, 0x0
- pshuflw m3, m3, 0x0
- punpcklqdq m2, m2
- punpcklqdq m3, m3
- paddw m2, m0
+ punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word]
+ psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word]
+ movq m2, [leftq]
+ punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word]
+.loop
+ pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word]
+ pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word]
+ punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word]
+ punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word]
+ paddw m4, m0
paddw m3, m0
- packuswb m2, m3
- movq [dstq ], m2
- movhps [dstq+strideq], m2
+ packuswb m4, m3
+ movq [dstq ], m4
+ movhps [dstq+strideq], m4
lea dstq, [dstq+strideq*2]
+ psrldq m2, 4
inc lineq
jnz .loop
REP_RET
INIT_XMM sse2
-cglobal tm_predictor_16x16, 4, 4, 7, dst, stride, above, left
+cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left
pxor m1, m1
- movd m2, [aboveq-1]
- mova m0, [aboveq]
- punpcklbw m2, m1
+ mova m2, [aboveq-16];
+ mova m0, [aboveq] ; t1 t2 ... t16 [byte]
+ punpckhbw m2, m1 ; [127:112] tl [word]
punpckhbw m4, m0, m1
- punpcklbw m0, m1
- pshuflw m2, m2, 0x0
- DEFINE_ARGS dst, stride, line, left
+ punpcklbw m0, m1 ; m0:m4 t1 t2 ... t16 [word]
+ DEFINE_ARGS dst, stride, line, left, stride8
mov lineq, -8
- punpcklqdq m2, m2
- add leftq, 16
+ pshufhw m2, m2, 0xff
+ mova m3, [leftq] ; l1 l2 ... l16 [byte]
+ punpckhqdq m2, m2 ; tl repeated 8 times [word]
psubw m0, m2
- psubw m4, m2
+ psubw m4, m2 ; m0:m4 t1-tl t2-tl ... t16-tl [word]
+ punpckhbw m5, m3, m1
+ punpcklbw m3, m1 ; m3:m5 l1 l2 ... l16 [word]
+ lea stride8q, [strideq*8]
.loop:
- movd m2, [leftq+lineq*2]
- movd m3, [leftq+lineq*2+1]
- punpcklbw m2, m1
- punpcklbw m3, m1
- pshuflw m2, m2, 0x0
- pshuflw m3, m3, 0x0
- punpcklqdq m2, m2
- punpcklqdq m3, m3
- paddw m5, m2, m0
- paddw m6, m3, m0
- paddw m2, m4
- paddw m3, m4
- packuswb m5, m2
- packuswb m6, m3
- mova [dstq ], m5
- mova [dstq+strideq], m6
- lea dstq, [dstq+strideq*2]
+ pshuflw m6, m3, 0x0
+ pshuflw m7, m5, 0x0
+ punpcklqdq m6, m6 ; l1 repeated 8 times [word]
+ punpcklqdq m7, m7 ; l8 repeated 8 times [word]
+ paddw m1, m6, m0
+ paddw m6, m4 ; m1:m6 ti-tl+l1 [i=1,15] [word]
+ psrldq m5, 2
+ packuswb m1, m6
+ mova [dstq ], m1
+ paddw m1, m7, m0
+ paddw m7, m4 ; m1:m7 ti-tl+l8 [i=1,15] [word]
+ psrldq m3, 2
+ packuswb m1, m7
+ mova [dstq+stride8q], m1
inc lineq
+ lea dstq, [dstq+strideq]
jnz .loop
REP_RET
-%if ARCH_X86_64
INIT_XMM sse2
-cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
+cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left
pxor m1, m1
movd m2, [aboveq-1]
mova m0, [aboveq]
@@ -637,31 +832,29 @@ cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
psubw m5, m2
.loop:
movd m2, [leftq+lineq*2]
- movd m6, [leftq+lineq*2+1]
+ pxor m1, m1
punpcklbw m2, m1
- punpcklbw m6, m1
+ pshuflw m7, m2, 0x55
pshuflw m2, m2, 0x0
- pshuflw m6, m6, 0x0
punpcklqdq m2, m2
- punpcklqdq m6, m6
- paddw m7, m2, m0
- paddw m8, m2, m3
- paddw m9, m2, m4
- paddw m2, m5
- packuswb m7, m8
- packuswb m9, m2
- paddw m2, m6, m0
- paddw m8, m6, m3
- mova [dstq ], m7
- paddw m7, m6, m4
- paddw m6, m5
- mova [dstq +16], m9
- packuswb m2, m8
- packuswb m7, m6
- mova [dstq+strideq ], m2
- mova [dstq+strideq+16], m7
+ punpcklqdq m7, m7
+ paddw m6, m2, m3
+ paddw m1, m2, m0
+ packuswb m1, m6
+ mova [dstq ], m1
+ paddw m6, m2, m5
+ paddw m1, m2, m4
+ packuswb m1, m6
+ mova [dstq+16 ], m1
+ paddw m6, m7, m3
+ paddw m1, m7, m0
+ packuswb m1, m6
+ mova [dstq+strideq ], m1
+ paddw m6, m7, m5
+ paddw m1, m7, m4
+ packuswb m1, m6
+ mova [dstq+strideq+16], m1
lea dstq, [dstq+strideq*2]
inc lineq
jnz .loop
REP_RET
-%endif
diff --git a/libvpx/vpx_dsp/x86/intrapred_ssse3.asm b/libvpx/vpx_dsp/x86/intrapred_ssse3.asm
index 88df9b2d1..5e0139fa8 100644
--- a/libvpx/vpx_dsp/x86/intrapred_ssse3.asm
+++ b/libvpx/vpx_dsp/x86/intrapred_ssse3.asm
@@ -13,7 +13,6 @@
SECTION_RODATA
pb_1: times 16 db 1
-sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
@@ -28,151 +27,9 @@ sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0
sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0
sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
sh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-sh_b1233: db 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-sh_b2333: db 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
SECTION .text
-INIT_MMX ssse3
-cglobal h_predictor_4x4, 2, 4, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- add leftq, 4
- mov lineq, -2
- pxor m0, m0
-.loop:
- movd m1, [leftq+lineq*2 ]
- movd m2, [leftq+lineq*2+1]
- pshufb m1, m0
- pshufb m2, m0
- movd [dstq ], m1
- movd [dstq+strideq], m2
- lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
-
-INIT_MMX ssse3
-cglobal h_predictor_8x8, 2, 4, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- add leftq, 8
- mov lineq, -4
- pxor m0, m0
-.loop:
- movd m1, [leftq+lineq*2 ]
- movd m2, [leftq+lineq*2+1]
- pshufb m1, m0
- pshufb m2, m0
- movq [dstq ], m1
- movq [dstq+strideq], m2
- lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
-
-INIT_XMM ssse3
-cglobal h_predictor_16x16, 2, 4, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- add leftq, 16
- mov lineq, -8
- pxor m0, m0
-.loop:
- movd m1, [leftq+lineq*2 ]
- movd m2, [leftq+lineq*2+1]
- pshufb m1, m0
- pshufb m2, m0
- mova [dstq ], m1
- mova [dstq+strideq], m2
- lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
-
-INIT_XMM ssse3
-cglobal h_predictor_32x32, 2, 4, 3, dst, stride, line, left
- movifnidn leftq, leftmp
- add leftq, 32
- mov lineq, -16
- pxor m0, m0
-.loop:
- movd m1, [leftq+lineq*2 ]
- movd m2, [leftq+lineq*2+1]
- pshufb m1, m0
- pshufb m2, m0
- mova [dstq ], m1
- mova [dstq +16], m1
- mova [dstq+strideq ], m2
- mova [dstq+strideq+16], m2
- lea dstq, [dstq+strideq*2]
- inc lineq
- jnz .loop
- REP_RET
-
-INIT_MMX ssse3
-cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset
- GET_GOT goffsetq
-
- movq m0, [aboveq]
- pshufb m2, m0, [GLOBAL(sh_b23456777)]
- pshufb m1, m0, [GLOBAL(sh_b01234577)]
- pshufb m0, [GLOBAL(sh_b12345677)]
- pavgb m3, m2, m1
- pxor m2, m1
- pand m2, [GLOBAL(pb_1)]
- psubb m3, m2
- pavgb m0, m3
-
- ; store 4 lines
- movd [dstq ], m0
- psrlq m0, 8
- movd [dstq+strideq], m0
- lea dstq, [dstq+strideq*2]
- psrlq m0, 8
- movd [dstq ], m0
- psrlq m0, 8
- movd [dstq+strideq], m0
-
- RESTORE_GOT
- RET
-
-INIT_MMX ssse3
-cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset
- GET_GOT goffsetq
-
- movq m0, [aboveq]
- mova m1, [GLOBAL(sh_b12345677)]
- DEFINE_ARGS dst, stride, stride3
- lea stride3q, [strideq*3]
- pshufb m2, m0, [GLOBAL(sh_b23456777)]
- pavgb m3, m2, m0
- pxor m2, m0
- pshufb m0, m1
- pand m2, [GLOBAL(pb_1)]
- psubb m3, m2
- pavgb m0, m3
-
- ; store 4 lines
- movq [dstq ], m0
- pshufb m0, m1
- movq [dstq+strideq ], m0
- pshufb m0, m1
- movq [dstq+strideq*2], m0
- pshufb m0, m1
- movq [dstq+stride3q ], m0
- pshufb m0, m1
- lea dstq, [dstq+strideq*4]
-
- ; store next 4 lines
- movq [dstq ], m0
- pshufb m0, m1
- movq [dstq+strideq ], m0
- pshufb m0, m1
- movq [dstq+strideq*2], m0
- pshufb m0, m1
- movq [dstq+stride3q ], m0
-
- RESTORE_GOT
- RET
-
INIT_XMM ssse3
cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset
GET_GOT goffsetq
@@ -789,28 +646,6 @@ cglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset
RESTORE_GOT
RET
-INIT_MMX ssse3
-cglobal d207_predictor_4x4, 4, 5, 4, dst, stride, unused, left, goffset
- GET_GOT goffsetq
- movd m0, [leftq] ; abcd [byte]
- pshufb m1, m0, [GLOBAL(sh_b1233)] ; bcdd [byte]
- pshufb m3, m0, [GLOBAL(sh_b2333)] ; cddd
-
- X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m3, m2
- pavgb m1, m0 ; ab, bc, cd, d [byte]
-
- punpcklbw m1, m2 ; ab, a2bc, bc, b2cd, cd, c3d, d, d
- movd [dstq ], m1
- psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d
- movd [dstq+strideq], m1
- lea dstq, [dstq+strideq*2]
- psrlq m1, 16 ; cd, c3d, d, d
- movd [dstq ], m1
- pshufw m1, m1, q1111 ; d, d, d, d
- movd [dstq+strideq], m1
- RESTORE_GOT
- RET
-
INIT_XMM ssse3
cglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset
GET_GOT goffsetq
diff --git a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
index ae907fd0b..df5068c62 100644
--- a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
@@ -158,8 +158,8 @@ void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
int a;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 4);
dc_value = _mm_set1_epi16(a);
@@ -527,8 +527,8 @@ void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
int a;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 5);
dc_value = _mm_set1_epi16(a);
@@ -1305,30 +1305,16 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
int a, i;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
dc_value = _mm_set1_epi16(a);
- for (i = 0; i < 2; ++i) {
- RECON_AND_STORE(dest + 0 * stride, dc_value);
- RECON_AND_STORE(dest + 1 * stride, dc_value);
- RECON_AND_STORE(dest + 2 * stride, dc_value);
- RECON_AND_STORE(dest + 3 * stride, dc_value);
- RECON_AND_STORE(dest + 4 * stride, dc_value);
- RECON_AND_STORE(dest + 5 * stride, dc_value);
- RECON_AND_STORE(dest + 6 * stride, dc_value);
- RECON_AND_STORE(dest + 7 * stride, dc_value);
- RECON_AND_STORE(dest + 8 * stride, dc_value);
- RECON_AND_STORE(dest + 9 * stride, dc_value);
- RECON_AND_STORE(dest + 10 * stride, dc_value);
- RECON_AND_STORE(dest + 11 * stride, dc_value);
- RECON_AND_STORE(dest + 12 * stride, dc_value);
- RECON_AND_STORE(dest + 13 * stride, dc_value);
- RECON_AND_STORE(dest + 14 * stride, dc_value);
- RECON_AND_STORE(dest + 15 * stride, dc_value);
- dest += 8;
+ for (i = 0; i < 16; ++i) {
+ RECON_AND_STORE(dest + 0, dc_value);
+ RECON_AND_STORE(dest + 8, dc_value);
+ dest += stride;
}
}
@@ -3476,8 +3462,8 @@ void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest,
const __m128i zero = _mm_setzero_si128();
int a, j;
- a = dct_const_round_shift(input[0] * cospi_16_64);
- a = dct_const_round_shift(a * cospi_16_64);
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
a = ROUND_POWER_OF_TWO(a, 6);
dc_value = _mm_set1_epi16(a);
diff --git a/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm b/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
index 68e7fa40c..20baf820f 100644
--- a/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
+++ b/libvpx/vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm
@@ -17,18 +17,70 @@
SECTION_RODATA
pw_11585x2: times 8 dw 23170
+
+pw_m2404x2: times 8 dw -2404*2
+pw_m4756x2: times 8 dw -4756*2
+pw_m5520x2: times 8 dw -5520*2
+pw_m8423x2: times 8 dw -8423*2
+pw_m9102x2: times 8 dw -9102*2
+pw_m10394x2: times 8 dw -10394*2
+pw_m11003x2: times 8 dw -11003*2
+
+pw_16364x2: times 8 dw 16364*2
+pw_16305x2: times 8 dw 16305*2
+pw_16207x2: times 8 dw 16207*2
+pw_16069x2: times 8 dw 16069*2
+pw_15893x2: times 8 dw 15893*2
+pw_15679x2: times 8 dw 15679*2
+pw_15426x2: times 8 dw 15426*2
+pw_15137x2: times 8 dw 15137*2
+pw_14811x2: times 8 dw 14811*2
+pw_14449x2: times 8 dw 14449*2
+pw_14053x2: times 8 dw 14053*2
+pw_13623x2: times 8 dw 13623*2
+pw_13160x2: times 8 dw 13160*2
+pw_12665x2: times 8 dw 12665*2
+pw_12140x2: times 8 dw 12140*2
+pw__9760x2: times 8 dw 9760*2
+pw__7723x2: times 8 dw 7723*2
+pw__7005x2: times 8 dw 7005*2
+pw__6270x2: times 8 dw 6270*2
+pw__3981x2: times 8 dw 3981*2
+pw__3196x2: times 8 dw 3196*2
+pw__1606x2: times 8 dw 1606*2
+pw___804x2: times 8 dw 804*2
+
pd_8192: times 4 dd 8192
+pw_32: times 8 dw 32
pw_16: times 8 dw 16
%macro TRANSFORM_COEFFS 2
pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
pw_m%2_%1: dw -%2, %1, -%2, %1, -%2, %1, -%2, %1
+pw_m%1_m%2: dw -%1, -%2, -%1, -%2, -%1, -%2, -%1, -%2
%endmacro
TRANSFORM_COEFFS 6270, 15137
TRANSFORM_COEFFS 3196, 16069
TRANSFORM_COEFFS 13623, 9102
+; constants for 32x32_34
+TRANSFORM_COEFFS 804, 16364
+TRANSFORM_COEFFS 15426, 5520
+TRANSFORM_COEFFS 3981, 15893
+TRANSFORM_COEFFS 16207, 2404
+TRANSFORM_COEFFS 1606, 16305
+TRANSFORM_COEFFS 15679, 4756
+TRANSFORM_COEFFS 11585, 11585
+
+; constants for 32x32_1024
+TRANSFORM_COEFFS 12140, 11003
+TRANSFORM_COEFFS 7005, 14811
+TRANSFORM_COEFFS 14053, 8423
+TRANSFORM_COEFFS 9760, 13160
+TRANSFORM_COEFFS 12665, 10394
+TRANSFORM_COEFFS 7723, 14449
+
%macro PAIR_PP_COEFFS 2
dpw_%1_%2: dw %1, %1, %1, %1, %2, %2, %2, %2
%endmacro
@@ -80,6 +132,15 @@ SECTION .text
packssdw m%2, m%6
%endmacro
+%macro BUTTERFLY_4Xmm 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+ punpckhwd m%6, m%2, m%1
+ MUL_ADD_2X %7, %6, %6, %5, [pw_m%4_%3], [pw_m%3_m%4]
+ punpcklwd m%2, m%1
+ MUL_ADD_2X %1, %2, %2, %5, [pw_m%4_%3], [pw_m%3_m%4]
+ packssdw m%1, m%7
+ packssdw m%2, m%6
+%endmacro
+
; matrix transpose
%macro INTERLEAVE_2X 4
punpckh%1 m%4, m%2, m%3
@@ -159,7 +220,24 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova m12, [pw_11585x2]
lea r3, [2 * strideq]
-
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova m0, [inputq + 0]
+ packssdw m0, [inputq + 16]
+ mova m1, [inputq + 32]
+ packssdw m1, [inputq + 48]
+ mova m2, [inputq + 64]
+ packssdw m2, [inputq + 80]
+ mova m3, [inputq + 96]
+ packssdw m3, [inputq + 112]
+ mova m4, [inputq + 128]
+ packssdw m4, [inputq + 144]
+ mova m5, [inputq + 160]
+ packssdw m5, [inputq + 176]
+ mova m6, [inputq + 192]
+ packssdw m6, [inputq + 208]
+ mova m7, [inputq + 224]
+ packssdw m7, [inputq + 240]
+%else
mova m0, [inputq + 0]
mova m1, [inputq + 16]
mova m2, [inputq + 32]
@@ -168,7 +246,7 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
mova m5, [inputq + 80]
mova m6, [inputq + 96]
mova m7, [inputq + 112]
-
+%endif
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
IDCT8_1D
TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
@@ -193,10 +271,21 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
lea r3, [2 * strideq]
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova m0, [inputq + 0]
+ packssdw m0, [inputq + 16]
+ mova m1, [inputq + 32]
+ packssdw m1, [inputq + 48]
+ mova m2, [inputq + 64]
+ packssdw m2, [inputq + 80]
+ mova m3, [inputq + 96]
+ packssdw m3, [inputq + 112]
+%else
mova m0, [inputq + 0]
mova m1, [inputq + 16]
mova m2, [inputq + 32]
mova m3, [inputq + 48]
+%endif
punpcklwd m0, m1
punpcklwd m2, m3
@@ -298,4 +387,1407 @@ cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
RET
+%define idx0 16 * 0
+%define idx1 16 * 1
+%define idx2 16 * 2
+%define idx3 16 * 3
+%define idx4 16 * 4
+%define idx5 16 * 5
+%define idx6 16 * 6
+%define idx7 16 * 7
+%define idx8 16 * 0
+%define idx9 16 * 1
+%define idx10 16 * 2
+%define idx11 16 * 3
+%define idx12 16 * 4
+%define idx13 16 * 5
+%define idx14 16 * 6
+%define idx15 16 * 7
+%define idx16 16 * 0
+%define idx17 16 * 1
+%define idx18 16 * 2
+%define idx19 16 * 3
+%define idx20 16 * 4
+%define idx21 16 * 5
+%define idx22 16 * 6
+%define idx23 16 * 7
+%define idx24 16 * 0
+%define idx25 16 * 1
+%define idx26 16 * 2
+%define idx27 16 * 3
+%define idx28 16 * 4
+%define idx29 16 * 5
+%define idx30 16 * 6
+%define idx31 16 * 7
+
+; FROM idct32x32_add_neon.asm
+;
+; Instead of doing the transforms stage by stage, it is done by loading
+; some input values and doing as many stages as possible to minimize the
+; storing/loading of intermediate results. To fit within registers, the
+; final coefficients are cut into four blocks:
+; BLOCK A: 16-19,28-31
+; BLOCK B: 20-23,24-27
+; BLOCK C: 8-11,12-15
+; BLOCK D: 0-3,4-7
+; Blocks A and C are straight calculation through the various stages. In
+; block B, further calculations are performed using the results from
+; block A. In block D, further calculations are performed using the results
+; from block C and then the final calculations are done using results from
+; block A and B which have been combined at the end of block B.
+;
+
+%macro IDCT32X32_34 4
+ ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m11, m1
+ pmulhrsw m1, [pw___804x2] ; stp1_16
+ mova [r4 + 0], m0
+ pmulhrsw m11, [pw_16364x2] ; stp2_31
+ mova [r4 + 16 * 2], m2
+ mova m12, m7
+ pmulhrsw m7, [pw_15426x2] ; stp1_28
+ mova [r4 + 16 * 4], m4
+ pmulhrsw m12, [pw_m5520x2] ; stp2_19
+ mova [r4 + 16 * 6], m6
+
+ ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m2, m1 ; stp1_16
+ mova m0, m11 ; stp1_31
+ mova m4, m7 ; stp1_28
+ mova m15, m12 ; stp1_19
+
+ ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30
+ BUTTERFLY_4Xmm 4, 15, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18
+
+ ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 1, 12, 9 ; stp2_16, stp2_19
+ SUM_SUB 0, 15, 9 ; stp2_17, stp2_18
+ SUM_SUB 11, 7, 9 ; stp2_31, stp2_28
+ SUM_SUB 2, 4, 9 ; stp2_30, stp2_29
+
+ ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 4, 15, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29
+ BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28
+
+ ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m6, m5
+ pmulhrsw m5, [pw__3981x2] ; stp1_20
+ mova [stp + %4 + idx28], m12
+ mova [stp + %4 + idx29], m15
+ pmulhrsw m6, [pw_15893x2] ; stp2_27
+ mova [stp + %4 + idx30], m2
+ mova m2, m3
+ pmulhrsw m3, [pw_m2404x2] ; stp1_23
+ mova [stp + %4 + idx31], m11
+ pmulhrsw m2, [pw_16207x2] ; stp2_24
+
+ ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m13, m5 ; stp1_20
+ mova m14, m6 ; stp1_27
+ mova m15, m3 ; stp1_23
+ mova m11, m2 ; stp1_24
+
+ ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26
+ BUTTERFLY_4Xmm 11, 15, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22
+
+ ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 3, 5, 9 ; stp2_23, stp2_20
+ SUM_SUB 15, 14, 9 ; stp2_22, stp2_21
+ SUM_SUB 2, 6, 9 ; stp2_24, stp2_27
+ SUM_SUB 11, 13, 9 ; stp2_25, stp2_26
+
+ ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20
+ BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21
+
+ ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 1, 3, 9 ; stp2_16, stp2_23
+ SUM_SUB 0, 15, 9 ; stp2_17, stp2_22
+ SUM_SUB 4, 14, 9 ; stp2_18, stp2_21
+ SUM_SUB 7, 5, 9 ; stp2_19, stp2_20
+ mova [stp + %3 + idx16], m1
+ mova [stp + %3 + idx17], m0
+ mova [stp + %3 + idx18], m4
+ mova [stp + %3 + idx19], m7
+
+ mova m4, [stp + %4 + idx28]
+ mova m7, [stp + %4 + idx29]
+ mova m10, [stp + %4 + idx30]
+ mova m12, [stp + %4 + idx31]
+ SUM_SUB 4, 6, 9 ; stp2_28, stp2_27
+ SUM_SUB 7, 13, 9 ; stp2_29, stp2_26
+ SUM_SUB 10, 11, 9 ; stp2_30, stp2_25
+ SUM_SUB 12, 2, 9 ; stp2_31, stp2_24
+ mova [stp + %4 + idx28], m4
+ mova [stp + %4 + idx29], m7
+ mova [stp + %4 + idx30], m10
+ mova [stp + %4 + idx31], m12
+
+ ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 6, 5, 9
+ pmulhrsw m6, m10 ; stp1_27
+ pmulhrsw m5, m10 ; stp1_20
+ SUM_SUB 13, 14, 9
+ pmulhrsw m13, m10 ; stp1_26
+ pmulhrsw m14, m10 ; stp1_21
+ SUM_SUB 11, 15, 9
+ pmulhrsw m11, m10 ; stp1_25
+ pmulhrsw m15, m10 ; stp1_22
+ SUM_SUB 2, 3, 9
+ pmulhrsw m2, m10 ; stp1_24
+ pmulhrsw m3, m10 ; stp1_23
+%else
+ BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27
+ SWAP 6, 5
+ BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26
+ SWAP 13, 14
+ BUTTERFLY_4X 11, 15, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25
+ SWAP 11, 15
+ BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24
+ SWAP 2, 3
+%endif
+
+ mova [stp + %4 + idx24], m2
+ mova [stp + %4 + idx25], m11
+ mova [stp + %4 + idx26], m13
+ mova [stp + %4 + idx27], m6
+
+ ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m0, [rsp + transposed_in + 16 * 2]
+ mova m6, [rsp + transposed_in + 16 * 6]
+
+ mova m1, m0
+ pmulhrsw m0, [pw__1606x2] ; stp1_8
+ mova [stp + %3 + idx20], m5
+ mova [stp + %3 + idx21], m14
+ pmulhrsw m1, [pw_16305x2] ; stp2_15
+ mova [stp + %3 + idx22], m15
+ mova m7, m6
+ pmulhrsw m7, [pw_m4756x2] ; stp2_11
+ mova [stp + %3 + idx23], m3
+ pmulhrsw m6, [pw_15679x2] ; stp1_12
+
+ ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m3, m0 ; stp1_8
+ mova m2, m1 ; stp1_15
+
+ ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14
+ mova m4, m7 ; stp1_11
+ mova m5, m6 ; stp1_12
+ BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10
+
+ ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 7, 9 ; stp1_8, stp1_11
+ SUM_SUB 2, 4, 9 ; stp1_9, stp1_10
+ SUM_SUB 1, 6, 9 ; stp1_15, stp1_12
+ SUM_SUB 3, 5, 9 ; stp1_14, stp1_13
+
+ ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 5, 4, 9
+ pmulhrsw m5, m10 ; stp1_13
+ pmulhrsw m4, m10 ; stp1_10
+ SUM_SUB 6, 7, 9
+ pmulhrsw m6, m10 ; stp1_12
+ pmulhrsw m7, m10 ; stp1_11
+%else
+ BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13
+ SWAP 5, 4
+ BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12
+ SWAP 6, 7
+%endif
+
+ ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova [stp + %2 + idx8], m0
+ mova [stp + %2 + idx9], m2
+ mova [stp + %2 + idx10], m4
+ mova [stp + %2 + idx11], m7
+
+ ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m11, [rsp + transposed_in + 16 * 4]
+ mova m12, m11
+ pmulhrsw m11, [pw__3196x2] ; stp1_4
+ pmulhrsw m12, [pw_16069x2] ; stp1_7
+
+ ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m0, [rsp + transposed_in + 16 * 0]
+ mova m10, [pw_11585x2]
+ pmulhrsw m0, m10 ; stp1_1
+
+ mova m14, m11 ; stp1_4
+ mova m13, m12 ; stp1_7
+
+ ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ SUM_SUB 13, 14, 9
+ pmulhrsw m13, m10 ; stp1_6
+ pmulhrsw m14, m10 ; stp1_5
+%else
+ BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6
+ SWAP 13, 14
+%endif
+ mova m7, m0 ; stp1_0 = stp1_1
+ mova m4, m0 ; stp1_1
+ mova m2, m7 ; stp1_0
+
+ ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 12, 9 ; stp1_0, stp1_7
+ SUM_SUB 7, 13, 9 ; stp1_1, stp1_6
+ SUM_SUB 2, 14, 9 ; stp1_2, stp1_5
+ SUM_SUB 4, 11, 9 ; stp1_3, stp1_4
+
+ ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 1, 9 ; stp1_0, stp1_15
+ SUM_SUB 7, 3, 9 ; stp1_1, stp1_14
+ SUM_SUB 2, 5, 9 ; stp1_2, stp1_13
+ SUM_SUB 4, 6, 9 ; stp1_3, stp1_12
+
+ ; 0-3, 28-31 final stage
+ mova m15, [stp + %4 + idx30]
+ mova m10, [stp + %4 + idx31]
+ SUM_SUB 0, 10, 9 ; stp1_0, stp1_31
+ SUM_SUB 7, 15, 9 ; stp1_1, stp1_30
+ mova [stp + %1 + idx0], m0
+ mova [stp + %1 + idx1], m7
+ mova [stp + %4 + idx30], m15
+ mova [stp + %4 + idx31], m10
+ mova m7, [stp + %4 + idx28]
+ mova m0, [stp + %4 + idx29]
+ SUM_SUB 2, 0, 9 ; stp1_2, stp1_29
+ SUM_SUB 4, 7, 9 ; stp1_3, stp1_28
+ mova [stp + %1 + idx2], m2
+ mova [stp + %1 + idx3], m4
+ mova [stp + %4 + idx28], m7
+ mova [stp + %4 + idx29], m0
+
+ ; 12-15, 16-19 final stage
+ mova m0, [stp + %3 + idx16]
+ mova m7, [stp + %3 + idx17]
+ mova m2, [stp + %3 + idx18]
+ mova m4, [stp + %3 + idx19]
+ SUM_SUB 1, 0, 9 ; stp1_15, stp1_16
+ SUM_SUB 3, 7, 9 ; stp1_14, stp1_17
+ SUM_SUB 5, 2, 9 ; stp1_13, stp1_18
+ SUM_SUB 6, 4, 9 ; stp1_12, stp1_19
+ mova [stp + %2 + idx12], m6
+ mova [stp + %2 + idx13], m5
+ mova [stp + %2 + idx14], m3
+ mova [stp + %2 + idx15], m1
+ mova [stp + %3 + idx16], m0
+ mova [stp + %3 + idx17], m7
+ mova [stp + %3 + idx18], m2
+ mova [stp + %3 + idx19], m4
+
+ mova m4, [stp + %2 + idx8]
+ mova m5, [stp + %2 + idx9]
+ mova m6, [stp + %2 + idx10]
+ mova m7, [stp + %2 + idx11]
+ SUM_SUB 11, 7, 9 ; stp1_4, stp1_11
+ SUM_SUB 14, 6, 9 ; stp1_5, stp1_10
+ SUM_SUB 13, 5, 9 ; stp1_6, stp1_9
+ SUM_SUB 12, 4, 9 ; stp1_7, stp1_8
+
+ ; 4-7, 24-27 final stage
+ mova m0, [stp + %4 + idx27]
+ mova m1, [stp + %4 + idx26]
+ mova m2, [stp + %4 + idx25]
+ mova m3, [stp + %4 + idx24]
+ SUM_SUB 11, 0, 9 ; stp1_4, stp1_27
+ SUM_SUB 14, 1, 9 ; stp1_5, stp1_26
+ SUM_SUB 13, 2, 9 ; stp1_6, stp1_25
+ SUM_SUB 12, 3, 9 ; stp1_7, stp1_24
+ mova [stp + %4 + idx27], m0
+ mova [stp + %4 + idx26], m1
+ mova [stp + %4 + idx25], m2
+ mova [stp + %4 + idx24], m3
+ mova [stp + %1 + idx4], m11
+ mova [stp + %1 + idx5], m14
+ mova [stp + %1 + idx6], m13
+ mova [stp + %1 + idx7], m12
+
+ ; 8-11, 20-23 final stage
+ mova m0, [stp + %3 + idx20]
+ mova m1, [stp + %3 + idx21]
+ mova m2, [stp + %3 + idx22]
+ mova m3, [stp + %3 + idx23]
+ SUM_SUB 7, 0, 9 ; stp1_11, stp_20
+ SUM_SUB 6, 1, 9 ; stp1_10, stp_21
+ SUM_SUB 5, 2, 9 ; stp1_9, stp_22
+ SUM_SUB 4, 3, 9 ; stp1_8, stp_23
+ mova [stp + %2 + idx8], m4
+ mova [stp + %2 + idx9], m5
+ mova [stp + %2 + idx10], m6
+ mova [stp + %2 + idx11], m7
+ mova [stp + %3 + idx20], m0
+ mova [stp + %3 + idx21], m1
+ mova [stp + %3 + idx22], m2
+ mova [stp + %3 + idx23], m3
+%endmacro
+
+%macro RECON_AND_STORE 1
+ mova m11, [pw_32]
+ lea stp, [rsp + %1]
+ mov r6, 32
+ pxor m8, m8
+%%recon_and_store:
+ mova m0, [stp + 16 * 32 * 0]
+ mova m1, [stp + 16 * 32 * 1]
+ mova m2, [stp + 16 * 32 * 2]
+ mova m3, [stp + 16 * 32 * 3]
+ add stp, 16
+
+ paddw m0, m11
+ paddw m1, m11
+ paddw m2, m11
+ paddw m3, m11
+ psraw m0, 6
+ psraw m1, 6
+ psraw m2, 6
+ psraw m3, 6
+ movh m4, [outputq + 0]
+ movh m5, [outputq + 8]
+ movh m6, [outputq + 16]
+ movh m7, [outputq + 24]
+ punpcklbw m4, m8
+ punpcklbw m5, m8
+ punpcklbw m6, m8
+ punpcklbw m7, m8
+ paddw m0, m4
+ paddw m1, m5
+ paddw m2, m6
+ paddw m3, m7
+ packuswb m0, m1
+ packuswb m2, m3
+ mova [outputq + 0], m0
+ mova [outputq + 16], m2
+ lea outputq, [outputq + strideq]
+ dec r6
+ jnz %%recon_and_store
+%endmacro
+
+%define i32x32_size 16*32*5
+%define pass_two_start 16*32*0
+%define transposed_in 16*32*4
+%define pass_one_start 16*32*0
+%define stp r8
+
+INIT_XMM ssse3
+cglobal idct32x32_34_add, 3, 11, 16, i32x32_size, input, output, stride
+ mova m8, [pd_8192]
+ lea stp, [rsp + pass_one_start]
+
+idct32x32_34:
+ mov r3, inputq
+ lea r4, [rsp + transposed_in]
+
+idct32x32_34_transpose:
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova m0, [r3 + 0]
+ packssdw m0, [r3 + 16]
+ mova m1, [r3 + 32 * 4]
+ packssdw m1, [r3 + 32 * 4 + 16]
+ mova m2, [r3 + 32 * 8]
+ packssdw m2, [r3 + 32 * 8 + 16]
+ mova m3, [r3 + 32 * 12]
+ packssdw m3, [r3 + 32 * 12 + 16]
+ mova m4, [r3 + 32 * 16]
+ packssdw m4, [r3 + 32 * 16 + 16]
+ mova m5, [r3 + 32 * 20]
+ packssdw m5, [r3 + 32 * 20 + 16]
+ mova m6, [r3 + 32 * 24]
+ packssdw m6, [r3 + 32 * 24 + 16]
+ mova m7, [r3 + 32 * 28]
+ packssdw m7, [r3 + 32 * 28 + 16]
+%else
+ mova m0, [r3 + 0]
+ mova m1, [r3 + 16 * 4]
+ mova m2, [r3 + 16 * 8]
+ mova m3, [r3 + 16 * 12]
+ mova m4, [r3 + 16 * 16]
+ mova m5, [r3 + 16 * 20]
+ mova m6, [r3 + 16 * 24]
+ mova m7, [r3 + 16 * 28]
+%endif
+
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ IDCT32X32_34 16*0, 16*32, 16*64, 16*96
+ lea stp, [stp + 16 * 8]
+ mov r6, 4
+ lea stp, [rsp + pass_one_start]
+ lea r9, [rsp + pass_one_start]
+
+idct32x32_34_2:
+ lea r4, [rsp + transposed_in]
+ mov r3, r9
+
+idct32x32_34_transpose_2:
+ mova m0, [r3 + 0]
+ mova m1, [r3 + 16 * 1]
+ mova m2, [r3 + 16 * 2]
+ mova m3, [r3 + 16 * 3]
+ mova m4, [r3 + 16 * 4]
+ mova m5, [r3 + 16 * 5]
+ mova m6, [r3 + 16 * 6]
+ mova m7, [r3 + 16 * 7]
+
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ IDCT32X32_34 16*0, 16*8, 16*16, 16*24
+
+ lea stp, [stp + 16 * 32]
+ add r9, 16 * 32
+ dec r6
+ jnz idct32x32_34_2
+
+ RECON_AND_STORE pass_two_start
+
+ RET
+
+%macro IDCT32X32_135 4
+ ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m1, [rsp + transposed_in + 16 * 1]
+ mova m11, m1
+ pmulhrsw m1, [pw___804x2] ; stp1_16
+ pmulhrsw m11, [pw_16364x2] ; stp2_31
+
+ mova m7, [rsp + transposed_in + 16 * 7]
+ mova m12, m7
+ pmulhrsw m7, [pw_15426x2] ; stp1_28
+ pmulhrsw m12, [pw_m5520x2] ; stp2_19
+
+ mova m3, [rsp + transposed_in + 16 * 9]
+ mova m4, m3
+ pmulhrsw m3, [pw__7005x2] ; stp1_18
+ pmulhrsw m4, [pw_14811x2] ; stp2_29
+
+ mova m0, [rsp + transposed_in + 16 * 15]
+ mova m2, m0
+ pmulhrsw m0, [pw_12140x2] ; stp1_30
+ pmulhrsw m2, [pw_m11003x2] ; stp2_17
+
+ ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 1, 2, 9 ; stp2_16, stp2_17
+ SUM_SUB 12, 3, 9 ; stp2_19, stp2_18
+ SUM_SUB 7, 4, 9 ; stp2_28, stp2_29
+ SUM_SUB 11, 0, 9 ; stp2_31, stp2_30
+
+ ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30
+ BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18
+
+ ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 1, 12, 9 ; stp2_16, stp2_19
+ SUM_SUB 0, 3, 9 ; stp2_17, stp2_18
+ SUM_SUB 11, 7, 9 ; stp2_31, stp2_28
+ SUM_SUB 2, 4, 9 ; stp2_30, stp2_29
+
+ ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29
+ BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28
+
+ mova [stp + %3 + idx16], m1
+ mova [stp + %3 + idx17], m0
+ mova [stp + %3 + idx18], m4
+ mova [stp + %3 + idx19], m7
+ mova [stp + %4 + idx28], m12
+ mova [stp + %4 + idx29], m3
+ mova [stp + %4 + idx30], m2
+ mova [stp + %4 + idx31], m11
+
+ ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m2, [rsp + transposed_in + 16 * 3]
+ mova m3, m2
+ pmulhrsw m3, [pw_m2404x2] ; stp1_23
+ pmulhrsw m2, [pw_16207x2] ; stp2_24
+
+ mova m5, [rsp + transposed_in + 16 * 5]
+ mova m6, m5
+ pmulhrsw m5, [pw__3981x2] ; stp1_20
+ pmulhrsw m6, [pw_15893x2] ; stp2_27
+
+ mova m14, [rsp + transposed_in + 16 * 11]
+ mova m13, m14
+ pmulhrsw m13, [pw_m8423x2] ; stp1_21
+ pmulhrsw m14, [pw_14053x2] ; stp2_26
+
+ mova m0, [rsp + transposed_in + 16 * 13]
+ mova m1, m0
+ pmulhrsw m0, [pw__9760x2] ; stp1_22
+ pmulhrsw m1, [pw_13160x2] ; stp2_25
+
+ ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 5, 13, 9 ; stp2_20, stp2_21
+ SUM_SUB 3, 0, 9 ; stp2_23, stp2_22
+ SUM_SUB 2, 1, 9 ; stp2_24, stp2_25
+ SUM_SUB 6, 14, 9 ; stp2_27, stp2_26
+
+ ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26
+ BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22
+
+ ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 3, 5, 9 ; stp2_23, stp2_20
+ SUM_SUB 0, 14, 9 ; stp2_22, stp2_21
+ SUM_SUB 2, 6, 9 ; stp2_24, stp2_27
+ SUM_SUB 1, 13, 9 ; stp2_25, stp2_26
+
+ ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20
+ BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21
+
+ ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m4, [stp + %3 + idx16]
+ mova m7, [stp + %3 + idx17]
+ mova m11, [stp + %3 + idx18]
+ mova m12, [stp + %3 + idx19]
+ SUM_SUB 4, 3, 9 ; stp2_16, stp2_23
+ SUM_SUB 7, 0, 9 ; stp2_17, stp2_22
+ SUM_SUB 11, 14, 9 ; stp2_18, stp2_21
+ SUM_SUB 12, 5, 9 ; stp2_19, stp2_20
+ mova [stp + %3 + idx16], m4
+ mova [stp + %3 + idx17], m7
+ mova [stp + %3 + idx18], m11
+ mova [stp + %3 + idx19], m12
+
+ mova m4, [stp + %4 + idx28]
+ mova m7, [stp + %4 + idx29]
+ mova m11, [stp + %4 + idx30]
+ mova m12, [stp + %4 + idx31]
+ SUM_SUB 4, 6, 9 ; stp2_28, stp2_27
+ SUM_SUB 7, 13, 9 ; stp2_29, stp2_26
+ SUM_SUB 11, 1, 9 ; stp2_30, stp2_25
+ SUM_SUB 12, 2, 9 ; stp2_31, stp2_24
+ mova [stp + %4 + idx28], m4
+ mova [stp + %4 + idx29], m7
+ mova [stp + %4 + idx30], m11
+ mova [stp + %4 + idx31], m12
+
+ ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 6, 5, 9
+ pmulhrsw m6, m10 ; stp1_27
+ pmulhrsw m5, m10 ; stp1_20
+ SUM_SUB 13, 14, 9
+ pmulhrsw m13, m10 ; stp1_26
+ pmulhrsw m14, m10 ; stp1_21
+ SUM_SUB 1, 0, 9
+ pmulhrsw m1, m10 ; stp1_25
+ pmulhrsw m0, m10 ; stp1_22
+ SUM_SUB 2, 3, 9
+ pmulhrsw m2, m10 ; stp1_25
+ pmulhrsw m3, m10 ; stp1_22
+%else
+ BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27
+ SWAP 6, 5
+ BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26
+ SWAP 13, 14
+ BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25
+ SWAP 1, 0
+ BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24
+ SWAP 2, 3
+%endif
+ mova [stp + %3 + idx20], m5
+ mova [stp + %3 + idx21], m14
+ mova [stp + %3 + idx22], m0
+ mova [stp + %3 + idx23], m3
+ mova [stp + %4 + idx24], m2
+ mova [stp + %4 + idx25], m1
+ mova [stp + %4 + idx26], m13
+ mova [stp + %4 + idx27], m6
+
+ ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m0, [rsp + transposed_in + 16 * 2]
+ mova m1, m0
+ pmulhrsw m0, [pw__1606x2] ; stp1_8
+ pmulhrsw m1, [pw_16305x2] ; stp2_15
+
+ mova m6, [rsp + transposed_in + 16 * 6]
+ mova m7, m6
+ pmulhrsw m7, [pw_m4756x2] ; stp2_11
+ pmulhrsw m6, [pw_15679x2] ; stp1_12
+
+ mova m4, [rsp + transposed_in + 16 * 10]
+ mova m5, m4
+ pmulhrsw m4, [pw__7723x2] ; stp1_10
+ pmulhrsw m5, [pw_14449x2] ; stp2_13
+
+ mova m2, [rsp + transposed_in + 16 * 14]
+ mova m3, m2
+ pmulhrsw m3, [pw_m10394x2] ; stp1_9
+ pmulhrsw m2, [pw_12665x2] ; stp2_14
+
+ ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 3, 9 ; stp1_8, stp1_9
+ SUM_SUB 7, 4, 9 ; stp1_11, stp1_10
+ SUM_SUB 6, 5, 9 ; stp1_12, stp1_13
+ SUM_SUB 1, 2, 9 ; stp1_15, stp1_14
+
+ ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14
+ BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10
+
+ ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 7, 9 ; stp1_8, stp1_11
+ SUM_SUB 2, 4, 9 ; stp1_9, stp1_10
+ SUM_SUB 1, 6, 9 ; stp1_15, stp1_12
+ SUM_SUB 3, 5, 9 ; stp1_14, stp1_13
+
+ ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 5, 4, 9
+ pmulhrsw m5, m10 ; stp1_13
+ pmulhrsw m4, m10 ; stp1_10
+ SUM_SUB 6, 7, 9
+ pmulhrsw m6, m10 ; stp1_12
+ pmulhrsw m7, m10 ; stp1_11
+%else
+ BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13
+ SWAP 5, 4
+ BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12
+ SWAP 6, 7
+%endif
+ ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova [stp + %2 + idx8], m0
+ mova [stp + %2 + idx9], m2
+ mova [stp + %2 + idx10], m4
+ mova [stp + %2 + idx11], m7
+ mova [stp + %2 + idx12], m6
+ mova [stp + %2 + idx13], m5
+ mova [stp + %2 + idx14], m3
+ mova [stp + %2 + idx15], m1
+
+ ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m11, [rsp + transposed_in + 16 * 4]
+ mova m12, m11
+ pmulhrsw m11, [pw__3196x2] ; stp1_4
+ pmulhrsw m12, [pw_16069x2] ; stp1_7
+
+ mova m13, [rsp + transposed_in + 16 * 12]
+ mova m14, m13
+ pmulhrsw m13, [pw_13623x2] ; stp1_6
+ pmulhrsw m14, [pw_m9102x2] ; stp1_5
+
+ ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m0, [rsp + transposed_in + 16 * 0]
+ mova m2, [rsp + transposed_in + 16 * 8]
+ pmulhrsw m0, [pw_11585x2] ; stp1_1
+ mova m3, m2
+ pmulhrsw m2, [pw__6270x2] ; stp1_2
+ pmulhrsw m3, [pw_15137x2] ; stp1_3
+
+ SUM_SUB 11, 14, 9 ; stp1_4, stp1_5
+ SUM_SUB 12, 13, 9 ; stp1_7, stp1_6
+
+ ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 13, 14, 9
+ pmulhrsw m13, m10 ; stp1_6
+ pmulhrsw m14, m10 ; stp1_5
+%else
+ BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6
+ SWAP 13, 14
+%endif
+ mova m1, m0 ; stp1_0 = stp1_1
+ SUM_SUB 0, 3, 9 ; stp1_0, stp1_3
+ SUM_SUB 1, 2, 9 ; stp1_1, stp1_2
+
+ ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 12, 9 ; stp1_0, stp1_7
+ SUM_SUB 1, 13, 9 ; stp1_1, stp1_6
+ SUM_SUB 2, 14, 9 ; stp1_2, stp1_5
+ SUM_SUB 3, 11, 9 ; stp1_3, stp1_4
+
+ ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m4, [stp + %2 + idx12]
+ mova m5, [stp + %2 + idx13]
+ mova m6, [stp + %2 + idx14]
+ mova m7, [stp + %2 + idx15]
+ SUM_SUB 0, 7, 9 ; stp1_0, stp1_15
+ SUM_SUB 1, 6, 9 ; stp1_1, stp1_14
+ SUM_SUB 2, 5, 9 ; stp1_2, stp1_13
+ SUM_SUB 3, 4, 9 ; stp1_3, stp1_12
+
+ ; 0-3, 28-31 final stage
+ mova m10, [stp + %4 + idx31]
+ mova m15, [stp + %4 + idx30]
+ SUM_SUB 0, 10, 9 ; stp1_0, stp1_31
+ SUM_SUB 1, 15, 9 ; stp1_1, stp1_30
+ mova [stp + %1 + idx0], m0
+ mova [stp + %1 + idx1], m1
+ mova [stp + %4 + idx31], m10
+ mova [stp + %4 + idx30], m15
+ mova m0, [stp + %4 + idx29]
+ mova m1, [stp + %4 + idx28]
+ SUM_SUB 2, 0, 9 ; stp1_2, stp1_29
+ SUM_SUB 3, 1, 9 ; stp1_3, stp1_28
+ mova [stp + %1 + idx2], m2
+ mova [stp + %1 + idx3], m3
+ mova [stp + %4 + idx29], m0
+ mova [stp + %4 + idx28], m1
+
+ ; 12-15, 16-19 final stage
+ mova m0, [stp + %3 + idx16]
+ mova m1, [stp + %3 + idx17]
+ mova m2, [stp + %3 + idx18]
+ mova m3, [stp + %3 + idx19]
+ SUM_SUB 7, 0, 9 ; stp1_15, stp1_16
+ SUM_SUB 6, 1, 9 ; stp1_14, stp1_17
+ SUM_SUB 5, 2, 9 ; stp1_13, stp1_18
+ SUM_SUB 4, 3, 9 ; stp1_12, stp1_19
+ mova [stp + %2 + idx12], m4
+ mova [stp + %2 + idx13], m5
+ mova [stp + %2 + idx14], m6
+ mova [stp + %2 + idx15], m7
+ mova [stp + %3 + idx16], m0
+ mova [stp + %3 + idx17], m1
+ mova [stp + %3 + idx18], m2
+ mova [stp + %3 + idx19], m3
+
+ mova m4, [stp + %2 + idx8]
+ mova m5, [stp + %2 + idx9]
+ mova m6, [stp + %2 + idx10]
+ mova m7, [stp + %2 + idx11]
+ SUM_SUB 11, 7, 9 ; stp1_4, stp1_11
+ SUM_SUB 14, 6, 9 ; stp1_5, stp1_10
+ SUM_SUB 13, 5, 9 ; stp1_6, stp1_9
+ SUM_SUB 12, 4, 9 ; stp1_7, stp1_8
+
+ ; 4-7, 24-27 final stage
+ mova m3, [stp + %4 + idx24]
+ mova m2, [stp + %4 + idx25]
+ mova m1, [stp + %4 + idx26]
+ mova m0, [stp + %4 + idx27]
+ SUM_SUB 12, 3, 9 ; stp1_7, stp1_24
+ SUM_SUB 13, 2, 9 ; stp1_6, stp1_25
+ SUM_SUB 14, 1, 9 ; stp1_5, stp1_26
+ SUM_SUB 11, 0, 9 ; stp1_4, stp1_27
+ mova [stp + %4 + idx24], m3
+ mova [stp + %4 + idx25], m2
+ mova [stp + %4 + idx26], m1
+ mova [stp + %4 + idx27], m0
+ mova [stp + %1 + idx4], m11
+ mova [stp + %1 + idx5], m14
+ mova [stp + %1 + idx6], m13
+ mova [stp + %1 + idx7], m12
+
+ ; 8-11, 20-23 final stage
+ mova m0, [stp + %3 + idx20]
+ mova m1, [stp + %3 + idx21]
+ mova m2, [stp + %3 + idx22]
+ mova m3, [stp + %3 + idx23]
+ SUM_SUB 7, 0, 9 ; stp1_11, stp_20
+ SUM_SUB 6, 1, 9 ; stp1_10, stp_21
+ SUM_SUB 5, 2, 9 ; stp1_9, stp_22
+ SUM_SUB 4, 3, 9 ; stp1_8, stp_23
+ mova [stp + %2 + idx8], m4
+ mova [stp + %2 + idx9], m5
+ mova [stp + %2 + idx10], m6
+ mova [stp + %2 + idx11], m7
+ mova [stp + %3 + idx20], m0
+ mova [stp + %3 + idx21], m1
+ mova [stp + %3 + idx22], m2
+ mova [stp + %3 + idx23], m3
+%endmacro
+
+INIT_XMM ssse3
+cglobal idct32x32_135_add, 3, 11, 16, i32x32_size, input, output, stride
+ mova m8, [pd_8192]
+ mov r6, 2
+ lea stp, [rsp + pass_one_start]
+
+idct32x32_135:
+ mov r3, inputq
+ lea r4, [rsp + transposed_in]
+ mov r7, 2
+
+idct32x32_135_transpose:
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova m0, [r3 + 0]
+ packssdw m0, [r3 + 16]
+ mova m1, [r3 + 32 * 4]
+ packssdw m1, [r3 + 32 * 4 + 16]
+ mova m2, [r3 + 32 * 8]
+ packssdw m2, [r3 + 32 * 8 + 16]
+ mova m3, [r3 + 32 * 12]
+ packssdw m3, [r3 + 32 * 12 + 16]
+ mova m4, [r3 + 32 * 16]
+ packssdw m4, [r3 + 32 * 16 + 16]
+ mova m5, [r3 + 32 * 20]
+ packssdw m5, [r3 + 32 * 20 + 16]
+ mova m6, [r3 + 32 * 24]
+ packssdw m6, [r3 + 32 * 24 + 16]
+ mova m7, [r3 + 32 * 28]
+ packssdw m7, [r3 + 32 * 28 + 16]
+%else
+ mova m0, [r3 + 0]
+ mova m1, [r3 + 16 * 4]
+ mova m2, [r3 + 16 * 8]
+ mova m3, [r3 + 16 * 12]
+ mova m4, [r3 + 16 * 16]
+ mova m5, [r3 + 16 * 20]
+ mova m6, [r3 + 16 * 24]
+ mova m7, [r3 + 16 * 28]
+%endif
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ mova [r4 + 0], m0
+ mova [r4 + 16 * 1], m1
+ mova [r4 + 16 * 2], m2
+ mova [r4 + 16 * 3], m3
+ mova [r4 + 16 * 4], m4
+ mova [r4 + 16 * 5], m5
+ mova [r4 + 16 * 6], m6
+ mova [r4 + 16 * 7], m7
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ add r3, 32
+%else
+ add r3, 16
+%endif
+ add r4, 16 * 8
+ dec r7
+ jne idct32x32_135_transpose
+
+ IDCT32X32_135 16*0, 16*32, 16*64, 16*96
+ lea stp, [stp + 16 * 8]
+%if CONFIG_VP9_HIGHBITDEPTH
+ lea inputq, [inputq + 32 * 32]
+%else
+ lea inputq, [inputq + 16 * 32]
+%endif
+ dec r6
+ jnz idct32x32_135
+
+ mov r6, 4
+ lea stp, [rsp + pass_one_start]
+ lea r9, [rsp + pass_one_start]
+
+idct32x32_135_2:
+ lea r4, [rsp + transposed_in]
+ mov r3, r9
+ mov r7, 2
+
+idct32x32_135_transpose_2:
+ mova m0, [r3 + 0]
+ mova m1, [r3 + 16 * 1]
+ mova m2, [r3 + 16 * 2]
+ mova m3, [r3 + 16 * 3]
+ mova m4, [r3 + 16 * 4]
+ mova m5, [r3 + 16 * 5]
+ mova m6, [r3 + 16 * 6]
+ mova m7, [r3 + 16 * 7]
+
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ mova [r4 + 0], m0
+ mova [r4 + 16 * 1], m1
+ mova [r4 + 16 * 2], m2
+ mova [r4 + 16 * 3], m3
+ mova [r4 + 16 * 4], m4
+ mova [r4 + 16 * 5], m5
+ mova [r4 + 16 * 6], m6
+ mova [r4 + 16 * 7], m7
+
+ add r3, 16 * 8
+ add r4, 16 * 8
+ dec r7
+ jne idct32x32_135_transpose_2
+
+ IDCT32X32_135 16*0, 16*8, 16*16, 16*24
+
+ lea stp, [stp + 16 * 32]
+ add r9, 16 * 32
+ dec r6
+ jnz idct32x32_135_2
+
+ RECON_AND_STORE pass_two_start
+
+ RET
+
+%macro IDCT32X32_1024 4
+ ; BLOCK A STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m1, [rsp + transposed_in + 16 * 1]
+ mova m11, [rsp + transposed_in + 16 * 31]
+ BUTTERFLY_4X 1, 11, 804, 16364, m8, 9, 10 ; stp1_16, stp1_31
+
+ mova m0, [rsp + transposed_in + 16 * 15]
+ mova m2, [rsp + transposed_in + 16 * 17]
+ BUTTERFLY_4X 2, 0, 12140, 11003, m8, 9, 10 ; stp1_17, stp1_30
+
+ mova m7, [rsp + transposed_in + 16 * 7]
+ mova m12, [rsp + transposed_in + 16 * 25]
+ BUTTERFLY_4X 12, 7, 15426, 5520, m8, 9, 10 ; stp1_19, stp1_28
+
+ mova m3, [rsp + transposed_in + 16 * 9]
+ mova m4, [rsp + transposed_in + 16 * 23]
+ BUTTERFLY_4X 3, 4, 7005, 14811, m8, 9, 10 ; stp1_18, stp1_29
+
+ ; BLOCK A STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 1, 2, 9 ; stp2_16, stp2_17
+ SUM_SUB 12, 3, 9 ; stp2_19, stp2_18
+ SUM_SUB 7, 4, 9 ; stp2_28, stp2_29
+ SUM_SUB 11, 0, 9 ; stp2_31, stp2_30
+
+ ; BLOCK A STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 0, 2, 3196, 16069, m8, 9, 10 ; stp1_17, stp1_30
+ BUTTERFLY_4Xmm 4, 3, 3196, 16069, m8, 9, 10 ; stp1_29, stp1_18
+
+ ; BLOCK A STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 1, 12, 9 ; stp2_16, stp2_19
+ SUM_SUB 0, 3, 9 ; stp2_17, stp2_18
+ SUM_SUB 11, 7, 9 ; stp2_31, stp2_28
+ SUM_SUB 2, 4, 9 ; stp2_30, stp2_29
+
+ ; BLOCK A STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 4, 3, 6270, 15137, m8, 9, 10 ; stp1_18, stp1_29
+ BUTTERFLY_4X 7, 12, 6270, 15137, m8, 9, 10 ; stp1_19, stp1_28
+
+ mova [stp + %3 + idx16], m1
+ mova [stp + %3 + idx17], m0
+ mova [stp + %3 + idx18], m4
+ mova [stp + %3 + idx19], m7
+ mova [stp + %4 + idx28], m12
+ mova [stp + %4 + idx29], m3
+ mova [stp + %4 + idx30], m2
+ mova [stp + %4 + idx31], m11
+
+ ; BLOCK B STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m5, [rsp + transposed_in + 16 * 5]
+ mova m6, [rsp + transposed_in + 16 * 27]
+ BUTTERFLY_4X 5, 6, 3981, 15893, m8, 9, 10 ; stp1_20, stp1_27
+
+ mova m13, [rsp + transposed_in + 16 * 21]
+ mova m14, [rsp + transposed_in + 16 * 11]
+ BUTTERFLY_4X 13, 14, 14053, 8423, m8, 9, 10 ; stp1_21, stp1_26
+
+ mova m0, [rsp + transposed_in + 16 * 13]
+ mova m1, [rsp + transposed_in + 16 * 19]
+ BUTTERFLY_4X 0, 1, 9760, 13160, m8, 9, 10 ; stp1_22, stp1_25
+
+ mova m2, [rsp + transposed_in + 16 * 3]
+ mova m3, [rsp + transposed_in + 16 * 29]
+ BUTTERFLY_4X 3, 2, 16207, 2404, m8, 9, 10 ; stp1_23, stp1_24
+
+ ; BLOCK B STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 5, 13, 9 ; stp2_20, stp2_21
+ SUM_SUB 3, 0, 9 ; stp2_23, stp2_22
+ SUM_SUB 2, 1, 9 ; stp2_24, stp2_25
+ SUM_SUB 6, 14, 9 ; stp2_27, stp2_26
+
+ ; BLOCK B STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_21, stp1_26
+ BUTTERFLY_4Xmm 1, 0, 13623, 9102, m8, 9, 10 ; stp1_25, stp1_22
+
+ ; BLOCK B STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 3, 5, 9 ; stp2_23, stp2_20
+ SUM_SUB 0, 14, 9 ; stp2_22, stp2_21
+ SUM_SUB 2, 6, 9 ; stp2_24, stp2_27
+ SUM_SUB 1, 13, 9 ; stp2_25, stp2_26
+
+ ; BLOCK B STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4Xmm 6, 5, 6270, 15137, m8, 9, 10 ; stp1_27, stp1_20
+ BUTTERFLY_4Xmm 13, 14, 6270, 15137, m8, 9, 10 ; stp1_26, stp1_21
+
+ ; BLOCK B STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m4, [stp + %3 + idx16]
+ mova m7, [stp + %3 + idx17]
+ mova m11, [stp + %3 + idx18]
+ mova m12, [stp + %3 + idx19]
+ SUM_SUB 4, 3, 9 ; stp2_16, stp2_23
+ SUM_SUB 7, 0, 9 ; stp2_17, stp2_22
+ SUM_SUB 11, 14, 9 ; stp2_18, stp2_21
+ SUM_SUB 12, 5, 9 ; stp2_19, stp2_20
+ mova [stp + %3 + idx16], m4
+ mova [stp + %3 + idx17], m7
+ mova [stp + %3 + idx18], m11
+ mova [stp + %3 + idx19], m12
+
+ mova m4, [stp + %4 + idx28]
+ mova m7, [stp + %4 + idx29]
+ mova m11, [stp + %4 + idx30]
+ mova m12, [stp + %4 + idx31]
+ SUM_SUB 4, 6, 9 ; stp2_28, stp2_27
+ SUM_SUB 7, 13, 9 ; stp2_29, stp2_26
+ SUM_SUB 11, 1, 9 ; stp2_30, stp2_25
+ SUM_SUB 12, 2, 9 ; stp2_31, stp2_24
+ mova [stp + %4 + idx28], m4
+ mova [stp + %4 + idx29], m7
+ mova [stp + %4 + idx30], m11
+ mova [stp + %4 + idx31], m12
+
+ ; BLOCK B STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 6, 5, 9
+ pmulhrsw m6, m10 ; stp1_27
+ pmulhrsw m5, m10 ; stp1_20
+ SUM_SUB 13, 14, 9
+ pmulhrsw m13, m10 ; stp1_26
+ pmulhrsw m14, m10 ; stp1_21
+ SUM_SUB 1, 0, 9
+ pmulhrsw m1, m10 ; stp1_25
+ pmulhrsw m0, m10 ; stp1_22
+ SUM_SUB 2, 3, 9
+ pmulhrsw m2, m10 ; stp1_25
+ pmulhrsw m3, m10 ; stp1_22
+%else
+ BUTTERFLY_4X 6, 5, 11585, 11585, m8, 9, 10 ; stp1_20, stp1_27
+ SWAP 6, 5
+ BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_21, stp1_26
+ SWAP 13, 14
+ BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 ; stp1_22, stp1_25
+ SWAP 1, 0
+ BUTTERFLY_4X 2, 3, 11585, 11585, m8, 9, 10 ; stp1_23, stp1_24
+ SWAP 2, 3
+%endif
+ mova [stp + %3 + idx20], m5
+ mova [stp + %3 + idx21], m14
+ mova [stp + %3 + idx22], m0
+ mova [stp + %3 + idx23], m3
+ mova [stp + %4 + idx24], m2
+ mova [stp + %4 + idx25], m1
+ mova [stp + %4 + idx26], m13
+ mova [stp + %4 + idx27], m6
+
+ ; BLOCK C STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK C STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m0, [rsp + transposed_in + 16 * 2]
+ mova m1, [rsp + transposed_in + 16 * 30]
+ BUTTERFLY_4X 0, 1, 1606, 16305, m8, 9, 10 ; stp1_8, stp1_15
+
+ mova m2, [rsp + transposed_in + 16 * 14]
+ mova m3, [rsp + transposed_in + 16 * 18]
+ BUTTERFLY_4X 3, 2, 12665, 10394, m8, 9, 10 ; stp1_9, stp1_14
+
+ mova m4, [rsp + transposed_in + 16 * 10]
+ mova m5, [rsp + transposed_in + 16 * 22]
+ BUTTERFLY_4X 4, 5, 7723, 14449, m8, 9, 10 ; stp1_10, stp1_13
+
+ mova m6, [rsp + transposed_in + 16 * 6]
+ mova m7, [rsp + transposed_in + 16 * 26]
+ BUTTERFLY_4X 7, 6, 15679, 4756, m8, 9, 10 ; stp1_11, stp1_12
+
+ ; BLOCK C STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 3, 9 ; stp1_8, stp1_9
+ SUM_SUB 7, 4, 9 ; stp1_11, stp1_10
+ SUM_SUB 6, 5, 9 ; stp1_12, stp1_13
+ SUM_SUB 1, 2, 9 ; stp1_15, stp1_14
+
+ ; BLOCK C STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_9, stp1_14
+ BUTTERFLY_4Xmm 5, 4, 6270, 15137, m8, 9, 10 ; stp1_13, stp1_10
+
+ ; BLOCK C STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 7, 9 ; stp1_8, stp1_11
+ SUM_SUB 2, 4, 9 ; stp1_9, stp1_10
+ SUM_SUB 1, 6, 9 ; stp1_15, stp1_12
+ SUM_SUB 3, 5, 9 ; stp1_14, stp1_13
+
+ ; BLOCK C STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 5, 4, 9
+ pmulhrsw m5, m10 ; stp1_13
+ pmulhrsw m4, m10 ; stp1_10
+ SUM_SUB 6, 7, 9
+ pmulhrsw m6, m10 ; stp1_12
+ pmulhrsw m7, m10 ; stp1_11
+%else
+ BUTTERFLY_4X 5, 4, 11585, 11585, m8, 9, 10 ; stp1_10, stp1_13
+ SWAP 5, 4
+ BUTTERFLY_4X 6, 7, 11585, 11585, m8, 9, 10 ; stp1_11, stp1_12
+ SWAP 6, 7
+%endif
+ ; BLOCK C STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova [stp + %2 + idx8], m0
+ mova [stp + %2 + idx9], m2
+ mova [stp + %2 + idx10], m4
+ mova [stp + %2 + idx11], m7
+ mova [stp + %2 + idx12], m6
+ mova [stp + %2 + idx13], m5
+ mova [stp + %2 + idx14], m3
+ mova [stp + %2 + idx15], m1
+
+ ; BLOCK D STAGE 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK D STAGE 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ;
+ ; BLOCK D STAGE 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m11, [rsp + transposed_in + 16 * 4]
+ mova m12, [rsp + transposed_in + 16 * 28]
+ BUTTERFLY_4X 11, 12, 3196, 16069, m8, 9, 10 ; stp1_4, stp1_7
+
+ mova m13, [rsp + transposed_in + 16 * 12]
+ mova m14, [rsp + transposed_in + 16 * 20]
+ BUTTERFLY_4X 14, 13, 13623, 9102, m8, 9, 10 ; stp1_5, stp1_6
+
+ ; BLOCK D STAGE 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m0, [rsp + transposed_in + 16 * 0]
+ mova m1, [rsp + transposed_in + 16 * 16]
+
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ mova m10, [pw_11585x2]
+ SUM_SUB 0, 1, 9
+ pmulhrsw m0, m10 ; stp1_1
+ pmulhrsw m1, m10 ; stp1_0
+%else
+ BUTTERFLY_4X 0, 1, 11585, 11585, m8, 9, 10 ; stp1_1, stp1_0
+ SWAP 0, 1
+%endif
+ mova m2, [rsp + transposed_in + 16 * 8]
+ mova m3, [rsp + transposed_in + 16 * 24]
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 ; stp1_2, stp1_3
+
+ mova m10, [pw_11585x2]
+ SUM_SUB 11, 14, 9 ; stp1_4, stp1_5
+ SUM_SUB 12, 13, 9 ; stp1_7, stp1_6
+
+ ; BLOCK D STAGE 5 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+%if 0 ; overflow occurs in SUM_SUB when using test streams
+ SUM_SUB 13, 14, 9
+ pmulhrsw m13, m10 ; stp1_6
+ pmulhrsw m14, m10 ; stp1_5
+%else
+ BUTTERFLY_4X 13, 14, 11585, 11585, m8, 9, 10 ; stp1_5, stp1_6
+ SWAP 13, 14
+%endif
+ SUM_SUB 0, 3, 9 ; stp1_0, stp1_3
+ SUM_SUB 1, 2, 9 ; stp1_1, stp1_2
+
+ ; BLOCK D STAGE 6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SUM_SUB 0, 12, 9 ; stp1_0, stp1_7
+ SUM_SUB 1, 13, 9 ; stp1_1, stp1_6
+ SUM_SUB 2, 14, 9 ; stp1_2, stp1_5
+ SUM_SUB 3, 11, 9 ; stp1_3, stp1_4
+
+ ; BLOCK D STAGE 7 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mova m4, [stp + %2 + idx12]
+ mova m5, [stp + %2 + idx13]
+ mova m6, [stp + %2 + idx14]
+ mova m7, [stp + %2 + idx15]
+ SUM_SUB 0, 7, 9 ; stp1_0, stp1_15
+ SUM_SUB 1, 6, 9 ; stp1_1, stp1_14
+ SUM_SUB 2, 5, 9 ; stp1_2, stp1_13
+ SUM_SUB 3, 4, 9 ; stp1_3, stp1_12
+
+ ; 0-3, 28-31 final stage
+ mova m10, [stp + %4 + idx31]
+ mova m15, [stp + %4 + idx30]
+ SUM_SUB 0, 10, 9 ; stp1_0, stp1_31
+ SUM_SUB 1, 15, 9 ; stp1_1, stp1_30
+ mova [stp + %1 + idx0], m0
+ mova [stp + %1 + idx1], m1
+ mova [stp + %4 + idx31], m10
+ mova [stp + %4 + idx30], m15
+ mova m0, [stp + %4 + idx29]
+ mova m1, [stp + %4 + idx28]
+ SUM_SUB 2, 0, 9 ; stp1_2, stp1_29
+ SUM_SUB 3, 1, 9 ; stp1_3, stp1_28
+ mova [stp + %1 + idx2], m2
+ mova [stp + %1 + idx3], m3
+ mova [stp + %4 + idx29], m0
+ mova [stp + %4 + idx28], m1
+
+ ; 12-15, 16-19 final stage
+ mova m0, [stp + %3 + idx16]
+ mova m1, [stp + %3 + idx17]
+ mova m2, [stp + %3 + idx18]
+ mova m3, [stp + %3 + idx19]
+ SUM_SUB 7, 0, 9 ; stp1_15, stp1_16
+ SUM_SUB 6, 1, 9 ; stp1_14, stp1_17
+ SUM_SUB 5, 2, 9 ; stp1_13, stp1_18
+ SUM_SUB 4, 3, 9 ; stp1_12, stp1_19
+ mova [stp + %2 + idx12], m4
+ mova [stp + %2 + idx13], m5
+ mova [stp + %2 + idx14], m6
+ mova [stp + %2 + idx15], m7
+ mova [stp + %3 + idx16], m0
+ mova [stp + %3 + idx17], m1
+ mova [stp + %3 + idx18], m2
+ mova [stp + %3 + idx19], m3
+
+ mova m4, [stp + %2 + idx8]
+ mova m5, [stp + %2 + idx9]
+ mova m6, [stp + %2 + idx10]
+ mova m7, [stp + %2 + idx11]
+ SUM_SUB 11, 7, 9 ; stp1_4, stp1_11
+ SUM_SUB 14, 6, 9 ; stp1_5, stp1_10
+ SUM_SUB 13, 5, 9 ; stp1_6, stp1_9
+ SUM_SUB 12, 4, 9 ; stp1_7, stp1_8
+
+ ; 4-7, 24-27 final stage
+ mova m3, [stp + %4 + idx24]
+ mova m2, [stp + %4 + idx25]
+ mova m1, [stp + %4 + idx26]
+ mova m0, [stp + %4 + idx27]
+ SUM_SUB 12, 3, 9 ; stp1_7, stp1_24
+ SUM_SUB 13, 2, 9 ; stp1_6, stp1_25
+ SUM_SUB 14, 1, 9 ; stp1_5, stp1_26
+ SUM_SUB 11, 0, 9 ; stp1_4, stp1_27
+ mova [stp + %4 + idx24], m3
+ mova [stp + %4 + idx25], m2
+ mova [stp + %4 + idx26], m1
+ mova [stp + %4 + idx27], m0
+ mova [stp + %1 + idx4], m11
+ mova [stp + %1 + idx5], m14
+ mova [stp + %1 + idx6], m13
+ mova [stp + %1 + idx7], m12
+
+ ; 8-11, 20-23 final stage
+ mova m0, [stp + %3 + idx20]
+ mova m1, [stp + %3 + idx21]
+ mova m2, [stp + %3 + idx22]
+ mova m3, [stp + %3 + idx23]
+ SUM_SUB 7, 0, 9 ; stp1_11, stp_20
+ SUM_SUB 6, 1, 9 ; stp1_10, stp_21
+ SUM_SUB 5, 2, 9 ; stp1_9, stp_22
+ SUM_SUB 4, 3, 9 ; stp1_8, stp_23
+ mova [stp + %2 + idx8], m4
+ mova [stp + %2 + idx9], m5
+ mova [stp + %2 + idx10], m6
+ mova [stp + %2 + idx11], m7
+ mova [stp + %3 + idx20], m0
+ mova [stp + %3 + idx21], m1
+ mova [stp + %3 + idx22], m2
+ mova [stp + %3 + idx23], m3
+%endmacro
+
+INIT_XMM ssse3
+cglobal idct32x32_1024_add, 3, 11, 16, i32x32_size, input, output, stride
+ mova m8, [pd_8192]
+ mov r6, 4
+ lea stp, [rsp + pass_one_start]
+
+idct32x32_1024:
+ mov r3, inputq
+ lea r4, [rsp + transposed_in]
+ mov r7, 4
+
+idct32x32_1024_transpose:
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova m0, [r3 + 0]
+ packssdw m0, [r3 + 16]
+ mova m1, [r3 + 32 * 4]
+ packssdw m1, [r3 + 32 * 4 + 16]
+ mova m2, [r3 + 32 * 8]
+ packssdw m2, [r3 + 32 * 8 + 16]
+ mova m3, [r3 + 32 * 12]
+ packssdw m3, [r3 + 32 * 12 + 16]
+ mova m4, [r3 + 32 * 16]
+ packssdw m4, [r3 + 32 * 16 + 16]
+ mova m5, [r3 + 32 * 20]
+ packssdw m5, [r3 + 32 * 20 + 16]
+ mova m6, [r3 + 32 * 24]
+ packssdw m6, [r3 + 32 * 24 + 16]
+ mova m7, [r3 + 32 * 28]
+ packssdw m7, [r3 + 32 * 28 + 16]
+%else
+ mova m0, [r3 + 0]
+ mova m1, [r3 + 16 * 4]
+ mova m2, [r3 + 16 * 8]
+ mova m3, [r3 + 16 * 12]
+ mova m4, [r3 + 16 * 16]
+ mova m5, [r3 + 16 * 20]
+ mova m6, [r3 + 16 * 24]
+ mova m7, [r3 + 16 * 28]
+%endif
+
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ mova [r4 + 0], m0
+ mova [r4 + 16 * 1], m1
+ mova [r4 + 16 * 2], m2
+ mova [r4 + 16 * 3], m3
+ mova [r4 + 16 * 4], m4
+ mova [r4 + 16 * 5], m5
+ mova [r4 + 16 * 6], m6
+ mova [r4 + 16 * 7], m7
+%if CONFIG_VP9_HIGHBITDEPTH
+ add r3, 32
+%else
+ add r3, 16
+%endif
+ add r4, 16 * 8
+ dec r7
+ jne idct32x32_1024_transpose
+
+ IDCT32X32_1024 16*0, 16*32, 16*64, 16*96
+
+ lea stp, [stp + 16 * 8]
+%if CONFIG_VP9_HIGHBITDEPTH
+ lea inputq, [inputq + 32 * 32]
+%else
+ lea inputq, [inputq + 16 * 32]
+%endif
+ dec r6
+ jnz idct32x32_1024
+
+ mov r6, 4
+ lea stp, [rsp + pass_one_start]
+ lea r9, [rsp + pass_one_start]
+
+idct32x32_1024_2:
+ lea r4, [rsp + transposed_in]
+ mov r3, r9
+ mov r7, 4
+
+idct32x32_1024_transpose_2:
+ mova m0, [r3 + 0]
+ mova m1, [r3 + 16 * 1]
+ mova m2, [r3 + 16 * 2]
+ mova m3, [r3 + 16 * 3]
+ mova m4, [r3 + 16 * 4]
+ mova m5, [r3 + 16 * 5]
+ mova m6, [r3 + 16 * 6]
+ mova m7, [r3 + 16 * 7]
+
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ mova [r4 + 0], m0
+ mova [r4 + 16 * 1], m1
+ mova [r4 + 16 * 2], m2
+ mova [r4 + 16 * 3], m3
+ mova [r4 + 16 * 4], m4
+ mova [r4 + 16 * 5], m5
+ mova [r4 + 16 * 6], m6
+ mova [r4 + 16 * 7], m7
+
+ add r3, 16 * 8
+ add r4, 16 * 8
+ dec r7
+ jne idct32x32_1024_transpose_2
+
+ IDCT32X32_1024 16*0, 16*8, 16*16, 16*24
+
+ lea stp, [stp + 16 * 32]
+ add r9, 16 * 32
+ dec r6
+ jnz idct32x32_1024_2
+
+ RECON_AND_STORE pass_two_start
+
+ RET
%endif
diff --git a/libvpx/vpx_dsp/x86/inv_wht_sse2.asm b/libvpx/vpx_dsp/x86/inv_wht_sse2.asm
index df6f4692b..fbbcd76bd 100644
--- a/libvpx/vpx_dsp/x86/inv_wht_sse2.asm
+++ b/libvpx/vpx_dsp/x86/inv_wht_sse2.asm
@@ -82,9 +82,15 @@ SECTION .text
INIT_XMM sse2
cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova m0, [inputq + 0]
+ packssdw m0, [inputq + 16]
+ mova m1, [inputq + 32]
+ packssdw m1, [inputq + 48]
+%else
mova m0, [inputq + 0]
mova m1, [inputq + 16]
-
+%endif
psraw m0, 2
psraw m1, 2
diff --git a/libvpx/vpx_dsp/x86/loopfilter_avx2.c b/libvpx/vpx_dsp/x86/loopfilter_avx2.c
index 23a97dd05..be1087c1e 100644
--- a/libvpx/vpx_dsp/x86/loopfilter_avx2.c
+++ b/libvpx/vpx_dsp/x86/loopfilter_avx2.c
@@ -13,9 +13,10 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
-static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
- const unsigned char *_blimit, const unsigned char *_limit,
- const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh) {
__m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
@@ -400,9 +401,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
};
-static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
- const unsigned char *_blimit, const unsigned char *_limit,
- const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh) {
__m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
@@ -975,12 +977,3 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
_mm_storeu_si128((__m128i *) (s + 6 * p), q6);
}
}
-
-void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
- const unsigned char *_blimit, const unsigned char *_limit,
- const unsigned char *_thresh, int count) {
- if (count == 1)
- mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh);
- else
- mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh);
-}
diff --git a/libvpx/vpx_dsp/x86/loopfilter_mmx.asm b/libvpx/vpx_dsp/x86/loopfilter_mmx.asm
deleted file mode 100644
index b9c18b680..000000000
--- a/libvpx/vpx_dsp/x86/loopfilter_mmx.asm
+++ /dev/null
@@ -1,611 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-;void vpx_lpf_horizontal_4_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
-sym(vpx_lpf_horizontal_4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 32 ; reserve 32 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- movsxd rcx, dword ptr arg(5) ;count
-.next8_h:
- mov rdx, arg(3) ;limit
- movq mm7, [rdx]
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
-
- ; calculate breakout conditions
- movq mm2, [rdi+2*rax] ; q3
- movq mm1, [rsi+2*rax] ; q2
- movq mm6, mm1 ; q2
- psubusb mm1, mm2 ; q2-=q3
- psubusb mm2, mm6 ; q3-=q2
- por mm1, mm2 ; abs(q3-q2)
- psubusb mm1, mm7 ;
-
-
- movq mm4, [rsi+rax] ; q1
- movq mm3, mm4 ; q1
- psubusb mm4, mm6 ; q1-=q2
- psubusb mm6, mm3 ; q2-=q1
- por mm4, mm6 ; abs(q2-q1)
-
- psubusb mm4, mm7
- por mm1, mm4
-
- movq mm4, [rsi] ; q0
- movq mm0, mm4 ; q0
- psubusb mm4, mm3 ; q0-=q1
- psubusb mm3, mm0 ; q1-=q0
- por mm4, mm3 ; abs(q0-q1)
- movq t0, mm4 ; save to t0
- psubusb mm4, mm7
- por mm1, mm4
-
-
- neg rax ; negate pitch to deal with above border
-
- movq mm2, [rsi+4*rax] ; p3
- movq mm4, [rdi+4*rax] ; p2
- movq mm5, mm4 ; p2
- psubusb mm4, mm2 ; p2-=p3
- psubusb mm2, mm5 ; p3-=p2
- por mm4, mm2 ; abs(p3 - p2)
- psubusb mm4, mm7
- por mm1, mm4
-
-
- movq mm4, [rsi+2*rax] ; p1
- movq mm3, mm4 ; p1
- psubusb mm4, mm5 ; p1-=p2
- psubusb mm5, mm3 ; p2-=p1
- por mm4, mm5 ; abs(p2 - p1)
- psubusb mm4, mm7
- por mm1, mm4
-
- movq mm2, mm3 ; p1
-
- movq mm4, [rsi+rax] ; p0
- movq mm5, mm4 ; p0
- psubusb mm4, mm3 ; p0-=p1
- psubusb mm3, mm5 ; p1-=p0
- por mm4, mm3 ; abs(p1 - p0)
- movq t1, mm4 ; save to t1
- psubusb mm4, mm7
- por mm1, mm4
-
- movq mm3, [rdi] ; q1
- movq mm4, mm3 ; q1
- psubusb mm3, mm2 ; q1-=p1
- psubusb mm2, mm4 ; p1-=q1
- por mm2, mm3 ; abs(p1-q1)
- pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
- psrlw mm2, 1 ; abs(p1-q1)/2
-
- movq mm6, mm5 ; p0
- movq mm3, [rsi] ; q0
- psubusb mm5, mm3 ; p0-=q0
- psubusb mm3, mm6 ; q0-=p0
- por mm5, mm3 ; abs(p0 - q0)
- paddusb mm5, mm5 ; abs(p0-q0)*2
- paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
- mov rdx, arg(2) ;blimit ; get blimit
- movq mm7, [rdx] ; blimit
-
- psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
- por mm1, mm5
- pxor mm5, mm5
- pcmpeqb mm1, mm5 ; mask mm1
-
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movq mm7, [rdx] ;
- movq mm4, t0 ; get abs (q1 - q0)
- psubusb mm4, mm7
- movq mm3, t1 ; get abs (p1 - p0)
- psubusb mm3, mm7
- paddb mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-
- pcmpeqb mm4, mm5
-
- pcmpeqb mm5, mm5
- pxor mm4, mm5
-
-
- ; start work on filters
- movq mm2, [rsi+2*rax] ; p1
- movq mm7, [rdi] ; q1
- pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
- psubsb mm2, mm7 ; p1 - q1
- pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
- pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
- movq mm3, mm0 ; q0
- psubsb mm0, mm6 ; q0 - p0
- paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
- paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
- paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
- pand mm1, mm2 ; mask filter values we don't care about
- movq mm2, mm1
- paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
- paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-
- pxor mm0, mm0 ;
- pxor mm5, mm5
- punpcklbw mm0, mm2 ;
- punpckhbw mm5, mm2 ;
- psraw mm0, 11 ;
- psraw mm5, 11
- packsswb mm0, mm5
- movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
- pxor mm0, mm0 ; 0
- movq mm5, mm1 ; abcdefgh
- punpcklbw mm0, mm1 ; e0f0g0h0
- psraw mm0, 11 ; sign extended shift right by 3
- pxor mm1, mm1 ; 0
- punpckhbw mm1, mm5 ; a0b0c0d0
- psraw mm1, 11 ; sign extended shift right by 3
- movq mm5, mm0 ; save results
-
- packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
- paddsw mm5, [GLOBAL(ones)]
- paddsw mm1, [GLOBAL(ones)]
- psraw mm5, 1 ; partial shifted one more time for 2nd tap
- psraw mm1, 1 ; partial shifted one more time for 2nd tap
- packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
- pandn mm4, mm5 ; high edge variance additive
-
- paddsb mm6, mm2 ; p0+= p0 add
- pxor mm6, [GLOBAL(t80)] ; unoffset
- movq [rsi+rax], mm6 ; write back
-
- movq mm6, [rsi+2*rax] ; p1
- pxor mm6, [GLOBAL(t80)] ; reoffset
- paddsb mm6, mm4 ; p1+= p1 add
- pxor mm6, [GLOBAL(t80)] ; unoffset
- movq [rsi+2*rax], mm6 ; write back
-
- psubsb mm3, mm0 ; q0-= q0 add
- pxor mm3, [GLOBAL(t80)] ; unoffset
- movq [rsi], mm3 ; write back
-
- psubsb mm7, mm4 ; q1-= q1 add
- pxor mm7, [GLOBAL(t80)] ; unoffset
- movq [rdi], mm7 ; write back
-
- add rsi,8
- neg rax
- dec rcx
- jnz .next8_h
-
- add rsp, 32
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vpx_lpf_vertical_4_mmx
-;(
-; unsigned char *src_ptr,
-; int src_pixel_step,
-; const char *blimit,
-; const char *limit,
-; const char *thresh,
-; int count
-;)
-global sym(vpx_lpf_vertical_4_mmx) PRIVATE
-sym(vpx_lpf_vertical_4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 64 ; reserve 64 bytes
- %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8];
- %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8];
- %define srct [rsp + 32] ;__declspec(align(16)) char srct[32];
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
-
- lea rsi, [rsi + rax*4 - 4]
-
- movsxd rcx, dword ptr arg(5) ;count
-.next8_v:
- mov rdi, rsi ; rdi points to row +1 for indirect addressing
- add rdi, rax
-
-
- ;transpose
- movq mm6, [rsi+2*rax] ; 67 66 65 64 63 62 61 60
- movq mm7, mm6 ; 77 76 75 74 73 72 71 70
-
- punpckhbw mm7, [rdi+2*rax] ; 77 67 76 66 75 65 74 64
- punpcklbw mm6, [rdi+2*rax] ; 73 63 72 62 71 61 70 60
-
- movq mm4, [rsi] ; 47 46 45 44 43 42 41 40
- movq mm5, mm4 ; 47 46 45 44 43 42 41 40
-
- punpckhbw mm5, [rsi+rax] ; 57 47 56 46 55 45 54 44
- punpcklbw mm4, [rsi+rax] ; 53 43 52 42 51 41 50 40
-
- movq mm3, mm5 ; 57 47 56 46 55 45 54 44
- punpckhwd mm5, mm7 ; 77 67 57 47 76 66 56 46
-
- punpcklwd mm3, mm7 ; 75 65 55 45 74 64 54 44
- movq mm2, mm4 ; 53 43 52 42 51 41 50 40
-
- punpckhwd mm4, mm6 ; 73 63 53 43 72 62 52 42
- punpcklwd mm2, mm6 ; 71 61 51 41 70 60 50 40
-
- neg rax
- movq mm6, [rsi+rax*2] ; 27 26 25 24 23 22 21 20
-
- movq mm1, mm6 ; 27 26 25 24 23 22 21 20
- punpckhbw mm6, [rsi+rax] ; 37 27 36 36 35 25 34 24
-
- punpcklbw mm1, [rsi+rax] ; 33 23 32 22 31 21 30 20
- movq mm7, [rsi+rax*4]; ; 07 06 05 04 03 02 01 00
-
- punpckhbw mm7, [rdi+rax*4] ; 17 07 16 06 15 05 14 04
- movq mm0, mm7 ; 17 07 16 06 15 05 14 04
-
- punpckhwd mm7, mm6 ; 37 27 17 07 36 26 16 06
- punpcklwd mm0, mm6 ; 35 25 15 05 34 24 14 04
-
- movq mm6, mm7 ; 37 27 17 07 36 26 16 06
- punpckhdq mm7, mm5 ; 77 67 57 47 37 27 17 07 = q3
-
- punpckldq mm6, mm5 ; 76 66 56 46 36 26 16 06 = q2
-
- movq mm5, mm6 ; 76 66 56 46 36 26 16 06
- psubusb mm5, mm7 ; q2-q3
-
- psubusb mm7, mm6 ; q3-q2
- por mm7, mm5; ; mm7=abs (q3-q2)
-
- movq mm5, mm0 ; 35 25 15 05 34 24 14 04
- punpckhdq mm5, mm3 ; 75 65 55 45 35 25 15 05 = q1
-
- punpckldq mm0, mm3 ; 74 64 54 44 34 24 15 04 = q0
- movq mm3, mm5 ; 75 65 55 45 35 25 15 05 = q1
-
- psubusb mm3, mm6 ; q1-q2
- psubusb mm6, mm5 ; q2-q1
-
- por mm6, mm3 ; mm6=abs(q2-q1)
- lea rdx, srct
-
- movq [rdx+24], mm5 ; save q1
- movq [rdx+16], mm0 ; save q0
-
- movq mm3, [rsi+rax*4] ; 07 06 05 04 03 02 01 00
- punpcklbw mm3, [rdi+rax*4] ; 13 03 12 02 11 01 10 00
-
- movq mm0, mm3 ; 13 03 12 02 11 01 10 00
- punpcklwd mm0, mm1 ; 31 21 11 01 30 20 10 00
-
- punpckhwd mm3, mm1 ; 33 23 13 03 32 22 12 02
- movq mm1, mm0 ; 31 21 11 01 30 20 10 00
-
- punpckldq mm0, mm2 ; 70 60 50 40 30 20 10 00 =p3
- punpckhdq mm1, mm2 ; 71 61 51 41 31 21 11 01 =p2
-
- movq mm2, mm1 ; 71 61 51 41 31 21 11 01 =p2
- psubusb mm2, mm0 ; p2-p3
-
- psubusb mm0, mm1 ; p3-p2
- por mm0, mm2 ; mm0=abs(p3-p2)
-
- movq mm2, mm3 ; 33 23 13 03 32 22 12 02
- punpckldq mm2, mm4 ; 72 62 52 42 32 22 12 02 = p1
-
- punpckhdq mm3, mm4 ; 73 63 53 43 33 23 13 03 = p0
- movq [rdx+8], mm3 ; save p0
-
- movq [rdx], mm2 ; save p1
- movq mm5, mm2 ; mm5 = p1
-
- psubusb mm2, mm1 ; p1-p2
- psubusb mm1, mm5 ; p2-p1
-
- por mm1, mm2 ; mm1=abs(p2-p1)
- mov rdx, arg(3) ;limit
-
- movq mm4, [rdx] ; mm4 = limit
- psubusb mm7, mm4
-
- psubusb mm0, mm4
- psubusb mm1, mm4
-
- psubusb mm6, mm4
- por mm7, mm6
-
- por mm0, mm1
- por mm0, mm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
- movq mm1, mm5 ; p1
-
- movq mm7, mm3 ; mm3=mm7=p0
- psubusb mm7, mm5 ; p0 - p1
-
- psubusb mm5, mm3 ; p1 - p0
- por mm5, mm7 ; abs(p1-p0)
-
- movq t0, mm5 ; save abs(p1-p0)
- lea rdx, srct
-
- psubusb mm5, mm4
- por mm0, mm5 ; mm0=mask
-
- movq mm5, [rdx+16] ; mm5=q0
- movq mm7, [rdx+24] ; mm7=q1
-
- movq mm6, mm5 ; mm6=q0
- movq mm2, mm7 ; q1
- psubusb mm5, mm7 ; q0-q1
-
- psubusb mm7, mm6 ; q1-q0
- por mm7, mm5 ; abs(q1-q0)
-
- movq t1, mm7 ; save abs(q1-q0)
- psubusb mm7, mm4
-
- por mm0, mm7 ; mask
-
- movq mm5, mm2 ; q1
- psubusb mm5, mm1 ; q1-=p1
- psubusb mm1, mm2 ; p1-=q1
- por mm5, mm1 ; abs(p1-q1)
- pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
- psrlw mm5, 1 ; abs(p1-q1)/2
-
- mov rdx, arg(2) ;blimit ;
-
- movq mm4, [rdx] ;blimit
- movq mm1, mm3 ; mm1=mm3=p0
-
- movq mm7, mm6 ; mm7=mm6=q0
- psubusb mm1, mm7 ; p0-q0
-
- psubusb mm7, mm3 ; q0-p0
- por mm1, mm7 ; abs(q0-p0)
- paddusb mm1, mm1 ; abs(q0-p0)*2
- paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
- psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
- por mm1, mm0; ; mask
-
- pxor mm0, mm0
- pcmpeqb mm1, mm0
-
- ; calculate high edge variance
- mov rdx, arg(4) ;thresh ; get thresh
- movq mm7, [rdx]
- ;
- movq mm4, t0 ; get abs (q1 - q0)
- psubusb mm4, mm7
-
- movq mm3, t1 ; get abs (p1 - p0)
- psubusb mm3, mm7
-
- por mm4, mm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
- pcmpeqb mm4, mm0
-
- pcmpeqb mm0, mm0
- pxor mm4, mm0
-
-
-
- ; start work on filters
- lea rdx, srct
-
- movq mm2, [rdx] ; p1
- movq mm7, [rdx+24] ; q1
-
- movq mm6, [rdx+8] ; p0
- movq mm0, [rdx+16] ; q0
-
- pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values
- pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values
-
- psubsb mm2, mm7 ; p1 - q1
- pand mm2, mm4 ; high var mask (hvm)(p1 - q1)
-
- pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values
- pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values
-
- movq mm3, mm0 ; q0
- psubsb mm0, mm6 ; q0 - p0
-
- paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1)
- paddsb mm2, mm0 ; 2 * (q0 - p0) + hvm(p1 - q1)
-
- paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1)
- pand mm1, mm2 ; mask filter values we don't care about
-
- movq mm2, mm1
- paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-
- paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3
- pxor mm0, mm0 ;
-
- pxor mm5, mm5
- punpcklbw mm0, mm2 ;
-
- punpckhbw mm5, mm2 ;
- psraw mm0, 11 ;
-
- psraw mm5, 11
- packsswb mm0, mm5
-
- movq mm2, mm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
- pxor mm0, mm0 ; 0
- movq mm5, mm1 ; abcdefgh
-
- punpcklbw mm0, mm1 ; e0f0g0h0
- psraw mm0, 11 ; sign extended shift right by 3
-
- pxor mm1, mm1 ; 0
- punpckhbw mm1, mm5 ; a0b0c0d0
-
- psraw mm1, 11 ; sign extended shift right by 3
- movq mm5, mm0 ; save results
-
- packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
- paddsw mm5, [GLOBAL(ones)]
-
- paddsw mm1, [GLOBAL(ones)]
- psraw mm5, 1 ; partial shifted one more time for 2nd tap
-
- psraw mm1, 1 ; partial shifted one more time for 2nd tap
- packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-
- pandn mm4, mm5 ; high edge variance additive
-
- paddsb mm6, mm2 ; p0+= p0 add
- pxor mm6, [GLOBAL(t80)] ; unoffset
-
- ; mm6=p0 ;
- movq mm1, [rdx] ; p1
- pxor mm1, [GLOBAL(t80)] ; reoffset
-
- paddsb mm1, mm4 ; p1+= p1 add
- pxor mm1, [GLOBAL(t80)] ; unoffset
- ; mm6 = p0 mm1 = p1
-
- psubsb mm3, mm0 ; q0-= q0 add
- pxor mm3, [GLOBAL(t80)] ; unoffset
-
- ; mm3 = q0
- psubsb mm7, mm4 ; q1-= q1 add
- pxor mm7, [GLOBAL(t80)] ; unoffset
- ; mm7 = q1
-
- ; transpose and write back
- ; mm1 = 72 62 52 42 32 22 12 02
- ; mm6 = 73 63 53 43 33 23 13 03
- ; mm3 = 74 64 54 44 34 24 14 04
- ; mm7 = 75 65 55 45 35 25 15 05
-
- movq mm2, mm1 ; 72 62 52 42 32 22 12 02
- punpcklbw mm2, mm6 ; 33 32 23 22 13 12 03 02
-
- movq mm4, mm3 ; 74 64 54 44 34 24 14 04
- punpckhbw mm1, mm6 ; 73 72 63 62 53 52 43 42
-
- punpcklbw mm4, mm7 ; 35 34 25 24 15 14 05 04
- punpckhbw mm3, mm7 ; 75 74 65 64 55 54 45 44
-
- movq mm6, mm2 ; 33 32 23 22 13 12 03 02
- punpcklwd mm2, mm4 ; 15 14 13 12 05 04 03 02
-
- punpckhwd mm6, mm4 ; 35 34 33 32 25 24 23 22
- movq mm5, mm1 ; 73 72 63 62 53 52 43 42
-
- punpcklwd mm1, mm3 ; 55 54 53 52 45 44 43 42
- punpckhwd mm5, mm3 ; 75 74 73 72 65 64 63 62
-
-
- ; mm2 = 15 14 13 12 05 04 03 02
- ; mm6 = 35 34 33 32 25 24 23 22
- ; mm5 = 55 54 53 52 45 44 43 42
- ; mm1 = 75 74 73 72 65 64 63 62
-
-
-
- movd [rsi+rax*4+2], mm2
- psrlq mm2, 32
-
- movd [rdi+rax*4+2], mm2
- movd [rsi+rax*2+2], mm6
-
- psrlq mm6, 32
- movd [rsi+rax+2],mm6
-
- movd [rsi+2], mm1
- psrlq mm1, 32
-
- movd [rdi+2], mm1
- neg rax
-
- movd [rdi+rax+2],mm5
- psrlq mm5, 32
-
- movd [rdi+rax*2+2], mm5
-
- lea rsi, [rsi+rax*8]
- dec rcx
- jnz .next8_v
-
- add rsp, 64
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-tfe:
- times 8 db 0xfe
-align 16
-t80:
- times 8 db 0x80
-align 16
-t3:
- times 8 db 0x03
-align 16
-t4:
- times 8 db 0x04
-align 16
-ones:
- times 4 dw 0x0001
diff --git a/libvpx/vpx_dsp/x86/loopfilter_sse2.c b/libvpx/vpx_dsp/x86/loopfilter_sse2.c
index ed1012736..739adf31d 100644
--- a/libvpx/vpx_dsp/x86/loopfilter_sse2.c
+++ b/libvpx/vpx_dsp/x86/loopfilter_sse2.c
@@ -18,11 +18,216 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) {
return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
}
-static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
+// filter_mask and hev_mask
+#define FILTER_HEV_MASK do { \
+ /* (abs(q1 - q0), abs(p1 - p0) */ \
+ __m128i flat = abs_diff(q1p1, q0p0); \
+ /* abs(p1 - q1), abs(p0 - q0) */ \
+ const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0); \
+ __m128i abs_p0q0, abs_p1q1, work; \
+ \
+ /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \
+ hev = _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \
+ hev = _mm_cmpgt_epi16(hev, thresh); \
+ hev = _mm_packs_epi16(hev, hev); \
+ \
+ /* const int8_t mask = filter_mask(*limit, *blimit, */ \
+ /* p3, p2, p1, p0, q0, q1, q2, q3); */ \
+ abs_p0q0 = _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */\
+ abs_p1q1 = _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */\
+ abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9); \
+ abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */ \
+ /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \
+ mask = _mm_adds_epu8(abs_p0q0, abs_p1q1); \
+ /* abs(p3 - p2), abs(p2 - p1) */ \
+ work = abs_diff(p3p2, p2p1); \
+ flat = _mm_max_epu8(work, flat); \
+ /* abs(q3 - q2), abs(q2 - q1) */ \
+ work = abs_diff(q3q2, q2q1); \
+ flat = _mm_max_epu8(work, flat); \
+ flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \
+ mask = _mm_unpacklo_epi64(mask, flat); \
+ mask = _mm_subs_epu8(mask, limit); \
+ mask = _mm_cmpeq_epi8(mask, zero); \
+ mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \
+} while (0)
+
+#define FILTER4 do { \
+ const __m128i t3t4 = _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, \
+ 4, 4, 4, 4, 4, 4, 4, 4); \
+ const __m128i t80 = _mm_set1_epi8(0x80); \
+ __m128i filter, filter2filter1, work; \
+ \
+ ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \
+ qs1qs0 = _mm_xor_si128(q1q0, t80); \
+ \
+ /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */ \
+ work = _mm_subs_epi8(ps1ps0, qs1qs0); \
+ filter = _mm_and_si128(_mm_srli_si128(work, 8), hev); \
+ /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */ \
+ filter = _mm_subs_epi8(filter, work); \
+ filter = _mm_subs_epi8(filter, work); \
+ filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */ \
+ filter = _mm_and_si128(filter, mask); /* & mask */ \
+ filter = _mm_unpacklo_epi64(filter, filter); \
+ \
+ /* filter1 = signed_char_clamp(filter + 4) >> 3; */ \
+ /* filter2 = signed_char_clamp(filter + 3) >> 3; */ \
+ filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */ \
+ filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1); \
+ filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1); \
+ filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */ \
+ filter = _mm_srai_epi16(filter, 11); /* >> 3 */ \
+ filter2filter1 = _mm_packs_epi16(filter2filter1, filter); \
+ \
+ /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */ \
+ filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */ \
+ filter = _mm_unpacklo_epi8(filter, filter); \
+ filter = _mm_srai_epi16(filter, 9); /* round */ \
+ filter = _mm_packs_epi16(filter, filter); \
+ filter = _mm_andnot_si128(hev, filter); \
+ \
+ hev = _mm_unpackhi_epi64(filter2filter1, filter); \
+ filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter); \
+ \
+ /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */ \
+ qs1qs0 = _mm_subs_epi8(qs1qs0, filter2filter1); \
+ /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */ \
+ ps1ps0 = _mm_adds_epi8(ps1ps0, hev); \
+ qs1qs0 = _mm_xor_si128(qs1qs0, t80); /* ^ 0x80 */ \
+ ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \
+} while (0)
+
+void vpx_lpf_horizontal_4_sse2(uint8_t *s, int p /* pitch */,
+ const uint8_t *_blimit, const uint8_t *_limit,
+ const uint8_t *_thresh) {
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i limit =
+ _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit),
+ _mm_loadl_epi64((const __m128i *)_limit));
+ const __m128i thresh =
+ _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero);
+ const __m128i ff = _mm_cmpeq_epi8(zero, zero);
+ __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0;
+ __m128i mask, hev;
+
+ p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
+ _mm_loadl_epi64((__m128i *)(s - 4 * p)));
+ q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * p)),
+ _mm_loadl_epi64((__m128i *)(s + 1 * p)));
+ q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * p)),
+ _mm_loadl_epi64((__m128i *)(s + 0 * p)));
+ q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * p)),
+ _mm_loadl_epi64((__m128i *)(s + 3 * p)));
+ p1p0 = _mm_unpacklo_epi64(q0p0, q1p1);
+ p2p1 = _mm_unpacklo_epi64(q1p1, p3p2);
+ q1q0 = _mm_unpackhi_epi64(q0p0, q1p1);
+ q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2);
+
+ FILTER_HEV_MASK;
+ FILTER4;
+
+ _mm_storeh_pi((__m64 *)(s - 2 * p), _mm_castsi128_ps(ps1ps0)); // *op1
+ _mm_storel_epi64((__m128i *)(s - 1 * p), ps1ps0); // *op0
+ _mm_storel_epi64((__m128i *)(s + 0 * p), qs1qs0); // *oq0
+ _mm_storeh_pi((__m64 *)(s + 1 * p), _mm_castsi128_ps(qs1qs0)); // *oq1
+}
+
+void vpx_lpf_vertical_4_sse2(uint8_t *s, int p /* pitch */,
+ const uint8_t *_blimit, const uint8_t *_limit,
+ const uint8_t *_thresh) {
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i limit =
+ _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)_blimit),
+ _mm_loadl_epi64((const __m128i *)_limit));
+ const __m128i thresh =
+ _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)_thresh), zero);
+ const __m128i ff = _mm_cmpeq_epi8(zero, zero);
+ __m128i x0, x1, x2, x3;
+ __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0;
+ __m128i mask, hev;
+
+ // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
+ q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * p - 4)),
+ _mm_loadl_epi64((__m128i *)(s + 1 * p - 4)));
+
+ // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
+ x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * p - 4)),
+ _mm_loadl_epi64((__m128i *)(s + 3 * p - 4)));
+
+ // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
+ x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * p - 4)),
+ _mm_loadl_epi64((__m128i *)(s + 5 * p - 4)));
+
+ // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
+ x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * p - 4)),
+ _mm_loadl_epi64((__m128i *)(s + 7 * p - 4)));
+
+ // Transpose 8x8
+ // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ p1p0 = _mm_unpacklo_epi16(q1q0, x1);
+ // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73
+ x0 = _mm_unpacklo_epi16(x2, x3);
+ // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71
+ p3p2 = _mm_unpacklo_epi32(p1p0, x0);
+ // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73
+ p1p0 = _mm_unpackhi_epi32(p1p0, x0);
+ p3p2 = _mm_unpackhi_epi64(p3p2, _mm_slli_si128(p3p2, 8)); // swap lo and high
+ p1p0 = _mm_unpackhi_epi64(p1p0, _mm_slli_si128(p1p0, 8)); // swap lo and high
+
+ // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
+ q1q0 = _mm_unpackhi_epi16(q1q0, x1);
+ // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77
+ x2 = _mm_unpackhi_epi16(x2, x3);
+ // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77
+ q3q2 = _mm_unpackhi_epi32(q1q0, x2);
+ // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75
+ q1q0 = _mm_unpacklo_epi32(q1q0, x2);
+
+ q0p0 = _mm_unpacklo_epi64(p1p0, q1q0);
+ q1p1 = _mm_unpackhi_epi64(p1p0, q1q0);
+ p1p0 = _mm_unpacklo_epi64(q0p0, q1p1);
+ p2p1 = _mm_unpacklo_epi64(q1p1, p3p2);
+ q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2);
+
+ FILTER_HEV_MASK;
+ FILTER4;
+
+ // Transpose 8x4 to 4x8
+ // qs1qs0: 20 21 22 23 24 25 26 27 30 31 32 33 34 34 36 37
+ // ps1ps0: 10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07
+ // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17
+ ps1ps0 = _mm_unpackhi_epi64(ps1ps0, _mm_slli_si128(ps1ps0, 8));
+ // 10 30 11 31 12 32 13 33 14 34 15 35 16 36 17 37
+ x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0);
+ // 00 20 01 21 02 22 03 23 04 24 05 25 06 26 07 27
+ ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0);
+ // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37
+ qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0);
+ // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33
+ ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0);
+
+ *(int *)(s + 0 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ ps1ps0 = _mm_srli_si128(ps1ps0, 4);
+ *(int *)(s + 1 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ ps1ps0 = _mm_srli_si128(ps1ps0, 4);
+ *(int *)(s + 2 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+ ps1ps0 = _mm_srli_si128(ps1ps0, 4);
+ *(int *)(s + 3 * p - 2) = _mm_cvtsi128_si32(ps1ps0);
+
+ *(int *)(s + 4 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ qs1qs0 = _mm_srli_si128(qs1qs0, 4);
+ *(int *)(s + 5 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ qs1qs0 = _mm_srli_si128(qs1qs0, 4);
+ *(int *)(s + 6 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+ qs1qs0 = _mm_srli_si128(qs1qs0, 4);
+ *(int *)(s + 7 * p - 2) = _mm_cvtsi128_si32(qs1qs0);
+}
+
+void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh) {
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -383,11 +588,10 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
}
-static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh) {
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -716,21 +920,10 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
}
}
-// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh, int count) {
- if (count == 1)
- mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
- else
- mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
-}
-
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
- const unsigned char *_thresh, int count) {
+ const unsigned char *_thresh) {
DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
@@ -745,8 +938,6 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
__m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0;
- (void)count;
-
q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
_mm_loadl_epi64((__m128i *)(s + 3 * p)));
q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
@@ -1492,11 +1683,10 @@ void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
- const unsigned char *thresh, int count) {
+ const unsigned char *thresh) {
DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]);
unsigned char *src[1];
unsigned char *dst[1];
- (void)count;
// Transpose 8x8
src[0] = s - 4;
@@ -1505,7 +1695,7 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
+ vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1557,7 +1747,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
transpose(src, p, dst, 8, 2);
// Loop filtering
- mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh);
+ vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
src[0] = t_dst;
src[1] = t_dst + 8 * 8;
@@ -1578,8 +1768,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
// Loop filtering
- mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
- thresh);
+ vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
// Transpose back
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
diff --git a/libvpx/vpx_dsp/x86/sad4d_sse2.asm b/libvpx/vpx_dsp/x86/sad4d_sse2.asm
index a2f0ae79e..3f6e55ce9 100644
--- a/libvpx/vpx_dsp/x86/sad4d_sse2.asm
+++ b/libvpx/vpx_dsp/x86/sad4d_sse2.asm
@@ -20,33 +20,41 @@ SECTION .text
movd m4, [ref2q+%3]
movd m7, [ref3q+%3]
movd m5, [ref4q+%3]
- punpckldq m0, [srcq +%4]
- punpckldq m6, [ref1q+%5]
- punpckldq m4, [ref2q+%5]
- punpckldq m7, [ref3q+%5]
- punpckldq m5, [ref4q+%5]
+ movd m1, [srcq +%4]
+ movd m2, [ref1q+%5]
+ punpckldq m0, m1
+ punpckldq m6, m2
+ movd m1, [ref2q+%5]
+ movd m2, [ref3q+%5]
+ movd m3, [ref4q+%5]
+ punpckldq m4, m1
+ punpckldq m7, m2
+ punpckldq m5, m3
+ movlhps m0, m0
+ movlhps m6, m4
+ movlhps m7, m5
psadbw m6, m0
- psadbw m4, m0
psadbw m7, m0
- psadbw m5, m0
- punpckldq m6, m4
- punpckldq m7, m5
%else
movd m1, [ref1q+%3]
+ movd m5, [ref1q+%5]
movd m2, [ref2q+%3]
+ movd m4, [ref2q+%5]
+ punpckldq m1, m5
+ punpckldq m2, m4
movd m3, [ref3q+%3]
+ movd m5, [ref3q+%5]
+ punpckldq m3, m5
movd m4, [ref4q+%3]
- punpckldq m0, [srcq +%4]
- punpckldq m1, [ref1q+%5]
- punpckldq m2, [ref2q+%5]
- punpckldq m3, [ref3q+%5]
- punpckldq m4, [ref4q+%5]
+ movd m5, [ref4q+%5]
+ punpckldq m4, m5
+ movd m5, [srcq +%4]
+ punpckldq m0, m5
+ movlhps m0, m0
+ movlhps m1, m2
+ movlhps m3, m4
psadbw m1, m0
- psadbw m2, m0
psadbw m3, m0
- psadbw m4, m0
- punpckldq m1, m2
- punpckldq m3, m4
paddd m6, m1
paddd m7, m3
%endif
@@ -170,7 +178,7 @@ SECTION .text
; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
-; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
+; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
%macro SADNXN4D 2
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
@@ -192,7 +200,7 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
%endrep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
-%if mmsize == 16
+%if %1 > 4
pslldq m5, 4
pslldq m7, 4
por m4, m5
@@ -207,8 +215,10 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
RET
%else
movifnidn r4, r4mp
- movq [r4+0], m6
- movq [r4+8], m7
+ pshufd m6, m6, 0x08
+ pshufd m7, m7, 0x08
+ movq [r4+0], m6
+ movq [r4+8], m7
RET
%endif
%endmacro
@@ -225,7 +235,5 @@ SADNXN4D 16, 8
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
-
-INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4
diff --git a/libvpx/vpx_dsp/x86/sad_mmx.asm b/libvpx/vpx_dsp/x86/sad_mmx.asm
deleted file mode 100644
index 9968992bd..000000000
--- a/libvpx/vpx_dsp/x86/sad_mmx.asm
+++ /dev/null
@@ -1,427 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-global sym(vpx_sad16x16_mmx) PRIVATE
-global sym(vpx_sad8x16_mmx) PRIVATE
-global sym(vpx_sad8x8_mmx) PRIVATE
-global sym(vpx_sad4x4_mmx) PRIVATE
-global sym(vpx_sad16x8_mmx) PRIVATE
-
-;unsigned int vpx_sad16x16_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad16x16_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
-
- lea rcx, [rcx+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x16x16sad_mmx_loop:
-
- movq mm0, QWORD PTR [rsi]
- movq mm2, QWORD PTR [rsi+8]
-
- movq mm1, QWORD PTR [rdi]
- movq mm3, QWORD PTR [rdi+8]
-
- movq mm4, mm0
- movq mm5, mm2
-
- psubusb mm0, mm1
- psubusb mm1, mm4
-
- psubusb mm2, mm3
- psubusb mm3, mm5
-
- por mm0, mm1
- por mm2, mm3
-
- movq mm1, mm0
- movq mm3, mm2
-
- punpcklbw mm0, mm6
- punpcklbw mm2, mm6
-
- punpckhbw mm1, mm6
- punpckhbw mm3, mm6
-
- paddw mm0, mm2
- paddw mm1, mm3
-
-
- lea rsi, [rsi+rax]
- add rdi, rdx
-
- paddw mm7, mm0
- paddw mm7, mm1
-
- cmp rsi, rcx
- jne .x16x16sad_mmx_loop
-
-
- movq mm0, mm7
-
- punpcklwd mm0, mm6
- punpckhwd mm7, mm6
-
- paddw mm0, mm7
- movq mm7, mm0
-
-
- psrlq mm0, 32
- paddw mm7, mm0
-
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad8x16_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad8x16_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
-
- lea rcx, [rcx+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x8x16sad_mmx_loop:
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, mm0
- psubusb mm0, mm1
-
- psubusb mm1, mm2
- por mm0, mm1
-
- movq mm2, mm0
- punpcklbw mm0, mm6
-
- punpckhbw mm2, mm6
- lea rsi, [rsi+rax]
-
- add rdi, rdx
- paddw mm7, mm0
-
- paddw mm7, mm2
- cmp rsi, rcx
-
- jne .x8x16sad_mmx_loop
-
- movq mm0, mm7
- punpcklwd mm0, mm6
-
- punpckhwd mm7, mm6
- paddw mm0, mm7
-
- movq mm7, mm0
- psrlq mm0, 32
-
- paddw mm7, mm0
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad8x8_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad8x8_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x8x8sad_mmx_loop:
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, mm0
- psubusb mm0, mm1
-
- psubusb mm1, mm2
- por mm0, mm1
-
- movq mm2, mm0
- punpcklbw mm0, mm6
-
- punpckhbw mm2, mm6
- paddw mm0, mm2
-
- lea rsi, [rsi+rax]
- add rdi, rdx
-
- paddw mm7, mm0
- cmp rsi, rcx
-
- jne .x8x8sad_mmx_loop
-
- movq mm0, mm7
- punpcklwd mm0, mm6
-
- punpckhwd mm7, mm6
- paddw mm0, mm7
-
- movq mm7, mm0
- psrlq mm0, 32
-
- paddw mm7, mm0
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad4x4_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad4x4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- movd mm0, DWORD PTR [rsi]
- movd mm1, DWORD PTR [rdi]
-
- movd mm2, DWORD PTR [rsi+rax]
- movd mm3, DWORD PTR [rdi+rdx]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- movq mm2, mm0
- psubusb mm0, mm1
-
- psubusb mm1, mm2
- por mm0, mm1
-
- movq mm2, mm0
- pxor mm3, mm3
-
- punpcklbw mm0, mm3
- punpckhbw mm2, mm3
-
- paddw mm0, mm2
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- movd mm4, DWORD PTR [rsi]
- movd mm5, DWORD PTR [rdi]
-
- movd mm6, DWORD PTR [rsi+rax]
- movd mm7, DWORD PTR [rdi+rdx]
-
- punpcklbw mm4, mm6
- punpcklbw mm5, mm7
-
- movq mm6, mm4
- psubusb mm4, mm5
-
- psubusb mm5, mm6
- por mm4, mm5
-
- movq mm5, mm4
- punpcklbw mm4, mm3
-
- punpckhbw mm5, mm3
- paddw mm4, mm5
-
- paddw mm0, mm4
- movq mm1, mm0
-
- punpcklwd mm0, mm3
- punpckhwd mm1, mm3
-
- paddw mm0, mm1
- movq mm1, mm0
-
- psrlq mm0, 32
- paddw mm0, mm1
-
- movq rax, mm0
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad16x8_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad16x8_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x16x8sad_mmx_loop:
-
- movq mm0, [rsi]
- movq mm1, [rdi]
-
- movq mm2, [rsi+8]
- movq mm3, [rdi+8]
-
- movq mm4, mm0
- movq mm5, mm2
-
- psubusb mm0, mm1
- psubusb mm1, mm4
-
- psubusb mm2, mm3
- psubusb mm3, mm5
-
- por mm0, mm1
- por mm2, mm3
-
- movq mm1, mm0
- movq mm3, mm2
-
- punpcklbw mm0, mm6
- punpckhbw mm1, mm6
-
- punpcklbw mm2, mm6
- punpckhbw mm3, mm6
-
-
- paddw mm0, mm2
- paddw mm1, mm3
-
- paddw mm0, mm1
- lea rsi, [rsi+rax]
-
- add rdi, rdx
- paddw mm7, mm0
-
- cmp rsi, rcx
- jne .x16x8sad_mmx_loop
-
- movq mm0, mm7
- punpcklwd mm0, mm6
-
- punpckhwd mm7, mm6
- paddw mm0, mm7
-
- movq mm7, mm0
- psrlq mm0, 32
-
- paddw mm7, mm0
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/libvpx/vpx_dsp/x86/sad_sse2.asm b/libvpx/vpx_dsp/x86/sad_sse2.asm
index 0defe1b6d..1ec906c23 100644
--- a/libvpx/vpx_dsp/x86/sad_sse2.asm
+++ b/libvpx/vpx_dsp/x86/sad_sse2.asm
@@ -17,7 +17,7 @@ SECTION .text
%if %3 == 5
cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, n_rows
%else ; %3 == 7
-cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
+cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \
src_stride3, ref_stride3, n_rows
%endif ; %3 == 5/7
%else ; avg
@@ -25,7 +25,7 @@ cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 5, src, src_stride, \
+cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
@@ -222,8 +222,8 @@ SAD8XN 16, 1 ; sad8x16_avg_sse2
SAD8XN 8, 1 ; sad8x8_avg_sse2
SAD8XN 4, 1 ; sad8x4_avg_sse2
-; unsigned int vpx_sad4x{4, 8}_sse(uint8_t *src, int src_stride,
-; uint8_t *ref, int ref_stride);
+; unsigned int vpx_sad4x{4, 8}_sse2(uint8_t *src, int src_stride,
+; uint8_t *ref, int ref_stride);
%macro SAD4XN 1-2 0
SAD_FN 4, %1, 7, %2
mov n_rowsd, %1/4
@@ -236,31 +236,32 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2
movd m4, [refq+ref_stride3q]
punpckldq m1, m2
punpckldq m3, m4
+ movlhps m1, m3
%if %2 == 1
pavgb m1, [second_predq+mmsize*0]
- pavgb m3, [second_predq+mmsize*1]
- lea second_predq, [second_predq+mmsize*2]
+ lea second_predq, [second_predq+mmsize*1]
%endif
movd m2, [srcq]
movd m5, [srcq+src_strideq]
movd m4, [srcq+src_strideq*2]
- movd m6, [srcq+src_stride3q]
+ movd m3, [srcq+src_stride3q]
punpckldq m2, m5
- punpckldq m4, m6
+ punpckldq m4, m3
+ movlhps m2, m4
psadbw m1, m2
- psadbw m3, m4
lea refq, [refq+ref_strideq*4]
paddd m0, m1
lea srcq, [srcq+src_strideq*4]
- paddd m0, m3
dec n_rowsd
jg .loop
+ movhlps m1, m0
+ paddd m0, m1
movd eax, m0
RET
%endmacro
-INIT_MMX sse
+INIT_XMM sse2
SAD4XN 8 ; sad4x8_sse
SAD4XN 4 ; sad4x4_sse
SAD4XN 8, 1 ; sad4x8_avg_sse
diff --git a/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm b/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm
index 05dcff75e..cee4468c1 100644
--- a/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm
+++ b/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm
@@ -57,8 +57,8 @@ SECTION .text
paddd %6, %1
%endmacro
-%macro STORE_AND_RET 0
-%if mmsize == 16
+%macro STORE_AND_RET 1
+%if %1 > 4
; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit
; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg.
; We have to sign-extend it before adding the words within the register
@@ -78,16 +78,16 @@ SECTION .text
movd [r1], m7 ; store sse
paddd m6, m4
movd raxd, m6 ; store sum as return value
-%else ; mmsize == 8
- pshufw m4, m6, 0xe
- pshufw m3, m7, 0xe
+%else ; 4xh
+ pshuflw m4, m6, 0xe
+ pshuflw m3, m7, 0xe
paddw m6, m4
paddd m7, m3
pcmpgtw m5, m6 ; mask for 0 > x
mov r1, ssem ; r1 = unsigned int *sse
punpcklwd m6, m5 ; sign-extend m6 word->dword
movd [r1], m7 ; store sse
- pshufw m4, m6, 0xe
+ pshuflw m4, m6, 0xe
paddd m6, m4
movd raxd, m6 ; store sum as return value
%endif
@@ -139,8 +139,10 @@ SECTION .text
%define sec_str sec_stridemp
;Store bilin_filter and pw_8 location in stack
- GET_GOT eax
- add esp, 4 ; restore esp
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -156,8 +158,10 @@ SECTION .text
%define block_height heightd
;Store bilin_filter and pw_8 location in stack
- GET_GOT eax
- add esp, 4 ; restore esp
+ %if GET_GOT_DEFINED == 1
+ GET_GOT eax
+ add esp, 4 ; restore esp
+ %endif
lea ecx, [GLOBAL(bilin_filter_m)]
mov g_bilin_filterm, ecx
@@ -192,6 +196,12 @@ SECTION .text
%endif
%endif
+%if %1 == 4
+ %define movx movd
+%else
+ %define movx movh
+%endif
+
ASSERT %1 <= 16 ; m6 overflows if w > 16
pxor m6, m6 ; sum
pxor m7, m7 ; sse
@@ -224,6 +234,7 @@ SECTION .text
%endif
punpckhbw m2, m0, m5
punpcklbw m0, m5
+
%if %2 == 0 ; !avg
punpckhbw m3, m1, m5
punpcklbw m1, m5
@@ -233,24 +244,37 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
+ movx m0, [srcq]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m0, [srcq+src_strideq]
-%else ; mmsize == 8
- punpckldq m0, [srcq+src_strideq]
+%else ; 4xh
+ movx m1, [srcq+src_strideq]
+ punpckldq m0, m1
%endif
%else ; !avg
- movh m2, [srcq+src_strideq]
+ movx m2, [srcq+src_strideq]
%endif
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
+
%if %2 == 1 ; avg
+%if %1 > 4
pavgb m0, [secq]
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+%endif
punpcklbw m3, m5
punpcklbw m1, m5
+%if %1 > 4
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
punpcklbw m0, m5
punpcklbw m2, m5
@@ -267,10 +291,10 @@ SECTION .text
%endif
dec block_height
jg .x_zero_y_zero_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_zero_y_nonzero:
- cmp y_offsetd, 8
+ cmp y_offsetd, 4
jne .x_zero_y_nonhalf
; x_offset == 0 && y_offset == 0.5
@@ -292,37 +316,41 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m2, [srcq+src_strideq]
+ movx m0, [srcq]
+ movx m2, [srcq+src_strideq]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m2, [srcq+src_strideq*2]
-%else ; mmsize == 8
-%if %1 == 4
- movh m1, [srcq+src_strideq*2]
+%else ; 4xh
+ movx m1, [srcq+src_strideq*2]
punpckldq m2, m1
-%else
- punpckldq m2, [srcq+src_strideq*2]
-%endif
%endif
- movh m1, [dstq]
-%if mmsize == 16
+ movx m1, [dstq]
+%if %1 > 4
movlhps m0, m2
-%else ; mmsize == 8
+%else ; 4xh
punpckldq m0, m2
%endif
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
pavgb m0, m2
punpcklbw m1, m5
+%if %1 > 4
pavgb m0, [secq]
punpcklbw m3, m5
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ movh m4, [secq]
+ pavgb m0, m4
+ punpcklbw m3, m5
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
- movh m4, [srcq+src_strideq*2]
- movh m1, [dstq]
+ movx m4, [srcq+src_strideq*2]
+ movx m1, [dstq]
pavgb m0, m2
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
pavgb m2, m4
punpcklbw m0, m5
punpcklbw m2, m5
@@ -339,7 +367,7 @@ SECTION .text
%endif
dec block_height
jg .x_zero_y_half_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
@@ -347,7 +375,7 @@ SECTION .text
lea bilin_filter, [bilin_filter_m]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -420,12 +448,12 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m2, [srcq+src_strideq]
- movh m4, [srcq+src_strideq*2]
- movh m3, [dstq+dst_strideq]
+ movx m0, [srcq]
+ movx m2, [srcq+src_strideq]
+ movx m4, [srcq+src_strideq*2]
+ movx m3, [dstq+dst_strideq]
%if cpuflag(ssse3)
- movh m1, [dstq]
+ movx m1, [dstq]
punpcklbw m0, m2
punpcklbw m2, m4
pmaddubsw m0, filter_y_a
@@ -445,17 +473,27 @@ SECTION .text
pmullw m4, filter_y_b
paddw m0, m1
paddw m2, filter_rnd
- movh m1, [dstq]
+ movx m1, [dstq]
paddw m2, m4
%endif
psraw m0, 4
psraw m2, 4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m1, m5
SUM_SSE m0, m1, m2, m3, m6, m7
@@ -471,10 +509,10 @@ SECTION .text
%undef filter_y_a
%undef filter_y_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonzero:
- cmp x_offsetd, 8
+ cmp x_offsetd, 4
jne .x_nonhalf
; x_offset == 0.5
test y_offsetd, y_offsetd
@@ -499,30 +537,40 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m4, [srcq+1]
+ movx m0, [srcq]
+ movx m4, [srcq+1]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m0, [srcq+src_strideq]
movhps m4, [srcq+src_strideq+1]
-%else ; mmsize == 8
- punpckldq m0, [srcq+src_strideq]
- punpckldq m4, [srcq+src_strideq+1]
-%endif
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+%else ; 4xh
+ movx m1, [srcq+src_strideq]
+ punpckldq m0, m1
+ movx m2, [srcq+src_strideq+1]
+ punpckldq m4, m2
+%endif
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
pavgb m0, m4
punpcklbw m3, m5
+%if %1 > 4
pavgb m0, [secq]
punpcklbw m1, m5
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m1, m5
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
- movh m2, [srcq+src_strideq]
- movh m1, [dstq]
+ movx m2, [srcq+src_strideq]
+ movx m1, [dstq]
pavgb m0, m4
- movh m4, [srcq+src_strideq+1]
- movh m3, [dstq+dst_strideq]
+ movx m4, [srcq+src_strideq+1]
+ movx m3, [dstq+dst_strideq]
pavgb m2, m4
punpcklbw m0, m5
punpcklbw m2, m5
@@ -539,10 +587,10 @@ SECTION .text
%endif
dec block_height
jg .x_half_y_zero_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_half_y_nonzero:
- cmp y_offsetd, 8
+ cmp y_offsetd, 4
jne .x_half_y_nonhalf
; x_offset == 0.5 && y_offset == 0.5
@@ -574,53 +622,58 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m3, [srcq+1]
+ movx m0, [srcq]
+ movx m3, [srcq+1]
add srcq, src_strideq
pavgb m0, m3
.x_half_y_half_loop:
- movh m2, [srcq]
- movh m3, [srcq+1]
+ movx m2, [srcq]
+ movx m3, [srcq+1]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m2, [srcq+src_strideq]
movhps m3, [srcq+src_strideq+1]
%else
-%if %1 == 4
- movh m1, [srcq+src_strideq]
+ movx m1, [srcq+src_strideq]
punpckldq m2, m1
- movh m1, [srcq+src_strideq+1]
+ movx m1, [srcq+src_strideq+1]
punpckldq m3, m1
-%else
- punpckldq m2, [srcq+src_strideq]
- punpckldq m3, [srcq+src_strideq+1]
-%endif
%endif
pavgb m2, m3
-%if mmsize == 16
+%if %1 > 4
movlhps m0, m2
movhlps m4, m2
-%else ; mmsize == 8
+%else ; 4xh
punpckldq m0, m2
- pshufw m4, m2, 0xe
+ pshuflw m4, m2, 0xe
%endif
- movh m1, [dstq]
+ movx m1, [dstq]
pavgb m0, m2
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
+%if %1 > 4
pavgb m0, [secq]
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+%endif
punpcklbw m3, m5
punpcklbw m1, m5
+%if %1 > 4
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
- movh m4, [srcq+src_strideq]
- movh m1, [srcq+src_strideq+1]
+ movx m4, [srcq+src_strideq]
+ movx m1, [srcq+src_strideq+1]
pavgb m2, m3
pavgb m4, m1
pavgb m0, m2
pavgb m2, m4
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
punpcklbw m0, m5
punpcklbw m2, m5
punpcklbw m3, m5
@@ -637,7 +690,7 @@ SECTION .text
%endif
dec block_height
jg .x_half_y_half_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
@@ -645,7 +698,7 @@ SECTION .text
lea bilin_filter, [bilin_filter_m]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -720,23 +773,23 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m3, [srcq+1]
+ movx m0, [srcq]
+ movx m3, [srcq+1]
add srcq, src_strideq
pavgb m0, m3
%if notcpuflag(ssse3)
punpcklbw m0, m5
%endif
.x_half_y_other_loop:
- movh m2, [srcq]
- movh m1, [srcq+1]
- movh m4, [srcq+src_strideq]
- movh m3, [srcq+src_strideq+1]
+ movx m2, [srcq]
+ movx m1, [srcq+1]
+ movx m4, [srcq+src_strideq]
+ movx m3, [srcq+src_strideq+1]
pavgb m2, m1
pavgb m4, m3
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
%if cpuflag(ssse3)
- movh m1, [dstq]
+ movx m1, [dstq]
punpcklbw m0, m2
punpcklbw m2, m4
pmaddubsw m0, filter_y_a
@@ -756,16 +809,26 @@ SECTION .text
pmullw m1, m4, filter_y_b
paddw m2, filter_rnd
paddw m2, m1
- movh m1, [dstq]
+ movx m1, [dstq]
%endif
psraw m0, 4
psraw m2, 4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m1, m5
SUM_SSE m0, m1, m2, m3, m6, m7
@@ -782,7 +845,7 @@ SECTION .text
%undef filter_y_a
%undef filter_y_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonhalf:
test y_offsetd, y_offsetd
@@ -793,7 +856,7 @@ SECTION .text
lea bilin_filter, [bilin_filter_m]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -861,14 +924,14 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m1, [srcq+1]
- movh m2, [srcq+src_strideq]
- movh m4, [srcq+src_strideq+1]
- movh m3, [dstq+dst_strideq]
+ movx m0, [srcq]
+ movx m1, [srcq+1]
+ movx m2, [srcq+src_strideq]
+ movx m4, [srcq+src_strideq+1]
+ movx m3, [dstq+dst_strideq]
%if cpuflag(ssse3)
punpcklbw m0, m1
- movh m1, [dstq]
+ movx m1, [dstq]
punpcklbw m2, m4
pmaddubsw m0, filter_x_a
pmaddubsw m2, filter_x_a
@@ -888,17 +951,27 @@ SECTION .text
pmullw m4, filter_x_b
paddw m0, m1
paddw m2, filter_rnd
- movh m1, [dstq]
+ movx m1, [dstq]
paddw m2, m4
%endif
psraw m0, 4
psraw m2, 4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m1, m5
SUM_SSE m0, m1, m2, m3, m6, m7
@@ -914,10 +987,10 @@ SECTION .text
%undef filter_x_a
%undef filter_x_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonhalf_y_nonzero:
- cmp y_offsetd, 8
+ cmp y_offsetd, 4
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
@@ -925,7 +998,7 @@ SECTION .text
lea bilin_filter, [bilin_filter_m]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -1033,8 +1106,8 @@ SECTION .text
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m1, [srcq+1]
+ movx m0, [srcq]
+ movx m1, [srcq+1]
%if cpuflag(ssse3)
punpcklbw m0, m1
pmaddubsw m0, filter_x_a
@@ -1050,17 +1123,17 @@ SECTION .text
add srcq, src_strideq
psraw m0, 4
.x_other_y_half_loop:
- movh m2, [srcq]
- movh m1, [srcq+1]
- movh m4, [srcq+src_strideq]
- movh m3, [srcq+src_strideq+1]
+ movx m2, [srcq]
+ movx m1, [srcq+1]
+ movx m4, [srcq+src_strideq]
+ movx m3, [srcq+src_strideq+1]
%if cpuflag(ssse3)
punpcklbw m2, m1
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmaddubsw m4, filter_x_a
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
paddw m2, filter_rnd
paddw m4, filter_rnd
%else
@@ -1075,9 +1148,9 @@ SECTION .text
pmullw m3, filter_x_b
paddw m4, filter_rnd
paddw m2, m1
- movh m1, [dstq]
+ movx m1, [dstq]
paddw m4, m3
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
%endif
psraw m2, 4
psraw m4, 4
@@ -1085,10 +1158,20 @@ SECTION .text
pavgw m2, m4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline - also consider going to bytes here
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m3, m5
punpcklbw m1, m5
@@ -1106,7 +1189,7 @@ SECTION .text
%undef filter_x_a
%undef filter_x_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonhalf_y_nonhalf:
%ifdef PIC
@@ -1114,7 +1197,7 @@ SECTION .text
%endif
shl x_offsetd, filter_idx_shift
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -1257,8 +1340,8 @@ SECTION .text
INC_SRC_BY_SRC_STRIDE
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m1, [srcq+1]
+ movx m0, [srcq]
+ movx m1, [srcq+1]
%if cpuflag(ssse3)
punpcklbw m0, m1
pmaddubsw m0, filter_x_a
@@ -1279,20 +1362,20 @@ SECTION .text
INC_SRC_BY_SRC_STRIDE
.x_other_y_other_loop:
- movh m2, [srcq]
- movh m1, [srcq+1]
+ movx m2, [srcq]
+ movx m1, [srcq+1]
INC_SRC_BY_SRC_STRIDE
- movh m4, [srcq]
- movh m3, [srcq+1]
+ movx m4, [srcq]
+ movx m3, [srcq+1]
%if cpuflag(ssse3)
punpcklbw m2, m1
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmaddubsw m4, filter_x_a
- movh m3, [dstq+dst_strideq]
- movh m1, [dstq]
+ movx m3, [dstq+dst_strideq]
+ movx m1, [dstq]
paddw m2, filter_rnd
paddw m4, filter_rnd
psraw m2, 4
@@ -1331,9 +1414,9 @@ SECTION .text
pmullw m1, m4, filter_y_b
paddw m2, filter_rnd
paddw m0, m3
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
paddw m2, m1
- movh m1, [dstq]
+ movx m1, [dstq]
psraw m0, 4
psraw m2, 4
punpcklbw m3, m5
@@ -1341,10 +1424,20 @@ SECTION .text
%endif
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
mova m0, m4
@@ -1362,7 +1455,8 @@ SECTION .text
%undef filter_y_a
%undef filter_y_b
%undef filter_rnd
- STORE_AND_RET
+%undef movx
+ STORE_AND_RET %1
%endmacro
; FIXME(rbultje) the non-bilinear versions (i.e. x=0,8&&y=0,8) are identical
@@ -1371,26 +1465,22 @@ SECTION .text
; location in the sse/2 version, rather than duplicating that code in the
; binary.
-INIT_MMX sse
-SUBPEL_VARIANCE 4
INIT_XMM sse2
+SUBPEL_VARIANCE 4
SUBPEL_VARIANCE 8
SUBPEL_VARIANCE 16
-INIT_MMX ssse3
-SUBPEL_VARIANCE 4
INIT_XMM ssse3
+SUBPEL_VARIANCE 4
SUBPEL_VARIANCE 8
SUBPEL_VARIANCE 16
-INIT_MMX sse
-SUBPEL_VARIANCE 4, 1
INIT_XMM sse2
+SUBPEL_VARIANCE 4, 1
SUBPEL_VARIANCE 8, 1
SUBPEL_VARIANCE 16, 1
-INIT_MMX ssse3
-SUBPEL_VARIANCE 4, 1
INIT_XMM ssse3
+SUBPEL_VARIANCE 4, 1
SUBPEL_VARIANCE 8, 1
SUBPEL_VARIANCE 16, 1
diff --git a/libvpx/vpx_dsp/x86/variance_avx2.c b/libvpx/vpx_dsp/x86/variance_avx2.c
index 7851a98b1..f8c97117d 100644
--- a/libvpx/vpx_dsp/x86/variance_avx2.c
+++ b/libvpx/vpx_dsp/x86/variance_avx2.c
@@ -45,7 +45,7 @@ unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride,
int sum;
variance_avx2(src, src_stride, ref, ref_stride, 16, 16,
sse, &sum, vpx_get16x16var_avx2, 16);
- return *sse - (((unsigned int)sum * sum) >> 8);
+ return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8);
}
unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride,
diff --git a/libvpx/vpx_dsp/x86/variance_impl_mmx.asm b/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
deleted file mode 100644
index b8ba79b65..000000000
--- a/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
+++ /dev/null
@@ -1,744 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%define mmx_filter_shift 7
-
-;unsigned int vpx_get_mb_ss_mmx( short *src_ptr )
-global sym(vpx_get_mb_ss_mmx) PRIVATE
-sym(vpx_get_mb_ss_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 8
- ; end prolog
-
- mov rax, arg(0) ;src_ptr
- mov rcx, 16
- pxor mm4, mm4
-
-.NEXTROW:
- movq mm0, [rax]
- movq mm1, [rax+8]
- movq mm2, [rax+16]
- movq mm3, [rax+24]
- pmaddwd mm0, mm0
- pmaddwd mm1, mm1
- pmaddwd mm2, mm2
- pmaddwd mm3, mm3
-
- paddd mm4, mm0
- paddd mm4, mm1
- paddd mm4, mm2
- paddd mm4, mm3
-
- add rax, 32
- dec rcx
- ja .NEXTROW
- movq QWORD PTR [rsp], mm4
-
- ;return sum[0]+sum[1];
- movsxd rax, dword ptr [rsp]
- movsxd rcx, dword ptr [rsp+4]
- add rax, rcx
-
- ; begin epilog
- add rsp, 8
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_get8x8var_mmx
-;(
-; unsigned char *src_ptr,
-; int source_stride,
-; unsigned char *ref_ptr,
-; int recon_stride,
-; unsigned int *SSE,
-; int *Sum
-;)
-global sym(vpx_get8x8var_mmx) PRIVATE
-sym(vpx_get8x8var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- push rsi
- push rdi
- push rbx
- sub rsp, 16
- ; end prolog
-
- pxor mm5, mm5 ; Blank mmx6
- pxor mm6, mm6 ; Blank mmx7
- pxor mm7, mm7 ; Blank mmx7
-
- mov rax, arg(0) ;[src_ptr] ; Load base addresses
- mov rbx, arg(2) ;[ref_ptr]
- movsxd rcx, dword ptr arg(1) ;[source_stride]
- movsxd rdx, dword ptr arg(3) ;[recon_stride]
-
- ; Row 1
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm1, [rbx] ; Copy eight bytes to mm1
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 2
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 3
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 4
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 5
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- ; movq mm4, [rbx + rdx]
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 6
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 7
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movq mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Row 8
- movq mm0, [rax] ; Copy eight bytes to mm0
- movq mm2, mm0 ; Take copies
- movq mm3, mm1 ; Take copies
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- punpckhbw mm2, mm6 ; unpack to higher prrcision
- punpckhbw mm3, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- psubsw mm2, mm3 ; A-B (high order) to MM2
-
- paddw mm5, mm0 ; accumulate differences in mm5
- paddw mm5, mm2 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- pmaddwd mm2, mm2 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- paddd mm7, mm0 ; accumulate in mm7
- paddd mm7, mm2 ; accumulate in mm7
-
- ; Now accumulate the final results.
- movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory
- movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory
- movsx rdx, WORD PTR [rsp+8]
- movsx rcx, WORD PTR [rsp+10]
- movsx rbx, WORD PTR [rsp+12]
- movsx rax, WORD PTR [rsp+14]
- add rdx, rcx
- add rbx, rax
- add rdx, rbx ;XSum
- movsxd rax, DWORD PTR [rsp]
- movsxd rcx, DWORD PTR [rsp+4]
- add rax, rcx ;XXSum
- mov rsi, arg(4) ;SSE
- mov rdi, arg(5) ;Sum
- mov dword ptr [rsi], eax
- mov dword ptr [rdi], edx
- xor rax, rax ; return 0
-
- ; begin epilog
- add rsp, 16
- pop rbx
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void
-;vpx_get4x4var_mmx
-;(
-; unsigned char *src_ptr,
-; int source_stride,
-; unsigned char *ref_ptr,
-; int recon_stride,
-; unsigned int *SSE,
-; int *Sum
-;)
-global sym(vpx_get4x4var_mmx) PRIVATE
-sym(vpx_get4x4var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- push rsi
- push rdi
- push rbx
- sub rsp, 16
- ; end prolog
-
- pxor mm5, mm5 ; Blank mmx6
- pxor mm6, mm6 ; Blank mmx7
- pxor mm7, mm7 ; Blank mmx7
-
- mov rax, arg(0) ;[src_ptr] ; Load base addresses
- mov rbx, arg(2) ;[ref_ptr]
- movsxd rcx, dword ptr arg(1) ;[source_stride]
- movsxd rdx, dword ptr arg(3) ;[recon_stride]
-
- ; Row 1
- movd mm0, [rax] ; Copy four bytes to mm0
- movd mm1, [rbx] ; Copy four bytes to mm1
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- paddw mm5, mm0 ; accumulate differences in mm5
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy four bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 2
- movd mm0, [rax] ; Copy four bytes to mm0
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- paddw mm5, mm0 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy four bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 3
- movd mm0, [rax] ; Copy four bytes to mm0
- punpcklbw mm0, mm6 ; unpack to higher precision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- paddw mm5, mm0 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy four bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 4
- movd mm0, [rax] ; Copy four bytes to mm0
-
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
-
- paddw mm5, mm0 ; accumulate differences in mm5
-
- pmaddwd mm0, mm0 ; square and accumulate
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Now accumulate the final results.
- movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory
- movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory
- movsx rdx, WORD PTR [rsp+8]
- movsx rcx, WORD PTR [rsp+10]
- movsx rbx, WORD PTR [rsp+12]
- movsx rax, WORD PTR [rsp+14]
- add rdx, rcx
- add rbx, rax
- add rdx, rbx ;XSum
- movsxd rax, DWORD PTR [rsp]
- movsxd rcx, DWORD PTR [rsp+4]
- add rax, rcx ;XXSum
- mov rsi, arg(4) ;SSE
- mov rdi, arg(5) ;Sum
- mov dword ptr [rsi], eax
- mov dword ptr [rdi], edx
- xor rax, rax ; return 0
-
- ; begin epilog
- add rsp, 16
- pop rbx
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_filter_block2d_bil4x4_var_mmx
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned short *HFilter,
-; unsigned short *VFilter,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vpx_filter_block2d_bil4x4_var_mmx) PRIVATE
-sym(vpx_filter_block2d_bil4x4_var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
-
- mov rax, arg(4) ;HFilter ;
- mov rdx, arg(5) ;VFilter ;
-
- mov rsi, arg(0) ;ref_ptr ;
- mov rdi, arg(2) ;src_ptr ;
-
- mov rcx, 4 ;
- pxor mm0, mm0 ;
-
- movd mm1, [rsi] ;
- movd mm3, [rsi+1] ;
-
- punpcklbw mm1, mm0 ;
- pmullw mm1, [rax] ;
-
- punpcklbw mm3, mm0 ;
- pmullw mm3, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- movq mm5, mm1
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
- add rsi, r8
-%endif
-
-.filter_block2d_bil4x4_var_mmx_loop:
-
- movd mm1, [rsi] ;
- movd mm3, [rsi+1] ;
-
- punpcklbw mm1, mm0 ;
- pmullw mm1, [rax] ;
-
- punpcklbw mm3, mm0 ;
- pmullw mm3, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- movq mm3, mm5 ;
-
- movq mm5, mm1 ;
- pmullw mm3, [rdx] ;
-
- pmullw mm1, [rdx+8] ;
- paddw mm1, mm3 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- psraw mm1, mmx_filter_shift ;
-
- movd mm3, [rdi] ;
- punpcklbw mm3, mm0 ;
-
- psubw mm1, mm3 ;
- paddw mm6, mm1 ;
-
- pmaddwd mm1, mm1 ;
- paddd mm7, mm1 ;
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
- add rdi, dword ptr arg(3) ;src_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .filter_block2d_bil4x4_var_mmx_loop ;
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rdi, arg(6) ;sum
- mov rsi, arg(7) ;sumsquared
-
- movd dword ptr [rdi], mm2 ;
- movd dword ptr [rsi], mm4 ;
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_filter_block2d_bil_var_mmx
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; unsigned short *HFilter,
-; unsigned short *VFilter,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vpx_filter_block2d_bil_var_mmx) PRIVATE
-sym(vpx_filter_block2d_bil_var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
- mov rax, arg(5) ;HFilter ;
-
- mov rdx, arg(6) ;VFilter ;
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
-
- pxor mm0, mm0 ;
- movq mm1, [rsi] ;
-
- movq mm3, [rsi+1] ;
- movq mm2, mm1 ;
-
- movq mm4, mm3 ;
- punpcklbw mm1, mm0 ;
-
- punpckhbw mm2, mm0 ;
- pmullw mm1, [rax] ;
-
- pmullw mm2, [rax] ;
- punpcklbw mm3, mm0 ;
-
- punpckhbw mm4, mm0 ;
- pmullw mm3, [rax+8] ;
-
- pmullw mm4, [rax+8] ;
- paddw mm1, mm3 ;
-
- paddw mm2, mm4 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm2, mmx_filter_shift ;
- movq mm5, mm1
-
- packuswb mm5, mm2 ;
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- add rsi, r8
-%endif
-
-.filter_block2d_bil_var_mmx_loop:
-
- movq mm1, [rsi] ;
- movq mm3, [rsi+1] ;
-
- movq mm2, mm1 ;
- movq mm4, mm3 ;
-
- punpcklbw mm1, mm0 ;
- punpckhbw mm2, mm0 ;
-
- pmullw mm1, [rax] ;
- pmullw mm2, [rax] ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, [rax+8] ;
- pmullw mm4, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm2, mm4 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- psraw mm1, mmx_filter_shift ;
-
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
- psraw mm2, mmx_filter_shift ;
-
- movq mm3, mm5 ;
- movq mm4, mm5 ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- movq mm5, mm1 ;
- packuswb mm5, mm2 ;
-
- pmullw mm3, [rdx] ;
- pmullw mm4, [rdx] ;
-
- pmullw mm1, [rdx+8] ;
- pmullw mm2, [rdx+8] ;
-
- paddw mm1, mm3 ;
- paddw mm2, mm4 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- psraw mm2, mmx_filter_shift ;
-
- movq mm3, [rdi] ;
- movq mm4, mm3 ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- psubw mm1, mm3 ;
- psubw mm2, mm4 ;
-
- paddw mm6, mm1 ;
- pmaddwd mm1, mm1 ;
-
- paddw mm6, mm2 ;
- pmaddwd mm2, mm2 ;
-
- paddd mm7, mm1 ;
- paddd mm7, mm2 ;
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
- add rdi, dword ptr arg(3) ;src_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .filter_block2d_bil_var_mmx_loop ;
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rdi, arg(7) ;sum
- mov rsi, arg(8) ;sumsquared
-
- movd dword ptr [rdi], mm2 ;
- movd dword ptr [rsi], mm4 ;
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-;short mmx_bi_rd[4] = { 64, 64, 64, 64};
-align 16
-mmx_bi_rd:
- times 4 dw 64
diff --git a/libvpx/vpx_dsp/x86/variance_mmx.c b/libvpx/vpx_dsp/x86/variance_mmx.c
deleted file mode 100644
index f04f4e2c8..000000000
--- a/libvpx/vpx_dsp/x86/variance_mmx.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_dsp_rtcd.h"
-
-#include "vpx_ports/mem.h"
-
-DECLARE_ALIGNED(16, static const int16_t, bilinear_filters_mmx[8][8]) = {
- { 128, 128, 128, 128, 0, 0, 0, 0 },
- { 112, 112, 112, 112, 16, 16, 16, 16 },
- { 96, 96, 96, 96, 32, 32, 32, 32 },
- { 80, 80, 80, 80, 48, 48, 48, 48 },
- { 64, 64, 64, 64, 64, 64, 64, 64 },
- { 48, 48, 48, 48, 80, 80, 80, 80 },
- { 32, 32, 32, 32, 96, 96, 96, 96 },
- { 16, 16, 16, 16, 112, 112, 112, 112 }
-};
-
-extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse, int *sum);
-
-extern void vpx_filter_block2d_bil4x4_var_mmx(const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- const int16_t *HFilter,
- const int16_t *VFilter,
- int *sum,
- unsigned int *sumsquared);
-
-extern void vpx_filter_block2d_bil_var_mmx(const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- const int16_t *HFilter,
- const int16_t *VFilter,
- int *sum,
- unsigned int *sumsquared);
-
-
-unsigned int vpx_variance4x4_mmx(const unsigned char *a, int a_stride,
- const unsigned char *b, int b_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 4));
-}
-
-unsigned int vpx_variance8x8_mmx(const unsigned char *a, int a_stride,
- const unsigned char *b, int b_stride,
- unsigned int *sse) {
- unsigned int var;
- int avg;
-
- vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg);
- *sse = var;
-
- return (var - (((unsigned int)avg * avg) >> 6));
-}
-
-unsigned int vpx_mse16x16_mmx(const unsigned char *a, int a_stride,
- const unsigned char *b, int b_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3;
-
- vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
- vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
- vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
- b + 8 * b_stride, b_stride, &sse2, &sum2);
- vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride,
- b + 8 * b_stride + 8, b_stride, &sse3, &sum3);
-
- var = sse0 + sse1 + sse2 + sse3;
- *sse = var;
- return var;
-}
-
-unsigned int vpx_variance16x16_mmx(const unsigned char *a, int a_stride,
- const unsigned char *b, int b_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3, avg;
-
- vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
- vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
- vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
- b + 8 * b_stride, b_stride, &sse2, &sum2);
- vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride,
- b + 8 * b_stride + 8, b_stride, &sse3, &sum3);
-
- var = sse0 + sse1 + sse2 + sse3;
- avg = sum0 + sum1 + sum2 + sum3;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 8));
-}
-
-unsigned int vpx_variance16x8_mmx(const unsigned char *a, int a_stride,
- const unsigned char *b, int b_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
- vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
-}
-
-unsigned int vpx_variance8x16_mmx(const unsigned char *a, int a_stride,
- const unsigned char *b, int b_stride,
- unsigned int *sse) {
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
- vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
- b + 8 * b_stride, b_stride, &sse1, &sum1);
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
-
- return (var - (((unsigned int)avg * avg) >> 7));
-}
-
-uint32_t vpx_sub_pixel_variance4x4_mmx(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int xsum;
- unsigned int xxsum;
- vpx_filter_block2d_bil4x4_var_mmx(a, a_stride, b, b_stride,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum, &xxsum);
- *sse = xxsum;
- return (xxsum - (((unsigned int)xsum * xsum) >> 4));
-}
-
-
-uint32_t vpx_sub_pixel_variance8x8_mmx(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int xsum;
- uint32_t xxsum;
- vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 8,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum, &xxsum);
- *sse = xxsum;
- return (xxsum - (((uint32_t)xsum * xsum) >> 6));
-}
-
-uint32_t vpx_sub_pixel_variance16x16_mmx(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int xsum0, xsum1;
- unsigned int xxsum0, xxsum1;
-
- vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 16,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum0, &xxsum0);
-
- vpx_filter_block2d_bil_var_mmx(a + 8, a_stride, b + 8, b_stride, 16,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum1, &xxsum1);
-
- xsum0 += xsum1;
- xxsum0 += xxsum1;
-
- *sse = xxsum0;
- return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
-}
-
-uint32_t vpx_sub_pixel_variance16x8_mmx(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int xsum0, xsum1;
- unsigned int xxsum0, xxsum1;
-
- vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 8,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum0, &xxsum0);
-
- vpx_filter_block2d_bil_var_mmx(a + 8, a_stride, b + 8, b_stride, 8,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum1, &xxsum1);
-
- xsum0 += xsum1;
- xxsum0 += xxsum1;
-
- *sse = xxsum0;
- return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 7));
-}
-
-uint32_t vpx_sub_pixel_variance8x16_mmx(const uint8_t *a, int a_stride,
- int xoffset, int yoffset,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- int xsum;
- unsigned int xxsum;
- vpx_filter_block2d_bil_var_mmx(a, a_stride, b, b_stride, 16,
- bilinear_filters_mmx[xoffset],
- bilinear_filters_mmx[yoffset],
- &xsum, &xxsum);
- *sse = xxsum;
- return (xxsum - (((uint32_t)xsum * xsum) >> 7));
-}
-
-uint32_t vpx_variance_halfpixvar16x16_h_mmx(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 4, 0, b, b_stride, sse);
-}
-
-uint32_t vpx_variance_halfpixvar16x16_v_mmx(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 0, 4, b, b_stride, sse);
-}
-
-uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- uint32_t *sse) {
- return vpx_sub_pixel_variance16x16_mmx(a, a_stride, 4, 4, b, b_stride, sse);
-}
diff --git a/libvpx/vpx_dsp/x86/variance_sse2.c b/libvpx/vpx_dsp/x86/variance_sse2.c
index e6c9365ab..6987c2e24 100644
--- a/libvpx/vpx_dsp/x86/variance_sse2.c
+++ b/libvpx/vpx_dsp/x86/variance_sse2.c
@@ -171,7 +171,7 @@ unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride,
unsigned int *sse) {
int sum;
get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 4);
+ return *sse - ((sum * sum) >> 4);
}
unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
@@ -180,7 +180,7 @@ unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
sse, &sum, get4x4var_sse2, 4);
- return *sse - (((unsigned int)sum * sum) >> 5);
+ return *sse - ((sum * sum) >> 5);
}
unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
@@ -189,7 +189,7 @@ unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
sse, &sum, get4x4var_sse2, 4);
- return *sse - (((unsigned int)sum * sum) >> 5);
+ return *sse - ((sum * sum) >> 5);
}
unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
@@ -197,7 +197,7 @@ unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
unsigned int *sse) {
int sum;
vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 6);
+ return *sse - ((sum * sum) >> 6);
}
unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
@@ -206,7 +206,7 @@ unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
sse, &sum, vpx_get8x8var_sse2, 8);
- return *sse - (((unsigned int)sum * sum) >> 7);
+ return *sse - ((sum * sum) >> 7);
}
unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
@@ -215,7 +215,7 @@ unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
sse, &sum, vpx_get8x8var_sse2, 8);
- return *sse - (((unsigned int)sum * sum) >> 7);
+ return *sse - ((sum * sum) >> 7);
}
unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
@@ -223,7 +223,7 @@ unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
unsigned int *sse) {
int sum;
vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 8);
+ return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8);
}
unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
@@ -320,16 +320,16 @@ unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride,
int height, unsigned int *sse, \
void *unused0, void *unused)
#define DECLS(opt1, opt2) \
- DECL(4, opt2); \
+ DECL(4, opt1); \
DECL(8, opt1); \
DECL(16, opt1)
-DECLS(sse2, sse);
+DECLS(sse2, sse2);
DECLS(ssse3, ssse3);
#undef DECLS
#undef DECL
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
int src_stride, \
int x_offset, \
@@ -365,25 +365,25 @@ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
} \
} \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \
}
#define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \
-FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \
-FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \
-FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \
-FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \
-FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \
-FN(4, 4, 4, 2, 2, opt2, (uint32_t))
-
-FNS(sse2, sse);
+FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
+FN(16, 8, 16, 4, 3, opt1, (int32_t), (int32_t)); \
+FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \
+FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \
+FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \
+FN(4, 8, 4, 2, 3, opt1, (int32_t), (int32_t)); \
+FN(4, 4, 4, 2, 2, opt1, (int32_t), (int32_t))
+
+FNS(sse2, sse2);
FNS(ssse3, ssse3);
#undef FNS
@@ -401,16 +401,16 @@ int vpx_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \
int height, unsigned int *sse, \
void *unused0, void *unused)
#define DECLS(opt1, opt2) \
-DECL(4, opt2); \
+DECL(4, opt1); \
DECL(8, opt1); \
DECL(16, opt1)
-DECLS(sse2, sse);
+DECLS(sse2, sse2);
DECLS(ssse3, ssse3);
#undef DECL
#undef DECLS
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
int src_stride, \
int x_offset, \
@@ -451,23 +451,23 @@ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
} \
} \
*sseptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \
}
#define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \
-FN(16, 8, 16, 4, 3, opt1, (uint32_t)); \
-FN(8, 16, 8, 3, 4, opt1, (uint32_t)); \
-FN(8, 8, 8, 3, 3, opt1, (uint32_t)); \
-FN(8, 4, 8, 3, 2, opt1, (uint32_t)); \
-FN(4, 8, 4, 2, 3, opt2, (uint32_t)); \
-FN(4, 4, 4, 2, 2, opt2, (uint32_t))
+FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
+FN(16, 8, 16, 4, 3, opt1, (uint32_t), (int32_t)); \
+FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \
+FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \
+FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \
+FN(4, 8, 4, 2, 3, opt1, (uint32_t), (int32_t)); \
+FN(4, 4, 4, 2, 2, opt1, (uint32_t), (int32_t))
FNS(sse2, sse);
FNS(ssse3, ssse3);
diff --git a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
index 9c5b414b4..abc027065 100644
--- a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+++ b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -13,15 +13,21 @@
SECTION .text
%macro convolve_fn 1-2
-INIT_XMM sse2
+%ifidn %1, avg
+%define AUX_XMM_REGS 4
+%else
+%define AUX_XMM_REGS 0
+%endif
%ifidn %2, highbd
%define pavg pavgw
-cglobal %2_convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
- fx, fxs, fy, fys, w, h, bd
+cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
+ dst, dst_stride, \
+ fx, fxs, fy, fys, w, h, bd
%else
%define pavg pavgb
-cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
- fx, fxs, fy, fys, w, h
+cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
+ dst, dst_stride, \
+ fx, fxs, fy, fys, w, h
%endif
mov r4d, dword wm
%ifidn %2, highbd
@@ -152,27 +158,30 @@ cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
jnz .loop16
RET
-INIT_MMX sse
.w8:
mov r4d, dword hm
lea r5q, [src_strideq*3]
lea r6q, [dst_strideq*3]
.loop8:
- movu m0, [srcq]
- movu m1, [srcq+src_strideq]
- movu m2, [srcq+src_strideq*2]
- movu m3, [srcq+r5q]
+ movh m0, [srcq]
+ movh m1, [srcq+src_strideq]
+ movh m2, [srcq+src_strideq*2]
+ movh m3, [srcq+r5q]
lea srcq, [srcq+src_strideq*4]
%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq+dst_strideq]
- pavg m2, [dstq+dst_strideq*2]
- pavg m3, [dstq+r6q]
+ movh m4, [dstq]
+ movh m5, [dstq+dst_strideq]
+ movh m6, [dstq+dst_strideq*2]
+ movh m7, [dstq+r6q]
+ pavg m0, m4
+ pavg m1, m5
+ pavg m2, m6
+ pavg m3, m7
%endif
- mova [dstq ], m0
- mova [dstq+dst_strideq ], m1
- mova [dstq+dst_strideq*2], m2
- mova [dstq+r6q ], m3
+ movh [dstq ], m0
+ movh [dstq+dst_strideq ], m1
+ movh [dstq+dst_strideq*2], m2
+ movh [dstq+r6q ], m3
lea dstq, [dstq+dst_strideq*4]
sub r4d, 4
jnz .loop8
@@ -184,25 +193,25 @@ INIT_MMX sse
lea r5q, [src_strideq*3]
lea r6q, [dst_strideq*3]
.loop4:
- movh m0, [srcq]
- movh m1, [srcq+src_strideq]
- movh m2, [srcq+src_strideq*2]
- movh m3, [srcq+r5q]
+ movd m0, [srcq]
+ movd m1, [srcq+src_strideq]
+ movd m2, [srcq+src_strideq*2]
+ movd m3, [srcq+r5q]
lea srcq, [srcq+src_strideq*4]
%ifidn %1, avg
- movh m4, [dstq]
- movh m5, [dstq+dst_strideq]
- movh m6, [dstq+dst_strideq*2]
- movh m7, [dstq+r6q]
+ movd m4, [dstq]
+ movd m5, [dstq+dst_strideq]
+ movd m6, [dstq+dst_strideq*2]
+ movd m7, [dstq+r6q]
pavg m0, m4
pavg m1, m5
pavg m2, m6
pavg m3, m7
%endif
- movh [dstq ], m0
- movh [dstq+dst_strideq ], m1
- movh [dstq+dst_strideq*2], m2
- movh [dstq+r6q ], m3
+ movd [dstq ], m0
+ movd [dstq+dst_strideq ], m1
+ movd [dstq+dst_strideq*2], m2
+ movd [dstq+r6q ], m3
lea dstq, [dstq+dst_strideq*4]
sub r4d, 4
jnz .loop4
@@ -210,6 +219,7 @@ INIT_MMX sse
%endif
%endmacro
+INIT_XMM sse2
convolve_fn copy
convolve_fn avg
%if CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
index 3fbaa274c..d2cb8ea29 100644
--- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -16,6 +16,11 @@ pw_64: times 8 dw 64
; %define USE_PMULHRSW
; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss
; when using this instruction.
+;
+; The add order below (based on ffvp9) must be followed to prevent outranges.
+; x = k0k1 + k4k5
+; y = k2k3 + k6k7
+; z = signed SAT(x + y)
SECTION .text
%if ARCH_X86_64
@@ -77,17 +82,12 @@ SECTION .text
pmaddubsw %2, k0k1k4k5
pmaddubsw m3, k2k3k6k7
-
- mova m4, %2
- mova m5, m3
- psrldq %2, 8
- psrldq m3, 8
- mova m6, m5
-
- paddsw m4, m3
- pmaxsw m5, %2
- pminsw %2, m6
+ mova m4, %2 ;k0k1
+ mova m5, m3 ;k2k3
+ psrldq %2, 8 ;k4k5
+ psrldq m3, 8 ;k6k7
paddsw %2, m4
+ paddsw m5, m3
paddsw %2, m5
paddsw %2, krd
psraw %2, 7
@@ -157,27 +157,20 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
pmaddubsw m7, k0k1k4k5
palignr m3, m2, 5
pmaddubsw m3, k2k3k6k7
- mova m0, m4
- mova m5, m1
- mova m2, m7
- psrldq m4, 8
- psrldq m1, 8
- mova m6, m5
- paddsw m0, m1
- mova m1, m3
- psrldq m7, 8
- psrldq m3, 8
- paddsw m2, m3
- mova m3, m1
- pmaxsw m5, m4
- pminsw m4, m6
+ mova m0, m4 ;k0k1
+ mova m5, m1 ;k2k3
+ mova m2, m7 ;k0k1 upper
+ psrldq m4, 8 ;k4k5
+ psrldq m1, 8 ;k6k7
paddsw m4, m0
- paddsw m4, m5
- pmaxsw m1, m7
- pminsw m7, m3
+ paddsw m5, m1
+ mova m1, m3 ;k2k3 upper
+ psrldq m7, 8 ;k4k5 upper
+ psrldq m3, 8 ;k6k7 upper
paddsw m7, m2
+ paddsw m4, m5
+ paddsw m1, m3
paddsw m7, m1
-
paddsw m4, krd
psraw m4, 7
packuswb m4, m4
@@ -240,16 +233,13 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
pmaddubsw %3, k2k3
pmaddubsw %4, k4k5
pmaddubsw %5, k6k7
-
+ paddsw %2, %4
+ paddsw %5, %3
paddsw %2, %5
- mova %1, %3
- pminsw %3, %4
- pmaxsw %1, %4
- paddsw %2, %3
- paddsw %1, %2
- paddsw %1, krd
- psraw %1, 7
- packuswb %1, %1
+ paddsw %2, krd
+ psraw %2, 7
+ packuswb %2, %2
+ SWAP %1, %2
%endm
;-------------------------------------------------------------------------------
@@ -293,39 +283,33 @@ cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \
pmaddubsw m3, k4k5
palignr m7, m4, 13
- paddsw m1, m5
- mova m5, m6
- mova m0, m2
- palignr m5, m4, 5
- pminsw m2, m3
+ mova m0, m6
+ palignr m0, m4, 5
pmaddubsw m7, k6k7
- pmaxsw m3, m0
+ paddsw m1, m3
+ paddsw m2, m5
paddsw m1, m2
- mova m0, m6
+ mova m5, m6
palignr m6, m4, 1
- pmaddubsw m5, k2k3
- paddsw m1, m3
+ pmaddubsw m0, k2k3
pmaddubsw m6, k0k1
- palignr m0, m4, 9
+ palignr m5, m4, 9
paddsw m1, krd
- pmaddubsw m0, k4k5
- mova m4, m5
+ pmaddubsw m5, k4k5
psraw m1, 7
- pminsw m5, m0
- paddsw m6, m7
+ paddsw m0, m7
+%ifidn %1, h8_avg
+ movh m7, [dstq]
+ movh m2, [dstq + dstrideq]
+%endif
packuswb m1, m1
-
paddsw m6, m5
- pmaxsw m0, m4
paddsw m6, m0
paddsw m6, krd
psraw m6, 7
packuswb m6, m6
-
%ifidn %1, h8_avg
- movh m0, [dstq]
- movh m2, [dstq + dstrideq]
- pavgb m1, m0
+ pavgb m1, m7
pavgb m6, m2
%endif
movh [dstq], m1
@@ -388,7 +372,7 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
pmaddubsw m1, k2k3
palignr m2, m7, 9
pmaddubsw m2, k4k5
- paddsw m0, m3
+ paddsw m1, m3
mova m3, m4
punpckhbw m4, m4
mova m5, m4
@@ -403,17 +387,13 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
pmaddubsw m6, k4k5
palignr m7, m3, 13
pmaddubsw m7, k6k7
-
- mova m3, m1
- pmaxsw m1, m2
- pminsw m2, m3
paddsw m0, m2
paddsw m0, m1
- paddsw m4, m7
- mova m7, m5
- pmaxsw m5, m6
- pminsw m6, m7
+%ifidn %1, h8_avg
+ mova m1, [dstq]
+%endif
paddsw m4, m6
+ paddsw m5, m7
paddsw m4, m5
paddsw m0, krd
paddsw m4, krd
@@ -421,7 +401,6 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
psraw m4, 7
packuswb m0, m4
%ifidn %1, h8_avg
- mova m1, [dstq]
pavgb m0, m1
%endif
lea srcq, [srcq + sstrideq]
@@ -488,27 +467,21 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
movx m7, [src1q + sstride6q ] ;H
punpcklbw m6, m7 ;G H
pmaddubsw m6, k6k7
- mova tmp, m2
pmaddubsw m3, k2k3
pmaddubsw m1, k0k1
- pmaxsw m2, m4
- paddsw m0, m6
+ paddsw m0, m4
+ paddsw m2, m6
movx m6, [srcq + sstrideq * 8 ] ;H next iter
punpcklbw m7, m6
pmaddubsw m7, k6k7
- pminsw m4, tmp
- paddsw m0, m4
- mova m4, m3
paddsw m0, m2
- pminsw m3, m5
- pmaxsw m5, m4
paddsw m0, krd
psraw m0, 7
- paddsw m1, m7
+ paddsw m1, m5
packuswb m0, m0
+ paddsw m3, m7
paddsw m1, m3
- paddsw m1, m5
paddsw m1, krd
psraw m1, 7
lea srcq, [srcq + sstrideq * 2 ]
@@ -538,11 +511,11 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
movx m1, [srcq + sstrideq ] ;B
movx m6, [srcq + sstride6q ] ;G
punpcklbw m0, m1 ;A B
- movx m7, [rax + sstride6q ] ;H
+ movx m7, [src1q + sstride6q ] ;H
pmaddubsw m0, k0k1
movx m2, [srcq + sstrideq * 2 ] ;C
punpcklbw m6, m7 ;G H
- movx m3, [rax + sstrideq * 2 ] ;D
+ movx m3, [src1q + sstrideq * 2] ;D
pmaddubsw m6, k6k7
movx m4, [srcq + sstrideq * 4 ] ;E
punpcklbw m2, m3 ;C D
@@ -550,10 +523,7 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
punpcklbw m4, m5 ;E F
pmaddubsw m2, k2k3
pmaddubsw m4, k4k5
- paddsw m0, m6
- mova m1, m2
- pmaxsw m2, m4
- pminsw m4, m1
+ paddsw m2, m6
paddsw m0, m4
paddsw m0, m2
paddsw m0, krd
@@ -572,7 +542,6 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
%macro SUBPIX_VFILTER16 1
cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
src, sstride, dst, dstride, height, filter
-
mova m4, [filterq]
SETUP_LOCAL_VARS
%if ARCH_X86_64
@@ -611,12 +580,9 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
punpcklbw m3, m5 ;A B
movh m7, [srcq + sstrideq * 2 + 8] ;C
pmaddubsw m6, k6k7
- mova m1, m2
movh m5, [src1q + sstrideq * 2 + 8] ;D
- pmaxsw m2, m4
punpcklbw m7, m5 ;C D
- pminsw m4, m1
- paddsw m0, m6
+ paddsw m2, m6
pmaddubsw m3, k0k1
movh m1, [srcq + sstrideq * 4 + 8] ;E
paddsw m0, m4
@@ -630,30 +596,24 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
movh m5, [src1q + sstride6q + 8] ;H
psraw m0, 7
punpcklbw m2, m5 ;G H
- packuswb m0, m0
pmaddubsw m2, k6k7
%ifidn %1, v8_avg
- movh m4, [dstq]
- pavgb m0, m4
+ mova m4, [dstq]
%endif
movh [dstq], m0
- mova m6, m7
- pmaxsw m7, m1
- pminsw m1, m6
- paddsw m3, m2
+ paddsw m7, m2
paddsw m3, m1
paddsw m3, m7
paddsw m3, krd
psraw m3, 7
- packuswb m3, m3
+ packuswb m0, m3
add srcq, sstrideq
add src1q, sstrideq
%ifidn %1, v8_avg
- movh m1, [dstq + 8]
- pavgb m3, m1
+ pavgb m0, m4
%endif
- movh [dstq + 8], m3
+ mova [dstq], m0
add dstq, dst_stride
dec heightd
jnz .loop
diff --git a/libvpx/vpx_mem/vpx_mem.c b/libvpx/vpx_mem/vpx_mem.c
index b98fe83c0..b261fc0da 100644
--- a/libvpx/vpx_mem/vpx_mem.c
+++ b/libvpx/vpx_mem/vpx_mem.c
@@ -9,8 +9,6 @@
*/
-#define __VPX_MEM_C__
-
#include "vpx_mem.h"
#include <stdio.h>
#include <stdlib.h>
diff --git a/libvpx/vpx_ports/mem_ops.h b/libvpx/vpx_ports/mem_ops.h
index d4a3d773f..620df31b2 100644
--- a/libvpx/vpx_ports/mem_ops.h
+++ b/libvpx/vpx_ports/mem_ops.h
@@ -89,7 +89,7 @@ static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) {
unsigned MEM_VALUE_T val;
const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[0] << 24;
+ val = ((unsigned MEM_VALUE_T)mem[0]) << 24;
val |= mem[1] << 16;
val |= mem[2] << 8;
val |= mem[3];
@@ -125,7 +125,7 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) {
unsigned MEM_VALUE_T val;
const MAU_T *mem = (const MAU_T *)vmem;
- val = mem[3] << 24;
+ val = ((unsigned MEM_VALUE_T)mem[3]) << 24;
val |= mem[2] << 16;
val |= mem[1] << 8;
val |= mem[0];
@@ -168,8 +168,8 @@ mem_get_s_generic(le, 32)
static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) {
MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 8) & 0xff;
- mem[1] = (val >> 0) & 0xff;
+ mem[0] = (MAU_T)((val >> 8) & 0xff);
+ mem[1] = (MAU_T)((val >> 0) & 0xff);
}
#undef mem_put_be24
@@ -177,9 +177,9 @@ static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) {
static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) {
MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 16) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 0) & 0xff;
+ mem[0] = (MAU_T)((val >> 16) & 0xff);
+ mem[1] = (MAU_T)((val >> 8) & 0xff);
+ mem[2] = (MAU_T)((val >> 0) & 0xff);
}
#undef mem_put_be32
@@ -187,10 +187,10 @@ static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) {
static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) {
MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 24) & 0xff;
- mem[1] = (val >> 16) & 0xff;
- mem[2] = (val >> 8) & 0xff;
- mem[3] = (val >> 0) & 0xff;
+ mem[0] = (MAU_T)((val >> 24) & 0xff);
+ mem[1] = (MAU_T)((val >> 16) & 0xff);
+ mem[2] = (MAU_T)((val >> 8) & 0xff);
+ mem[3] = (MAU_T)((val >> 0) & 0xff);
}
#undef mem_put_le16
@@ -198,8 +198,8 @@ static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) {
static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) {
MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
+ mem[0] = (MAU_T)((val >> 0) & 0xff);
+ mem[1] = (MAU_T)((val >> 8) & 0xff);
}
#undef mem_put_le24
@@ -207,9 +207,9 @@ static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) {
static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) {
MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 16) & 0xff;
+ mem[0] = (MAU_T)((val >> 0) & 0xff);
+ mem[1] = (MAU_T)((val >> 8) & 0xff);
+ mem[2] = (MAU_T)((val >> 16) & 0xff);
}
#undef mem_put_le32
@@ -217,10 +217,10 @@ static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) {
static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) {
MAU_T *mem = (MAU_T *)vmem;
- mem[0] = (val >> 0) & 0xff;
- mem[1] = (val >> 8) & 0xff;
- mem[2] = (val >> 16) & 0xff;
- mem[3] = (val >> 24) & 0xff;
+ mem[0] = (MAU_T)((val >> 0) & 0xff);
+ mem[1] = (MAU_T)((val >> 8) & 0xff);
+ mem[2] = (MAU_T)((val >> 16) & 0xff);
+ mem[3] = (MAU_T)((val >> 24) & 0xff);
}
#endif // VPX_PORTS_MEM_OPS_H_
diff --git a/libvpx/vpx_ports/mem_ops_aligned.h b/libvpx/vpx_ports/mem_ops_aligned.h
index c16111fec..46f61738b 100644
--- a/libvpx/vpx_ports/mem_ops_aligned.h
+++ b/libvpx/vpx_ports/mem_ops_aligned.h
@@ -28,8 +28,8 @@
* could redefine these macros.
*/
#define swap_endian_16(val,raw) do {\
- val = ((raw>>8) & 0x00ff) \
- | ((raw<<8) & 0xff00);\
+ val = (uint16_t)(((raw>>8) & 0x00ff) \
+ | ((raw<<8) & 0xff00));\
} while(0)
#define swap_endian_32(val,raw) do {\
val = ((raw>>24) & 0x000000ff) \
diff --git a/libvpx/vpx_ports/vpx_once.h b/libvpx/vpx_ports/vpx_once.h
index f1df39434..da04db459 100644
--- a/libvpx/vpx_ports/vpx_once.h
+++ b/libvpx/vpx_ports/vpx_once.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -13,63 +13,83 @@
#include "vpx_config.h"
+/* Implement a function wrapper to guarantee initialization
+ * thread-safety for library singletons.
+ *
+ * NOTE: These functions use static locks, and can only be
+ * used with one common argument per compilation unit. So
+ *
+ * file1.c:
+ * vpx_once(foo);
+ * ...
+ * vpx_once(foo);
+ *
+ * file2.c:
+ * vpx_once(bar);
+ *
+ * will ensure foo() and bar() are each called only once, but in
+ *
+ * file1.c:
+ * vpx_once(foo);
+ * vpx_once(bar):
+ *
+ * bar() will never be called because the lock is used up
+ * by the call to foo().
+ */
+
#if CONFIG_MULTITHREAD && defined(_WIN32)
#include <windows.h>
#include <stdlib.h>
+/* Declare a per-compilation-unit state variable to track the progress
+ * of calling func() only once. This must be at global scope because
+ * local initializers are not thread-safe in MSVC prior to Visual
+ * Studio 2015.
+ *
+ * As a static, once_state will be zero-initialized as program start.
+ */
+static LONG once_state;
static void once(void (*func)(void))
{
- static CRITICAL_SECTION *lock;
- static LONG waiters;
- static int done;
- void *lock_ptr = &lock;
-
- /* If the initialization is complete, return early. This isn't just an
- * optimization, it prevents races on the destruction of the global
- * lock.
+ /* Try to advance once_state from its initial value of 0 to 1.
+ * Only one thread can succeed in doing so.
*/
- if(done)
+ if (InterlockedCompareExchange(&once_state, 1, 0) == 0) {
+ /* We're the winning thread, having set once_state to 1.
+ * Call our function. */
+ func();
+ /* Now advance once_state to 2, unblocking any other threads. */
+ InterlockedIncrement(&once_state);
return;
-
- InterlockedIncrement(&waiters);
-
- /* Get a lock. We create one and try to make it the one-true-lock,
- * throwing it away if we lost the race.
- */
-
- {
- /* Scope to protect access to new_lock */
- CRITICAL_SECTION *new_lock = malloc(sizeof(CRITICAL_SECTION));
- InitializeCriticalSection(new_lock);
- if (InterlockedCompareExchangePointer(lock_ptr, new_lock, NULL) != NULL)
- {
- DeleteCriticalSection(new_lock);
- free(new_lock);
- }
}
- /* At this point, we have a lock that can be synchronized on. We don't
- * care which thread actually performed the allocation.
+ /* We weren't the winning thread, but we want to block on
+ * the state variable so we don't return before func()
+ * has finished executing elsewhere.
+ *
+ * Try to advance once_state from 2 to 2, which is only possible
+ * after the winning thead advances it from 1 to 2.
*/
-
- EnterCriticalSection(lock);
-
- if (!done)
- {
- func();
- done = 1;
+ while (InterlockedCompareExchange(&once_state, 2, 2) != 2) {
+ /* State isn't yet 2. Try again.
+ *
+ * We are used for singleton initialization functions,
+ * which should complete quickly. Contention will likewise
+ * be rare, so it's worthwhile to use a simple but cpu-
+ * intensive busy-wait instead of successive backoff,
+ * waiting on a kernel object, or another heavier-weight scheme.
+ *
+ * We can at least yield our timeslice.
+ */
+ Sleep(0);
}
- LeaveCriticalSection(lock);
-
- /* Last one out should free resources. The destructed objects are
- * protected by checking if(done) above.
+ /* We've seen once_state advance to 2, so we know func()
+ * has been called. And we've left once_state as we found it,
+ * so other threads will have the same experience.
+ *
+ * It's safe to return now.
*/
- if(!InterlockedDecrement(&waiters))
- {
- DeleteCriticalSection(lock);
- free(lock);
- lock = NULL;
- }
+ return;
}
diff --git a/libvpx/vpx_ports/x86.h b/libvpx/vpx_ports/x86.h
index 5da346e58..bae25ac34 100644
--- a/libvpx/vpx_ports/x86.h
+++ b/libvpx/vpx_ports/x86.h
@@ -12,6 +12,11 @@
#ifndef VPX_PORTS_X86_H_
#define VPX_PORTS_X86_H_
#include <stdlib.h>
+
+#if defined(_MSC_VER)
+#include <intrin.h> /* For __cpuidex, __rdtsc */
+#endif
+
#include "vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -77,16 +82,12 @@ typedef enum {
#else /* end __SUNPRO__ */
#if ARCH_X86_64
#if defined(_MSC_VER) && _MSC_VER > 1500
-void __cpuidex(int CPUInfo[4], int info_type, int ecxvalue);
-#pragma intrinsic(__cpuidex)
#define cpuid(func, func2, a, b, c, d) do {\
int regs[4];\
__cpuidex(regs, func, func2); \
a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\
} while(0)
#else
-void __cpuid(int CPUInfo[4], int info_type);
-#pragma intrinsic(__cpuid)
#define cpuid(func, func2, a, b, c, d) do {\
int regs[4];\
__cpuid(regs, func); \
@@ -172,7 +173,7 @@ x86_simd_caps(void) {
env = getenv("VPX_SIMD_CAPS_MASK");
if (env && *env)
- mask = strtol(env, NULL, 0);
+ mask = (unsigned int)strtoul(env, NULL, 0);
/* Ensure that the CPUID instruction supports extended features */
cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
@@ -212,10 +213,11 @@ x86_simd_caps(void) {
return flags & mask;
}
-#if ARCH_X86_64 && defined(_MSC_VER)
-unsigned __int64 __rdtsc(void);
-#pragma intrinsic(__rdtsc)
-#endif
+// Note:
+// 32-bit CPU cycle counter is light-weighted for most function performance
+// measurement. For large function (CPU time > a couple of seconds), 64-bit
+// counter should be used.
+// 32-bit CPU cycle counter
static INLINE unsigned int
x86_readtsc(void) {
#if defined(__GNUC__) && __GNUC__
@@ -234,7 +236,25 @@ x86_readtsc(void) {
#endif
#endif
}
-
+// 64-bit CPU cycle counter
+static INLINE uint64_t
+x86_readtsc64(void) {
+#if defined(__GNUC__) && __GNUC__
+ uint32_t hi, lo;
+ __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)hi << 32) | lo;
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+ uint_t hi, lo;
+ asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)hi << 32) | lo;
+#else
+#if ARCH_X86_64
+ return (uint64_t)__rdtsc();
+#else
+ __asm rdtsc;
+#endif
+#endif
+}
#if defined(__GNUC__) && __GNUC__
#define x86_pause_hint()\
diff --git a/libvpx/vpx_ports/x86_abi_support.asm b/libvpx/vpx_ports/x86_abi_support.asm
index c94b76a06..708fa101c 100644
--- a/libvpx/vpx_ports/x86_abi_support.asm
+++ b/libvpx/vpx_ports/x86_abi_support.asm
@@ -189,7 +189,6 @@
%if ABI_IS_32BIT
%if CONFIG_PIC=1
%ifidn __OUTPUT_FORMAT__,elf32
- %define GET_GOT_SAVE_ARG 1
%define WRT_PLT wrt ..plt
%macro GET_GOT 1
extern _GLOBAL_OFFSET_TABLE_
@@ -208,7 +207,6 @@
%define RESTORE_GOT pop %1
%endmacro
%elifidn __OUTPUT_FORMAT__,macho32
- %define GET_GOT_SAVE_ARG 1
%macro GET_GOT 1
push %1
call %%get_got
diff --git a/libvpx/vpx_scale/generic/yv12config.c b/libvpx/vpx_scale/generic/yv12config.c
index 773921813..6bbb6d8d4 100644
--- a/libvpx/vpx_scale/generic/yv12config.c
+++ b/libvpx/vpx_scale/generic/yv12config.c
@@ -114,7 +114,7 @@ int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
return -2;
}
-#if CONFIG_VP9 || CONFIG_VP10
+#if CONFIG_VP9
// TODO(jkoleszar): Maybe replace this with struct vpx_image
int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) {
@@ -160,29 +160,12 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) *
(uint64_t)uv_stride + byte_alignment;
-#if CONFIG_ALPHA
- const int alpha_width = aligned_width;
- const int alpha_height = aligned_height;
- const int alpha_stride = y_stride;
- const int alpha_border_w = border;
- const int alpha_border_h = border;
- const uint64_t alpha_plane_size = (alpha_height + 2 * alpha_border_h) *
- (uint64_t)alpha_stride + byte_alignment;
-#if CONFIG_VP9_HIGHBITDEPTH
- const uint64_t frame_size = (1 + use_highbitdepth) *
- (yplane_size + 2 * uvplane_size + alpha_plane_size);
-#else
- const uint64_t frame_size = yplane_size + 2 * uvplane_size +
- alpha_plane_size;
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#else
#if CONFIG_VP9_HIGHBITDEPTH
const uint64_t frame_size =
(1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size);
#else
const uint64_t frame_size = yplane_size + 2 * uvplane_size;
#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // CONFIG_ALPHA
uint8_t *buf = NULL;
@@ -203,6 +186,15 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
return -1;
ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+ // This memset is needed for fixing the issue of using uninitialized
+ // value in msan test. It will cause a perf loss, so only do this for
+ // msan test.
+ memset(ybf->buffer_alloc, 0, (int)frame_size);
+#endif
+#endif
} else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
// Allocation to hold larger frame, or first allocation.
vpx_free(ybf->buffer_alloc);
@@ -268,14 +260,6 @@ int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) +
uv_border_w, vp9_byte_align);
-#if CONFIG_ALPHA
- ybf->alpha_width = alpha_width;
- ybf->alpha_height = alpha_height;
- ybf->alpha_stride = alpha_stride;
- ybf->alpha_buffer = (uint8_t *)yv12_align_addr(
- buf + yplane_size + 2 * uvplane_size +
- (alpha_border_h * alpha_stride) + alpha_border_w, vp9_byte_align);
-#endif
ybf->corrupted = 0; /* assume not corrupted by errors */
return 0;
}
diff --git a/libvpx/vpx_scale/generic/yv12extend.c b/libvpx/vpx_scale/generic/yv12extend.c
index 670144bc1..52f0aff1f 100644
--- a/libvpx/vpx_scale/generic/yv12extend.c
+++ b/libvpx/vpx_scale/generic/yv12extend.c
@@ -157,7 +157,7 @@ void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) {
uv_border + ybf->uv_width - ybf->uv_crop_width);
}
-#if CONFIG_VP9 || CONFIG_VP10
+#if CONFIG_VP9
static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
const int c_w = ybf->uv_crop_width;
const int c_h = ybf->uv_crop_height;
@@ -211,13 +211,13 @@ void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) {
}
#if CONFIG_VP9_HIGHBITDEPTH
-void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
+static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
memcpy(dst, src, num * sizeof(uint16_t));
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // CONFIG_VP9 || CONFIG_VP10
+#endif // CONFIG_VP9
// Copies the source image into the destination image and updates the
// destination's UMV borders.
diff --git a/libvpx/vpx_scale/vpx_scale_rtcd.pl b/libvpx/vpx_scale/vpx_scale_rtcd.pl
index 56b952ba3..44b115c7e 100644
--- a/libvpx/vpx_scale/vpx_scale_rtcd.pl
+++ b/libvpx/vpx_scale/vpx_scale_rtcd.pl
@@ -22,7 +22,7 @@ add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_yb
add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
+if (vpx_config("CONFIG_VP9") eq "yes") {
add_proto qw/void vpx_extend_frame_borders/, "struct yv12_buffer_config *ybf";
specialize qw/vpx_extend_frame_borders dspr2/;
diff --git a/libvpx/vpx_util/vpx_thread.h b/libvpx/vpx_util/vpx_thread.h
index de63c4da0..2062abd75 100644
--- a/libvpx/vpx_util/vpx_thread.h
+++ b/libvpx/vpx_util/vpx_thread.h
@@ -147,6 +147,152 @@ static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
pthread_mutex_lock(mutex);
return !ok;
}
+#elif defined(__OS2__)
+#define INCL_DOS
+#include <os2.h> // NOLINT
+
+#include <errno.h> // NOLINT
+#include <stdlib.h> // NOLINT
+#include <sys/builtin.h> // NOLINT
+
+#define pthread_t TID
+#define pthread_mutex_t HMTX
+
+typedef struct {
+ HEV event_sem_;
+ HEV ack_sem_;
+ volatile unsigned wait_count_;
+} pthread_cond_t;
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#define THREADFN void *
+#define THREAD_RETURN(val) (val)
+
+typedef struct {
+ void* (*start_)(void*);
+ void* arg_;
+} thread_arg;
+
+static void thread_start(void* arg) {
+ thread_arg targ = *(thread_arg *)arg;
+ free(arg);
+
+ targ.start_(targ.arg_);
+}
+
+static INLINE int pthread_create(pthread_t* const thread, const void* attr,
+ void* (*start)(void*),
+ void* arg) {
+ int tid;
+ thread_arg *targ = (thread_arg *)malloc(sizeof(*targ));
+ if (targ == NULL) return 1;
+
+ (void)attr;
+
+ targ->start_ = start;
+ targ->arg_ = arg;
+ tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ);
+ if (tid == -1) {
+ free(targ);
+ return 1;
+ }
+
+ *thread = tid;
+ return 0;
+}
+
+static INLINE int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return DosWaitThread(&thread, DCWW_WAIT) != 0;
+}
+
+// Mutex
+static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex,
+ void* mutexattr) {
+ (void)mutexattr;
+ return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0;
+}
+
+static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) {
+ return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY;
+}
+
+static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) {
+ return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0;
+}
+
+static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
+ return DosReleaseMutexSem(*mutex) != 0;
+}
+
+static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
+ return DosCloseMutexSem(*mutex) != 0;
+}
+
+// Condition
+static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) {
+ int ok = 1;
+ ok &= DosCloseEventSem(condition->event_sem_) == 0;
+ ok &= DosCloseEventSem(condition->ack_sem_) == 0;
+ return !ok;
+}
+
+static INLINE int pthread_cond_init(pthread_cond_t *const condition,
+ void* cond_attr) {
+ int ok = 1;
+ (void)cond_attr;
+
+ ok &= DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE)
+ == 0;
+ ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0;
+ if (!ok) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ condition->wait_count_ = 0;
+ return 0;
+}
+
+static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
+ int ok = 1;
+
+ if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) {
+ ok &= DosPostEventSem(condition->event_sem_) == 0;
+ ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0;
+ }
+
+ return !ok;
+}
+
+static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) {
+ int ok = 1;
+
+ while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0))
+ ok &= pthread_cond_signal(condition) == 0;
+
+ return !ok;
+}
+
+static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
+ pthread_mutex_t *const mutex) {
+ int ok = 1;
+
+ __atomic_increment(&condition->wait_count_);
+
+ ok &= pthread_mutex_unlock(mutex) == 0;
+
+ ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0;
+
+ __atomic_decrement(&condition->wait_count_);
+
+ ok &= DosPostEventSem(condition->ack_sem_) == 0;
+
+ pthread_mutex_lock(mutex);
+
+ return !ok;
+}
#else // _WIN32
#include <pthread.h> // NOLINT
# define THREADFN void*
diff --git a/libvpx/vpxdec.c b/libvpx/vpxdec.c
index 285d58e1e..dbe64aa94 100644
--- a/libvpx/vpxdec.c
+++ b/libvpx/vpxdec.c
@@ -28,7 +28,7 @@
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/vpx_timer.h"
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
@@ -257,8 +257,7 @@ static int read_frame(struct VpxDecInputContext *input, uint8_t **buf,
switch (input->vpx_input_ctx->file_type) {
#if CONFIG_WEBM_IO
case FILE_TYPE_WEBM:
- return webm_read_frame(input->webm_ctx,
- buf, bytes_in_buffer, buffer_size);
+ return webm_read_frame(input->webm_ctx, buf, bytes_in_buffer);
#endif
case FILE_TYPE_RAW:
return raw_read_frame(input->vpx_input_ctx->file,
@@ -642,7 +641,7 @@ static int main_loop(int argc, const char **argv_) {
summary = 1;
else if (arg_match(&arg, &threadsarg, argi))
cfg.threads = arg_parse_uint(&arg);
-#if CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
+#if CONFIG_VP9_DECODER
else if (arg_match(&arg, &frameparallelarg, argi))
frame_parallel = 1;
#endif
diff --git a/libvpx/vpxenc.c b/libvpx/vpxenc.c
index cb78226b3..efcf06495 100644
--- a/libvpx/vpxenc.c
+++ b/libvpx/vpxenc.c
@@ -32,10 +32,10 @@
#include "./ivfenc.h"
#include "./tools_common.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
#include "vpx/vp8cx.h"
#endif
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
#include "vpx/vp8dx.h"
#endif
@@ -374,21 +374,22 @@ static const int vp8_arg_ctrl_map[] = {
};
#endif
-#if CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP9_ENCODER
static const arg_def_t cpu_used_vp9 = ARG_DEF(
NULL, "cpu-used", 1, "CPU Used (-8..8)");
static const arg_def_t tile_cols = ARG_DEF(
NULL, "tile-columns", 1, "Number of tile columns to use, log2");
static const arg_def_t tile_rows = ARG_DEF(
- NULL, "tile-rows", 1, "Number of tile rows to use, log2");
+ NULL, "tile-rows", 1,
+ "Number of tile rows to use, log2 (set to 0 while threads > 1)");
static const arg_def_t lossless = ARG_DEF(
- NULL, "lossless", 1, "Lossless mode");
+ NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)");
static const arg_def_t frame_parallel_decoding = ARG_DEF(
NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
static const arg_def_t aq_mode = ARG_DEF(
NULL, "aq-mode", 1,
"Adaptive quantization mode (0: off (default), 1: variance 2: complexity, "
- "3: cyclic refresh)");
+ "3: cyclic refresh, 4: equator360)");
static const arg_def_t frame_periodic_boost = ARG_DEF(
NULL, "frame-boost", 1,
"Enable frame periodic boost (0: off (default), 1: on)");
@@ -443,6 +444,11 @@ static const struct arg_enum_list tune_content_enum[] = {
static const arg_def_t tune_content = ARG_DEF_ENUM(
NULL, "tune-content", 1, "Tune content type", tune_content_enum);
+
+static const arg_def_t target_level = ARG_DEF(
+ NULL, "target-level", 1,
+ "Target level (255: off (default); 0: only keep level stats; 10: level 1.0;"
+ " 11: level 1.1; ... 62: level 6.2)");
#endif
#if CONFIG_VP9_ENCODER
@@ -453,7 +459,10 @@ static const arg_def_t *vp9_args[] = {
&gf_cbr_boost_pct, &lossless,
&frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
&noise_sens, &tune_content, &input_color_space,
- &min_gf_interval, &max_gf_interval,
+ &min_gf_interval, &max_gf_interval, &target_level,
+#if CONFIG_VP9_HIGHBITDEPTH
+ &bitdeptharg, &inbitdeptharg,
+#endif // CONFIG_VP9_HIGHBITDEPTH
NULL
};
static const int vp9_arg_ctrl_map[] = {
@@ -466,33 +475,7 @@ static const int vp9_arg_ctrl_map[] = {
VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE,
- VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL,
- 0
-};
-#endif
-
-#if CONFIG_VP10_ENCODER
-static const arg_def_t *vp10_args[] = {
- &cpu_used_vp9, &auto_altref, &sharpness, &static_thresh,
- &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
- &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct,
- &gf_cbr_boost_pct, &lossless,
- &frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
- &noise_sens, &tune_content, &input_color_space,
- &min_gf_interval, &max_gf_interval,
- NULL
-};
-static const int vp10_arg_ctrl_map[] = {
- VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF,
- VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD,
- VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS,
- VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
- VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
- VP9E_SET_MAX_INTER_BITRATE_PCT, VP9E_SET_GF_CBR_BOOST_PCT,
- VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
- VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
- VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE,
- VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL,
+ VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, VP9E_SET_TARGET_LEVEL,
0
};
#endif
@@ -524,10 +507,6 @@ void usage_exit(void) {
fprintf(stderr, "\nVP9 Specific Options:\n");
arg_show_usage(stderr, vp9_args);
#endif
-#if CONFIG_VP10_ENCODER
- fprintf(stderr, "\nVP10 Specific Options:\n");
- arg_show_usage(stderr, vp10_args);
-#endif
fprintf(stderr, "\nStream timebase (--timebase):\n"
" The desired precision of timestamps in the output, expressed\n"
" in fractional seconds. Default is 1/1000.\n");
@@ -773,9 +752,7 @@ static int compare_img(const vpx_image_t *const img1,
#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
-#if CONFIG_VP10_ENCODER
-#define ARG_CTRL_CNT_MAX NELEMENTS(vp10_arg_ctrl_map)
-#elif CONFIG_VP9_ENCODER
+#if CONFIG_VP9_ENCODER
#define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map)
#else
#define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map)
@@ -783,7 +760,7 @@ static int compare_img(const vpx_image_t *const img1,
#if !CONFIG_WEBM_IO
typedef int stereo_format_t;
-struct EbmlGlobal { int debug; };
+struct WebmOutputContext { int debug; };
#endif
/* Per-stream configuration */
@@ -798,7 +775,6 @@ struct stream_config {
int arg_ctrls[ARG_CTRL_CNT_MAX][2];
int arg_ctrl_cnt;
int write_webm;
- int have_kf_max_dist;
#if CONFIG_VP9_HIGHBITDEPTH
// whether to use 16bit internal buffers
int use_16bit_internal;
@@ -812,7 +788,7 @@ struct stream_state {
struct stream_config config;
FILE *file;
struct rate_hist *rate_hist;
- struct EbmlGlobal ebml;
+ struct WebmOutputContext webm_ctx;
uint64_t psnr_sse_total;
uint64_t psnr_samples_total;
double psnr_totals[4];
@@ -943,7 +919,7 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
}
/* Validate global config */
if (global->passes == 0) {
-#if CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
+#if CONFIG_VP9_ENCODER
// Make default VP9 passes = 2 until there is a better quality 1-pass
// encoder
if (global->codec != NULL && global->codec->name != NULL)
@@ -1055,13 +1031,13 @@ static struct stream_state *new_stream(struct VpxEncoderConfig *global,
stream->config.write_webm = 1;
#if CONFIG_WEBM_IO
stream->config.stereo_fmt = STEREO_FORMAT_MONO;
- stream->ebml.last_pts_ns = -1;
- stream->ebml.writer = NULL;
- stream->ebml.segment = NULL;
+ stream->webm_ctx.last_pts_ns = -1;
+ stream->webm_ctx.writer = NULL;
+ stream->webm_ctx.segment = NULL;
#endif
/* Allows removal of the application version from the EBML tags */
- stream->ebml.debug = global->debug;
+ stream->webm_ctx.debug = global->debug;
/* Default lag_in_frames is 0 in realtime mode */
if (global->deadline == VPX_DL_REALTIME)
@@ -1101,13 +1077,6 @@ static int parse_stream_params(struct VpxEncoderConfig *global,
ctrl_args = vp9_args;
ctrl_args_map = vp9_arg_ctrl_map;
#endif
-#if CONFIG_VP10_ENCODER
- } else if (strcmp(global->codec->name, "vp10") == 0) {
- // TODO(jingning): Reuse VP9 specific encoder configuration parameters.
- // Consider to expand this set for VP10 encoder control.
- ctrl_args = vp10_args;
- ctrl_args_map = vp10_arg_ctrl_map;
-#endif
}
for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
@@ -1218,13 +1187,11 @@ static int parse_stream_params(struct VpxEncoderConfig *global,
config->cfg.kf_min_dist = arg_parse_uint(&arg);
} else if (arg_match(&arg, &kf_max_dist, argi)) {
config->cfg.kf_max_dist = arg_parse_uint(&arg);
- config->have_kf_max_dist = 1;
} else if (arg_match(&arg, &kf_disabled, argi)) {
config->cfg.kf_mode = VPX_KF_DISABLED;
#if CONFIG_VP9_HIGHBITDEPTH
} else if (arg_match(&arg, &test16bitinternalarg, argi)) {
- if (strcmp(global->codec->name, "vp9") == 0 ||
- strcmp(global->codec->name, "vp10") == 0) {
+ if (strcmp(global->codec->name, "vp9") == 0) {
test_16bit_internal = 1;
}
#endif
@@ -1258,8 +1225,7 @@ static int parse_stream_params(struct VpxEncoderConfig *global,
}
}
#if CONFIG_VP9_HIGHBITDEPTH
- if (strcmp(global->codec->name, "vp9") == 0 ||
- strcmp(global->codec->name, "vp10") == 0) {
+ if (strcmp(global->codec->name, "vp9") == 0) {
config->use_16bit_internal = test_16bit_internal |
(config->cfg.g_profile > 1);
}
@@ -1346,19 +1312,6 @@ static void set_stream_dimensions(struct stream_state *stream,
}
}
-
-static void set_default_kf_interval(struct stream_state *stream,
- struct VpxEncoderConfig *global) {
- /* Use a max keyframe interval of 5 seconds, if none was
- * specified on the command line.
- */
- if (!stream->config.have_kf_max_dist) {
- double framerate = (double)global->framerate.num / global->framerate.den;
- if (framerate > 0.0)
- stream->config.cfg.kf_max_dist = (unsigned int)(5.0 * framerate);
- }
-}
-
static const char* file_type_to_string(enum VideoFileType t) {
switch (t) {
case FILE_TYPE_RAW: return "RAW";
@@ -1457,13 +1410,15 @@ static void open_output_file(struct stream_state *stream,
#if CONFIG_WEBM_IO
if (stream->config.write_webm) {
- stream->ebml.stream = stream->file;
- write_webm_file_header(&stream->ebml, cfg,
+ stream->webm_ctx.stream = stream->file;
+ write_webm_file_header(&stream->webm_ctx, cfg,
&global->framerate,
stream->config.stereo_fmt,
global->codec->fourcc,
pixel_aspect_ratio);
}
+#else
+ (void)pixel_aspect_ratio;
#endif
if (!stream->config.write_webm) {
@@ -1481,7 +1436,7 @@ static void close_output_file(struct stream_state *stream,
#if CONFIG_WEBM_IO
if (stream->config.write_webm) {
- write_webm_file_footer(&stream->ebml);
+ write_webm_file_footer(&stream->webm_ctx);
}
#endif
@@ -1708,7 +1663,7 @@ static void get_cx_data(struct stream_state *stream,
update_rate_histogram(stream->rate_hist, cfg, pkt);
#if CONFIG_WEBM_IO
if (stream->config.write_webm) {
- write_webm_block(&stream->ebml, cfg, pkt);
+ write_webm_block(&stream->webm_ctx, cfg, pkt);
}
#endif
if (!stream->config.write_webm) {
@@ -1996,7 +1951,7 @@ int main(int argc, const char **argv_) {
usage_exit();
/* Decide if other chroma subsamplings than 4:2:0 are supported */
- if (global.codec->fourcc == VP9_FOURCC || global.codec->fourcc == VP10_FOURCC)
+ if (global.codec->fourcc == VP9_FOURCC)
input.only_i420 = 0;
for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
@@ -2060,9 +2015,11 @@ int main(int argc, const char **argv_) {
#if !CONFIG_WEBM_IO
FOREACH_STREAM({
- stream->config.write_webm = 0;
- warn("vpxenc was compiled without WebM container support."
- "Producing IVF output");
+ if (stream->config.write_webm) {
+ stream->config.write_webm = 0;
+ warn("vpxenc was compiled without WebM container support."
+ "Producing IVF output");
+ }
});
#endif
@@ -2072,10 +2029,10 @@ int main(int argc, const char **argv_) {
if (!global.have_framerate) {
global.framerate.num = input.framerate.numerator;
global.framerate.den = input.framerate.denominator;
+ FOREACH_STREAM(stream->config.cfg.g_timebase.den = global.framerate.num;
+ stream->config.cfg.g_timebase.num = global.framerate.den);
}
- FOREACH_STREAM(set_default_kf_interval(stream, &global));
-
/* Show configuration */
if (global.verbose && pass == 0)
FOREACH_STREAM(show_stream_config(stream, &global, &input));
@@ -2100,8 +2057,7 @@ int main(int argc, const char **argv_) {
FOREACH_STREAM(initialize_encoder(stream, &global));
#if CONFIG_VP9_HIGHBITDEPTH
- if (strcmp(global.codec->name, "vp9") == 0 ||
- strcmp(global.codec->name, "vp10") == 0) {
+ if (strcmp(global.codec->name, "vp9") == 0) {
// Check to see if at least one stream uses 16 bit internal.
// Currently assume that the bit_depths for all streams using
// highbitdepth are the same.
diff --git a/libvpx/vpxstats.c b/libvpx/vpxstats.c
index 172d8937c..16728ce09 100644
--- a/libvpx/vpxstats.c
+++ b/libvpx/vpxstats.c
@@ -26,17 +26,6 @@ int stats_open_file(stats_io_t *stats, const char *fpf, int pass) {
stats->buf.buf = NULL;
res = (stats->file != NULL);
} else {
-#if USE_POSIX_MMAP
- struct stat stat_buf;
- int fd;
-
- fd = open(fpf, O_RDONLY);
- stats->file = fdopen(fd, "rb");
- fstat(fd, &stat_buf);
- stats->buf.sz = stat_buf.st_size;
- stats->buf.buf = mmap(NULL, stats->buf.sz, PROT_READ, MAP_PRIVATE, fd, 0);
- res = (stats->buf.buf != NULL);
-#else
size_t nbytes;
stats->file = fopen(fpf, "rb");
@@ -58,7 +47,6 @@ int stats_open_file(stats_io_t *stats, const char *fpf, int pass) {
nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file);
res = (nbytes == stats->buf.sz);
-#endif /* USE_POSIX_MMAP */
}
return res;
@@ -82,11 +70,7 @@ int stats_open_mem(stats_io_t *stats, int pass) {
void stats_close(stats_io_t *stats, int last_pass) {
if (stats->file) {
if (stats->pass == last_pass) {
-#if USE_POSIX_MMAP
- munmap(stats->buf.buf, stats->buf.sz);
-#else
free(stats->buf.buf);
-#endif /* USE_POSIX_MMAP */
}
fclose(stats->file);
diff --git a/libvpx/webmdec.cc b/libvpx/webmdec.cc
index f541cfecc..36dbd92bf 100644
--- a/libvpx/webmdec.cc
+++ b/libvpx/webmdec.cc
@@ -13,8 +13,8 @@
#include <cstring>
#include <cstdio>
-#include "third_party/libwebm/mkvparser.hpp"
-#include "third_party/libwebm/mkvreader.hpp"
+#include "third_party/libwebm/mkvparser/mkvparser.h"
+#include "third_party/libwebm/mkvparser/mkvreader.h"
namespace {
@@ -103,8 +103,6 @@ int file_is_webm(struct WebmInputContext *webm_ctx,
vpx_ctx->fourcc = VP8_FOURCC;
} else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) {
vpx_ctx->fourcc = VP9_FOURCC;
- } else if (!strncmp(video_track->GetCodecId(), "V_VP10", 6)) {
- vpx_ctx->fourcc = VP10_FOURCC;
} else {
rewind_and_reset(webm_ctx, vpx_ctx);
return 0;
@@ -122,7 +120,6 @@ int file_is_webm(struct WebmInputContext *webm_ctx,
int webm_read_frame(struct WebmInputContext *webm_ctx,
uint8_t **buffer,
- size_t *bytes_in_buffer,
size_t *buffer_size) {
// This check is needed for frame parallel decoding, in which case this
// function could be called even after it has reached end of input stream.
@@ -147,7 +144,7 @@ int webm_read_frame(struct WebmInputContext *webm_ctx,
} else if (block_entry_eos || block_entry->EOS()) {
cluster = segment->GetNext(cluster);
if (cluster == NULL || cluster->EOS()) {
- *bytes_in_buffer = 0;
+ *buffer_size = 0;
webm_ctx->reached_eos = 1;
return 1;
}
@@ -164,7 +161,7 @@ int webm_read_frame(struct WebmInputContext *webm_ctx,
}
get_new_block = true;
}
- if (status) {
+ if (status || block_entry == NULL) {
return -1;
}
if (get_new_block) {
@@ -187,10 +184,9 @@ int webm_read_frame(struct WebmInputContext *webm_ctx,
if (*buffer == NULL) {
return -1;
}
- *buffer_size = frame.len;
webm_ctx->buffer = *buffer;
}
- *bytes_in_buffer = frame.len;
+ *buffer_size = frame.len;
webm_ctx->timestamp_ns = block->GetTime(cluster);
webm_ctx->is_key_frame = block->IsKey();
@@ -203,10 +199,9 @@ int webm_guess_framerate(struct WebmInputContext *webm_ctx,
struct VpxInputContext *vpx_ctx) {
uint32_t i = 0;
uint8_t *buffer = NULL;
- size_t bytes_in_buffer = 0;
size_t buffer_size = 0;
while (webm_ctx->timestamp_ns < 1000000000 && i < 50) {
- if (webm_read_frame(webm_ctx, &buffer, &bytes_in_buffer, &buffer_size)) {
+ if (webm_read_frame(webm_ctx, &buffer, &buffer_size)) {
break;
}
++i;
diff --git a/libvpx/webmdec.h b/libvpx/webmdec.h
index 7d1638035..aa371f321 100644
--- a/libvpx/webmdec.h
+++ b/libvpx/webmdec.h
@@ -42,22 +42,18 @@ int file_is_webm(struct WebmInputContext *webm_ctx,
// Reads a WebM Video Frame. Memory for the buffer is created, owned and managed
// by this function. For the first call, |buffer| should be NULL and
-// |*bytes_in_buffer| should be 0. Once all the frames are read and used,
+// |*buffer_size| should be 0. Once all the frames are read and used,
// webm_free() should be called, otherwise there will be a leak.
// Parameters:
// webm_ctx - WebmInputContext object
// buffer - pointer where the frame data will be filled.
-// bytes_in_buffer - pointer to buffer size.
-// buffer_size - unused TODO(vigneshv): remove this
+// buffer_size - pointer to buffer size.
// Return values:
// 0 - Success
// 1 - End of Stream
// -1 - Error
-// TODO(vigneshv): Make the return values consistent across all functions in
-// this file.
int webm_read_frame(struct WebmInputContext *webm_ctx,
uint8_t **buffer,
- size_t *bytes_in_buffer,
size_t *buffer_size);
// Guesses the frame rate of the input file based on the container timestamps.
diff --git a/libvpx/webmenc.cc b/libvpx/webmenc.cc
index d41e70044..9929969a0 100644
--- a/libvpx/webmenc.cc
+++ b/libvpx/webmenc.cc
@@ -11,22 +11,22 @@
#include <string>
-#include "third_party/libwebm/mkvmuxer.hpp"
-#include "third_party/libwebm/mkvmuxerutil.hpp"
-#include "third_party/libwebm/mkvwriter.hpp"
+#include "third_party/libwebm/mkvmuxer/mkvmuxer.h"
+#include "third_party/libwebm/mkvmuxer/mkvmuxerutil.h"
+#include "third_party/libwebm/mkvmuxer/mkvwriter.h"
namespace {
const uint64_t kDebugTrackUid = 0xDEADBEEF;
const int kVideoTrackNumber = 1;
} // namespace
-void write_webm_file_header(struct EbmlGlobal *glob,
+void write_webm_file_header(struct WebmOutputContext *webm_ctx,
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
stereo_format_t stereo_fmt,
unsigned int fourcc,
const struct VpxRational *par) {
- mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(glob->stream);
+ mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(webm_ctx->stream);
mkvmuxer::Segment *const segment = new mkvmuxer::Segment();
segment->Init(writer);
segment->set_mode(mkvmuxer::Segment::kFile);
@@ -36,7 +36,7 @@ void write_webm_file_header(struct EbmlGlobal *glob,
const uint64_t kTimecodeScale = 1000000;
info->set_timecode_scale(kTimecodeScale);
std::string version = "vpxenc";
- if (!glob->debug) {
+ if (!webm_ctx->debug) {
version.append(std::string(" ") + vpx_codec_version_str());
}
info->set_writing_app(version.c_str());
@@ -55,13 +55,8 @@ void write_webm_file_header(struct EbmlGlobal *glob,
codec_id = "V_VP8";
break;
case VP9_FOURCC:
- codec_id = "V_VP9";
- break;
- case VP10_FOURCC:
- codec_id = "V_VP10";
- break;
default:
- codec_id = "V_VP10";
+ codec_id = "V_VP9";
break;
}
video_track->set_codec_id(codec_id);
@@ -74,23 +69,23 @@ void write_webm_file_header(struct EbmlGlobal *glob,
video_track->set_display_width(display_width);
video_track->set_display_height(cfg->g_h);
}
- if (glob->debug) {
+ if (webm_ctx->debug) {
video_track->set_uid(kDebugTrackUid);
}
- glob->writer = writer;
- glob->segment = segment;
+ webm_ctx->writer = writer;
+ webm_ctx->segment = segment;
}
-void write_webm_block(struct EbmlGlobal *glob,
+void write_webm_block(struct WebmOutputContext *webm_ctx,
const vpx_codec_enc_cfg_t *cfg,
const vpx_codec_cx_pkt_t *pkt) {
mkvmuxer::Segment *const segment =
- reinterpret_cast<mkvmuxer::Segment*>(glob->segment);
+ reinterpret_cast<mkvmuxer::Segment*>(webm_ctx->segment);
int64_t pts_ns = pkt->data.frame.pts * 1000000000ll *
cfg->g_timebase.num / cfg->g_timebase.den;
- if (pts_ns <= glob->last_pts_ns)
- pts_ns = glob->last_pts_ns + 1000000;
- glob->last_pts_ns = pts_ns;
+ if (pts_ns <= webm_ctx->last_pts_ns)
+ pts_ns = webm_ctx->last_pts_ns + 1000000;
+ webm_ctx->last_pts_ns = pts_ns;
segment->AddFrame(static_cast<uint8_t*>(pkt->data.frame.buf),
pkt->data.frame.sz,
@@ -99,14 +94,14 @@ void write_webm_block(struct EbmlGlobal *glob,
pkt->data.frame.flags & VPX_FRAME_IS_KEY);
}
-void write_webm_file_footer(struct EbmlGlobal *glob) {
+void write_webm_file_footer(struct WebmOutputContext *webm_ctx) {
mkvmuxer::MkvWriter *const writer =
- reinterpret_cast<mkvmuxer::MkvWriter*>(glob->writer);
+ reinterpret_cast<mkvmuxer::MkvWriter*>(webm_ctx->writer);
mkvmuxer::Segment *const segment =
- reinterpret_cast<mkvmuxer::Segment*>(glob->segment);
+ reinterpret_cast<mkvmuxer::Segment*>(webm_ctx->segment);
segment->Finalize();
delete segment;
delete writer;
- glob->writer = NULL;
- glob->segment = NULL;
+ webm_ctx->writer = NULL;
+ webm_ctx->segment = NULL;
}
diff --git a/libvpx/webmenc.h b/libvpx/webmenc.h
index c255d3de6..ad30664e3 100644
--- a/libvpx/webmenc.h
+++ b/libvpx/webmenc.h
@@ -20,8 +20,7 @@
extern "C" {
#endif
-/* TODO(vigneshv): Rename this struct */
-struct EbmlGlobal {
+struct WebmOutputContext {
int debug;
FILE *stream;
int64_t last_pts_ns;
@@ -38,18 +37,18 @@ typedef enum stereo_format {
STEREO_FORMAT_RIGHT_LEFT = 11
} stereo_format_t;
-void write_webm_file_header(struct EbmlGlobal *glob,
+void write_webm_file_header(struct WebmOutputContext *webm_ctx,
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
stereo_format_t stereo_fmt,
unsigned int fourcc,
const struct VpxRational *par);
-void write_webm_block(struct EbmlGlobal *glob,
+void write_webm_block(struct WebmOutputContext *webm_ctx,
const vpx_codec_enc_cfg_t *cfg,
const vpx_codec_cx_pkt_t *pkt);
-void write_webm_file_footer(struct EbmlGlobal *glob);
+void write_webm_file_footer(struct WebmOutputContext *webm_ctx);
#ifdef __cplusplus
} // extern "C"