diff options
author | johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c> | 2014-04-10 17:14:25 +0000 |
---|---|---|
committer | johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c> | 2014-04-10 17:14:25 +0000 |
commit | 93a74791c8e808ea76001ee07693aa2a5fdd3500 (patch) | |
tree | 88c3a21369388876dccedda352d254b16007ba22 /source | |
parent | ef98d99073c8ddc400dac9bd4a1b31fb7240d861 (diff) | |
download | libvpx-93a74791c8e808ea76001ee07693aa2a5fdd3500.tar.gz |
libvpx: Pull from upstream
Update grep pattern in update_libvpx.sh to account for
alternative locales.
Update generate_gypi.sh to use the new perl rtcd scripts.
Current HEAD: 23ccf7192434399e5db3a981fbfde40e1712ed5f
git log from upstream:
23ccf71 Merge "Fix encoder uninitialized read errors reported by drmemory"
4b8ad4a Merge "Fix coding format in vp9_rc_regulate_q"
675d95f Merge "Prevent the usage of invalid best_mode_index"
9034094 Merge "Remove duplicate code"
81056e2 Merge "Minor code cleanup"
65e650e Merge "Revert "Converting set_prev_mi() to get_prev_mi().""
3dff8aa Merge "Moving q_trans[] table to vp9_quantize.{c, h}."
f10c173 Merge "Removing unused code from vp9_onyx_if.c."
1dcc1aa Prevent the usage of invalid best_mode_index
41ea9ef Merge "Removing redundant assignments."
71ffc7d Merge "Remove unused tile arguments from vp_rdopt."
1eee13c Merge "Cleanup vp9_rd_pick_inter_mode_sub8x8()."
2255085 Fix coding format in vp9_rc_regulate_q
60def47 Revert "Converting set_prev_mi() to get_prev_mi()."
2dc9248 Merge "Fix the setting of mode_skip_mask"
b60d23f Removing unused code from vp9_onyx_if.c.
d1a396d Moving q_trans[] table to vp9_quantize.{c, h}.
4fffefe Merge "Fix avx builds on macosx with clang 5.0."
585e01b Remove duplicate code
7cc78c0 Merge "Adding vp9_inc_frame_in_layer() function."
7c891ed Minor code cleanup
3ab4d57 Remove unused tile arguments from vp_rdopt.
<...>
TBR=tomfinegan@chromium.org
Review URL: https://codereview.chromium.org/232133009
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/libvpx@263021 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
Diffstat (limited to 'source')
257 files changed, 28426 insertions, 11112 deletions
diff --git a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h index d1f0da4..77cc0b7 100644 --- a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h +++ b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h @@ -422,6 +422,7 @@ void vp8_yv12_copy_partial_frame_neon(struct yv12_buffer_config *src_ybc, struct RTCD_EXTERN void (*vp8_yv12_copy_partial_frame)(struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); void vp8_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -435,253 +436,155 @@ static void setup_rtcd_internal(void) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c; if (flags & HAS_MEDIA) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_armv6; if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; if (flags & HAS_MEDIA) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_armv6; if (flags & HAS_NEON) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_neon; - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; if (flags & HAS_MEDIA) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_armv6; if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon; - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; if (flags & HAS_MEDIA) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_armv6; if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon; - - - - - - - - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; if (flags & HAS_MEDIA) vp8_copy_mem16x16 = vp8_copy_mem16x16_v6; if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon; - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; if (flags & HAS_MEDIA) vp8_copy_mem8x4 = vp8_copy_mem8x4_v6; if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon; - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; if (flags & HAS_MEDIA) vp8_copy_mem8x8 = vp8_copy_mem8x8_v6; if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon; - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; if (flags & HAS_MEDIA) vp8_dc_only_idct_add = vp8_dc_only_idct_add_v6; if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon; - vp8_denoiser_filter = vp8_denoiser_filter_c; if (flags & HAS_NEON) vp8_denoiser_filter = vp8_denoiser_filter_neon; - vp8_dequant_idct_add = vp8_dequant_idct_add_c; if (flags & HAS_MEDIA) vp8_dequant_idct_add = vp8_dequant_idct_add_v6; if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon; - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; if (flags & HAS_MEDIA) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; if (flags & HAS_MEDIA) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_v6; if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon; - vp8_dequantize_b = vp8_dequantize_b_c; if (flags & HAS_MEDIA) vp8_dequantize_b = vp8_dequantize_b_v6; if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon; - - vp8_fast_quantize_b = vp8_fast_quantize_b_c; if (flags & HAS_MEDIA) vp8_fast_quantize_b = vp8_fast_quantize_b_armv6; if (flags & HAS_NEON) vp8_fast_quantize_b = vp8_fast_quantize_b_neon; - vp8_fast_quantize_b_pair = vp8_fast_quantize_b_pair_c; if (flags & HAS_NEON) vp8_fast_quantize_b_pair = vp8_fast_quantize_b_pair_neon; - - - - - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; if (flags & HAS_NEON) vp8_get4x4sse_cs = vp8_get4x4sse_cs_neon; - - vp8_intra4x4_predict = vp8_intra4x4_predict_c; if (flags & HAS_MEDIA) vp8_intra4x4_predict = vp8_intra4x4_predict_armv6; - vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MEDIA) vp8_loop_filter_bh = vp8_loop_filter_bh_armv6; if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon; - vp8_loop_filter_bv = vp8_loop_filter_bv_c; if (flags & HAS_MEDIA) vp8_loop_filter_bv = vp8_loop_filter_bv_armv6; if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon; - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; if (flags & HAS_MEDIA) vp8_loop_filter_mbh = vp8_loop_filter_mbh_armv6; if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon; - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; if (flags & HAS_MEDIA) vp8_loop_filter_mbv = vp8_loop_filter_mbv_armv6; if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon; - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; if (flags & HAS_MEDIA) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_armv6; if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon; - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; if (flags & HAS_MEDIA) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_armv6; if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon; - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; if (flags & HAS_MEDIA) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_armv6; if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon; - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; if (flags & HAS_MEDIA) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_armv6; if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon; - - - - - vp8_mse16x16 = vp8_mse16x16_c; if (flags & HAS_MEDIA) vp8_mse16x16 = vp8_mse16x16_armv6; if (flags & HAS_NEON) vp8_mse16x16 = vp8_mse16x16_neon; - - - vp8_quantize_mb = vp8_quantize_mb_c; if (flags & HAS_NEON) vp8_quantize_mb = vp8_quantize_mb_neon; - vp8_quantize_mbuv = vp8_quantize_mbuv_c; if (flags & HAS_NEON) vp8_quantize_mbuv = vp8_quantize_mbuv_neon; - vp8_quantize_mby = vp8_quantize_mby_c; if (flags & HAS_NEON) vp8_quantize_mby = vp8_quantize_mby_neon; - - - - vp8_sad16x16 = vp8_sad16x16_c; if (flags & HAS_MEDIA) vp8_sad16x16 = vp8_sad16x16_armv6; if (flags & HAS_NEON) vp8_sad16x16 = vp8_sad16x16_neon; - - - - vp8_sad16x8 = vp8_sad16x8_c; if (flags & HAS_NEON) vp8_sad16x8 = vp8_sad16x8_neon; - - - - vp8_sad4x4 = vp8_sad4x4_c; if (flags & HAS_NEON) vp8_sad4x4 = vp8_sad4x4_neon; - - - - vp8_sad8x16 = vp8_sad8x16_c; if (flags & HAS_NEON) vp8_sad8x16 = vp8_sad8x16_neon; - - - - vp8_sad8x8 = vp8_sad8x8_c; if (flags & HAS_NEON) vp8_sad8x8 = vp8_sad8x8_neon; - - - - vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MEDIA) vp8_short_fdct4x4 = vp8_short_fdct4x4_armv6; if (flags & HAS_NEON) vp8_short_fdct4x4 = vp8_short_fdct4x4_neon; - vp8_short_fdct8x4 = vp8_short_fdct8x4_c; if (flags & HAS_MEDIA) vp8_short_fdct8x4 = vp8_short_fdct8x4_armv6; if (flags & HAS_NEON) vp8_short_fdct8x4 = vp8_short_fdct8x4_neon; - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; if (flags & HAS_MEDIA) vp8_short_idct4x4llm = vp8_short_idct4x4llm_v6_dual; if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon; - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; if (flags & HAS_MEDIA) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_v6; if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon; - - vp8_short_walsh4x4 = vp8_short_walsh4x4_c; if (flags & HAS_MEDIA) vp8_short_walsh4x4 = vp8_short_walsh4x4_armv6; if (flags & HAS_NEON) vp8_short_walsh4x4 = vp8_short_walsh4x4_neon; - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; if (flags & HAS_MEDIA) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_armv6; if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; if (flags & HAS_MEDIA) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_armv6; if (flags & HAS_NEON) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_neon; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; if (flags & HAS_MEDIA) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_armv6; if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; if (flags & HAS_MEDIA) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_armv6; if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon; - - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MEDIA) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_armv6; if (flags & HAS_NEON) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_neon; - - - - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c; if (flags & HAS_MEDIA) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_armv6; if (flags & HAS_NEON) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_neon; - vp8_subtract_b = vp8_subtract_b_c; if (flags & HAS_MEDIA) vp8_subtract_b = vp8_subtract_b_armv6; if (flags & HAS_NEON) vp8_subtract_b = vp8_subtract_b_neon; - vp8_subtract_mbuv = vp8_subtract_mbuv_c; if (flags & HAS_MEDIA) vp8_subtract_mbuv = vp8_subtract_mbuv_armv6; if (flags & HAS_NEON) vp8_subtract_mbuv = vp8_subtract_mbuv_neon; - vp8_subtract_mby = vp8_subtract_mby_c; if (flags & HAS_MEDIA) vp8_subtract_mby = vp8_subtract_mby_armv6; if (flags & HAS_NEON) vp8_subtract_mby = vp8_subtract_mby_neon; - vp8_variance16x16 = vp8_variance16x16_c; if (flags & HAS_MEDIA) vp8_variance16x16 = vp8_variance16x16_armv6; if (flags & HAS_NEON) vp8_variance16x16 = vp8_variance16x16_neon; - vp8_variance16x8 = vp8_variance16x8_c; if (flags & HAS_NEON) vp8_variance16x8 = vp8_variance16x8_neon; - - vp8_variance8x16 = vp8_variance8x16_c; if (flags & HAS_NEON) vp8_variance8x16 = vp8_variance8x16_neon; - vp8_variance8x8 = vp8_variance8x8_c; if (flags & HAS_MEDIA) vp8_variance8x8 = vp8_variance8x8_armv6; if (flags & HAS_NEON) vp8_variance8x8 = vp8_variance8x8_neon; - vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MEDIA) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6; if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon; - vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; if (flags & HAS_MEDIA) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6; if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon; - vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; if (flags & HAS_MEDIA) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6; if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon; - vp8_yv12_copy_partial_frame = vp8_yv12_copy_partial_frame_c; if (flags & HAS_NEON) vp8_yv12_copy_partial_frame = vp8_yv12_copy_partial_frame_neon; } diff --git a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h index e6eb470..127c325 100644 --- a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h +++ b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h @@ -232,6 +232,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_c +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c @@ -731,6 +734,7 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c void vp9_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -741,288 +745,92 @@ static void setup_rtcd_internal(void) (void)flags; - - - - vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_NEON) vp9_convolve8 = vp9_convolve8_neon; - vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_NEON) vp9_convolve8_avg = vp9_convolve8_avg_neon; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c; if (flags & HAS_NEON) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_neon; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c; if (flags & HAS_NEON) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_neon; - vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_NEON) vp9_convolve8_horiz = vp9_convolve8_horiz_neon; - vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_NEON) vp9_convolve8_vert = vp9_convolve8_vert_neon; - vp9_convolve_avg = vp9_convolve_avg_c; if (flags & HAS_NEON) vp9_convolve_avg = vp9_convolve_avg_neon; - vp9_convolve_copy = vp9_convolve_copy_c; if (flags & HAS_NEON) vp9_convolve_copy = vp9_convolve_copy_neon; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_NEON) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_neon; - vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; if (flags & HAS_NEON) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_neon; - vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c; if (flags & HAS_NEON) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_neon; - vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_NEON) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_neon; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_NEON) vp9_idct16x16_10_add = vp9_idct16x16_10_add_neon; - vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_NEON) vp9_idct16x16_1_add = vp9_idct16x16_1_add_neon; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_NEON) vp9_idct16x16_256_add = vp9_idct16x16_256_add_neon; - vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_NEON) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_neon; - vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; if (flags & HAS_NEON) vp9_idct32x32_1_add = vp9_idct32x32_1_add_neon; - vp9_idct32x32_34_add = vp9_idct32x32_34_add_c; if (flags & HAS_NEON) vp9_idct32x32_34_add = vp9_idct32x32_1024_add_neon; - vp9_idct4x4_16_add = vp9_idct4x4_16_add_c; if (flags & HAS_NEON) vp9_idct4x4_16_add = vp9_idct4x4_16_add_neon; - vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_NEON) vp9_idct4x4_1_add = vp9_idct4x4_1_add_neon; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; if (flags & HAS_NEON) vp9_idct8x8_10_add = vp9_idct8x8_10_add_neon; - vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_NEON) vp9_idct8x8_1_add = vp9_idct8x8_1_add_neon; - vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; if (flags & HAS_NEON) vp9_idct8x8_64_add = vp9_idct8x8_64_add_neon; - - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon; - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon; - - - vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_NEON) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_neon; - vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_NEON) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_neon; - vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; if (flags & HAS_NEON) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_neon; - vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c; if (flags & HAS_NEON) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_neon; - vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c; if (flags & HAS_NEON) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_neon; - vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c; if (flags & HAS_NEON) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_neon; - vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c; if (flags & HAS_NEON) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_neon; - vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c; if (flags & HAS_NEON) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_neon; - vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c; if (flags & HAS_NEON) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_neon; - vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c; if (flags & HAS_NEON) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_neon; - vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_NEON) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_neon; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_c; if (flags & HAS_NEON) vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_neon; - vp9_tm_predictor_32x32 = vp9_tm_predictor_32x32_c; if (flags & HAS_NEON) vp9_tm_predictor_32x32 = vp9_tm_predictor_32x32_neon; - vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_c; if (flags & HAS_NEON) vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_neon; - vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_c; if (flags & HAS_NEON) vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_neon; - vp9_v_predictor_16x16 = vp9_v_predictor_16x16_c; if (flags & HAS_NEON) vp9_v_predictor_16x16 = vp9_v_predictor_16x16_neon; - vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c; if (flags & HAS_NEON) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_neon; - vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c; if (flags & HAS_NEON) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_neon; - vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_NEON) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_neon; } diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm index 83869bc..7b66017 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm @@ -73,12 +73,12 @@ .equ CONFIG_POSTPROC_VISUALIZER , 0 .equ CONFIG_OS_SUPPORT , 1 .equ CONFIG_UNIT_TESTS , 0 +.equ CONFIG_WEBM_IO , 1 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_DECRYPT , 0 .equ CONFIG_MULTIPLE_ARF , 0 -.equ CONFIG_NON420 , 0 .equ CONFIG_ALPHA , 0 .section .note.GNU-stack,"",%progbits diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/source/config/linux/arm-neon-cpu-detect/vpx_config.h index a6a6152..609fcab 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_config.h +++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h index 7ec35db..33bed21 100644 --- a/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h +++ b/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h @@ -49,10 +49,10 @@ void vp9_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); #define vp9_extend_frame_inner_borders vp9_extend_frame_inner_borders_c void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -void vpx_yv12_copy_y_neon(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -RTCD_EXTERN void (*vpx_yv12_copy_y)(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); +#define vpx_yv12_copy_y vpx_yv12_copy_y_c void vpx_scale_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -63,23 +63,10 @@ static void setup_rtcd_internal(void) (void)flags; - - - - - - - vp8_yv12_copy_frame = vp8_yv12_copy_frame_c; if (flags & HAS_NEON) vp8_yv12_copy_frame = vp8_yv12_copy_frame_neon; - vp8_yv12_extend_frame_borders = vp8_yv12_extend_frame_borders_c; if (flags & HAS_NEON) vp8_yv12_extend_frame_borders = vp8_yv12_extend_frame_borders_neon; - - - - vpx_yv12_copy_y = vpx_yv12_copy_y_c; - if (flags & HAS_NEON) vpx_yv12_copy_y = vpx_yv12_copy_y_neon; } #endif diff --git a/source/config/linux/arm-neon/vp8_rtcd.h b/source/config/linux/arm-neon/vp8_rtcd.h index 34661bd..a52d575 100644 --- a/source/config/linux/arm-neon/vp8_rtcd.h +++ b/source/config/linux/arm-neon/vp8_rtcd.h @@ -422,6 +422,7 @@ void vp8_yv12_copy_partial_frame_neon(struct yv12_buffer_config *src_ybc, struct #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_neon void vp8_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -432,7 +433,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/arm-neon/vp9_rtcd.h b/source/config/linux/arm-neon/vp9_rtcd.h index dbb9fc3..b757556 100644 --- a/source/config/linux/arm-neon/vp9_rtcd.h +++ b/source/config/linux/arm-neon/vp9_rtcd.h @@ -232,6 +232,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_c +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c @@ -731,6 +734,7 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c void vp9_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -741,7 +745,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/arm-neon/vpx_config.asm b/source/config/linux/arm-neon/vpx_config.asm index 007f243..c42b4ed 100644 --- a/source/config/linux/arm-neon/vpx_config.asm +++ b/source/config/linux/arm-neon/vpx_config.asm @@ -73,12 +73,12 @@ .equ CONFIG_POSTPROC_VISUALIZER , 0 .equ CONFIG_OS_SUPPORT , 1 .equ CONFIG_UNIT_TESTS , 0 +.equ CONFIG_WEBM_IO , 1 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_DECRYPT , 0 .equ CONFIG_MULTIPLE_ARF , 0 -.equ CONFIG_NON420 , 0 .equ CONFIG_ALPHA , 0 .section .note.GNU-stack,"",%progbits diff --git a/source/config/linux/arm-neon/vpx_config.h b/source/config/linux/arm-neon/vpx_config.h index f9da536..7d95fee 100644 --- a/source/config/linux/arm-neon/vpx_config.h +++ b/source/config/linux/arm-neon/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/arm-neon/vpx_scale_rtcd.h b/source/config/linux/arm-neon/vpx_scale_rtcd.h index dc9ab6f..58a946b 100644 --- a/source/config/linux/arm-neon/vpx_scale_rtcd.h +++ b/source/config/linux/arm-neon/vpx_scale_rtcd.h @@ -49,10 +49,10 @@ void vp9_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf); #define vp9_extend_frame_inner_borders vp9_extend_frame_inner_borders_c void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -void vpx_yv12_copy_y_neon(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc); -#define vpx_yv12_copy_y vpx_yv12_copy_y_neon +#define vpx_yv12_copy_y vpx_yv12_copy_y_c void vpx_scale_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -63,7 +63,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/arm/vp8_rtcd.h b/source/config/linux/arm/vp8_rtcd.h index 5a48adc..ab5fa41 100644 --- a/source/config/linux/arm/vp8_rtcd.h +++ b/source/config/linux/arm/vp8_rtcd.h @@ -367,6 +367,7 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c void vp8_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -377,7 +378,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/arm/vp9_rtcd.h b/source/config/linux/arm/vp9_rtcd.h index 9c7e204..4d3884c 100644 --- a/source/config/linux/arm/vp9_rtcd.h +++ b/source/config/linux/arm/vp9_rtcd.h @@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_c +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c @@ -687,6 +690,7 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c void vp9_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -697,7 +701,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/arm/vpx_config.asm b/source/config/linux/arm/vpx_config.asm index 08822e2..4e652ce 100644 --- a/source/config/linux/arm/vpx_config.asm +++ b/source/config/linux/arm/vpx_config.asm @@ -73,12 +73,12 @@ .equ CONFIG_POSTPROC_VISUALIZER , 0 .equ CONFIG_OS_SUPPORT , 1 .equ CONFIG_UNIT_TESTS , 0 +.equ CONFIG_WEBM_IO , 1 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_DECRYPT , 0 .equ CONFIG_MULTIPLE_ARF , 0 -.equ CONFIG_NON420 , 0 .equ CONFIG_ALPHA , 0 .section .note.GNU-stack,"",%progbits diff --git a/source/config/linux/arm/vpx_config.h b/source/config/linux/arm/vpx_config.h index f7d694b..1b7367b 100644 --- a/source/config/linux/arm/vpx_config.h +++ b/source/config/linux/arm/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/arm/vpx_scale_rtcd.h b/source/config/linux/arm/vpx_scale_rtcd.h index 4fb918a..0a6d790 100644 --- a/source/config/linux/arm/vpx_scale_rtcd.h +++ b/source/config/linux/arm/vpx_scale_rtcd.h @@ -50,6 +50,7 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf #define vpx_yv12_copy_y vpx_yv12_copy_y_c void vpx_scale_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C @@ -60,7 +61,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/generic/vp8_rtcd.h b/source/config/linux/generic/vp8_rtcd.h index 9564cfc..d6de728 100644 --- a/source/config/linux/generic/vp8_rtcd.h +++ b/source/config/linux/generic/vp8_rtcd.h @@ -324,12 +324,12 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c void vp8_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/linux/generic/vp9_rtcd.h b/source/config/linux/generic/vp9_rtcd.h index 652aa08..03e7181 100644 --- a/source/config/linux/generic/vp9_rtcd.h +++ b/source/config/linux/generic/vp9_rtcd.h @@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_c +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c @@ -687,12 +690,12 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c void vp9_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/linux/generic/vpx_config.asm b/source/config/linux/generic/vpx_config.asm index c612876..1d1039e 100644 --- a/source/config/linux/generic/vpx_config.asm +++ b/source/config/linux/generic/vpx_config.asm @@ -73,12 +73,12 @@ .equ CONFIG_POSTPROC_VISUALIZER , 0 .equ CONFIG_OS_SUPPORT , 1 .equ CONFIG_UNIT_TESTS , 0 +.equ CONFIG_WEBM_IO , 1 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_DECRYPT , 0 .equ CONFIG_MULTIPLE_ARF , 0 -.equ CONFIG_NON420 , 0 .equ CONFIG_ALPHA , 0 .section .note.GNU-stack,"",%progbits diff --git a/source/config/linux/generic/vpx_config.h b/source/config/linux/generic/vpx_config.h index e910000..2d5e208 100644 --- a/source/config/linux/generic/vpx_config.h +++ b/source/config/linux/generic/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/generic/vpx_scale_rtcd.h b/source/config/linux/generic/vpx_scale_rtcd.h index 4b0a213..f5e6caa 100644 --- a/source/config/linux/generic/vpx_scale_rtcd.h +++ b/source/config/linux/generic/vpx_scale_rtcd.h @@ -50,12 +50,12 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf #define vpx_yv12_copy_y vpx_yv12_copy_y_c void vpx_scale_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/linux/ia32/vp8_rtcd.h b/source/config/linux/ia32/vp8_rtcd.h index 7a3e0f4..7e90462 100644 --- a/source/config/linux/ia32/vp8_rtcd.h +++ b/source/config/linux/ia32/vp8_rtcd.h @@ -492,337 +492,239 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx; if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx; - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx; - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx; if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - - - - vp8_block_error = vp8_block_error_c; if (flags & HAS_MMX) vp8_block_error = vp8_block_error_mmx; if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_xmm; - vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_c; if (flags & HAS_SSE2) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; - vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_c; if (flags & HAS_SSE2) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3; - vp8_clear_system_state = vp8_clear_system_state_c; if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state; - vp8_copy32xn = vp8_copy32xn_c; if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2; if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3; - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx; if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2; - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx; - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx; - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx; - vp8_denoiser_filter = vp8_denoiser_filter_c; if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2; - vp8_dequant_idct_add = vp8_dequant_idct_add_c; if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx; - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - vp8_dequantize_b = vp8_dequantize_b_c; if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx; - vp8_diamond_search_sad = vp8_diamond_search_sad_c; if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2; if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3; - - vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c; if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2; - - vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c; if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2; - vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; - - vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; - vp8_loop_filter_bv = vp8_loop_filter_bv_c; if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2; - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2; - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx; if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2; - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2; - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2; - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2; - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2; - vp8_mbblock_error = vp8_mbblock_error_c; if (flags & HAS_MMX) vp8_mbblock_error = vp8_mbblock_error_mmx; if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_xmm; - vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c; if (flags & HAS_SSE2) vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_xmm; - vp8_mbpost_proc_down = vp8_mbpost_proc_down_c; if (flags & HAS_MMX) vp8_mbpost_proc_down = vp8_mbpost_proc_down_mmx; if (flags & HAS_SSE2) vp8_mbpost_proc_down = vp8_mbpost_proc_down_xmm; - vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; - vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; - vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c; if (flags & HAS_SSE2) vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_sse2; - - - - vp8_refining_search_sad = vp8_refining_search_sad_c; if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; - vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; - - vp8_sad16x16 = vp8_sad16x16_c; if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; - vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; - vp8_short_fdct8x4 = vp8_short_fdct8x4_c; if (flags & HAS_MMX) vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2; - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx; - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx; if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2; - - vp8_short_walsh4x4 = vp8_short_walsh4x4_c; if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2; - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3; - vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c; if (flags & HAS_MMX) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt; - vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c; if (flags & HAS_MMX) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt; - vp8_subtract_b = vp8_subtract_b_c; if (flags & HAS_MMX) vp8_subtract_b = vp8_subtract_b_mmx; if (flags & HAS_SSE2) vp8_subtract_b = vp8_subtract_b_sse2; - vp8_subtract_mbuv = vp8_subtract_mbuv_c; if (flags & HAS_MMX) vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; if (flags & HAS_SSE2) vp8_subtract_mbuv = vp8_subtract_mbuv_sse2; - vp8_subtract_mby = vp8_subtract_mby_c; if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx; if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; - vp8_variance16x16 = vp8_variance16x16_c; if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; - vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; - vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; - vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; diff --git a/source/config/linux/ia32/vp9_rtcd.h b/source/config/linux/ia32/vp9_rtcd.h index b4674d2..33c6064 100644 --- a/source/config/linux/ia32/vp9_rtcd.h +++ b/source/config/linux/ia32/vp9_rtcd.h @@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *); unsigned int vp9_get_mb_ss_sse2(const int16_t *); RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *); +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vp9_get_sse_sum_16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vp9_get_sse_sum_8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); @@ -909,595 +913,385 @@ static void setup_rtcd_internal(void) (void)flags; - - - vp9_block_error = vp9_block_error_c; if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2; - vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; - vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c; if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; - vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; - vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; - vp9_convolve_avg = vp9_convolve_avg_c; if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2; - vp9_convolve_copy = vp9_convolve_copy_c; if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2; - - - - - - - - - vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; - - vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3; - vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3; - vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3; - vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3; - vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3; - vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3; - vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3; - vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3; - vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3; - vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3; - vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3; - vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3; - vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; - vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; - - - - - - - - - vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c; if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2; - vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c; if (flags & HAS_SSE2) vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_sse2; - vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_c; if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse; - vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c; if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse; - - - - - vp9_diamond_search_sad = vp9_diamond_search_sad_c; if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; - vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; - vp9_fdct32x32 = vp9_fdct32x32_c; if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2; - vp9_fdct32x32_rd = vp9_fdct32x32_rd_c; if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; - vp9_fdct4x4 = vp9_fdct4x4_c; if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2; - vp9_fdct8x8 = vp9_fdct8x8_c; if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2; - vp9_fht16x16 = vp9_fht16x16_c; if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2; - vp9_fht4x4 = vp9_fht4x4_c; if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2; - vp9_fht8x8 = vp9_fht8x8_c; if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2; - - vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - - vp9_get_mb_ss = vp9_get_mb_ss_c; if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx; if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; - + vp9_get_sse_sum_16x16 = vp9_get_sse_sum_16x16_c; + if (flags & HAS_SSE2) vp9_get_sse_sum_16x16 = vp9_get16x16var_sse2; vp9_get_sse_sum_8x8 = vp9_get_sse_sum_8x8_c; if (flags & HAS_SSE2) vp9_get_sse_sum_8x8 = vp9_get8x8var_sse2; - vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; - vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3; - vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; - vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; - vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1_add = vp9_idct32x32_1_add_sse2; - vp9_idct32x32_34_add = vp9_idct32x32_34_add_c; if (flags & HAS_SSE2) vp9_idct32x32_34_add = vp9_idct32x32_34_add_sse2; - vp9_idct4x4_16_add = vp9_idct4x4_16_add_c; if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2; - vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2; - vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2; - vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; if (flags & HAS_SSE2) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; - vp9_iht16x16_256_add = vp9_iht16x16_256_add_c; if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2; - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2; - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2; - - - vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; - vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx; - vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2; - vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2; - vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2; - vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c; if (flags & HAS_SSE2) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_sse2; - vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_sse2; - vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c; if (flags & HAS_MMX) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_mmx; - vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2; - vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; - vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; - vp9_mse16x16 = vp9_mse16x16_c; if (flags & HAS_MMX) vp9_mse16x16 = vp9_mse16x16_mmx; if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2; - - - - - - vp9_refining_search_sad = vp9_refining_search_sad_c; if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4; - vp9_sad16x16 = vp9_sad16x16_c; if (flags & HAS_MMX) vp9_sad16x16 = vp9_sad16x16_mmx; if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2; - vp9_sad16x16_avg = vp9_sad16x16_avg_c; if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2; - vp9_sad16x16x3 = vp9_sad16x16x3_c; if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - - vp9_sad16x32 = vp9_sad16x32_c; if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2; - vp9_sad16x32_avg = vp9_sad16x32_avg_c; if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2; - vp9_sad16x32x4d = vp9_sad16x32x4d_c; if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8 = vp9_sad16x8_c; if (flags & HAS_MMX) vp9_sad16x8 = vp9_sad16x8_mmx; if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2; - vp9_sad16x8_avg = vp9_sad16x8_avg_c; if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2; - vp9_sad16x8x3 = vp9_sad16x8x3_c; if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - - vp9_sad32x16 = vp9_sad32x16_c; if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2; - vp9_sad32x16_avg = vp9_sad32x16_avg_c; if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - vp9_sad32x16x4d = vp9_sad32x16x4d_c; if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - vp9_sad32x32 = vp9_sad32x32_c; if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2; - vp9_sad32x32_avg = vp9_sad32x32_avg_c; if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - - vp9_sad32x32x4d = vp9_sad32x32x4d_c; if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - - vp9_sad32x64 = vp9_sad32x64_c; if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2; - vp9_sad32x64_avg = vp9_sad32x64_avg_c; if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - vp9_sad32x64x4d = vp9_sad32x64x4d_c; if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4 = vp9_sad4x4_c; if (flags & HAS_MMX) vp9_sad4x4 = vp9_sad4x4_mmx; if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse; - vp9_sad4x4_avg = vp9_sad4x4_avg_c; if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse; - vp9_sad4x4x3 = vp9_sad4x4x3_c; if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - - vp9_sad4x8 = vp9_sad4x8_c; if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse; - vp9_sad4x8_avg = vp9_sad4x8_avg_c; if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse; - vp9_sad4x8x4d = vp9_sad4x8x4d_c; if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - - vp9_sad64x32 = vp9_sad64x32_c; if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2; - vp9_sad64x32_avg = vp9_sad64x32_avg_c; if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - vp9_sad64x32x4d = vp9_sad64x32x4d_c; if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - vp9_sad64x64 = vp9_sad64x64_c; if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2; - vp9_sad64x64_avg = vp9_sad64x64_avg_c; if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - - vp9_sad64x64x4d = vp9_sad64x64x4d_c; if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - - vp9_sad8x16 = vp9_sad8x16_c; if (flags & HAS_MMX) vp9_sad8x16 = vp9_sad8x16_mmx; if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2; - vp9_sad8x16_avg = vp9_sad8x16_avg_c; if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - - vp9_sad8x4 = vp9_sad8x4_c; if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2; - vp9_sad8x4_avg = vp9_sad8x4_avg_c; if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2; - vp9_sad8x4x4d = vp9_sad8x4x4d_c; if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - - vp9_sad8x8 = vp9_sad8x8_c; if (flags & HAS_MMX) vp9_sad8x8 = vp9_sad8x8_mmx; if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2; - vp9_sad8x8_avg = vp9_sad8x8_avg_c; if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2; - vp9_sad8x8x3 = vp9_sad8x8x3_c; if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; - - vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; - vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3; - vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3; - vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; - vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; - vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; - vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_c; if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3; - vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_c; if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3; - vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; - vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; - vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; - vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3; - vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3; - - - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3; - vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3; - vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; - vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; - vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_c; if (flags & HAS_SSE) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3; - vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_c; if (flags & HAS_SSE) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3; - vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; - vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; - vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; - vp9_subtract_block = vp9_subtract_block_c; if (flags & HAS_SSE2) vp9_subtract_block = vp9_subtract_block_sse2; - vp9_temporal_filter_apply = vp9_temporal_filter_apply_c; if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2; - vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_c; if (flags & HAS_SSE2) vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_sse2; - - vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_c; if (flags & HAS_SSE) vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_sse; - vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_c; if (flags & HAS_SSE2) vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_sse2; - vp9_v_predictor_16x16 = vp9_v_predictor_16x16_c; if (flags & HAS_SSE2) vp9_v_predictor_16x16 = vp9_v_predictor_16x16_sse2; - vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c; if (flags & HAS_SSE2) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_sse2; - vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c; if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse; - vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse; - vp9_variance16x16 = vp9_variance16x16_c; if (flags & HAS_MMX) vp9_variance16x16 = vp9_variance16x16_mmx; if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2; - vp9_variance16x32 = vp9_variance16x32_c; if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2; - vp9_variance16x8 = vp9_variance16x8_c; if (flags & HAS_MMX) vp9_variance16x8 = vp9_variance16x8_mmx; if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2; - vp9_variance32x16 = vp9_variance32x16_c; if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2; - vp9_variance32x32 = vp9_variance32x32_c; if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2; - vp9_variance32x64 = vp9_variance32x64_c; if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2; - vp9_variance4x4 = vp9_variance4x4_c; if (flags & HAS_MMX) vp9_variance4x4 = vp9_variance4x4_mmx; if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2; - vp9_variance4x8 = vp9_variance4x8_c; if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2; - vp9_variance64x32 = vp9_variance64x32_c; if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2; - vp9_variance64x64 = vp9_variance64x64_c; if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2; - vp9_variance8x16 = vp9_variance8x16_c; if (flags & HAS_MMX) vp9_variance8x16 = vp9_variance8x16_mmx; if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2; - vp9_variance8x4 = vp9_variance8x4_c; if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2; - vp9_variance8x8 = vp9_variance8x8_c; if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx; if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2; - vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_c; if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_sse2; - vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_c; if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_sse2; - vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_c; if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_sse2; } diff --git a/source/config/linux/ia32/vpx_config.asm b/source/config/linux/ia32/vpx_config.asm index 9446c60..6f5cff0 100644 --- a/source/config/linux/ia32/vpx_config.asm +++ b/source/config/linux/ia32/vpx_config.asm @@ -70,11 +70,11 @@ CONFIG_SMALL equ 0 CONFIG_POSTPROC_VISUALIZER equ 0 CONFIG_OS_SUPPORT equ 1 CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 1 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_MULTI_RES_ENCODING equ 1 CONFIG_TEMPORAL_DENOISING equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_DECRYPT equ 0 CONFIG_MULTIPLE_ARF equ 0 -CONFIG_NON420 equ 0 CONFIG_ALPHA equ 0 diff --git a/source/config/linux/ia32/vpx_config.h b/source/config/linux/ia32/vpx_config.h index b1da19e..2e170eb 100644 --- a/source/config/linux/ia32/vpx_config.h +++ b/source/config/linux/ia32/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/ia32/vpx_scale_rtcd.h b/source/config/linux/ia32/vpx_scale_rtcd.h index 6eadf0f..7487e5f 100644 --- a/source/config/linux/ia32/vpx_scale_rtcd.h +++ b/source/config/linux/ia32/vpx_scale_rtcd.h @@ -59,7 +59,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/linux/mipsel/vp8_rtcd.h b/source/config/linux/mipsel/vp8_rtcd.h index e46242f..72a7d9e 100644 --- a/source/config/linux/mipsel/vp8_rtcd.h +++ b/source/config/linux/mipsel/vp8_rtcd.h @@ -327,12 +327,12 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c void vp8_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/linux/mipsel/vp9_rtcd.h b/source/config/linux/mipsel/vp9_rtcd.h index 652aa08..03e7181 100644 --- a/source/config/linux/mipsel/vp9_rtcd.h +++ b/source/config/linux/mipsel/vp9_rtcd.h @@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_c +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c @@ -687,12 +690,12 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c void vp9_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/linux/mipsel/vpx_config.h b/source/config/linux/mipsel/vpx_config.h index 21353ea..32bd922 100644 --- a/source/config/linux/mipsel/vpx_config.h +++ b/source/config/linux/mipsel/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/mipsel/vpx_scale_rtcd.h b/source/config/linux/mipsel/vpx_scale_rtcd.h index 4b0a213..f5e6caa 100644 --- a/source/config/linux/mipsel/vpx_scale_rtcd.h +++ b/source/config/linux/mipsel/vpx_scale_rtcd.h @@ -50,12 +50,12 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf #define vpx_yv12_copy_y vpx_yv12_copy_y_c void vpx_scale_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/linux/x64/vp8_rtcd.h b/source/config/linux/x64/vp8_rtcd.h index f7b58ac..9653130 100644 --- a/source/config/linux/x64/vp8_rtcd.h +++ b/source/config/linux/x64/vp8_rtcd.h @@ -490,151 +490,67 @@ static void setup_rtcd_internal(void) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - - - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - - - - - vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; - vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3; - - vp8_copy32xn = vp8_copy32xn_sse2; if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3; - - - - - - - - - - vp8_diamond_search_sad = vp8_diamond_search_sad_c; if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4; - vp8_fast_quantize_b = vp8_fast_quantize_b_sse2; if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3; - - - - - vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - - - - - - - - - - - - - - - - - - - - - - vp8_refining_search_sad = vp8_refining_search_sad_c; if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; - - - vp8_sad16x16 = vp8_sad16x16_wmt; if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - - vp8_sad16x8x3 = vp8_sad16x8x3_c; if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - - vp8_sad4x4x3 = vp8_sad4x4x3_c; if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - - vp8_sad8x16x3 = vp8_sad8x16x3_c; if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - - vp8_sad8x8x3 = vp8_sad8x8x3_c; if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; - - - - - - - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3; } diff --git a/source/config/linux/x64/vp9_rtcd.h b/source/config/linux/x64/vp9_rtcd.h index 08003f5..e6a0520 100644 --- a/source/config/linux/x64/vp9_rtcd.h +++ b/source/config/linux/x64/vp9_rtcd.h @@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *); unsigned int vp9_get_mb_ss_sse2(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_sse2 +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get16x16var_sse2 + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get8x8var_sse2 @@ -912,315 +916,129 @@ static void setup_rtcd_internal(void) (void)flags; - - - - vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; - vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; - vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; - vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; - - - - - - - - - - - vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; - - vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3; - vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3; - vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3; - vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3; - vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3; - vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3; - vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3; - vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3; - vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3; - vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3; - vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3; - vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3; - vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; - vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; - - - - - - - - - - - - - - - - - vp9_diamond_search_sad = vp9_diamond_search_sad_c; if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; - - - - - - - - - - vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - - - - vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; - vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3; - vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; - vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; - vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3; - vp9_refining_search_sad = vp9_refining_search_sad_c; if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4; - - - vp9_sad16x16x3 = vp9_sad16x16x3_c; if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - - - - - - - - vp9_sad16x8x3 = vp9_sad16x8x3_c; if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - - - - - - - - - - - - - - - - vp9_sad4x4x3 = vp9_sad4x4x3_c; if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - - - - - - - - - - - - - - - - - vp9_sad8x16x3 = vp9_sad8x16x3_c; if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - - - - - - - - - vp9_sad8x8x3 = vp9_sad8x8x3_c; if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - - - vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; - vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3; - vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3; - vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; - vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; - vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; - vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3; - vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3; - vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; - vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; - vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; - vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3; - vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3; - - - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3; - vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3; - vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; - vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; - vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3; - vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3; - vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; - vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; - vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; } diff --git a/source/config/linux/x64/vpx_config.asm b/source/config/linux/x64/vpx_config.asm index 7a9cf0b..c045d4d 100644 --- a/source/config/linux/x64/vpx_config.asm +++ b/source/config/linux/x64/vpx_config.asm @@ -70,11 +70,11 @@ CONFIG_SMALL equ 0 CONFIG_POSTPROC_VISUALIZER equ 0 CONFIG_OS_SUPPORT equ 1 CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 1 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_MULTI_RES_ENCODING equ 1 CONFIG_TEMPORAL_DENOISING equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_DECRYPT equ 0 CONFIG_MULTIPLE_ARF equ 0 -CONFIG_NON420 equ 0 CONFIG_ALPHA equ 0 diff --git a/source/config/linux/x64/vpx_config.h b/source/config/linux/x64/vpx_config.h index c34ce89..494d1f4 100644 --- a/source/config/linux/x64/vpx_config.h +++ b/source/config/linux/x64/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/linux/x64/vpx_scale_rtcd.h b/source/config/linux/x64/vpx_scale_rtcd.h index 6eadf0f..7487e5f 100644 --- a/source/config/linux/x64/vpx_scale_rtcd.h +++ b/source/config/linux/x64/vpx_scale_rtcd.h @@ -59,7 +59,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/mac/ia32/vp8_rtcd.h b/source/config/mac/ia32/vp8_rtcd.h index 7a3e0f4..7e90462 100644 --- a/source/config/mac/ia32/vp8_rtcd.h +++ b/source/config/mac/ia32/vp8_rtcd.h @@ -492,337 +492,239 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx; if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx; - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx; - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx; if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - - - - vp8_block_error = vp8_block_error_c; if (flags & HAS_MMX) vp8_block_error = vp8_block_error_mmx; if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_xmm; - vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_c; if (flags & HAS_SSE2) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; - vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_c; if (flags & HAS_SSE2) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3; - vp8_clear_system_state = vp8_clear_system_state_c; if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state; - vp8_copy32xn = vp8_copy32xn_c; if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2; if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3; - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx; if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2; - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx; - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx; - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx; - vp8_denoiser_filter = vp8_denoiser_filter_c; if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2; - vp8_dequant_idct_add = vp8_dequant_idct_add_c; if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx; - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - vp8_dequantize_b = vp8_dequantize_b_c; if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx; - vp8_diamond_search_sad = vp8_diamond_search_sad_c; if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2; if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3; - - vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c; if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2; - - vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c; if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2; - vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; - - vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; - vp8_loop_filter_bv = vp8_loop_filter_bv_c; if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2; - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2; - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx; if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2; - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2; - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2; - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2; - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2; - vp8_mbblock_error = vp8_mbblock_error_c; if (flags & HAS_MMX) vp8_mbblock_error = vp8_mbblock_error_mmx; if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_xmm; - vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c; if (flags & HAS_SSE2) vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_xmm; - vp8_mbpost_proc_down = vp8_mbpost_proc_down_c; if (flags & HAS_MMX) vp8_mbpost_proc_down = vp8_mbpost_proc_down_mmx; if (flags & HAS_SSE2) vp8_mbpost_proc_down = vp8_mbpost_proc_down_xmm; - vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; - vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; - vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c; if (flags & HAS_SSE2) vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_sse2; - - - - vp8_refining_search_sad = vp8_refining_search_sad_c; if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; - vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; - - vp8_sad16x16 = vp8_sad16x16_c; if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; - vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; - vp8_short_fdct8x4 = vp8_short_fdct8x4_c; if (flags & HAS_MMX) vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2; - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx; - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx; if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2; - - vp8_short_walsh4x4 = vp8_short_walsh4x4_c; if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2; - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3; - vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c; if (flags & HAS_MMX) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt; - vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c; if (flags & HAS_MMX) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt; - vp8_subtract_b = vp8_subtract_b_c; if (flags & HAS_MMX) vp8_subtract_b = vp8_subtract_b_mmx; if (flags & HAS_SSE2) vp8_subtract_b = vp8_subtract_b_sse2; - vp8_subtract_mbuv = vp8_subtract_mbuv_c; if (flags & HAS_MMX) vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; if (flags & HAS_SSE2) vp8_subtract_mbuv = vp8_subtract_mbuv_sse2; - vp8_subtract_mby = vp8_subtract_mby_c; if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx; if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; - vp8_variance16x16 = vp8_variance16x16_c; if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; - vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; - vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; - vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; diff --git a/source/config/mac/ia32/vp9_rtcd.h b/source/config/mac/ia32/vp9_rtcd.h index 4f25ab3..1489a7e 100644 --- a/source/config/mac/ia32/vp9_rtcd.h +++ b/source/config/mac/ia32/vp9_rtcd.h @@ -249,6 +249,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *); unsigned int vp9_get_mb_ss_sse2(const int16_t *); RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *); +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vp9_get_sse_sum_16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vp9_get_sse_sum_8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); @@ -780,371 +784,164 @@ static void setup_rtcd_internal(void) (void)flags; - - - - vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; - vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c; if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; - vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; - vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_diamond_search_sad = vp9_diamond_search_sad_c; if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; - vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; - vp9_fdct32x32 = vp9_fdct32x32_c; if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2; - vp9_fdct32x32_rd = vp9_fdct32x32_rd_c; if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; - vp9_fdct4x4 = vp9_fdct4x4_c; if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2; - vp9_fdct8x8 = vp9_fdct8x8_c; if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2; - vp9_fht16x16 = vp9_fht16x16_c; if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2; - vp9_fht4x4 = vp9_fht4x4_c; if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2; - vp9_fht8x8 = vp9_fht8x8_c; if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2; - - vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - - vp9_get_mb_ss = vp9_get_mb_ss_c; if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx; if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; - + vp9_get_sse_sum_16x16 = vp9_get_sse_sum_16x16_c; + if (flags & HAS_SSE2) vp9_get_sse_sum_16x16 = vp9_get16x16var_sse2; vp9_get_sse_sum_8x8 = vp9_get_sse_sum_8x8_c; if (flags & HAS_SSE2) vp9_get_sse_sum_8x8 = vp9_get8x8var_sse2; - - - - - vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; - vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1_add = vp9_idct32x32_1_add_sse2; - vp9_idct32x32_34_add = vp9_idct32x32_34_add_c; if (flags & HAS_SSE2) vp9_idct32x32_34_add = vp9_idct32x32_34_add_sse2; - vp9_idct4x4_16_add = vp9_idct4x4_16_add_c; if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2; - vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2; - vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2; - vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; if (flags & HAS_SSE2) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; - vp9_iht16x16_256_add = vp9_iht16x16_256_add_c; if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2; - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2; - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2; - - - vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; - vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx; - vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2; - vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2; - vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2; - vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c; if (flags & HAS_SSE2) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_sse2; - vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_sse2; - vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c; if (flags & HAS_MMX) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_mmx; - vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2; - vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; - vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; - vp9_mse16x16 = vp9_mse16x16_c; if (flags & HAS_MMX) vp9_mse16x16 = vp9_mse16x16_mmx; - - - - - - vp9_refining_search_sad = vp9_refining_search_sad_c; if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4; - vp9_sad16x16 = vp9_sad16x16_c; if (flags & HAS_MMX) vp9_sad16x16 = vp9_sad16x16_mmx; - - vp9_sad16x16x3 = vp9_sad16x16x3_c; if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - - - - vp9_sad16x32x4d = vp9_sad16x32x4d_c; if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8 = vp9_sad16x8_c; if (flags & HAS_MMX) vp9_sad16x8 = vp9_sad16x8_mmx; - - vp9_sad16x8x3 = vp9_sad16x8x3_c; if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - - - - vp9_sad32x16x4d = vp9_sad32x16x4d_c; if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - - - - vp9_sad32x32x4d = vp9_sad32x32x4d_c; if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - - - - vp9_sad32x64x4d = vp9_sad32x64x4d_c; if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4 = vp9_sad4x4_c; if (flags & HAS_MMX) vp9_sad4x4 = vp9_sad4x4_mmx; - - vp9_sad4x4x3 = vp9_sad4x4x3_c; if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - - - - vp9_sad4x8x4d = vp9_sad4x8x4d_c; if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - - - - vp9_sad64x32x4d = vp9_sad64x32x4d_c; if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - - - - vp9_sad64x64x4d = vp9_sad64x64x4d_c; if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - - vp9_sad8x16 = vp9_sad8x16_c; if (flags & HAS_MMX) vp9_sad8x16 = vp9_sad8x16_mmx; - - vp9_sad8x16x3 = vp9_sad8x16x3_c; if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - - - - vp9_sad8x4x4d = vp9_sad8x4x4d_c; if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - - vp9_sad8x8 = vp9_sad8x8_c; if (flags & HAS_MMX) vp9_sad8x8 = vp9_sad8x8_mmx; - - vp9_sad8x8x3 = vp9_sad8x8x3_c; if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_temporal_filter_apply = vp9_temporal_filter_apply_c; if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2; - - - - - - - - - vp9_variance16x16 = vp9_variance16x16_c; if (flags & HAS_MMX) vp9_variance16x16 = vp9_variance16x16_mmx; - - vp9_variance16x8 = vp9_variance16x8_c; if (flags & HAS_MMX) vp9_variance16x8 = vp9_variance16x8_mmx; - - - - vp9_variance4x4 = vp9_variance4x4_c; if (flags & HAS_MMX) vp9_variance4x4 = vp9_variance4x4_mmx; - - - - vp9_variance8x16 = vp9_variance8x16_c; if (flags & HAS_MMX) vp9_variance8x16 = vp9_variance8x16_mmx; - - vp9_variance8x8 = vp9_variance8x8_c; if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx; } diff --git a/source/config/mac/ia32/vpx_config.asm b/source/config/mac/ia32/vpx_config.asm index 33eed91..f296bc3 100644 --- a/source/config/mac/ia32/vpx_config.asm +++ b/source/config/mac/ia32/vpx_config.asm @@ -70,11 +70,11 @@ CONFIG_SMALL equ 0 CONFIG_POSTPROC_VISUALIZER equ 0 CONFIG_OS_SUPPORT equ 1 CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 1 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_MULTI_RES_ENCODING equ 1 CONFIG_TEMPORAL_DENOISING equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_DECRYPT equ 0 CONFIG_MULTIPLE_ARF equ 0 -CONFIG_NON420 equ 0 CONFIG_ALPHA equ 0 diff --git a/source/config/mac/ia32/vpx_config.h b/source/config/mac/ia32/vpx_config.h index 3e80541..7c2bcce 100644 --- a/source/config/mac/ia32/vpx_config.h +++ b/source/config/mac/ia32/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/mac/ia32/vpx_scale_rtcd.h b/source/config/mac/ia32/vpx_scale_rtcd.h index 6eadf0f..7487e5f 100644 --- a/source/config/mac/ia32/vpx_scale_rtcd.h +++ b/source/config/mac/ia32/vpx_scale_rtcd.h @@ -59,7 +59,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/mac/x64/vp8_rtcd.h b/source/config/mac/x64/vp8_rtcd.h index f7b58ac..9653130 100644 --- a/source/config/mac/x64/vp8_rtcd.h +++ b/source/config/mac/x64/vp8_rtcd.h @@ -490,151 +490,67 @@ static void setup_rtcd_internal(void) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - - - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - - - - - vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; - vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3; - - vp8_copy32xn = vp8_copy32xn_sse2; if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3; - - - - - - - - - - vp8_diamond_search_sad = vp8_diamond_search_sad_c; if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4; - vp8_fast_quantize_b = vp8_fast_quantize_b_sse2; if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3; - - - - - vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - - - - - - - - - - - - - - - - - - - - - - vp8_refining_search_sad = vp8_refining_search_sad_c; if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; - - - vp8_sad16x16 = vp8_sad16x16_wmt; if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - - vp8_sad16x8x3 = vp8_sad16x8x3_c; if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - - vp8_sad4x4x3 = vp8_sad4x4x3_c; if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - - vp8_sad8x16x3 = vp8_sad8x16x3_c; if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - - vp8_sad8x8x3 = vp8_sad8x8x3_c; if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; - - - - - - - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3; } diff --git a/source/config/mac/x64/vp9_rtcd.h b/source/config/mac/x64/vp9_rtcd.h index 08003f5..e6a0520 100644 --- a/source/config/mac/x64/vp9_rtcd.h +++ b/source/config/mac/x64/vp9_rtcd.h @@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *); unsigned int vp9_get_mb_ss_sse2(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_sse2 +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get16x16var_sse2 + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get8x8var_sse2 @@ -912,315 +916,129 @@ static void setup_rtcd_internal(void) (void)flags; - - - - vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; - vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; - vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; - vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; - - - - - - - - - - - vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; - - vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3; - vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3; - vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3; - vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3; - vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3; - vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3; - vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3; - vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3; - vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3; - vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3; - vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3; - vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3; - vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; - vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; - - - - - - - - - - - - - - - - - vp9_diamond_search_sad = vp9_diamond_search_sad_c; if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; - - - - - - - - - - vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - - - - vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; - vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3; - vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; - vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; - vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3; - vp9_refining_search_sad = vp9_refining_search_sad_c; if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4; - - - vp9_sad16x16x3 = vp9_sad16x16x3_c; if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - - - - - - - - vp9_sad16x8x3 = vp9_sad16x8x3_c; if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - - - - - - - - - - - - - - - - vp9_sad4x4x3 = vp9_sad4x4x3_c; if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - - - - - - - - - - - - - - - - - vp9_sad8x16x3 = vp9_sad8x16x3_c; if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - - - - - - - - - vp9_sad8x8x3 = vp9_sad8x8x3_c; if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - - - vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; - vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3; - vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3; - vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; - vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; - vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; - vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3; - vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3; - vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; - vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; - vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; - vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3; - vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3; - - - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3; - vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3; - vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; - vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; - vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3; - vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3; - vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; - vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; - vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; } diff --git a/source/config/mac/x64/vpx_config.asm b/source/config/mac/x64/vpx_config.asm index 7a9cf0b..c045d4d 100644 --- a/source/config/mac/x64/vpx_config.asm +++ b/source/config/mac/x64/vpx_config.asm @@ -70,11 +70,11 @@ CONFIG_SMALL equ 0 CONFIG_POSTPROC_VISUALIZER equ 0 CONFIG_OS_SUPPORT equ 1 CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 1 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_MULTI_RES_ENCODING equ 1 CONFIG_TEMPORAL_DENOISING equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_DECRYPT equ 0 CONFIG_MULTIPLE_ARF equ 0 -CONFIG_NON420 equ 0 CONFIG_ALPHA equ 0 diff --git a/source/config/mac/x64/vpx_config.h b/source/config/mac/x64/vpx_config.h index c34ce89..494d1f4 100644 --- a/source/config/mac/x64/vpx_config.h +++ b/source/config/mac/x64/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/mac/x64/vpx_scale_rtcd.h b/source/config/mac/x64/vpx_scale_rtcd.h index 6eadf0f..7487e5f 100644 --- a/source/config/mac/x64/vpx_scale_rtcd.h +++ b/source/config/mac/x64/vpx_scale_rtcd.h @@ -59,7 +59,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/nacl/vp8_rtcd.h b/source/config/nacl/vp8_rtcd.h index 9564cfc..d6de728 100644 --- a/source/config/nacl/vp8_rtcd.h +++ b/source/config/nacl/vp8_rtcd.h @@ -324,12 +324,12 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c void vp8_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/nacl/vp9_rtcd.h b/source/config/nacl/vp9_rtcd.h index 652aa08..03e7181 100644 --- a/source/config/nacl/vp9_rtcd.h +++ b/source/config/nacl/vp9_rtcd.h @@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride); unsigned int vp9_get_mb_ss_c(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_c +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c @@ -687,12 +690,12 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c void vp9_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/nacl/vpx_config.asm b/source/config/nacl/vpx_config.asm index c612876..1d1039e 100644 --- a/source/config/nacl/vpx_config.asm +++ b/source/config/nacl/vpx_config.asm @@ -73,12 +73,12 @@ .equ CONFIG_POSTPROC_VISUALIZER , 0 .equ CONFIG_OS_SUPPORT , 1 .equ CONFIG_UNIT_TESTS , 0 +.equ CONFIG_WEBM_IO , 1 .equ CONFIG_DECODE_PERF_TESTS , 0 .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_EXPERIMENTAL , 0 .equ CONFIG_DECRYPT , 0 .equ CONFIG_MULTIPLE_ARF , 0 -.equ CONFIG_NON420 , 0 .equ CONFIG_ALPHA , 0 .section .note.GNU-stack,"",%progbits diff --git a/source/config/nacl/vpx_config.h b/source/config/nacl/vpx_config.h index e910000..2d5e208 100644 --- a/source/config/nacl/vpx_config.h +++ b/source/config/nacl/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/nacl/vpx_scale_rtcd.h b/source/config/nacl/vpx_scale_rtcd.h index 4b0a213..f5e6caa 100644 --- a/source/config/nacl/vpx_scale_rtcd.h +++ b/source/config/nacl/vpx_scale_rtcd.h @@ -50,12 +50,12 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf #define vpx_yv12_copy_y vpx_yv12_copy_y_c void vpx_scale_rtcd(void); + #include "vpx_config.h" #ifdef RTCD_C static void setup_rtcd_internal(void) { - } #endif diff --git a/source/config/win/ia32/vp8_rtcd.h b/source/config/win/ia32/vp8_rtcd.h index 7a3e0f4..7e90462 100644 --- a/source/config/win/ia32/vp8_rtcd.h +++ b/source/config/win/ia32/vp8_rtcd.h @@ -492,337 +492,239 @@ static void setup_rtcd_internal(void) if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx; if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c; if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx; - vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c; if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx; - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c; if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx; if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - - - - vp8_block_error = vp8_block_error_c; if (flags & HAS_MMX) vp8_block_error = vp8_block_error_mmx; if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_xmm; - vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_c; if (flags & HAS_SSE2) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; - vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_c; if (flags & HAS_SSE2) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3; - vp8_clear_system_state = vp8_clear_system_state_c; if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state; - vp8_copy32xn = vp8_copy32xn_c; if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2; if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3; - vp8_copy_mem16x16 = vp8_copy_mem16x16_c; if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx; if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2; - vp8_copy_mem8x4 = vp8_copy_mem8x4_c; if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx; - vp8_copy_mem8x8 = vp8_copy_mem8x8_c; if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx; - vp8_dc_only_idct_add = vp8_dc_only_idct_add_c; if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx; - vp8_denoiser_filter = vp8_denoiser_filter_c; if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2; - vp8_dequant_idct_add = vp8_dequant_idct_add_c; if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx; - vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; - vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c; if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; - vp8_dequantize_b = vp8_dequantize_b_c; if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx; - vp8_diamond_search_sad = vp8_diamond_search_sad_c; if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2; if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3; - - vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c; if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2; - - vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c; if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2; - vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_c; if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx; if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2; - - vp8_loop_filter_bh = vp8_loop_filter_bh_c; if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2; - vp8_loop_filter_bv = vp8_loop_filter_bv_c; if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx; if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2; - vp8_loop_filter_mbh = vp8_loop_filter_mbh_c; if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx; if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2; - vp8_loop_filter_mbv = vp8_loop_filter_mbv_c; if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx; if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2; - vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c; if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2; - vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c; if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2; - vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c; if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2; - vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c; if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx; if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2; - vp8_mbblock_error = vp8_mbblock_error_c; if (flags & HAS_MMX) vp8_mbblock_error = vp8_mbblock_error_mmx; if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_xmm; - vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c; if (flags & HAS_SSE2) vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_xmm; - vp8_mbpost_proc_down = vp8_mbpost_proc_down_c; if (flags & HAS_MMX) vp8_mbpost_proc_down = vp8_mbpost_proc_down_mmx; if (flags & HAS_SSE2) vp8_mbpost_proc_down = vp8_mbpost_proc_down_xmm; - vp8_mbuverror = vp8_mbuverror_c; if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx; if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm; - vp8_mse16x16 = vp8_mse16x16_c; if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx; if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt; - vp8_plane_add_noise = vp8_plane_add_noise_c; if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx; if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt; - vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c; if (flags & HAS_SSE2) vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_sse2; - - - - vp8_refining_search_sad = vp8_refining_search_sad_c; if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; - vp8_regular_quantize_b = vp8_regular_quantize_b_c; if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2; - - vp8_sad16x16 = vp8_sad16x16_c; if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx; if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt; if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - vp8_sad16x8 = vp8_sad16x8_c; if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx; if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad16x8x3 = vp8_sad16x8x3_c; if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - vp8_sad4x4 = vp8_sad4x4_c; if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx; if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_sad4x4x3 = vp8_sad4x4x3_c; if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - vp8_sad8x16 = vp8_sad8x16_c; if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx; if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x16x3 = vp8_sad8x16x3_c; if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - vp8_sad8x8 = vp8_sad8x8_c; if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx; if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad8x8x3 = vp8_sad8x8x3_c; if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; - vp8_short_fdct4x4 = vp8_short_fdct4x4_c; if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2; - vp8_short_fdct8x4 = vp8_short_fdct8x4_c; if (flags & HAS_MMX) vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2; - vp8_short_idct4x4llm = vp8_short_idct4x4llm_c; if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx; - vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c; if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx; if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2; - - vp8_short_walsh4x4 = vp8_short_walsh4x4_c; if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2; - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c; if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c; if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c; if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c; if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx; if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt; - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c; if (flags & HAS_MMX) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3; - vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c; if (flags & HAS_MMX) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt; - vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c; if (flags & HAS_MMX) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c; if (flags & HAS_MMX) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx; if (flags & HAS_SSE2) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt; - vp8_subtract_b = vp8_subtract_b_c; if (flags & HAS_MMX) vp8_subtract_b = vp8_subtract_b_mmx; if (flags & HAS_SSE2) vp8_subtract_b = vp8_subtract_b_sse2; - vp8_subtract_mbuv = vp8_subtract_mbuv_c; if (flags & HAS_MMX) vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; if (flags & HAS_SSE2) vp8_subtract_mbuv = vp8_subtract_mbuv_sse2; - vp8_subtract_mby = vp8_subtract_mby_c; if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx; if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2; - vp8_variance16x16 = vp8_variance16x16_c; if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx; if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_variance16x8 = vp8_variance16x8_c; if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx; if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance4x4 = vp8_variance4x4_c; if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx; if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt; - vp8_variance8x16 = vp8_variance8x16_c; if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx; if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance8x8 = vp8_variance8x8_c; if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx; if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt; - vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; - vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; - vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; diff --git a/source/config/win/ia32/vp9_rtcd.h b/source/config/win/ia32/vp9_rtcd.h index b4674d2..33c6064 100644 --- a/source/config/win/ia32/vp9_rtcd.h +++ b/source/config/win/ia32/vp9_rtcd.h @@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *); unsigned int vp9_get_mb_ss_sse2(const int16_t *); RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *); +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +RTCD_EXTERN void (*vp9_get_sse_sum_16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); RTCD_EXTERN void (*vp9_get_sse_sum_8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); @@ -909,595 +913,385 @@ static void setup_rtcd_internal(void) (void)flags; - - - vp9_block_error = vp9_block_error_c; if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2; - vp9_convolve8 = vp9_convolve8_c; if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; - vp9_convolve8_avg = vp9_convolve8_avg_c; if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c; if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; - vp9_convolve8_horiz = vp9_convolve8_horiz_c; if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; - vp9_convolve8_vert = vp9_convolve8_vert_c; if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; - vp9_convolve_avg = vp9_convolve_avg_c; if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2; - vp9_convolve_copy = vp9_convolve_copy_c; if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2; - - - - - - - - - vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; - - vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3; - vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3; - vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3; - vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3; - vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3; - vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3; - vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3; - vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3; - vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3; - vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3; - vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3; - vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3; - vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; - vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; - - - - - - - - - vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c; if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2; - vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c; if (flags & HAS_SSE2) vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_sse2; - vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_c; if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse; - vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c; if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse; - - - - - vp9_diamond_search_sad = vp9_diamond_search_sad_c; if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; - vp9_fdct16x16 = vp9_fdct16x16_c; if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2; - vp9_fdct32x32 = vp9_fdct32x32_c; if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2; - vp9_fdct32x32_rd = vp9_fdct32x32_rd_c; if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2; - vp9_fdct4x4 = vp9_fdct4x4_c; if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2; - vp9_fdct8x8 = vp9_fdct8x8_c; if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2; - vp9_fht16x16 = vp9_fht16x16_c; if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2; - vp9_fht4x4 = vp9_fht4x4_c; if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2; - vp9_fht8x8 = vp9_fht8x8_c; if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2; - - vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - - vp9_get_mb_ss = vp9_get_mb_ss_c; if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx; if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2; - + vp9_get_sse_sum_16x16 = vp9_get_sse_sum_16x16_c; + if (flags & HAS_SSE2) vp9_get_sse_sum_16x16 = vp9_get16x16var_sse2; vp9_get_sse_sum_8x8 = vp9_get_sse_sum_8x8_c; if (flags & HAS_SSE2) vp9_get_sse_sum_8x8 = vp9_get8x8var_sse2; - vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; - vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3; - vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; - vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - vp9_idct16x16_10_add = vp9_idct16x16_10_add_c; if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2; - vp9_idct16x16_1_add = vp9_idct16x16_1_add_c; if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2; - vp9_idct16x16_256_add = vp9_idct16x16_256_add_c; if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2; - vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2; - vp9_idct32x32_1_add = vp9_idct32x32_1_add_c; if (flags & HAS_SSE2) vp9_idct32x32_1_add = vp9_idct32x32_1_add_sse2; - vp9_idct32x32_34_add = vp9_idct32x32_34_add_c; if (flags & HAS_SSE2) vp9_idct32x32_34_add = vp9_idct32x32_34_add_sse2; - vp9_idct4x4_16_add = vp9_idct4x4_16_add_c; if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2; - vp9_idct4x4_1_add = vp9_idct4x4_1_add_c; if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2; - vp9_idct8x8_10_add = vp9_idct8x8_10_add_c; if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2; - vp9_idct8x8_1_add = vp9_idct8x8_1_add_c; if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2; - vp9_idct8x8_64_add = vp9_idct8x8_64_add_c; if (flags & HAS_SSE2) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2; - vp9_iht16x16_256_add = vp9_iht16x16_256_add_c; if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2; - vp9_iht4x4_16_add = vp9_iht4x4_16_add_c; if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2; - vp9_iht8x8_64_add = vp9_iht8x8_64_add_c; if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2; - - - vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2; - vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c; if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx; - vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2; - vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2; - vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2; - vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c; if (flags & HAS_SSE2) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_sse2; - vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_sse2; - vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c; if (flags & HAS_MMX) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_mmx; - vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2; - vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2; - vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c; if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2; - vp9_mse16x16 = vp9_mse16x16_c; if (flags & HAS_MMX) vp9_mse16x16 = vp9_mse16x16_mmx; if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2; - - - - - - vp9_refining_search_sad = vp9_refining_search_sad_c; if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4; - vp9_sad16x16 = vp9_sad16x16_c; if (flags & HAS_MMX) vp9_sad16x16 = vp9_sad16x16_mmx; if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2; - vp9_sad16x16_avg = vp9_sad16x16_avg_c; if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2; - vp9_sad16x16x3 = vp9_sad16x16x3_c; if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - vp9_sad16x16x4d = vp9_sad16x16x4d_c; if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2; - - vp9_sad16x32 = vp9_sad16x32_c; if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2; - vp9_sad16x32_avg = vp9_sad16x32_avg_c; if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2; - vp9_sad16x32x4d = vp9_sad16x32x4d_c; if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2; - vp9_sad16x8 = vp9_sad16x8_c; if (flags & HAS_MMX) vp9_sad16x8 = vp9_sad16x8_mmx; if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2; - vp9_sad16x8_avg = vp9_sad16x8_avg_c; if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2; - vp9_sad16x8x3 = vp9_sad16x8x3_c; if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - vp9_sad16x8x4d = vp9_sad16x8x4d_c; if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2; - - vp9_sad32x16 = vp9_sad32x16_c; if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2; - vp9_sad32x16_avg = vp9_sad32x16_avg_c; if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2; - vp9_sad32x16x4d = vp9_sad32x16x4d_c; if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2; - vp9_sad32x32 = vp9_sad32x32_c; if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2; - vp9_sad32x32_avg = vp9_sad32x32_avg_c; if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2; - - vp9_sad32x32x4d = vp9_sad32x32x4d_c; if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2; - - vp9_sad32x64 = vp9_sad32x64_c; if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2; - vp9_sad32x64_avg = vp9_sad32x64_avg_c; if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2; - vp9_sad32x64x4d = vp9_sad32x64x4d_c; if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2; - vp9_sad4x4 = vp9_sad4x4_c; if (flags & HAS_MMX) vp9_sad4x4 = vp9_sad4x4_mmx; if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse; - vp9_sad4x4_avg = vp9_sad4x4_avg_c; if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse; - vp9_sad4x4x3 = vp9_sad4x4x3_c; if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - vp9_sad4x4x4d = vp9_sad4x4x4d_c; if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse; - - vp9_sad4x8 = vp9_sad4x8_c; if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse; - vp9_sad4x8_avg = vp9_sad4x8_avg_c; if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse; - vp9_sad4x8x4d = vp9_sad4x8x4d_c; if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse; - - vp9_sad64x32 = vp9_sad64x32_c; if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2; - vp9_sad64x32_avg = vp9_sad64x32_avg_c; if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2; - vp9_sad64x32x4d = vp9_sad64x32x4d_c; if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2; - vp9_sad64x64 = vp9_sad64x64_c; if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2; - vp9_sad64x64_avg = vp9_sad64x64_avg_c; if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2; - - vp9_sad64x64x4d = vp9_sad64x64x4d_c; if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2; - - vp9_sad8x16 = vp9_sad8x16_c; if (flags & HAS_MMX) vp9_sad8x16 = vp9_sad8x16_mmx; if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2; - vp9_sad8x16_avg = vp9_sad8x16_avg_c; if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2; - vp9_sad8x16x3 = vp9_sad8x16x3_c; if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - vp9_sad8x16x4d = vp9_sad8x16x4d_c; if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2; - - vp9_sad8x4 = vp9_sad8x4_c; if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2; - vp9_sad8x4_avg = vp9_sad8x4_avg_c; if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2; - vp9_sad8x4x4d = vp9_sad8x4x4d_c; if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2; - - vp9_sad8x8 = vp9_sad8x8_c; if (flags & HAS_MMX) vp9_sad8x8 = vp9_sad8x8_mmx; if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2; - vp9_sad8x8_avg = vp9_sad8x8_avg_c; if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2; - vp9_sad8x8x3 = vp9_sad8x8x3_c; if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - vp9_sad8x8x4d = vp9_sad8x8x4d_c; if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2; - - vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; - vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3; - vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3; - vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; - vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; - vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; - vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_c; if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3; - vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_c; if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3; - vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; - vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; - vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; - vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3; - vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3; - - - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3; - vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3; - vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; - vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; - vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_c; if (flags & HAS_SSE) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3; - vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_c; if (flags & HAS_SSE) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3; - vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; - vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; - vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_c; if (flags & HAS_SSE2) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; - vp9_subtract_block = vp9_subtract_block_c; if (flags & HAS_SSE2) vp9_subtract_block = vp9_subtract_block_sse2; - vp9_temporal_filter_apply = vp9_temporal_filter_apply_c; if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2; - vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_c; if (flags & HAS_SSE2) vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_sse2; - - vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_c; if (flags & HAS_SSE) vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_sse; - vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_c; if (flags & HAS_SSE2) vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_sse2; - vp9_v_predictor_16x16 = vp9_v_predictor_16x16_c; if (flags & HAS_SSE2) vp9_v_predictor_16x16 = vp9_v_predictor_16x16_sse2; - vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c; if (flags & HAS_SSE2) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_sse2; - vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c; if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse; - vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c; if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse; - vp9_variance16x16 = vp9_variance16x16_c; if (flags & HAS_MMX) vp9_variance16x16 = vp9_variance16x16_mmx; if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2; - vp9_variance16x32 = vp9_variance16x32_c; if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2; - vp9_variance16x8 = vp9_variance16x8_c; if (flags & HAS_MMX) vp9_variance16x8 = vp9_variance16x8_mmx; if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2; - vp9_variance32x16 = vp9_variance32x16_c; if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2; - vp9_variance32x32 = vp9_variance32x32_c; if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2; - vp9_variance32x64 = vp9_variance32x64_c; if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2; - vp9_variance4x4 = vp9_variance4x4_c; if (flags & HAS_MMX) vp9_variance4x4 = vp9_variance4x4_mmx; if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2; - vp9_variance4x8 = vp9_variance4x8_c; if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2; - vp9_variance64x32 = vp9_variance64x32_c; if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2; - vp9_variance64x64 = vp9_variance64x64_c; if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2; - vp9_variance8x16 = vp9_variance8x16_c; if (flags & HAS_MMX) vp9_variance8x16 = vp9_variance8x16_mmx; if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2; - vp9_variance8x4 = vp9_variance8x4_c; if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2; - vp9_variance8x8 = vp9_variance8x8_c; if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx; if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2; - vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_c; if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_sse2; - vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_c; if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_sse2; - vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_c; if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_sse2; } diff --git a/source/config/win/ia32/vpx_config.asm b/source/config/win/ia32/vpx_config.asm index 7907235..cc0de0d 100644 --- a/source/config/win/ia32/vpx_config.asm +++ b/source/config/win/ia32/vpx_config.asm @@ -70,11 +70,11 @@ CONFIG_SMALL equ 0 CONFIG_POSTPROC_VISUALIZER equ 0 CONFIG_OS_SUPPORT equ 1 CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 1 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_MULTI_RES_ENCODING equ 1 CONFIG_TEMPORAL_DENOISING equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_DECRYPT equ 0 CONFIG_MULTIPLE_ARF equ 0 -CONFIG_NON420 equ 0 CONFIG_ALPHA equ 0 diff --git a/source/config/win/ia32/vpx_config.h b/source/config/win/ia32/vpx_config.h index a1e25bd..7aaa14d 100644 --- a/source/config/win/ia32/vpx_config.h +++ b/source/config/win/ia32/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/win/ia32/vpx_scale_rtcd.h b/source/config/win/ia32/vpx_scale_rtcd.h index 6eadf0f..7487e5f 100644 --- a/source/config/win/ia32/vpx_scale_rtcd.h +++ b/source/config/win/ia32/vpx_scale_rtcd.h @@ -59,7 +59,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/config/win/x64/vp8_rtcd.h b/source/config/win/x64/vp8_rtcd.h index f7b58ac..9653130 100644 --- a/source/config/win/x64/vp8_rtcd.h +++ b/source/config/win/x64/vp8_rtcd.h @@ -490,151 +490,67 @@ static void setup_rtcd_internal(void) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3; - - - vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3; - - - - - vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3; - vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2; if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3; - - vp8_copy32xn = vp8_copy32xn_sse2; if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3; - - - - - - - - - - vp8_diamond_search_sad = vp8_diamond_search_sad_c; if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4; - vp8_fast_quantize_b = vp8_fast_quantize_b_sse2; if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3; - - - - - vp8_full_search_sad = vp8_full_search_sad_c; if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3; if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8; - - - - - - - - - - - - - - - - - - - - - - vp8_refining_search_sad = vp8_refining_search_sad_c; if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4; - - - vp8_sad16x16 = vp8_sad16x16_wmt; if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3; - vp8_sad16x16x3 = vp8_sad16x16x3_c; if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3; - vp8_sad16x16x4d = vp8_sad16x16x4d_c; if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3; - vp8_sad16x16x8 = vp8_sad16x16x8_c; if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4; - - vp8_sad16x8x3 = vp8_sad16x8x3_c; if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3; - vp8_sad16x8x4d = vp8_sad16x8x4d_c; if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3; - vp8_sad16x8x8 = vp8_sad16x8x8_c; if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4; - - vp8_sad4x4x3 = vp8_sad4x4x3_c; if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3; - vp8_sad4x4x4d = vp8_sad4x4x4d_c; if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3; - vp8_sad4x4x8 = vp8_sad4x4x8_c; if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4; - - vp8_sad8x16x3 = vp8_sad8x16x3_c; if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3; - vp8_sad8x16x4d = vp8_sad8x16x4d_c; if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3; - vp8_sad8x16x8 = vp8_sad8x16x8_c; if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4; - - vp8_sad8x8x3 = vp8_sad8x8x3_c; if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3; - vp8_sad8x8x4d = vp8_sad8x8x4d_c; if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3; - vp8_sad8x8x8 = vp8_sad8x8x8_c; if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4; - - - - - - - vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3; - vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx; if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3; - vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3; - vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2; if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3; - - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3; } diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h index 08003f5..e6a0520 100644 --- a/source/config/win/x64/vp9_rtcd.h +++ b/source/config/win/x64/vp9_rtcd.h @@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *); unsigned int vp9_get_mb_ss_sse2(const int16_t *); #define vp9_get_mb_ss vp9_get_mb_ss_sse2 +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); +#define vp9_get_sse_sum_16x16 vp9_get16x16var_sse2 + void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); #define vp9_get_sse_sum_8x8 vp9_get8x8var_sse2 @@ -912,315 +916,129 @@ static void setup_rtcd_internal(void) (void)flags; - - - - vp9_convolve8 = vp9_convolve8_sse2; if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3; - vp9_convolve8_avg = vp9_convolve8_avg_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3; - vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3; - vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3; - vp9_convolve8_horiz = vp9_convolve8_horiz_sse2; if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3; - vp9_convolve8_vert = vp9_convolve8_vert_sse2; if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3; - - - - - - - - - - - vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3; - - vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3; - vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3; - vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3; - vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3; - vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3; - vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3; - vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3; - vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3; - vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3; - vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3; - vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3; - vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3; - vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3; - vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3; - - - - - - - - - - - - - - - - - vp9_diamond_search_sad = vp9_diamond_search_sad_c; if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4; - - - - - - - - - - vp9_full_search_sad = vp9_full_search_sad_c; if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3; if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8; - - - - vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c; if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3; - vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c; if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3; - vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c; if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3; - vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c; if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - vp9_quantize_b = vp9_quantize_b_c; if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3; - vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c; if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3; - vp9_refining_search_sad = vp9_refining_search_sad_c; if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4; - - - vp9_sad16x16x3 = vp9_sad16x16x3_c; if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3; - - - - - - - - vp9_sad16x8x3 = vp9_sad16x8x3_c; if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3; if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3; - - - - - - - - - - - - - - - - vp9_sad4x4x3 = vp9_sad4x4x3_c; if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3; - - - - - - - - - - - - - - - - - vp9_sad8x16x3 = vp9_sad8x16x3_c; if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3; - - - - - - - - - vp9_sad8x8x3 = vp9_sad8x8x3_c; if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3; - - - vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3; - vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3; - vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3; - vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3; - vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3; - vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3; - vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3; - vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3; - vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3; - vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3; - vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3; - vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3; - vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3; - - - vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3; - vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3; - vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3; - vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3; - vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3; - vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3; - vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3; - vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse; if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3; - vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3; - vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3; - vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3; - vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3; - vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2; if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3; } diff --git a/source/config/win/x64/vpx_config.asm b/source/config/win/x64/vpx_config.asm index f502251..2bfd490 100644 --- a/source/config/win/x64/vpx_config.asm +++ b/source/config/win/x64/vpx_config.asm @@ -70,11 +70,11 @@ CONFIG_SMALL equ 0 CONFIG_POSTPROC_VISUALIZER equ 0 CONFIG_OS_SUPPORT equ 1 CONFIG_UNIT_TESTS equ 0 +CONFIG_WEBM_IO equ 1 CONFIG_DECODE_PERF_TESTS equ 0 CONFIG_MULTI_RES_ENCODING equ 1 CONFIG_TEMPORAL_DENOISING equ 1 CONFIG_EXPERIMENTAL equ 0 CONFIG_DECRYPT equ 0 CONFIG_MULTIPLE_ARF equ 0 -CONFIG_NON420 equ 0 CONFIG_ALPHA equ 0 diff --git a/source/config/win/x64/vpx_config.h b/source/config/win/x64/vpx_config.h index c8874e1..e7570ce 100644 --- a/source/config/win/x64/vpx_config.h +++ b/source/config/win/x64/vpx_config.h @@ -82,12 +82,12 @@ #define CONFIG_POSTPROC_VISUALIZER 0 #define CONFIG_OS_SUPPORT 1 #define CONFIG_UNIT_TESTS 0 +#define CONFIG_WEBM_IO 1 #define CONFIG_DECODE_PERF_TESTS 0 #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_EXPERIMENTAL 0 #define CONFIG_DECRYPT 0 #define CONFIG_MULTIPLE_ARF 0 -#define CONFIG_NON420 0 #define CONFIG_ALPHA 0 #endif /* VPX_CONFIG_H */ diff --git a/source/config/win/x64/vpx_scale_rtcd.h b/source/config/win/x64/vpx_scale_rtcd.h index 6eadf0f..7487e5f 100644 --- a/source/config/win/x64/vpx_scale_rtcd.h +++ b/source/config/win/x64/vpx_scale_rtcd.h @@ -59,7 +59,6 @@ static void setup_rtcd_internal(void) (void)flags; - } #endif diff --git a/source/libvpx/.gitignore b/source/libvpx/.gitignore index aa95d57..bb9e518 100644 --- a/source/libvpx/.gitignore +++ b/source/libvpx/.gitignore @@ -28,15 +28,13 @@ /examples/decode_to_md5 /examples/decode_with_drops /examples/decode_with_partial_drops -/examples/error_resilient /examples/example_xma -/examples/force_keyframe /examples/postproc +/examples/set_maps /examples/simple_decoder /examples/simple_encoder /examples/twopass_encoder /examples/vp8_multi_resolution_encoder -/examples/vp8_set_maps /examples/vp8cx_set_ref /examples/vp9_spatial_scalable_encoder /examples/vpx_temporal_scalable_patterns diff --git a/source/libvpx/build/make/Makefile b/source/libvpx/build/make/Makefile index 6894d6d..dd7fb4a 100644 --- a/source/libvpx/build/make/Makefile +++ b/source/libvpx/build/make/Makefile @@ -94,6 +94,16 @@ clean:: rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s) rm -f $(CLEAN-OBJS) +.PHONY: clean +distclean: clean + if [ -z "$(target)" ]; then \ + rm -f Makefile; \ + rm -f config.log config.mk; \ + rm -f vpx_config.[hc] vpx_config.asm; \ + else \ + rm -f $(target)-$(TOOLCHAIN).mk; \ + fi + .PHONY: dist dist: .PHONY: install @@ -307,7 +317,7 @@ endef ifneq ($(target),) include $(SRC_PATH_BARE)/$(target:-$(TOOLCHAIN)=).mk endif -ifeq ($(filter clean,$(MAKECMDGOALS)),) +ifeq ($(filter %clean,$(MAKECMDGOALS)),) # Older versions of make don't like -include directives with no arguments ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),) -include $(filter %.d,$(OBJS-yes:.o=.d)) diff --git a/source/libvpx/build/make/ads2armasm_ms.pl b/source/libvpx/build/make/ads2armasm_ms.pl index 95c8084..2a2c470 100755 --- a/source/libvpx/build/make/ads2armasm_ms.pl +++ b/source/libvpx/build/make/ads2armasm_ms.pl @@ -32,6 +32,7 @@ while (<STDIN>) s/ldrneb/ldrbne/i; s/ldrneh/ldrhne/i; + s/^(\s*)ENDP.*/$&\n$1ALIGN 4/; print; } diff --git a/source/libvpx/build/make/configure.sh b/source/libvpx/build/make/configure.sh index c379c74..514c442 100755 --- a/source/libvpx/build/make/configure.sh +++ b/source/libvpx/build/make/configure.sh @@ -41,7 +41,7 @@ log(){ log_file(){ log BEGIN $1 - pr -n -t $1 >>$logfile + cat -n $1 >>$logfile log END $1 } @@ -274,6 +274,7 @@ TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm" clean_temp_files() { rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM} + enabled gcov && rm -f ${TMP_C%.c}.gcno ${TMP_CC%.cc}.gcno } # @@ -327,7 +328,7 @@ EOF check_cflags() { log check_cflags "$@" - check_cc "$@" <<EOF + check_cc -Werror "$@" <<EOF int x; EOF } @@ -341,7 +342,7 @@ check_cxxflags() { int x; EOF ;; - *) check_cxx "$@" <<EOF + *) check_cxx -Werror "$@" <<EOF int x; EOF ;; @@ -378,6 +379,19 @@ EOF fi } +# tests for -m$1 toggling the feature given in $2. If $2 is empty $1 is used. +check_gcc_machine_option() { + local opt="$1" + local feature="$2" + [ -n "$feature" ] || feature="$opt" + + if enabled gcc && ! disabled "$feature" && ! check_cflags "-m$opt"; then + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-$feature " + else + soft_enable "$feature" + fi +} + write_common_config_banner() { print_webm_license config.mk "##" "" echo '# This file automatically generated by configure. Do not edit!' >> config.mk @@ -405,8 +419,8 @@ true } write_common_target_config_mk() { - local CC=${CC} - local CXX=${CXX} + local CC="${CC}" + local CXX="${CXX}" enabled ccache && CC="ccache ${CC}" enabled ccache && CXX="ccache ${CXX}" print_webm_license $1 "##" "" @@ -1089,30 +1103,16 @@ EOF esac soft_enable runtime_cpu_detect - soft_enable mmx - soft_enable sse - soft_enable sse2 - soft_enable sse3 - soft_enable ssse3 # We can't use 'check_cflags' until the compiler is configured and CC is # populated. - if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then - RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 " - else - soft_enable sse4_1 - fi - - if enabled gcc && ! disabled avx && ! check_cflags -mavx; then - RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx " - else - soft_enable avx - fi - - if enabled gcc && ! disabled avx2 && ! check_cflags -mavx2; then - RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx2 " - else - soft_enable avx2 - fi + check_gcc_machine_option mmx + check_gcc_machine_option sse + check_gcc_machine_option sse2 + check_gcc_machine_option sse3 + check_gcc_machine_option ssse3 + check_gcc_machine_option sse4 sse4_1 + check_gcc_machine_option avx + check_gcc_machine_option avx2 case "${AS}" in auto|"") diff --git a/source/libvpx/build/make/gen_msvs_proj.sh b/source/libvpx/build/make/gen_msvs_proj.sh index 5936370..df91435 100755 --- a/source/libvpx/build/make/gen_msvs_proj.sh +++ b/source/libvpx/build/make/gen_msvs_proj.sh @@ -162,7 +162,8 @@ generate_filter() { done done fi - if [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then + if [ "$pat" == "c" ] || \ + [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then for plat in "${platforms[@]}"; do for cfg in Debug Release; do open_tag FileConfiguration \ @@ -561,7 +562,7 @@ generate_vcproj() { close_tag Configurations open_tag Files - generate_filter srcs "Source Files" "c;cc;def;odl;idl;hpj;bat;asm;asmx" + generate_filter srcs "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx" generate_filter hdrs "Header Files" "h;hm;inl;inc;xsd" generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav" generate_filter resrcs "Build Files" "mk" diff --git a/source/libvpx/build/make/gen_msvs_vcxproj.sh b/source/libvpx/build/make/gen_msvs_vcxproj.sh index 4558aa1..23990a4 100755 --- a/source/libvpx/build/make/gen_msvs_vcxproj.sh +++ b/source/libvpx/build/make/gen_msvs_vcxproj.sh @@ -28,6 +28,7 @@ Options: --lib Generate a project for creating a static library --dll Generate a project for creating a dll --static-crt Use the static C runtime (/MT) + --enable-werror Treat warnings as errors (/WX) --target=isa-os-cc Target specifier (required) --out=filename Write output to a file [stdout] --name=project_name Name of the project (required) @@ -173,7 +174,8 @@ generate_filter() { done done close_tag CustomBuild - elif [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then + elif [ "$pat" == "c" ] || \ + [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then open_tag ClCompile \ Include=".\\$f" # Separate file names with Condition? @@ -233,6 +235,8 @@ for opt in "$@"; do ;; --static-crt) use_static_runtime=true ;; + --enable-werror) werror=true + ;; --ver=*) vs_ver="$optval" case "$optval" in @@ -492,7 +496,9 @@ generate_vcxproj() { tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)" tag_content RuntimeLibrary $runtime tag_content WarningLevel Level3 - # DebugInformationFormat + if ${werror:-false}; then + tag_content TreatWarningAsError true + fi close_tag ClCompile case "$proj_kind" in exe) @@ -519,7 +525,7 @@ generate_vcxproj() { done open_tag ItemGroup - generate_filter "Source Files" "c;cc;def;odl;idl;hpj;bat;asm;asmx;s" + generate_filter "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s" close_tag ItemGroup open_tag ItemGroup generate_filter "Header Files" "h;hm;inl;inc;xsd" diff --git a/source/libvpx/build/make/rtcd.pl b/source/libvpx/build/make/rtcd.pl new file mode 100755 index 0000000..18ee80d --- /dev/null +++ b/source/libvpx/build/make/rtcd.pl @@ -0,0 +1,414 @@ +#!/usr/bin/env perl + +no strict 'refs'; +use warnings; +use Getopt::Long; +Getopt::Long::Configure("auto_help"); + +my %ALL_FUNCS = (); +my @ALL_ARCHS; +my @ALL_FORWARD_DECLS; +my @REQUIRES; + +my %opts = (); +my %disabled = (); +my %required = (); + +my @argv; +foreach (@ARGV) { + $disabled{$1} = 1, next if /--disable-(.*)/; + $required{$1} = 1, next if /--require-(.*)/; + push @argv, $_; +} + +# NB: use GetOptions() instead of GetOptionsFromArray() for compatibility. +@ARGV = @argv; +GetOptions( + \%opts, + 'arch=s', + 'sym=s', + 'config=s', +); + +foreach my $opt (qw/arch config/) { + if (!defined($opts{$opt})) { + warn "--$opt is required!\n"; + Getopt::Long::HelpMessage('-exit' => 1); + } +} + +foreach my $defs_file (@ARGV) { + if (!-f $defs_file) { + warn "$defs_file: $!\n"; + Getopt::Long::HelpMessage('-exit' => 1); + } +} + +open CONFIG_FILE, $opts{config} or + die "Error opening config file '$opts{config}': $!\n"; + +my %config = (); +while (<CONFIG_FILE>) { + next if !/^CONFIG_/; + chomp; + my @pair = split /=/; + $config{$pair[0]} = $pair[1]; +} +close CONFIG_FILE; + +# +# Routines for the RTCD DSL to call +# +sub vpx_config($) { + return (defined $config{$_[0]}) ? $config{$_[0]} : ""; +} + +sub specialize { + my $fn=$_[0]; + shift; + foreach my $opt (@_) { + eval "\$${fn}_${opt}=${fn}_${opt}"; + } +} + +sub add_proto { + my $fn = splice(@_, -2, 1); + $ALL_FUNCS{$fn} = \@_; + specialize $fn, "c"; +} + +sub require { + foreach my $fn (keys %ALL_FUNCS) { + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + + # if we already have a default, then we can disable it, as we know + # we can do better. + my $best = eval "\$${fn}_default"; + if ($best) { + my $best_ofn = eval "\$${best}"; + if ($best_ofn && "$best_ofn" ne "$ofn") { + eval "\$${best}_link = 'false'"; + } + } + eval "\$${fn}_default=${fn}_${opt}"; + eval "\$${fn}_${opt}_link='true'"; + } + } +} + +sub forward_decls { + push @ALL_FORWARD_DECLS, @_; +} + +# +# Include the user's directives +# +foreach my $f (@ARGV) { + open FILE, "<", $f or die "cannot open $f: $!\n"; + my $contents = join('', <FILE>); + close FILE; + eval $contents or warn "eval failed: $@\n"; +} + +# +# Process the directives according to the command line +# +sub process_forward_decls() { + foreach (@ALL_FORWARD_DECLS) { + $_->(); + } +} + +sub determine_indirection { + vpx_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS); + foreach my $fn (keys %ALL_FUNCS) { + my $n = ""; + my @val = @{$ALL_FUNCS{$fn}}; + my $args = pop @val; + my $rtyp = "@val"; + my $dfn = eval "\$${fn}_default"; + $dfn = eval "\$${dfn}"; + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + my $link = eval "\$${fn}_${opt}_link"; + next if $link && $link eq "false"; + $n .= "x"; + } + if ($n eq "x") { + eval "\$${fn}_indirect = 'false'"; + } else { + eval "\$${fn}_indirect = 'true'"; + } + } +} + +sub declare_function_pointers { + foreach my $fn (sort keys %ALL_FUNCS) { + my @val = @{$ALL_FUNCS{$fn}}; + my $args = pop @val; + my $rtyp = "@val"; + my $dfn = eval "\$${fn}_default"; + $dfn = eval "\$${dfn}"; + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + print "$rtyp ${ofn}($args);\n"; + } + if (eval "\$${fn}_indirect" eq "false") { + print "#define ${fn} ${dfn}\n"; + } else { + print "RTCD_EXTERN $rtyp (*${fn})($args);\n"; + } + print "\n"; + } +} + +sub set_function_pointers { + foreach my $fn (sort keys %ALL_FUNCS) { + my @val = @{$ALL_FUNCS{$fn}}; + my $args = pop @val; + my $rtyp = "@val"; + my $dfn = eval "\$${fn}_default"; + $dfn = eval "\$${dfn}"; + if (eval "\$${fn}_indirect" eq "true") { + print " $fn = $dfn;\n"; + foreach my $opt (@_) { + my $ofn = eval "\$${fn}_${opt}"; + next if !$ofn; + next if "$ofn" eq "$dfn"; + my $link = eval "\$${fn}_${opt}_link"; + next if $link && $link eq "false"; + my $cond = eval "\$have_${opt}"; + print " if (${cond}) $fn = $ofn;\n" + } + } + } +} + +sub filter { + my @filtered; + foreach (@_) { push @filtered, $_ unless $disabled{$_}; } + return @filtered; +} + +# +# Helper functions for generating the arch specific RTCD files +# +sub common_top() { + my $include_guard = uc($opts{sym})."_H_"; + print <<EOF; +#ifndef ${include_guard} +#define ${include_guard} + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +EOF + +process_forward_decls(); +print "\n"; +declare_function_pointers("c", @ALL_ARCHS); + +print <<EOF; +void $opts{sym}(void); + +EOF +} + +sub common_bottom() { + print <<EOF; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif +EOF +} + +sub x86() { + determine_indirection("c", @ALL_ARCHS); + + # Assign the helper variable for each enabled extension + foreach my $opt (@ALL_ARCHS) { + my $opt_uc = uc $opt; + eval "\$have_${opt}=\"flags & HAS_${opt_uc}\""; + } + + common_top; + print <<EOF; +#ifdef RTCD_C +#include "vpx_ports/x86.h" +static void setup_rtcd_internal(void) +{ + int flags = x86_simd_caps(); + + (void)flags; + +EOF + + set_function_pointers("c", @ALL_ARCHS); + + print <<EOF; +} +#endif +EOF + common_bottom; +} + +sub arm() { + determine_indirection("c", @ALL_ARCHS); + + # Assign the helper variable for each enabled extension + foreach my $opt (@ALL_ARCHS) { + my $opt_uc = uc $opt; + eval "\$have_${opt}=\"flags & HAS_${opt_uc}\""; + } + + common_top; + print <<EOF; +#include "vpx_config.h" + +#ifdef RTCD_C +#include "vpx_ports/arm.h" +static void setup_rtcd_internal(void) +{ + int flags = arm_cpu_caps(); + + (void)flags; + +EOF + + set_function_pointers("c", @ALL_ARCHS); + + print <<EOF; +} +#endif +EOF + common_bottom; +} + +sub mips() { + determine_indirection("c", @ALL_ARCHS); + common_top; + + print <<EOF; +#include "vpx_config.h" + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +EOF + + set_function_pointers("c", @ALL_ARCHS); + + print <<EOF; +#if HAVE_DSPR2 +#if CONFIG_VP8 +void dsputil_static_init(); +dsputil_static_init(); +#endif +#if CONFIG_VP9 +void vp9_dsputil_static_init(); +vp9_dsputil_static_init(); +#endif +#endif +} +#endif +EOF + common_bottom; +} + +sub unoptimized() { + determine_indirection "c"; + common_top; + print <<EOF; +#include "vpx_config.h" + +#ifdef RTCD_C +static void setup_rtcd_internal(void) +{ +EOF + + set_function_pointers "c"; + + print <<EOF; +} +#endif +EOF + common_bottom; +} + +# +# Main Driver +# + +&require("c"); +if ($opts{arch} eq 'x86') { + @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/); + x86; +} elsif ($opts{arch} eq 'x86_64') { + @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/); + @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/); + &require(@REQUIRES); + x86; +} elsif ($opts{arch} eq 'mips32') { + @ALL_ARCHS = filter(qw/mips32/); + open CONFIG_FILE, $opts{config} or + die "Error opening config file '$opts{config}': $!\n"; + while (<CONFIG_FILE>) { + if (/HAVE_DSPR2=yes/) { + @ALL_ARCHS = filter(qw/mips32 dspr2/); + last; + } + } + close CONFIG_FILE; + mips; +} elsif ($opts{arch} eq 'armv5te') { + @ALL_ARCHS = filter(qw/edsp/); + arm; +} elsif ($opts{arch} eq 'armv6') { + @ALL_ARCHS = filter(qw/edsp media/); + arm; +} elsif ($opts{arch} eq 'armv7') { + @ALL_ARCHS = filter(qw/edsp media neon/); + arm; +} else { + unoptimized; +} + +__END__ + +=head1 NAME + +rtcd - + +=head1 SYNOPSIS + +Usage: rtcd.pl [options] FILE + +See 'perldoc rtcd.pl' for more details. + +=head1 DESCRIPTION + +Reads the Run Time CPU Detections definitions from FILE and generates a +C header file on stdout. + +=head1 OPTIONS + +Options: + --arch=ARCH Architecture to generate defs for (required) + --disable-EXT Disable support for EXT extensions + --require-EXT Require support for EXT extensions + --sym=SYMBOL Unique symbol to use for RTCD initialization function + --config=FILE File with CONFIG_FOO=yes lines to parse diff --git a/source/libvpx/build/make/rtcd.sh b/source/libvpx/build/make/rtcd.sh deleted file mode 100755 index 93c9adc..0000000 --- a/source/libvpx/build/make/rtcd.sh +++ /dev/null @@ -1,373 +0,0 @@ -#!/bin/sh -self=$0 - -usage() { - cat <<EOF >&2 -Usage: $self [options] FILE - -Reads the Run Time CPU Detections definitions from FILE and generates a -C header file on stdout. - -Options: - --arch=ARCH Architecture to generate defs for (required) - --disable-EXT Disable support for EXT extensions - --require-EXT Require support for EXT extensions - --sym=SYMBOL Unique symbol to use for RTCD initialization function - --config=FILE File with CONFIG_FOO=yes lines to parse -EOF - exit 1 -} - -die() { - echo "$@" >&2 - exit 1 -} - -die_argument_required() { - die "Option $opt requires argument" -} - -for opt; do - optval="${opt#*=}" - case "$opt" in - --arch) die_argument_required;; - --arch=*) arch=${optval};; - --disable-*) eval "disable_${opt#--disable-}=true";; - --require-*) REQUIRES="${REQUIRES}${opt#--require-} ";; - --sym) die_argument_required;; - --sym=*) symbol=${optval};; - --config=*) config_file=${optval};; - -h|--help) - usage - ;; - -*) - die "Unrecognized option: ${opt%%=*}" - ;; - *) - defs_file="$defs_file $opt" - ;; - esac - shift -done -for f in $defs_file; do [ -f "$f" ] || usage; done -[ -n "$arch" ] || usage - -# Import the configuration -[ -f "$config_file" ] && eval $(grep CONFIG_ "$config_file") - -# -# Routines for the RTCD DSL to call -# -prototype() { - rtyp="" - case "$1" in - unsigned) rtyp="$1 "; shift;; - esac - rtyp="${rtyp}$1" - fn="$2" - args="$3" - - eval "${2}_rtyp='$rtyp'" - eval "${2}_args='$3'" - ALL_FUNCS="$ALL_FUNCS $fn" - specialize $fn c -} - -specialize() { - fn="$1" - shift - for opt in "$@"; do - eval "${fn}_${opt}=${fn}_${opt}" - done -} - -require() { - for fn in $ALL_FUNCS; do - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - - # if we already have a default, then we can disable it, as we know - # we can do better. - best=$(eval "echo \$${fn}_default") - best_ofn=$(eval "echo \$${best}") - [ -n "$best" ] && [ "$best_ofn" != "$ofn" ] && eval "${best}_link=false" - eval "${fn}_default=${fn}_${opt}" - eval "${fn}_${opt}_link=true" - done - done -} - -forward_decls() { - ALL_FORWARD_DECLS="$ALL_FORWARD_DECLS $1" -} - -# -# Include the user's directives -# -for f in $defs_file; do - . $f -done - -# -# Process the directives according to the command line -# -process_forward_decls() { - for fn in $ALL_FORWARD_DECLS; do - eval $fn - done -} - -determine_indirection() { - [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS - for fn in $ALL_FUNCS; do - n="" - rtyp="$(eval "echo \$${fn}_rtyp")" - args="$(eval "echo \"\$${fn}_args\"")" - dfn="$(eval "echo \$${fn}_default")" - dfn=$(eval "echo \$${dfn}") - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - link=$(eval "echo \$${fn}_${opt}_link") - [ "$link" = "false" ] && continue - n="${n}x" - done - if [ "$n" = "x" ]; then - eval "${fn}_indirect=false" - else - eval "${fn}_indirect=true" - fi - done -} - -declare_function_pointers() { - for fn in $ALL_FUNCS; do - rtyp="$(eval "echo \$${fn}_rtyp")" - args="$(eval "echo \"\$${fn}_args\"")" - dfn="$(eval "echo \$${fn}_default")" - dfn=$(eval "echo \$${dfn}") - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - echo "$rtyp ${ofn}($args);" - done - if [ "$(eval "echo \$${fn}_indirect")" = "false" ]; then - echo "#define ${fn} ${dfn}" - else - echo "RTCD_EXTERN $rtyp (*${fn})($args);" - fi - echo - done -} - -set_function_pointers() { - for fn in $ALL_FUNCS; do - n="" - rtyp="$(eval "echo \$${fn}_rtyp")" - args="$(eval "echo \"\$${fn}_args\"")" - dfn="$(eval "echo \$${fn}_default")" - dfn=$(eval "echo \$${dfn}") - if $(eval "echo \$${fn}_indirect"); then - echo " $fn = $dfn;" - for opt in "$@"; do - ofn=$(eval "echo \$${fn}_${opt}") - [ -z "$ofn" ] && continue - [ "$ofn" = "$dfn" ] && continue; - link=$(eval "echo \$${fn}_${opt}_link") - [ "$link" = "false" ] && continue - cond="$(eval "echo \$have_${opt}")" - echo " if (${cond}) $fn = $ofn;" - done - fi - echo - done -} - -filter() { - filtered="" - for opt in "$@"; do - [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt" - done - echo $filtered -} - -# -# Helper functions for generating the arch specific RTCD files -# -common_top() { - outfile_basename=$(basename ${symbol:-rtcd}) - include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | \ - tr -c '[A-Z0-9]' _)H_ - cat <<EOF -#ifndef ${include_guard} -#define ${include_guard} - -#ifdef RTCD_C -#define RTCD_EXTERN -#else -#define RTCD_EXTERN extern -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -$(process_forward_decls) - -$(declare_function_pointers c $ALL_ARCHS) - -void ${symbol:-rtcd}(void); -EOF -} - -common_bottom() { - cat <<EOF - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif -EOF -} - -x86() { - determine_indirection c $ALL_ARCHS - - # Assign the helper variable for each enabled extension - for opt in $ALL_ARCHS; do - uc=$(echo $opt | tr '[a-z]' '[A-Z]') - eval "have_${opt}=\"flags & HAS_${uc}\"" - done - - cat <<EOF -$(common_top) - -#ifdef RTCD_C -#include "vpx_ports/x86.h" -static void setup_rtcd_internal(void) -{ - int flags = x86_simd_caps(); - - (void)flags; - -$(set_function_pointers c $ALL_ARCHS) -} -#endif -$(common_bottom) -EOF -} - -arm() { - determine_indirection c $ALL_ARCHS - - # Assign the helper variable for each enabled extension - for opt in $ALL_ARCHS; do - uc=$(echo $opt | tr '[a-z]' '[A-Z]') - eval "have_${opt}=\"flags & HAS_${uc}\"" - done - - cat <<EOF -$(common_top) -#include "vpx_config.h" - -#ifdef RTCD_C -#include "vpx_ports/arm.h" -static void setup_rtcd_internal(void) -{ - int flags = arm_cpu_caps(); - - (void)flags; - -$(set_function_pointers c $ALL_ARCHS) -} -#endif -$(common_bottom) -EOF -} - - -mips() { - determine_indirection c $ALL_ARCHS - cat <<EOF -$(common_top) -#include "vpx_config.h" - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -$(set_function_pointers c $ALL_ARCHS) -#if HAVE_DSPR2 -#if CONFIG_VP8 -void dsputil_static_init(); -dsputil_static_init(); -#endif -#if CONFIG_VP9 -void vp9_dsputil_static_init(); -vp9_dsputil_static_init(); -#endif -#endif -} -#endif -$(common_bottom) -EOF -} - -unoptimized() { - determine_indirection c - cat <<EOF -$(common_top) -#include "vpx_config.h" - -#ifdef RTCD_C -static void setup_rtcd_internal(void) -{ -$(set_function_pointers c) -} -#endif -$(common_bottom) -EOF - -} -# -# Main Driver -# -ALL_FUNCS=$(export LC_ALL=C; echo $ALL_FUNCS | tr ' ' '\n' | sort |tr '\n' ' ') -require c -case $arch in - x86) - ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2) - x86 - ;; - x86_64) - ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2) - REQUIRES=${REQUIRES:-mmx sse sse2} - require $(filter $REQUIRES) - x86 - ;; - mips32) - ALL_ARCHS=$(filter mips32) - dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file")) - HAVE_DSPR2="${dspr2#*=}" - if [ "$HAVE_DSPR2" = "yes" ]; then - ALL_ARCHS=$(filter mips32 dspr2) - fi - mips - ;; - armv5te) - ALL_ARCHS=$(filter edsp) - arm - ;; - armv6) - ALL_ARCHS=$(filter edsp media) - arm - ;; - armv7) - ALL_ARCHS=$(filter edsp media neon) - arm - ;; - *) - unoptimized - ;; -esac diff --git a/source/libvpx/build/make/thumb.pm b/source/libvpx/build/make/thumb.pm index 9604c8e..483c253 100644 --- a/source/libvpx/build/make/thumb.pm +++ b/source/libvpx/build/make/thumb.pm @@ -51,7 +51,7 @@ sub FixThumbInstructions($$) # Convert register post indexing to a separate add instruction. # This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]", - # "add r0, r2". + # "addne r0, r0, r2". s/^(\s*)((ldr|str)(ne)?[bhd]?)(\s+)(\w+),(\s*\w+,)?\s*\[(\w+)\],\s*(\w+)/$1$2$5$6,$7 [$8]\n$1add$4$5$8, $8, $9/g; # Convert a conditional addition to the pc register into a series of diff --git a/source/libvpx/configure b/source/libvpx/configure index 9f5a435..ff350cc 100755 --- a/source/libvpx/configure +++ b/source/libvpx/configure @@ -51,6 +51,7 @@ Advanced options: ${toggle_postproc_visualizer} macro block / block level visualizers ${toggle_multi_res_encoding} enable multiple-resolution encoding ${toggle_temporal_denoising} enable temporal denoising and disable the spatial denoiser + ${toggle_webm_io} enable input from and output to WebM container Codecs: Codecs can be selectively enabled or disabled individually, or by family: @@ -160,6 +161,18 @@ for t in ${all_targets}; do [ -f ${source_path}/${t}.mk ] && enable_feature ${t} done +if ! perl --version >/dev/null; then + die "Perl is required to build" +fi + + +if [ "`cd ${source_path} && pwd`" != "`pwd`" ]; then + # test to see if source_path already configured + if [ -f ${source_path}/vpx_config.h ]; then + die "source directory already configured; run 'make distclean' there first" + fi +fi + # check installed doxygen version doxy_version=$(doxygen --version 2>/dev/null) doxy_major=${doxy_version%%.*} @@ -252,7 +265,6 @@ HAVE_LIST=" " EXPERIMENT_LIST=" multiple_arf - non420 alpha " CONFIG_LIST=" @@ -300,6 +312,7 @@ CONFIG_LIST=" postproc_visualizer os_support unit_tests + webm_io decode_perf_tests multi_res_encoding temporal_denoising @@ -353,6 +366,7 @@ CMDLINE_SELECT=" small postproc_visualizer unit_tests + webm_io decode_perf_tests multi_res_encoding temporal_denoising @@ -675,6 +689,7 @@ process_toolchain() { 10|11|12) VCPROJ_SFX=vcxproj gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh + enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror" ;; esac all_targets="${all_targets} solution" @@ -689,6 +704,9 @@ process_toolchain() { enabled postproc || die "postproc_visualizer requires postproc to be enabled" fi + # Enable WebM IO by default. + soft_enable webm_io + # Enable unit tests by default if we have a working C++ compiler. case "$toolchain" in *-vs*) diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk index aeb54ab..fa5d66c 100644 --- a/source/libvpx/examples.mk +++ b/source/libvpx/examples.mk @@ -26,16 +26,18 @@ vpxdec.SRCS += vpx/vpx_integer.h vpxdec.SRCS += args.c args.h vpxdec.SRCS += ivfdec.c ivfdec.h vpxdec.SRCS += tools_common.c tools_common.h -vpxdec.SRCS += webmdec.c webmdec.h vpxdec.SRCS += y4menc.c y4menc.h -vpxdec.SRCS += third_party/nestegg/halloc/halloc.h -vpxdec.SRCS += third_party/nestegg/halloc/src/align.h -vpxdec.SRCS += third_party/nestegg/halloc/src/halloc.c -vpxdec.SRCS += third_party/nestegg/halloc/src/hlist.h -vpxdec.SRCS += third_party/nestegg/halloc/src/macros.h -vpxdec.SRCS += third_party/nestegg/include/nestegg/nestegg.h -vpxdec.SRCS += third_party/nestegg/src/nestegg.c vpxdec.SRCS += $(LIBYUV_SRCS) +ifeq ($(CONFIG_WEBM_IO),yes) + vpxdec.SRCS += third_party/nestegg/halloc/halloc.h + vpxdec.SRCS += third_party/nestegg/halloc/src/align.h + vpxdec.SRCS += third_party/nestegg/halloc/src/halloc.c + vpxdec.SRCS += third_party/nestegg/halloc/src/hlist.h + vpxdec.SRCS += third_party/nestegg/halloc/src/macros.h + vpxdec.SRCS += third_party/nestegg/include/nestegg/nestegg.h + vpxdec.SRCS += third_party/nestegg/src/nestegg.c + vpxdec.SRCS += webmdec.c webmdec.h +endif vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 vpxdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += vpxenc.c @@ -45,15 +47,17 @@ vpxenc.SRCS += ivfenc.c ivfenc.h vpxenc.SRCS += rate_hist.c rate_hist.h vpxenc.SRCS += tools_common.c tools_common.h vpxenc.SRCS += warnings.c warnings.h -vpxenc.SRCS += webmenc.c webmenc.h vpxenc.SRCS += vpx_ports/mem_ops.h vpxenc.SRCS += vpx_ports/mem_ops_aligned.h vpxenc.SRCS += vpx_ports/vpx_timer.h vpxenc.SRCS += vpxstats.c vpxstats.h -vpxenc.SRCS += third_party/libmkv/EbmlIDs.h -vpxenc.SRCS += third_party/libmkv/EbmlWriter.c -vpxenc.SRCS += third_party/libmkv/EbmlWriter.h vpxenc.SRCS += $(LIBYUV_SRCS) +ifeq ($(CONFIG_WEBM_IO),yes) + vpxenc.SRCS += third_party/libmkv/EbmlIDs.h + vpxenc.SRCS += third_party/libmkv/EbmlWriter.c + vpxenc.SRCS += third_party/libmkv/EbmlWriter.h + vpxenc.SRCS += webmenc.c webmenc.h +endif vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c @@ -62,11 +66,12 @@ vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h vp9_spatial_scalable_encoder.SRCS += video_common.h vp9_spatial_scalable_encoder.SRCS += video_writer.h video_writer.c +vp9_spatial_scalable_encoder.SRCS += vpxstats.c vpxstats.h vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder -ifeq ($(CONFIG_SHARED),no) -UTILS-$(CONFIG_VP9_ENCODER) += resize_util.c +ifneq ($(CONFIG_SHARED),yes) +EXAMPLES-$(CONFIG_VP9_ENCODER) += resize_util.c endif # XMA example disabled for now, not used in VP8 @@ -123,9 +128,6 @@ twopass_encoder.SRCS += video_common.h twopass_encoder.SRCS += video_writer.h video_writer.c twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8 twopass_encoder.DESCRIPTION = Two-pass encoder loop -EXAMPLES-$(CONFIG_VP8_ENCODER) += force_keyframe.c -force_keyframe.GUID = 3C67CADF-029F-4C86-81F5-D6D4F51177F0 -force_keyframe.DESCRIPTION = Force generation of keyframes ifeq ($(CONFIG_DECODERS),yes) EXAMPLES-$(CONFIG_VP8_ENCODER) += decode_with_drops.c decode_with_drops.SRCS += ivfdec.h ivfdec.c @@ -142,14 +144,18 @@ EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c endif decode_with_partial_drops.GUID = 61C2D026-5754-46AC-916F-1343ECC5537E decode_with_partial_drops.DESCRIPTION = Drops parts of frames while decoding -EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_set_maps.c -vp8_set_maps.SRCS += ivfenc.h ivfenc.c -vp8_set_maps.SRCS += tools_common.h tools_common.c -vp8_set_maps.SRCS += video_common.h -vp8_set_maps.SRCS += video_writer.h video_writer.c -vp8_set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F -vp8_set_maps.DESCRIPTION = VP8 set active and ROI maps +EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c +set_maps.SRCS += ivfenc.h ivfenc.c +set_maps.SRCS += tools_common.h tools_common.c +set_maps.SRCS += video_common.h +set_maps.SRCS += video_writer.h video_writer.c +set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F +set_maps.DESCRIPTION = Set active and ROI maps EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c +vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c +vp8cx_set_ref.SRCS += tools_common.h tools_common.c +vp8cx_set_ref.SRCS += video_common.h +vp8cx_set_ref.SRCS += video_writer.h video_writer.c vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame diff --git a/source/libvpx/examples/force_keyframe.c b/source/libvpx/examples/force_keyframe.c deleted file mode 100644 index 6531e47..0000000 --- a/source/libvpx/examples/force_keyframe.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Forcing A Keyframe -// ================== -// -// This is an example demonstrating how to control placement of keyframes -// on a frame-by-frame basis. -// -// Configuration -// ------------- -// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the -// flags passed to `vpx_codec_control()`. In this example, we force a -// keyframe every 8 frames. -// -// Observing The Effects -// --------------------- -// The output of the encoder examples shows a 'K' rather than a dot '.' -// when the encoder generates a keyframe. Note that every 8 frames a 'K' -// is output. - -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <string.h> -#define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx/vpx_encoder.h" -#include "vpx/vp8cx.h" -#define interface (vpx_codec_vp8_cx()) -#define fourcc 0x30385056 - -#define IVF_FILE_HDR_SZ (32) -#define IVF_FRAME_HDR_SZ (12) - -static void mem_put_le16(char *mem, unsigned int val) { - mem[0] = val; - mem[1] = val>>8; -} - -static void mem_put_le32(char *mem, unsigned int val) { - mem[0] = val; - mem[1] = val>>8; - mem[2] = val>>16; - mem[3] = val>>24; -} - -static void die(const char *fmt, ...) { - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - if(fmt[strlen(fmt)-1] != '\n') - printf("\n"); - exit(EXIT_FAILURE); -} - -static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { - const char *detail = vpx_codec_error_detail(ctx); - - printf("%s: %s\n", s, vpx_codec_error(ctx)); - if(detail) - printf(" %s\n",detail); - exit(EXIT_FAILURE); -} - -static int read_frame(FILE *f, vpx_image_t *img) { - size_t nbytes, to_read; - int res = 1; - - to_read = img->w*img->h*3/2; - nbytes = fread(img->planes[0], 1, to_read, f); - if(nbytes != to_read) { - res = 0; - if(nbytes > 0) - printf("Warning: Read partial frame. Check your width & height!\n"); - } - return res; -} - -static void write_ivf_file_header(FILE *outfile, - const vpx_codec_enc_cfg_t *cfg, - int frame_cnt) { - char header[32]; - - if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) - return; - header[0] = 'D'; - header[1] = 'K'; - header[2] = 'I'; - header[3] = 'F'; - mem_put_le16(header+4, 0); /* version */ - mem_put_le16(header+6, 32); /* headersize */ - mem_put_le32(header+8, fourcc); /* headersize */ - mem_put_le16(header+12, cfg->g_w); /* width */ - mem_put_le16(header+14, cfg->g_h); /* height */ - mem_put_le32(header+16, cfg->g_timebase.den); /* rate */ - mem_put_le32(header+20, cfg->g_timebase.num); /* scale */ - mem_put_le32(header+24, frame_cnt); /* length */ - mem_put_le32(header+28, 0); /* unused */ - - (void) fwrite(header, 1, 32, outfile); -} - - -static void write_ivf_frame_header(FILE *outfile, - const vpx_codec_cx_pkt_t *pkt) -{ - char header[12]; - vpx_codec_pts_t pts; - - if(pkt->kind != VPX_CODEC_CX_FRAME_PKT) - return; - - pts = pkt->data.frame.pts; - mem_put_le32(header, (unsigned int)pkt->data.frame.sz); - mem_put_le32(header+4, pts&0xFFFFFFFF); - mem_put_le32(header+8, pts >> 32); - - (void) fwrite(header, 1, 12, outfile); -} - -int main(int argc, char **argv) { - FILE *infile, *outfile; - vpx_codec_ctx_t codec; - vpx_codec_enc_cfg_t cfg; - int frame_cnt = 0; - vpx_image_t raw; - vpx_codec_err_t res; - long width; - long height; - int frame_avail; - int got_data; - int flags = 0; - - /* Open files */ - if(argc!=5) - die("Usage: %s <width> <height> <infile> <outfile>\n", argv[0]); - width = strtol(argv[1], NULL, 0); - height = strtol(argv[2], NULL, 0); - if(width < 16 || width%2 || height <16 || height%2) - die("Invalid resolution: %ldx%ld", width, height); - if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1)) - die("Faile to allocate image", width, height); - if(!(outfile = fopen(argv[4], "wb"))) - die("Failed to open %s for writing", argv[4]); - - printf("Using %s\n",vpx_codec_iface_name(interface)); - - /* Populate encoder configuration */ - res = vpx_codec_enc_config_default(interface, &cfg, 0); - if(res) { - printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); - return EXIT_FAILURE; - } - - /* Update the default configuration with our settings */ - cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate - / cfg.g_w / cfg.g_h; - cfg.g_w = width; - cfg.g_h = height; - - write_ivf_file_header(outfile, &cfg, 0); - - - /* Open input file for this encoding pass */ - if(!(infile = fopen(argv[3], "rb"))) - die("Failed to open %s for reading", argv[3]); - - /* Initialize codec */ - if(vpx_codec_enc_init(&codec, interface, &cfg, 0)) - die_codec(&codec, "Failed to initialize encoder"); - - frame_avail = 1; - got_data = 0; - while(frame_avail || got_data) { - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *pkt; - - if(!(frame_cnt & 7)) - flags |= VPX_EFLAG_FORCE_KF; - else - flags &= ~VPX_EFLAG_FORCE_KF; - frame_avail = read_frame(infile, &raw); - if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, - 1, flags, VPX_DL_REALTIME)) - die_codec(&codec, "Failed to encode frame"); - got_data = 0; - while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { - got_data = 1; - switch(pkt->kind) { - case VPX_CODEC_CX_FRAME_PKT: - write_ivf_frame_header(outfile, pkt); - (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, - outfile); - break; - default: - break; - } - printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT - && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); - fflush(stdout); - } - frame_cnt++; - } - printf("\n"); - fclose(infile); - - printf("Processed %d frames.\n",frame_cnt-1); - vpx_img_free(&raw); - if(vpx_codec_destroy(&codec)) - die_codec(&codec, "Failed to destroy codec"); - - /* Try to rewrite the file header with the actual frame count */ - if(!fseek(outfile, 0, SEEK_SET)) - write_ivf_file_header(outfile, &cfg, frame_cnt-1); - fclose(outfile); - return EXIT_SUCCESS; -} diff --git a/source/libvpx/resize_util.c b/source/libvpx/examples/resize_util.c index b068f55..b068f55 100644 --- a/source/libvpx/resize_util.c +++ b/source/libvpx/examples/resize_util.c diff --git a/source/libvpx/examples/vp8_set_maps.c b/source/libvpx/examples/set_maps.c index f3cc9a7..4343832 100644 --- a/source/libvpx/examples/vp8_set_maps.c +++ b/source/libvpx/examples/set_maps.c @@ -56,7 +56,8 @@ static const char *exec_name; void usage_exit() { - fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name); + fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n", + exec_name); exit(EXIT_FAILURE); } @@ -65,8 +66,8 @@ static void set_roi_map(const vpx_codec_enc_cfg_t *cfg, unsigned int i; vpx_roi_map_t roi = {0}; - roi.rows = cfg->g_h / 16; - roi.cols = cfg->g_w / 16; + roi.rows = (cfg->g_h + 15) / 16; + roi.cols = (cfg->g_w + 15) / 16; roi.delta_q[0] = 0; roi.delta_q[1] = -2; @@ -98,8 +99,8 @@ static void set_active_map(const vpx_codec_enc_cfg_t *cfg, unsigned int i; vpx_active_map_t map = {0}; - map.rows = cfg->g_h / 16; - map.cols = cfg->g_w / 16; + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; map.active_map = (uint8_t *)malloc(map.rows * map.cols); for (i = 0; i < map.rows * map.cols; ++i) @@ -115,8 +116,8 @@ static void unset_active_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { vpx_active_map_t map = {0}; - map.rows = cfg->g_h / 16; - map.cols = cfg->g_w / 16; + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; map.active_map = NULL; if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map)) @@ -161,20 +162,20 @@ int main(int argc, char **argv) { VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; const int fps = 2; // TODO(dkovalev) add command line argument - const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + const double bits_per_pixel_per_frame = 0.067; exec_name = argv[0]; - if (argc != 5) + if (argc != 6) die("Invalid number of arguments"); - encoder = get_vpx_encoder_by_name("vp8"); // only vp8 for now + encoder = get_vpx_encoder_by_name(argv[1]); if (!encoder) die("Unsupported codec."); info.codec_fourcc = encoder->fourcc; - info.frame_width = strtol(argv[1], NULL, 0); - info.frame_height = strtol(argv[2], NULL, 0); + info.frame_width = strtol(argv[2], NULL, 0); + info.frame_height = strtol(argv[3], NULL, 0); info.time_base.numerator = 1; info.time_base.denominator = fps; @@ -200,14 +201,16 @@ int main(int argc, char **argv) { cfg.g_h = info.frame_height; cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; - cfg.rc_target_bitrate = bitrate; + cfg.rc_target_bitrate = (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * + cfg.g_h * fps / 1000); + cfg.g_lag_in_frames = 0; - writer = vpx_video_writer_open(argv[4], kContainerIVF, &info); + writer = vpx_video_writer_open(argv[5], kContainerIVF, &info); if (!writer) - die("Failed to open %s for writing.", argv[4]); + die("Failed to open %s for writing.", argv[5]); - if (!(infile = fopen(argv[3], "rb"))) - die("Failed to open %s for reading.", argv[3]); + if (!(infile = fopen(argv[4], "rb"))) + die("Failed to open %s for reading.", argv[4]); if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); @@ -215,7 +218,7 @@ int main(int argc, char **argv) { while (vpx_img_read(&raw, infile)) { ++frame_count; - if (frame_count == 22) { + if (frame_count == 22 && encoder->fourcc == VP8_FOURCC) { set_roi_map(&cfg, &codec); } else if (frame_count == 33) { set_active_map(&cfg, &codec); diff --git a/source/libvpx/examples/simple_encoder.c b/source/libvpx/examples/simple_encoder.c index 6ecd498..af58091 100644 --- a/source/libvpx/examples/simple_encoder.c +++ b/source/libvpx/examples/simple_encoder.c @@ -64,6 +64,15 @@ // frame is shown for one frame-time in duration. The flags parameter is // unused in this example. The deadline is set to VPX_DL_REALTIME to // make the example run as quickly as possible. + +// Forced Keyframes +// ---------------- +// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the +// flags passed to `vpx_codec_control()`. In this example, we force a +// keyframe every <keyframe-interval> frames. Note, the output stream can +// contain additional keyframes beyond those that have been forced using the +// VPX_EFLAG_FORCE_KF flag because of automatic keyframe placement by the +// encoder. // // Processing The Encoded Data // --------------------------- @@ -103,8 +112,8 @@ static const char *exec_name; void usage_exit() { fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile> " - "[<error-resilient>]\nSee comments in simple_encoder.c for more " - "information.\n", + "<keyframe-interval> [<error-resilient>]\nSee comments in " + "simple_encoder.c for more information.\n", exec_name); exit(EXIT_FAILURE); } @@ -112,11 +121,12 @@ void usage_exit() { static void encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, int frame_index, + int flags, VpxVideoWriter *writer) { vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; - const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0, - VPX_DL_GOOD_QUALITY); + const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, + flags, VPX_DL_GOOD_QUALITY); if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); @@ -148,15 +158,20 @@ int main(int argc, char **argv) { const VpxInterface *encoder = NULL; const int fps = 30; // TODO(dkovalev) add command line argument const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + int keyframe_interval = 0; + + // TODO(dkovalev): Add some simple command line parsing code to make the + // command line more flexible. const char *codec_arg = NULL; const char *width_arg = NULL; const char *height_arg = NULL; const char *infile_arg = NULL; const char *outfile_arg = NULL; + const char *keyframe_interval_arg = NULL; exec_name = argv[0]; - if (argc < 6) + if (argc < 7) die("Invalid number of arguments"); codec_arg = argv[1]; @@ -164,6 +179,7 @@ int main(int argc, char **argv) { height_arg = argv[3]; infile_arg = argv[4]; outfile_arg = argv[5]; + keyframe_interval_arg = argv[6]; encoder = get_vpx_encoder_by_name(codec_arg); if (!encoder) @@ -187,6 +203,10 @@ int main(int argc, char **argv) { die("Failed to allocate image."); } + keyframe_interval = strtol(keyframe_interval_arg, NULL, 0); + if (keyframe_interval < 0) + die("Invalid keyframe interval value."); + printf("Using %s\n", vpx_codec_iface_name(encoder->interface())); res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0); @@ -198,7 +218,7 @@ int main(int argc, char **argv) { cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; cfg.rc_target_bitrate = bitrate; - cfg.g_error_resilient = argc > 6 ? strtol(argv[6], NULL, 0) : 0; + cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0; writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); if (!writer) @@ -210,9 +230,13 @@ int main(int argc, char **argv) { if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); - while (vpx_img_read(&raw, infile)) - encode_frame(&codec, &raw, frame_count++, writer); - encode_frame(&codec, NULL, -1, writer); // flush the encoder + while (vpx_img_read(&raw, infile)) { + int flags = 0; + if (keyframe_interval > 0 && frame_count % keyframe_interval == 0) + flags |= VPX_EFLAG_FORCE_KF; + encode_frame(&codec, &raw, frame_count++, flags, writer); + } + encode_frame(&codec, NULL, -1, 0, writer); // flush the encoder printf("\n"); fclose(infile); diff --git a/source/libvpx/examples/vp8cx_set_ref.c b/source/libvpx/examples/vp8cx_set_ref.c index f87dd35..9b6d11b 100644 --- a/source/libvpx/examples/vp8cx_set_ref.c +++ b/source/libvpx/examples/vp8cx_set_ref.c @@ -48,212 +48,140 @@ #include <stdio.h> #include <stdlib.h> -#include <stdarg.h> #include <string.h> + #define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx/vpx_encoder.h" #include "vpx/vp8cx.h" -#define interface (vpx_codec_vp8_cx()) -#define fourcc 0x30385056 - -#define IVF_FILE_HDR_SZ (32) -#define IVF_FRAME_HDR_SZ (12) - -static void mem_put_le16(char *mem, unsigned int val) { - mem[0] = val; - mem[1] = val>>8; -} +#include "vpx/vpx_encoder.h" -static void mem_put_le32(char *mem, unsigned int val) { - mem[0] = val; - mem[1] = val>>8; - mem[2] = val>>16; - mem[3] = val>>24; -} +#include "./tools_common.h" +#include "./video_writer.h" -static void die(const char *fmt, ...) { - va_list ap; +static const char *exec_name; - va_start(ap, fmt); - vprintf(fmt, ap); - if(fmt[strlen(fmt)-1] != '\n') - printf("\n"); - exit(EXIT_FAILURE); +void usage_exit() { + fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n", + exec_name); + exit(EXIT_FAILURE); } -static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { - const char *detail = vpx_codec_error_detail(ctx); - - printf("%s: %s\n", s, vpx_codec_error(ctx)); - if(detail) - printf(" %s\n",detail); - exit(EXIT_FAILURE); -} - -static int read_frame(FILE *f, vpx_image_t *img) { - size_t nbytes, to_read; - int res = 1; - - to_read = img->w*img->h*3/2; - nbytes = fread(img->planes[0], 1, to_read, f); - if(nbytes != to_read) { - res = 0; - if(nbytes > 0) - printf("Warning: Read partial frame. Check your width & height!\n"); +static void encode_frame(vpx_codec_ctx_t *codec, + vpx_image_t *img, + int frame_index, + VpxVideoWriter *writer) { + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0, + VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) + die_codec(codec, "Failed to encode frame"); + + while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + if (!vpx_video_writer_write_frame(writer, + pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + + printf(keyframe ? "K" : "."); + fflush(stdout); } - return res; -} - -static void write_ivf_file_header(FILE *outfile, - const vpx_codec_enc_cfg_t *cfg, - int frame_cnt) { - char header[32]; - - if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) - return; - header[0] = 'D'; - header[1] = 'K'; - header[2] = 'I'; - header[3] = 'F'; - mem_put_le16(header+4, 0); /* version */ - mem_put_le16(header+6, 32); /* headersize */ - mem_put_le32(header+8, fourcc); /* headersize */ - mem_put_le16(header+12, cfg->g_w); /* width */ - mem_put_le16(header+14, cfg->g_h); /* height */ - mem_put_le32(header+16, cfg->g_timebase.den); /* rate */ - mem_put_le32(header+20, cfg->g_timebase.num); /* scale */ - mem_put_le32(header+24, frame_cnt); /* length */ - mem_put_le32(header+28, 0); /* unused */ - - (void) fwrite(header, 1, 32, outfile); -} - - -static void write_ivf_frame_header(FILE *outfile, - const vpx_codec_cx_pkt_t *pkt) -{ - char header[12]; - vpx_codec_pts_t pts; - - if(pkt->kind != VPX_CODEC_CX_FRAME_PKT) - return; - - pts = pkt->data.frame.pts; - mem_put_le32(header, (unsigned int)pkt->data.frame.sz); - mem_put_le32(header+4, pts&0xFFFFFFFF); - mem_put_le32(header+8, pts >> 32); - - (void) fwrite(header, 1, 12, outfile); + } } int main(int argc, char **argv) { - FILE *infile, *outfile; - vpx_codec_ctx_t codec; - vpx_codec_enc_cfg_t cfg; - int frame_cnt = 0; - vpx_image_t raw; - vpx_codec_err_t res; - long width; - long height; - int frame_avail; - int got_data; - int flags = 0; - int update_frame_num = 0; - - /* Open files */ - if(argc!=6) - die("Usage: %s <width> <height> <infile> <outfile> <frame>\n", - argv[0]); - - update_frame_num = atoi(argv[5]); - if(!update_frame_num) - die("Couldn't parse frame number '%s'\n", argv[5]); - - width = strtol(argv[1], NULL, 0); - height = strtol(argv[2], NULL, 0); - if(width < 16 || width%2 || height <16 || height%2) - die("Invalid resolution: %ldx%ld", width, height); - if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1)) - die("Faile to allocate image", width, height); - if(!(outfile = fopen(argv[4], "wb"))) - die("Failed to open %s for writing", argv[4]); - - printf("Using %s\n",vpx_codec_iface_name(interface)); - - /* Populate encoder configuration */ - res = vpx_codec_enc_config_default(interface, &cfg, 0); - if(res) { - printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); - return EXIT_FAILURE; + FILE *infile = NULL; + vpx_codec_ctx_t codec = {0}; + vpx_codec_enc_cfg_t cfg = {0}; + int frame_count = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info = {0}; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + int update_frame_num = 0; + const int fps = 30; // TODO(dkovalev) add command line argument + const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + + exec_name = argv[0]; + + if (argc != 6) + die("Invalid number of arguments"); + + // TODO(dkovalev): add vp9 support and rename the file accordingly + encoder = get_vpx_encoder_by_name("vp8"); + if (!encoder) + die("Unsupported codec."); + + update_frame_num = atoi(argv[5]); + if (!update_frame_num) + die("Couldn't parse frame number '%s'\n", argv[5]); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = strtol(argv[1], NULL, 0); + info.frame_height = strtol(argv[2], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || + info.frame_height <= 0 || + (info.frame_width % 2) != 0 || + (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", vpx_codec_iface_name(encoder->interface())); + + res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0); + if (res) + die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + + writer = vpx_video_writer_open(argv[4], kContainerIVF, &info); + if (!writer) + die("Failed to open %s for writing.", argv[4]); + + if (!(infile = fopen(argv[3], "rb"))) + die("Failed to open %s for reading.", argv[3]); + + if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + while (vpx_img_read(&raw, infile)) { + if (frame_count + 1 == update_frame_num) { + vpx_ref_frame_t ref; + ref.frame_type = VP8_LAST_FRAME; + ref.img = raw; + if (vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref)) + die_codec(&codec, "Failed to set reference frame"); } - /* Update the default configuration with our settings */ - cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate - / cfg.g_w / cfg.g_h; - cfg.g_w = width; - cfg.g_h = height; - - write_ivf_file_header(outfile, &cfg, 0); - - - /* Open input file for this encoding pass */ - if(!(infile = fopen(argv[3], "rb"))) - die("Failed to open %s for reading", argv[3]); - - /* Initialize codec */ - if(vpx_codec_enc_init(&codec, interface, &cfg, 0)) - die_codec(&codec, "Failed to initialize encoder"); - - frame_avail = 1; - got_data = 0; - while(frame_avail || got_data) { - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *pkt; - - frame_avail = read_frame(infile, &raw); - - if(frame_cnt + 1 == update_frame_num) { - vpx_ref_frame_t ref; - - ref.frame_type = VP8_LAST_FRAME; - ref.img = raw; + encode_frame(&codec, &raw, frame_count++, writer); + } + encode_frame(&codec, NULL, -1, writer); - if(vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref)) - die_codec(&codec, "Failed to set reference frame"); - } + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); - if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, - 1, flags, VPX_DL_REALTIME)) - die_codec(&codec, "Failed to encode frame"); - got_data = 0; - while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { - got_data = 1; - switch(pkt->kind) { - case VPX_CODEC_CX_FRAME_PKT: - write_ivf_frame_header(outfile, pkt); - (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, - outfile); - break; - default: - break; - } - printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT - && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"."); - fflush(stdout); - } - frame_cnt++; - } - printf("\n"); - fclose(infile); + vpx_img_free(&raw); + if (vpx_codec_destroy(&codec)) + die_codec(&codec, "Failed to destroy codec."); - printf("Processed %d frames.\n",frame_cnt-1); - vpx_img_free(&raw); - if(vpx_codec_destroy(&codec)) - die_codec(&codec, "Failed to destroy codec"); + vpx_video_writer_close(writer); - /* Try to rewrite the file header with the actual frame count */ - if(!fseek(outfile, 0, SEEK_SET)) - write_ivf_file_header(outfile, &cfg, frame_cnt-1); - fclose(outfile); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/source/libvpx/examples/vp9_spatial_scalable_encoder.c b/source/libvpx/examples/vp9_spatial_scalable_encoder.c index 98dc3f5..64e62ef 100644 --- a/source/libvpx/examples/vp9_spatial_scalable_encoder.c +++ b/source/libvpx/examples/vp9_spatial_scalable_encoder.c @@ -26,6 +26,7 @@ #include "vpx/svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" +#include "./vpxstats.h" static const struct arg_enum_list encoding_mode_enum[] = { {"i", INTER_LAYER_PREDICTION_I}, @@ -60,12 +61,28 @@ static const arg_def_t quantizers_arg = static const arg_def_t quantizers_keyframe_arg = ARG_DEF("qn", "quantizers-keyframe", 1, "quantizers for key frames (lowest " "to highest layer)"); +static const arg_def_t passes_arg = + ARG_DEF("p", "passes", 1, "Number of passes (1/2)"); +static const arg_def_t pass_arg = + ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)"); +static const arg_def_t fpf_name_arg = + ARG_DEF(NULL, "fpf", 1, "First pass statistics file name"); +static const arg_def_t min_q_arg = + ARG_DEF(NULL, "min-q", 1, "Minimum quantizer"); +static const arg_def_t max_q_arg = + ARG_DEF(NULL, "max-q", 1, "Maximum quantizer"); +static const arg_def_t min_bitrate_arg = + ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate"); +static const arg_def_t max_bitrate_arg = + ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate"); static const arg_def_t *svc_args[] = { &encoding_mode_arg, &frames_arg, &width_arg, &height_arg, &timebase_arg, &bitrate_arg, &skip_frames_arg, &layers_arg, &kf_dist_arg, &scale_factors_arg, &quantizers_arg, - &quantizers_keyframe_arg, NULL + &quantizers_keyframe_arg, &passes_arg, &pass_arg, + &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg, + &max_bitrate_arg, NULL }; static const SVC_ENCODING_MODE default_encoding_mode = @@ -85,6 +102,10 @@ typedef struct { const char *output_filename; uint32_t frames_to_code; uint32_t frames_to_skip; + struct VpxInputContext input_ctx; + stats_io_t rc_stats; + int passes; + int pass; } AppInput; static const char *exec_name; @@ -105,6 +126,11 @@ static void parse_command_line(int argc, const char **argv_, char **argi = NULL; char **argj = NULL; vpx_codec_err_t res; + int passes = 0; + int pass = 0; + const char *fpf_file_name = NULL; + unsigned int min_bitrate = 0; + unsigned int max_bitrate = 0; // initialize SvcContext with parameters that will be passed to vpx_svc_init svc_ctx->log_level = SVC_LOG_DEBUG; @@ -159,11 +185,72 @@ static void parse_command_line(int argc, const char **argv_, vpx_svc_set_quantizers(svc_ctx, arg.val, 0); } else if (arg_match(&arg, &quantizers_keyframe_arg, argi)) { vpx_svc_set_quantizers(svc_ctx, arg.val, 1); + } else if (arg_match(&arg, &passes_arg, argi)) { + passes = arg_parse_uint(&arg); + if (passes < 1 || passes > 2) { + die("Error: Invalid number of passes (%d)\n", passes); + } + } else if (arg_match(&arg, &pass_arg, argi)) { + pass = arg_parse_uint(&arg); + if (pass < 1 || pass > 2) { + die("Error: Invalid pass selected (%d)\n", pass); + } + } else if (arg_match(&arg, &fpf_name_arg, argi)) { + fpf_file_name = arg.val; + } else if (arg_match(&arg, &min_q_arg, argi)) { + enc_cfg->rc_min_quantizer = arg_parse_uint(&arg); + } else if (arg_match(&arg, &max_q_arg, argi)) { + enc_cfg->rc_max_quantizer = arg_parse_uint(&arg); + } else if (arg_match(&arg, &min_bitrate_arg, argi)) { + min_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &max_bitrate_arg, argi)) { + max_bitrate = arg_parse_uint(&arg); } else { ++argj; } } + if (passes == 0 || passes == 1) { + if (pass) { + fprintf(stderr, "pass is ignored since there's only one pass\n"); + } + enc_cfg->g_pass = VPX_RC_ONE_PASS; + } else { + if (pass == 0) { + die("pass must be specified when passes is 2\n"); + } + + if (fpf_file_name == NULL) { + die("fpf must be specified when passes is 2\n"); + } + + if (pass == 1) { + enc_cfg->g_pass = VPX_RC_FIRST_PASS; + if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) { + fatal("Failed to open statistics store"); + } + } else { + enc_cfg->g_pass = VPX_RC_LAST_PASS; + if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) { + fatal("Failed to open statistics store"); + } + enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats); + } + app_input->passes = passes; + app_input->pass = pass; + } + + if (enc_cfg->rc_target_bitrate > 0) { + if (min_bitrate > 0) { + enc_cfg->rc_2pass_vbr_minsection_pct = + min_bitrate * 100 / enc_cfg->rc_target_bitrate; + } + if (max_bitrate > 0) { + enc_cfg->rc_2pass_vbr_maxsection_pct = + max_bitrate * 100 / enc_cfg->rc_target_bitrate; + } + } + // Check for unrecognized options for (argi = argv; *argi; ++argi) if (argi[0][0] == '-' && strlen(argi[0]) > 1) @@ -207,6 +294,7 @@ int main(int argc, const char **argv) { int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ FILE *infile = NULL; + int end_of_stream = 0; memset(&svc_ctx, 0, sizeof(svc_ctx)); svc_ctx.log_print = 1; @@ -234,34 +322,50 @@ int main(int argc, const char **argv) { VPX_CODEC_OK) { die("Failed to get output resolution"); } - writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF, - &info); - if (!writer) - die("Failed to open %s for writing\n", app_input.output_filename); + + if (!(app_input.passes == 2 && app_input.pass == 1)) { + // We don't save the bitstream for the 1st pass on two pass rate control + writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF, + &info); + if (!writer) + die("Failed to open %s for writing\n", app_input.output_filename); + } // skip initial frames for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile); // Encode frames - while (frame_cnt < app_input.frames_to_code) { - if (!vpx_img_read(&raw, infile)) - break; + while (!end_of_stream) { + if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) { + // We need one extra vpx_svc_encode call at end of stream to flush + // encoder and get remaining data + end_of_stream = 1; + } - res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration, - VPX_DL_REALTIME); + res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw), + pts, frame_duration, VPX_DL_REALTIME); printf("%s", vpx_svc_get_message(&svc_ctx)); if (res != VPX_CODEC_OK) { die_codec(&codec, "Failed to encode frame"); } - if (vpx_svc_get_frame_size(&svc_ctx) > 0) { - vpx_video_writer_write_frame(writer, - vpx_svc_get_buffer(&svc_ctx), - vpx_svc_get_frame_size(&svc_ctx), - pts); + if (!(app_input.passes == 2 && app_input.pass == 1)) { + if (vpx_svc_get_frame_size(&svc_ctx) > 0) { + vpx_video_writer_write_frame(writer, + vpx_svc_get_buffer(&svc_ctx), + vpx_svc_get_frame_size(&svc_ctx), + pts); + } + } + if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) { + stats_write(&app_input.rc_stats, + vpx_svc_get_rc_stats_buffer(&svc_ctx), + vpx_svc_get_rc_stats_buffer_size(&svc_ctx)); + } + if (!end_of_stream) { + ++frame_cnt; + pts += frame_duration; } - ++frame_cnt; - pts += frame_duration; } printf("Processed %d frames\n", frame_cnt); @@ -269,7 +373,12 @@ int main(int argc, const char **argv) { fclose(infile); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); - vpx_video_writer_close(writer); + if (app_input.passes == 2) + stats_close(&app_input.rc_stats, 1); + + if (writer) { + vpx_video_writer_close(writer); + } vpx_img_free(&raw); diff --git a/source/libvpx/examples/vpx_temporal_scalable_patterns.c b/source/libvpx/examples/vpx_temporal_scalable_patterns.c index 6ec1b62..5cb4ee9 100644 --- a/source/libvpx/examples/vpx_temporal_scalable_patterns.c +++ b/source/libvpx/examples/vpx_temporal_scalable_patterns.c @@ -18,6 +18,8 @@ #include <string.h> #define VPX_CODEC_DISABLE_COMPAT 1 +#include "./vpx_config.h" +#include "vpx_ports/vpx_timer.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" @@ -435,6 +437,7 @@ int main(int argc, char **argv) { vpx_codec_err_t res; unsigned int width; unsigned int height; + int speed; int frame_avail; int got_data; int flags = 0; @@ -449,12 +452,13 @@ int main(int argc, char **argv) { const VpxInterface *encoder = NULL; FILE *infile = NULL; struct RateControlMetrics rc; + int64_t cx_time = 0; exec_name = argv[0]; // Check usage and arguments. if (argc < 11) { die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> " - "<rate_num> <rate_den> <frame_drop_threshold> <mode> " + "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> " "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]); } @@ -470,12 +474,12 @@ int main(int argc, char **argv) { die("Invalid resolution: %d x %d", width, height); } - layering_mode = strtol(argv[9], NULL, 0); + layering_mode = strtol(argv[10], NULL, 0); if (layering_mode < 0 || layering_mode > 12) { - die("Invalid mode (0..12) %s", argv[9]); + die("Invalid layering mode (0..12) %s", argv[10]); } - if (argc != 10 + mode_to_num_layers[layering_mode]) { + if (argc != 11 + mode_to_num_layers[layering_mode]) { die("Invalid number of arguments"); } @@ -498,12 +502,17 @@ int main(int argc, char **argv) { cfg.g_timebase.num = strtol(argv[6], NULL, 0); cfg.g_timebase.den = strtol(argv[7], NULL, 0); - for (i = 10; (int)i < 10 + mode_to_num_layers[layering_mode]; ++i) { - cfg.ts_target_bitrate[i - 10] = strtol(argv[i], NULL, 0); + speed = strtol(argv[8], NULL, 0); + if (speed < 0) { + die("Invalid speed setting: must be positive"); + } + + for (i = 11; (int)i < 11 + mode_to_num_layers[layering_mode]; ++i) { + cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0); } // Real time parameters. - cfg.rc_dropframe_thresh = strtol(argv[8], NULL, 0); + cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0); cfg.rc_end_usage = VPX_CBR; cfg.rc_resize_allowed = 0; cfg.rc_min_quantizer = 2; @@ -560,13 +569,16 @@ int main(int argc, char **argv) { if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); - vpx_codec_control(&codec, VP8E_SET_CPUUSED, -6); - vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1); - if (strncmp(encoder->name, "vp9", 3) == 0) { - vpx_codec_control(&codec, VP8E_SET_CPUUSED, 3); - vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0); - if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) { - die_codec(&codec, "Failed to set SVC"); + if (strncmp(encoder->name, "vp8", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1); + } else if (strncmp(encoder->name, "vp9", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); + vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); + vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0); + if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) { + die_codec(&codec, "Failed to set SVC"); } } vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); @@ -576,10 +588,13 @@ int main(int argc, char **argv) { // value, like 100 or 200. max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5) * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0); + // For low-quality key frame. + max_intra_size_pct = 200; vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); frame_avail = 1; while (frame_avail || got_data) { + struct vpx_usec_timer timer; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt; // Update the temporal layer_id. No spatial layers in this test. @@ -593,10 +608,13 @@ int main(int argc, char **argv) { frame_avail = vpx_img_read(&raw, infile); if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id]; + vpx_usec_timer_start(&timer); if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags, VPX_DL_REALTIME)) { die_codec(&codec, "Failed to encode frame"); } + vpx_usec_timer_mark(&timer); + cx_time += vpx_usec_timer_elapsed(&timer); // Reset KF flag. if (layering_mode != 7) { layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; @@ -632,6 +650,11 @@ int main(int argc, char **argv) { } fclose(infile); printout_rate_control_summary(&rc, &cfg, frame_cnt); + printf("\n"); + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", + frame_cnt, + 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); diff --git a/source/libvpx/libs.mk b/source/libvpx/libs.mk index 302d2af..a5c4b76 100644 --- a/source/libvpx/libs.mk +++ b/source/libvpx/libs.mk @@ -49,7 +49,7 @@ endif # !gcc define rtcd_h_template $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2) @echo " [CREATE] $$@" - $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \ + $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \ --sym=$(1) \ --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \ $$(RTCD_OPTIONS) $$^ > $$@ @@ -162,7 +162,7 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%) endif CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh -CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh +CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h @@ -236,6 +236,13 @@ vpx.def: $(call enabled,CODEC_EXPORTS) --out=$@ $^ CLEAN-OBJS += vpx.def +# Assembly files that are included, but don't define symbols themselves. +# Filtered out to avoid Visual Studio build warnings. +ASM_INCLUDES := \ + third_party/x86inc/x86inc.asm \ + vpx_config.asm \ + vpx_ports/x86_abi_support.asm \ + vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX) @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ @@ -246,7 +253,8 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX) --proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \ --module-def=vpx.def \ --ver=$(CONFIG_VS_VERSION) \ - --out=$@ $(CFLAGS) $^ \ + --out=$@ $(CFLAGS) \ + $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \ --src-path-bare="$(SRC_PATH_BARE)" \ PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX) diff --git a/source/libvpx/test/active_map_test.cc b/source/libvpx/test/active_map_test.cc new file mode 100644 index 0000000..6377e72 --- /dev/null +++ b/source/libvpx/test/active_map_test.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <climits> +#include <vector> +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class ActiveMapTest + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { + protected: + static const int kWidth = 208; + static const int kHeight = 144; + + ActiveMapTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~ActiveMapTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + cpu_used_ = GET_PARAM(2); + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, cpu_used_); + } else if (video->frame() == 3) { + vpx_active_map_t map = {0}; + uint8_t active_map[9 * 13] = { + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, + 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, + }; + map.cols = (kWidth + 15) / 16; + map.rows = (kHeight + 15) / 16; + ASSERT_EQ(map.cols, 13u); + ASSERT_EQ(map.rows, 9u); + map.active_map = active_map; + encoder->Control(VP8E_SET_ACTIVEMAP, &map); + } else if (video->frame() == 15) { + vpx_active_map_t map = {0}; + map.cols = (kWidth + 15) / 16; + map.rows = (kHeight + 15) / 16; + map.active_map = NULL; + encoder->Control(VP8E_SET_ACTIVEMAP, &map); + } + } + + int cpu_used_; +}; + +TEST_P(ActiveMapTest, Test) { + // Validate that this non multiple of 64 wide clip encodes + cfg_.g_lag_in_frames = 0; + cfg_.rc_target_bitrate = 400; + cfg_.rc_resize_allowed = 0; + cfg_.g_pass = VPX_RC_ONE_PASS; + cfg_.rc_end_usage = VPX_CBR; + cfg_.kf_max_dist = 90000; + + ::libvpx_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30, + 1, 0, 20); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +#define VP9_FACTORY \ + static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9) + +VP9_INSTANTIATE_TEST_CASE(ActiveMapTest, + ::testing::Values(::libvpx_test::kRealTime), + ::testing::Range(0, 6)); +} // namespace diff --git a/source/libvpx/test/android/scrape_gtest_log.py b/source/libvpx/test/android/scrape_gtest_log.py new file mode 100644 index 0000000..487845c --- /dev/null +++ b/source/libvpx/test/android/scrape_gtest_log.py @@ -0,0 +1,57 @@ +# Copyright (c) 2014 The WebM project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Standalone script which parses a gtest log for json. + +Json is returned returns as an array. This script is used by the libvpx +waterfall to gather json results mixed in with gtest logs. This is +dubious software engineering. +""" + +import getopt +import json +import os +import re +import sys + + +def main(): + if len(sys.argv) != 3: + print "Expects a file to write json to!" + exit(1) + + try: + opts, _ = \ + getopt.getopt(sys.argv[1:], \ + 'o:', ['output-json=']) + except getopt.GetOptError: + print 'scrape_gtest_log.py -o <output_json>' + sys.exit(2) + + output_json = '' + for opt, arg in opts: + if opt in ('-o', '--output-json'): + output_json = os.path.join(arg) + + blob = sys.stdin.read() + json_string = '[' + ','.join('{' + x + '}' for x in + re.findall(r'{([^}]*.?)}', blob)) + ']' + print blob + + output = json.dumps(json.loads(json_string), indent=4, sort_keys=True) + print output + + path = os.path.dirname(output_json) + if path and not os.path.exists(path): + os.makedirs(path) + + outfile = open(output_json, 'w') + outfile.write(output) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/source/libvpx/test/aq_segment_test.cc b/source/libvpx/test/aq_segment_test.cc new file mode 100644 index 0000000..2f88b53 --- /dev/null +++ b/source/libvpx/test/aq_segment_test.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <climits> +#include <vector> +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +class AqSegmentTest : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params< + libvpx_test::TestMode, int> { + protected: + AqSegmentTest() : EncoderTest(GET_PARAM(0)) {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + set_cpu_used_ = GET_PARAM(2); + aq_mode_ = 0; + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 1) { + encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); + encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); + encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100); + } + } + + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + } + } + int set_cpu_used_; + int aq_mode_; +}; + +// Validate that this AQ segmentation mode (AQ=1, variance_ap) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ1) { + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_target_bitrate = 300; + + aq_mode_ = 1; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +// Validate that this AQ segmentation mode (AQ=2, complexity_aq) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ2) { + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_target_bitrate = 300; + + aq_mode_ = 2; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +// Validate that this AQ segmentation mode (AQ=3, cyclic_refresh_aq) +// encodes and decodes without a mismatch. +TEST_P(AqSegmentTest, TestNoMisMatchAQ3) { + cfg_.rc_min_quantizer = 8; + cfg_.rc_max_quantizer = 56; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_target_bitrate = 300; + + aq_mode_ = 3; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 100); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + +using std::tr1::make_tuple; + +#define VP9_FACTORY \ + static_cast<const libvpx_test::CodecFactory*> (&libvpx_test::kVP9) + +VP9_INSTANTIATE_TEST_CASE(AqSegmentTest, + ::testing::Values(::libvpx_test::kRealTime, + ::libvpx_test::kOnePassGood), + ::testing::Range(3, 9)); +} // namespace diff --git a/source/libvpx/test/borders_test.cc b/source/libvpx/test/borders_test.cc index 5071541..b30be45 100644 --- a/source/libvpx/test/borders_test.cc +++ b/source/libvpx/test/borders_test.cc @@ -21,6 +21,7 @@ class BordersTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { protected: BordersTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~BordersTest() {} virtual void SetUp() { InitializeConfig(); diff --git a/source/libvpx/test/config_test.cc b/source/libvpx/test/config_test.cc index 36c6330..0493110 100644 --- a/source/libvpx/test/config_test.cc +++ b/source/libvpx/test/config_test.cc @@ -20,6 +20,7 @@ class ConfigTest : public ::libvpx_test::EncoderTest, protected: ConfigTest() : EncoderTest(GET_PARAM(0)), frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {} + virtual ~ConfigTest() {} virtual void SetUp() { InitializeConfig(); diff --git a/source/libvpx/test/cpu_speed_test.cc b/source/libvpx/test/cpu_speed_test.cc index c92e723..be651b4 100644 --- a/source/libvpx/test/cpu_speed_test.cc +++ b/source/libvpx/test/cpu_speed_test.cc @@ -22,6 +22,7 @@ class CpuSpeedTest : public ::libvpx_test::EncoderTest, libvpx_test::TestMode, int> { protected: CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~CpuSpeedTest() {} virtual void SetUp() { InitializeConfig(); @@ -79,7 +80,7 @@ TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { cfg_.rc_min_quantizer = 0; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, - 40); + 20); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } @@ -95,7 +96,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) { cfg_.rc_min_quantizer = 40; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, - 40); + 20); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } @@ -107,6 +108,6 @@ using std::tr1::make_tuple; VP9_INSTANTIATE_TEST_CASE( CpuSpeedTest, - ::testing::Values(::libvpx_test::kTwoPassGood), - ::testing::Range(0, 5)); + ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood), + ::testing::Range(0, 8)); } // namespace diff --git a/source/libvpx/test/datarate_test.cc b/source/libvpx/test/datarate_test.cc index 39c9a5a..e8604a6 100644 --- a/source/libvpx/test/datarate_test.cc +++ b/source/libvpx/test/datarate_test.cc @@ -17,10 +17,12 @@ namespace { -class DatarateTest : public ::libvpx_test::EncoderTest, +class DatarateTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { public: - DatarateTest() : EncoderTest(GET_PARAM(0)) {} + DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {} + + virtual ~DatarateTestLarge() {} protected: virtual void SetUp() { @@ -120,7 +122,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest, size_t bits_in_last_frame_; }; -TEST_P(DatarateTest, BasicBufferModel) { +TEST_P(DatarateTestLarge, BasicBufferModel) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 1; cfg_.rc_max_quantizer = 56; @@ -151,7 +153,7 @@ TEST_P(DatarateTest, BasicBufferModel) { } } -TEST_P(DatarateTest, ChangingDropFrameThresh) { +TEST_P(DatarateTestLarge, ChangingDropFrameThresh) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_max_quantizer = 36; cfg_.rc_end_usage = VPX_CBR; @@ -179,13 +181,13 @@ TEST_P(DatarateTest, ChangingDropFrameThresh) { } } -class DatarateTestVP9 : public ::libvpx_test::EncoderTest, +class DatarateTestVP9Large : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> { public: - DatarateTestVP9() : EncoderTest(GET_PARAM(0)) {} + DatarateTestVP9Large() : EncoderTest(GET_PARAM(0)) {} protected: - virtual ~DatarateTestVP9() {} + virtual ~DatarateTestVP9Large() {} virtual void SetUp() { InitializeConfig(); @@ -358,7 +360,7 @@ class DatarateTestVP9 : public ::libvpx_test::EncoderTest, }; // Check basic rate targeting, -TEST_P(DatarateTestVP9, BasicRateTargeting) { +TEST_P(DatarateTestVP9Large, BasicRateTargeting) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -382,7 +384,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting) { } // Check basic rate targeting, -TEST_P(DatarateTestVP9, BasicRateTargeting444) { +TEST_P(DatarateTestVP9Large, BasicRateTargeting444) { ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); cfg_.g_profile = 1; @@ -414,7 +416,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting444) { // as the drop frame threshold is increased, and (2) that the total number of // frame drops does not decrease as we increase frame drop threshold. // Use a lower qp-max to force some frame drops. -TEST_P(DatarateTestVP9, ChangingDropFrameThresh) { +TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -455,7 +457,7 @@ TEST_P(DatarateTestVP9, ChangingDropFrameThresh) { } // Check basic rate targeting for 2 temporal layers. -TEST_P(DatarateTestVP9, BasicRateTargeting2TemporalLayers) { +TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -492,7 +494,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting2TemporalLayers) { } // Check basic rate targeting for 3 temporal layers. -TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) { +TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -533,7 +535,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) { // Check basic rate targeting for 3 temporal layers, with frame dropping. // Only for one (low) bitrate with lower max_quantizer, and somewhat higher // frame drop threshold, to force frame dropping. -TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayersFrameDropping) { +TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -568,14 +570,15 @@ TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayersFrameDropping) { << " The datarate for the file is greater than target by too much, " "for layer: " << j; // Expect some frame drops in this test: for this 200 frames test, - // expect at least 10% and not more than 50% drops. + // expect at least 10% and not more than 60% drops. ASSERT_GE(num_drops_, 20); - ASSERT_LE(num_drops_, 100); + ASSERT_LE(num_drops_, 120); } } -VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES); -VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9, - ::testing::Values(::libvpx_test::kOnePassGood), - ::testing::Range(2, 5)); +VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES); +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large, + ::testing::Values(::libvpx_test::kOnePassGood, + ::libvpx_test::kRealTime), + ::testing::Range(2, 7)); } // namespace diff --git a/source/libvpx/test/encode_test_driver.h b/source/libvpx/test/encode_test_driver.h index 8017a2a..9526068 100644 --- a/source/libvpx/test/encode_test_driver.h +++ b/source/libvpx/test/encode_test_driver.h @@ -16,6 +16,9 @@ #include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_encoder.h" +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER +#include "vpx/vp8cx.h" +#endif namespace libvpx_test { @@ -128,6 +131,13 @@ class Encoder { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } +#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER + void Control(int ctrl_id, vpx_active_map_t *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } +#endif + void set_deadline(unsigned long deadline) { deadline_ = deadline; } diff --git a/source/libvpx/test/error_resilience_test.cc b/source/libvpx/test/error_resilience_test.cc index 4cd9efb..89684f8 100644 --- a/source/libvpx/test/error_resilience_test.cc +++ b/source/libvpx/test/error_resilience_test.cc @@ -19,19 +19,20 @@ namespace { const int kMaxErrorFrames = 12; const int kMaxDroppableFrames = 12; -class ErrorResilienceTest : public ::libvpx_test::EncoderTest, +class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { protected: - ErrorResilienceTest() : EncoderTest(GET_PARAM(0)), - psnr_(0.0), - nframes_(0), - mismatch_psnr_(0.0), - mismatch_nframes_(0), - encoding_mode_(GET_PARAM(1)) { + ErrorResilienceTestLarge() + : EncoderTest(GET_PARAM(0)), + psnr_(0.0), + nframes_(0), + mismatch_psnr_(0.0), + mismatch_nframes_(0), + encoding_mode_(GET_PARAM(1)) { Reset(); } - virtual ~ErrorResilienceTest() {} + virtual ~ErrorResilienceTestLarge() {} void Reset() { error_nframes_ = 0; @@ -144,7 +145,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest, libvpx_test::TestMode encoding_mode_; }; -TEST_P(ErrorResilienceTest, OnVersusOff) { +TEST_P(ErrorResilienceTestLarge, OnVersusOff) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 2000; @@ -179,7 +180,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) { // if we lose (i.e., drop before decoding) a set of droppable // frames (i.e., frames that don't update any reference buffers). // Check both isolated and consecutive loss. -TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { +TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; @@ -235,7 +236,7 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) { #endif } -VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES); -VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES); +VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES); +VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES); } // namespace diff --git a/source/libvpx/test/external_frame_buffer_test.cc b/source/libvpx/test/external_frame_buffer_test.cc index 2e7adc1..54c79e9 100644 --- a/source/libvpx/test/external_frame_buffer_test.cc +++ b/source/libvpx/test/external_frame_buffer_test.cc @@ -210,7 +210,7 @@ class ExternalFrameBufferMD5Test ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_)); ASSERT_EQ(VPX_CODEC_OK, decoder->SetFrameBufferFunctions( - GetVp9FrameBuffer, ReleaseVP9FrameBuffer, this)); + GetVP9FrameBuffer, ReleaseVP9FrameBuffer, this)); } } @@ -242,7 +242,7 @@ class ExternalFrameBufferMD5Test // Callback to get a free external frame buffer. Return value < 0 is an // error. - static int GetVp9FrameBuffer(void *user_priv, size_t min_size, + static int GetVP9FrameBuffer(void *user_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferMD5Test *const md5Test = reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv); @@ -462,5 +462,7 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) { } VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test, - ::testing::ValuesIn(libvpx_test::kVP9TestVectors)); + ::testing::ValuesIn(libvpx_test::kVP9TestVectors, + libvpx_test::kVP9TestVectors + + libvpx_test::kNumVP9TestVectors)); } // namespace diff --git a/source/libvpx/test/intrapred_test.cc b/source/libvpx/test/intrapred_test.cc index b28f5fb..cefe192 100644 --- a/source/libvpx/test/intrapred_test.cc +++ b/source/libvpx/test/intrapred_test.cc @@ -26,11 +26,7 @@ using libvpx_test::ACMRandom; class IntraPredBase { public: - virtual ~IntraPredBase() {} - - virtual void TearDown() { - libvpx_test::ClearSystemState(); - } + virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); } protected: void SetupMacroblock(MACROBLOCKD *mbptr, @@ -227,8 +223,9 @@ typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x, uint8_t *ypred_ptr, int y_stride); -class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>, - protected IntraPredBase { +class IntraPredYTest + : public IntraPredBase, + public ::testing::TestWithParam<intra_pred_y_fn_t> { public: static void SetUpTestCase() { mb_ = reinterpret_cast<MACROBLOCKD*>( @@ -308,8 +305,9 @@ typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x, uint8_t *vpred_ptr, int pred_stride); -class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>, - protected IntraPredBase { +class IntraPredUVTest + : public IntraPredBase, + public ::testing::TestWithParam<intra_pred_uv_fn_t> { public: static void SetUpTestCase() { mb_ = reinterpret_cast<MACROBLOCKD*>( diff --git a/source/libvpx/test/keyframe_test.cc b/source/libvpx/test/keyframe_test.cc index 7ee2898..d8b21a1 100644 --- a/source/libvpx/test/keyframe_test.cc +++ b/source/libvpx/test/keyframe_test.cc @@ -21,6 +21,7 @@ class KeyframeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { protected: KeyframeTest() : EncoderTest(GET_PARAM(0)) {} + virtual ~KeyframeTest() {} virtual void SetUp() { InitializeConfig(); diff --git a/source/libvpx/test/pp_filter_test.cc b/source/libvpx/test/pp_filter_test.cc index ff7bb08..86c2b0e 100644 --- a/source/libvpx/test/pp_filter_test.cc +++ b/source/libvpx/test/pp_filter_test.cc @@ -25,7 +25,7 @@ typedef void (*post_proc_func_t)(unsigned char *src_ptr, namespace { -class Vp8PostProcessingFilterTest +class VP8PostProcessingFilterTest : public ::testing::TestWithParam<post_proc_func_t> { public: virtual void TearDown() { @@ -36,7 +36,7 @@ class Vp8PostProcessingFilterTest // Test routine for the VP8 post-processing function // vp8_post_proc_down_and_across_mb_row_c. -TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { +TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) { // Size of the underlying data block that will be filtered. const int block_width = 16; const int block_height = 16; @@ -91,7 +91,7 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { for (int i = 0; i < block_height; ++i) { for (int j = 0; j < block_width; ++j) { EXPECT_EQ(expected_data[i], pixel_ptr[j]) - << "Vp8PostProcessingFilterTest failed with invalid filter output"; + << "VP8PostProcessingFilterTest failed with invalid filter output"; } pixel_ptr += output_stride; } @@ -101,11 +101,11 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { vpx_free(flimits); }; -INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest, +INSTANTIATE_TEST_CASE_P(C, VP8PostProcessingFilterTest, ::testing::Values(vp8_post_proc_down_and_across_mb_row_c)); #if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest, +INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest, ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2)); #endif diff --git a/source/libvpx/test/register_state_check.h b/source/libvpx/test/register_state_check.h index 479a42d..7e3d053 100644 --- a/source/libvpx/test/register_state_check.h +++ b/source/libvpx/test/register_state_check.h @@ -11,14 +11,15 @@ #ifndef TEST_REGISTER_STATE_CHECK_H_ #define TEST_REGISTER_STATE_CHECK_H_ -#ifdef _WIN64 +#include "third_party/googletest/src/include/gtest/gtest.h" +#include "./vpx_config.h" + +#if defined(_WIN64) #define _WIN32_LEAN_AND_MEAN #include <windows.h> #include <winnt.h> -#include "third_party/googletest/src/include/gtest/gtest.h" - namespace testing { namespace internal { @@ -81,7 +82,61 @@ class RegisterStateCheck { } // namespace libvpx_test -#else // !_WIN64 +#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) \ + && !CONFIG_SHARED && HAVE_NEON + +#include "vpx/vpx_integer.h" + +extern "C" { +// Save the d8-d15 registers into store. +void vp9_push_neon(int64_t *store); +} + +namespace libvpx_test { + +// Compares the state of d8-d15 at construction with their state at +// destruction. These registers should be preserved by the callee on +// arm platform. +// Usage: +// { +// RegisterStateCheck reg_check; +// FunctionToVerify(); +// } +class RegisterStateCheck { + public: + RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); } + ~RegisterStateCheck() { EXPECT_TRUE(Check()); } + + private: + static bool StoreRegisters(int64_t store[8]) { + vp9_push_neon(store); + return true; + } + + // Compares the register state. Returns true if the states match. + bool Check() const { + if (!initialized_) return false; + int64_t post_store[8]; + vp9_push_neon(post_store); + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(pre_store_[i], post_store[i]) << "d" + << i + 8 << " has been modified"; + } + return !testing::Test::HasNonfatalFailure(); + } + + bool initialized_; + int64_t pre_store_[8]; +}; + +#define REGISTER_STATE_CHECK(statement) do { \ + libvpx_test::RegisterStateCheck reg_check; \ + statement; \ +} while (false) + +} // namespace libvpx_test + +#else namespace libvpx_test { diff --git a/source/libvpx/test/set_roi.cc b/source/libvpx/test/set_roi.cc index e28f511..5b054f4 100644 --- a/source/libvpx/test/set_roi.cc +++ b/source/libvpx/test/set_roi.cc @@ -26,7 +26,7 @@ using libvpx_test::ACMRandom; namespace { -TEST(Vp8RoiMapTest, ParameterCheck) { +TEST(VP8RoiMapTest, ParameterCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; diff --git a/source/libvpx/test/sixtap_predict_test.cc b/source/libvpx/test/sixtap_predict_test.cc index 1b2f03f..0c600f4 100644 --- a/source/libvpx/test/sixtap_predict_test.cc +++ b/source/libvpx/test/sixtap_predict_test.cc @@ -198,7 +198,7 @@ const sixtap_predict_fn_t sixtap_16x16_neon = vp8_sixtap_predict16x16_neon; const sixtap_predict_fn_t sixtap_8x8_neon = vp8_sixtap_predict8x8_neon; const sixtap_predict_fn_t sixtap_8x4_neon = vp8_sixtap_predict8x4_neon; INSTANTIATE_TEST_CASE_P( - NEON, SixtapPredictTest, ::testing::Values( + DISABLED_NEON, SixtapPredictTest, ::testing::Values( make_tuple(16, 16, sixtap_16x16_neon), make_tuple(8, 8, sixtap_8x8_neon), make_tuple(8, 4, sixtap_8x4_neon))); diff --git a/source/libvpx/test/superframe_test.cc b/source/libvpx/test/superframe_test.cc index d91e7b1..c0f542d 100644 --- a/source/libvpx/test/superframe_test.cc +++ b/source/libvpx/test/superframe_test.cc @@ -21,6 +21,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest, protected: SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL), last_sf_pts_(0) {} + virtual ~SuperframeTest() {} virtual void SetUp() { InitializeConfig(); diff --git a/source/libvpx/test/svc_test.cc b/source/libvpx/test/svc_test.cc index 2e56534..fb9277b 100644 --- a/source/libvpx/test/svc_test.cc +++ b/source/libvpx/test/svc_test.cc @@ -31,6 +31,7 @@ class SvcTest : public ::testing::Test { SvcTest() : codec_iface_(0), test_file_name_("hantro_collage_w352h288.yuv"), + stats_file_name_("hantro_collage_w352h288.stat"), codec_initialized_(false), decoder_(0) { memset(&svc_, 0, sizeof(svc_)); @@ -73,6 +74,7 @@ class SvcTest : public ::testing::Test { struct vpx_codec_enc_cfg codec_enc_; vpx_codec_iface_t *codec_iface_; std::string test_file_name_; + std::string stats_file_name_; bool codec_initialized_; Decoder *decoder_; }; @@ -362,4 +364,109 @@ TEST_F(SvcTest, GetLayerResolution) { EXPECT_EQ(kHeight * 8 / 16, layer_height); } +TEST_F(SvcTest, FirstPassEncode) { + svc_.spatial_layers = 2; + codec_enc_.g_pass = VPX_RC_FIRST_PASS; + vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); + vpx_svc_set_quantizers(&svc_, "40,30", 0); + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + ASSERT_EQ(VPX_CODEC_OK, res); + codec_initialized_ = true; + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + // FRAME 0 + video.Begin(); + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); + + // FRAME 1 + video.Next(); + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); + + // Flush encoder and test EOS packet + res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); +} + +TEST_F(SvcTest, SecondPassEncode) { + svc_.spatial_layers = 2; + codec_enc_.g_pass = VPX_RC_LAST_PASS; + + FILE *const stats_file = libvpx_test::OpenTestDataFile(stats_file_name_); + ASSERT_TRUE(stats_file != NULL) << "Stats file open failed. Filename: " + << stats_file; + + struct vpx_fixed_buf stats_buf; + fseek(stats_file, 0, SEEK_END); + stats_buf.sz = static_cast<size_t>(ftell(stats_file)); + fseek(stats_file, 0, SEEK_SET); + + stats_buf.buf = malloc(stats_buf.sz); + ASSERT_TRUE(stats_buf.buf != NULL); + const size_t bytes_read = fread(stats_buf.buf, 1, stats_buf.sz, stats_file); + ASSERT_EQ(bytes_read, stats_buf.sz); + fclose(stats_file); + codec_enc_.rc_twopass_stats_in = stats_buf; + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + ASSERT_EQ(VPX_CODEC_OK, res); + codec_initialized_ = true; + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + // FRAME 0 + video.Begin(); + // This frame is a keyframe. + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); + + vpx_codec_err_t res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + // FRAME 1 + video.Next(); + // This is a P-frame. + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); + + res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + // FRAME 2 + video.Next(); + // This is a P-frame. + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); + + res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + free(stats_buf.buf); +} + } // namespace diff --git a/source/libvpx/test/test-data.sha1 b/source/libvpx/test/test-data.sha1 index 6f718ef..cf2ad1e 100644 --- a/source/libvpx/test/test-data.sha1 +++ b/source/libvpx/test/test-data.sha1 @@ -1,4 +1,5 @@ d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv +998cec53307c94aa5835aaf8d5731f6a3c7c2e5a hantro_collage_w352h288.stat b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m 5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf @@ -588,3 +589,49 @@ b3c48382cf7d0454e83a02497c229d27720f9e20 vp90-2-11-size-351x287.webm.md5 92a756469fa438220524e7fa6ac1d38c89514d17 vp90-2-12-droppable_2.ivf.md5 c21e97e4ba486520118d78b01a5cb6e6dc33e190 vp90-2-12-droppable_3.ivf 601abc9e4176c70f82ac0381365e9b151fdd24cd vp90-2-12-droppable_3.ivf.md5 +61c640dad23cd4f7ad811b867e7b7e3521f4e3ba vp90-2-13-largescaling.webm +bca1b02eebdb088fa3f389fe0e7571e75a71f523 vp90-2-13-largescaling.webm.md5 +c740708fa390806eebaf669909c1285ab464f886 vp90-2-14-resize-fp-tiles-1-2.webm +c7b85ffd8e11500f73f52e7dc5a47f57c393d47f vp90-2-14-resize-fp-tiles-1-2.webm.md5 +ec8faa352a08f7033c60f29f80d505e2d7daa103 vp90-2-14-resize-fp-tiles-1-4.webm +6852c783fb421bda5ded3d4c5a3ffc46de03fbc1 vp90-2-14-resize-fp-tiles-1-4.webm.md5 +8af61853ac0d07c4cb5bf7c2016661ba350b3497 vp90-2-14-resize-fp-tiles-1-8.webm +571353bac89fea60b5706073409aa3c0d42aefe9 vp90-2-14-resize-fp-tiles-1-8.webm.md5 +b1c187ed69931496b82ec194017a79831bafceef vp90-2-14-resize-fp-tiles-1-16.webm +1c199a41afe42ce303944d70089eaaa2263b4a09 vp90-2-14-resize-fp-tiles-1-16.webm.md5 +8eaae5a6f2dff934610b0c7a917d7f583ba74aa5 vp90-2-14-resize-fp-tiles-2-1.webm +db18fcf915f7ffaea6c39feab8bda6c1688af011 vp90-2-14-resize-fp-tiles-2-1.webm.md5 +bc3046d138941e2a20e9ceec0ff6d25c25d12af3 vp90-2-14-resize-fp-tiles-4-1.webm +393211b808030d09a79927b17a4374b2f68a60ae vp90-2-14-resize-fp-tiles-4-1.webm.md5 +6e8f8e31721a0f7f68a2964e36e0e698c2e276b1 vp90-2-14-resize-fp-tiles-8-1.webm +491fd3cd78fb0577bfe905bb64bbf64bd7d29140 vp90-2-14-resize-fp-tiles-8-1.webm.md5 +cc5958da2a7edf739cd2cfeb18bd05e77903087e vp90-2-14-resize-fp-tiles-16-1.webm +0b58daf55aaf9063bf5b4fb33393d18b417dc428 vp90-2-14-resize-fp-tiles-16-1.webm.md5 +821eeecc9d8c6a316134dd42d1ff057787d8047b vp90-2-14-resize-fp-tiles-2-4.webm +374c549f2839a3d0b732c4e3650700144037e76c vp90-2-14-resize-fp-tiles-2-4.webm.md5 +dff8c8e49aacea9f4c7f22cb882da984e2a1b405 vp90-2-14-resize-fp-tiles-2-8.webm +e5b8820a7c823b21297d6e889e57ec401882c210 vp90-2-14-resize-fp-tiles-2-8.webm.md5 +77629e4b23e32896aadf6e994c78bd4ffa1c7797 vp90-2-14-resize-fp-tiles-2-16.webm +1937f5df032664ac345d4613ad4417b4967b1230 vp90-2-14-resize-fp-tiles-2-16.webm.md5 +380ba5702bb1ec7947697314ab0300b5c56a1665 vp90-2-14-resize-fp-tiles-4-2.webm +fde7b30d2aa64c1e851a4852f655d79fc542cf66 vp90-2-14-resize-fp-tiles-4-2.webm.md5 +dc784b258ffa2abc2ae693d11792acf0bb9cb74f vp90-2-14-resize-fp-tiles-8-2.webm +edf26f0130aeee8342d49c2c8f0793ad008782d9 vp90-2-14-resize-fp-tiles-8-2.webm.md5 +8e575789fd63ebf69e8eff1b9a4351a249a73bee vp90-2-14-resize-fp-tiles-16-2.webm +b6415318c1c589a1f64b9d569ce3cabbec2e0d52 vp90-2-14-resize-fp-tiles-16-2.webm.md5 +e3adc944a11c4c5517e63664c84ebb0847b64d81 vp90-2-14-resize-fp-tiles-4-8.webm +03cba0532bc90a05b1990db830bf5701e24e7982 vp90-2-14-resize-fp-tiles-4-8.webm.md5 +3b27a991eb6d78dce38efab35b7db682e8cbbee3 vp90-2-14-resize-fp-tiles-4-16.webm +5d16b7f82bf59f802724ddfd97abb487150b1c9d vp90-2-14-resize-fp-tiles-4-16.webm.md5 +d5fed8c28c1d4c7e232ebbd25cf758757313ed96 vp90-2-14-resize-fp-tiles-8-4.webm +5a8ff8a52cbbde7bfab569beb6d971c5f8b904f7 vp90-2-14-resize-fp-tiles-8-4.webm.md5 +17a5faa023d77ee9dad423a4e0d3145796bbc500 vp90-2-14-resize-fp-tiles-16-4.webm +2ef8daa3c3e750fd745130d0a76a39fe86f0448f vp90-2-14-resize-fp-tiles-16-4.webm.md5 +9361e031f5cc990d8740863e310abb5167ae351e vp90-2-14-resize-fp-tiles-8-16.webm +57f13a2197486584f4e1a4f82ad969f3abc5a1a2 vp90-2-14-resize-fp-tiles-8-16.webm.md5 +5803fc6fcbfb47b7661f3fcc6499158a32b56675 vp90-2-14-resize-fp-tiles-16-8.webm +be0fe64a1a4933696ff92d93f9bdecdbd886dc13 vp90-2-14-resize-fp-tiles-16-8.webm.md5 +0ac0f6d20a0afed77f742a3b9acb59fd7b9cb093 vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm +1765315acccfe6cd12230e731369fcb15325ebfa vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 +4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8 vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +1ef480392112b3509cb190afbb96f9a38dd9fbac vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 diff --git a/source/libvpx/test/test.mk b/source/libvpx/test/test.mk index bf6d055..92664e2 100644 --- a/source/libvpx/test/test.mk +++ b/source/libvpx/test/test.mk @@ -18,6 +18,7 @@ LIBVPX_TEST_SRCS-yes += video_source.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c +LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h @@ -29,6 +30,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc @@ -120,6 +122,7 @@ endif # CONFIG_SHARED ## TEST DATA ## LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.stat LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m @@ -691,8 +694,54 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # BBB VP9 streams diff --git a/source/libvpx/test/test_vector_test.cc b/source/libvpx/test/test_vector_test.cc index 53b7636..9ba18da 100644 --- a/source/libvpx/test/test_vector_test.cc +++ b/source/libvpx/test/test_vector_test.cc @@ -89,8 +89,12 @@ TEST_P(TestVectorTest, MD5Match) { } VP8_INSTANTIATE_TEST_CASE(TestVectorTest, - ::testing::ValuesIn(libvpx_test::kVP8TestVectors)); + ::testing::ValuesIn(libvpx_test::kVP8TestVectors, + libvpx_test::kVP8TestVectors + + libvpx_test::kNumVP8TestVectors)); VP9_INSTANTIATE_TEST_CASE(TestVectorTest, - ::testing::ValuesIn(libvpx_test::kVP9TestVectors)); + ::testing::ValuesIn(libvpx_test::kVP9TestVectors, + libvpx_test::kVP9TestVectors + + libvpx_test::kNumVP9TestVectors)); } // namespace diff --git a/source/libvpx/test/test_vectors.cc b/source/libvpx/test/test_vectors.cc index aba8a3c..ff3c389 100644 --- a/source/libvpx/test/test_vectors.cc +++ b/source/libvpx/test/test_vectors.cc @@ -12,8 +12,10 @@ namespace libvpx_test { +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + #if CONFIG_VP8_DECODER -const char *kVP8TestVectors[kNumVp8TestVectors] = { +const char *const kVP8TestVectors[] = { "vp80-00-comprehensive-001.ivf", "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf", "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf", @@ -47,9 +49,10 @@ const char *kVP8TestVectors[kNumVp8TestVectors] = { "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf", "vp80-06-smallsize.ivf" }; +const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER -const char *kVP9TestVectors[kNumVp9TestVectors] = { +const char *const kVP9TestVectors[] = { "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm", "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm", "vp90-2-00-quantizer-04.webm", "vp90-2-00-quantizer-05.webm", @@ -161,8 +164,22 @@ const char *kVP9TestVectors[kNumVp9TestVectors] = { "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm", "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf", "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf", - "vp91-2-04-yv444.webm" + "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm", + "vp90-2-14-resize-fp-tiles-1-16.webm", + "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", + "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm", + "vp90-2-14-resize-fp-tiles-16-1.webm", "vp90-2-14-resize-fp-tiles-16-2.webm", + "vp90-2-14-resize-fp-tiles-16-4.webm", + "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", + "vp90-2-14-resize-fp-tiles-16-8.webm", "vp90-2-14-resize-fp-tiles-1-8.webm", + "vp90-2-14-resize-fp-tiles-2-16.webm", "vp90-2-14-resize-fp-tiles-2-1.webm", + "vp90-2-14-resize-fp-tiles-2-4.webm", "vp90-2-14-resize-fp-tiles-2-8.webm", + "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm", + "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm", + "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", + "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm" }; +const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER } // namespace libvpx_test diff --git a/source/libvpx/test/test_vectors.h b/source/libvpx/test/test_vectors.h index d5ecc96..8e1aabb 100644 --- a/source/libvpx/test/test_vectors.h +++ b/source/libvpx/test/test_vectors.h @@ -16,14 +16,13 @@ namespace libvpx_test { #if CONFIG_VP8_DECODER -const int kNumVp8TestVectors = 62; -extern const char *kVP8TestVectors[kNumVp8TestVectors]; +extern const int kNumVP8TestVectors; +extern const char *const kVP8TestVectors[]; #endif #if CONFIG_VP9_DECODER -const int kNumVp9TestVectors = 223; - -extern const char *kVP9TestVectors[kNumVp9TestVectors]; +extern const int kNumVP9TestVectors; +extern const char *const kVP9TestVectors[]; #endif // CONFIG_VP9_DECODER } // namespace libvpx_test diff --git a/source/libvpx/test/tools_common.sh b/source/libvpx/test/tools_common.sh new file mode 100755 index 0000000..cd79771 --- /dev/null +++ b/source/libvpx/test/tools_common.sh @@ -0,0 +1,437 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file contains shell code shared by test scripts for libvpx tools. +set -e + +# Sets $VPX_TOOL_TEST to the name specified by positional parameter one. +test_begin() { + VPX_TOOL_TEST="${1}" +} + +# Clears the VPX_TOOL_TEST variable after confirming that $VPX_TOOL_TEST matches +# positional parameter one. +test_end() { + if [ "$1" != "${VPX_TOOL_TEST}" ]; then + echo "FAIL completed test mismatch!." + echo " completed test: ${1}" + echo " active test: ${VPX_TOOL_TEST}." + return 1 + fi + VPX_TOOL_TEST='<unset>' +} + +# Echoes the target configuration being tested. +test_configuration_target() { + vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk" + # Find the TOOLCHAIN line, split it using ':=' as the field separator, and + # print the last field to get the value. Then pipe the value to tr to consume + # any leading/trailing spaces while allowing tr to echo the output to stdout. + awk -F ':=' '/TOOLCHAIN/ { print $NF }' "${vpx_config_mk}" | tr -d ' ' +} + +# Trap function used for failure reports and tool output directory removal. +# When the contents of $VPX_TOOL_TEST do not match the string '<unset>', reports +# failure of test stored in $VPX_TOOL_TEST. +cleanup() { + if [ -n "${VPX_TOOL_TEST}" ] && [ "${VPX_TOOL_TEST}" != '<unset>' ]; then + echo "FAIL: $VPX_TOOL_TEST" + fi + if [ -n "${VPX_TEST_OUTPUT_DIR}" ] && [ -d "${VPX_TEST_OUTPUT_DIR}" ]; then + rm -rf "${VPX_TEST_OUTPUT_DIR}" + fi +} + +# Echoes the git hash portion of the VERSION_STRING variable defined in +# $LIBVPX_CONFIG_PATH/config.mk to stdout, or the version number string when +# no git hash is contained in VERSION_STRING. +config_hash() { + vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk" + # Find VERSION_STRING line, split it with "-g" and print the last field to + # output the git hash to stdout. + vpx_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${vpx_config_mk}") + # Handle two situations here: + # 1. The default case: $vpx_version is a git hash, so echo it unchanged. + # 2. When being run a non-dev tree, the -g portion is not present in the + # version string: It's only the version number. + # In this case $vpx_version is something like 'VERSION_STRING=v1.3.0', so + # we echo only what is after the '='. + echo "${vpx_version##*=}" +} + +# Echoes the short form of the current git hash. +current_hash() { + if git --version > /dev/null 2>&1; then + (cd "$(dirname "${0}")" + git rev-parse --short HEAD) + else + # Return the config hash if git is unavailable: Fail silently, git hashes + # are used only for warnings. + config_hash + fi +} + +# Echoes warnings to stdout when git hash in vpx_config.h does not match the +# current git hash. +check_git_hashes() { + hash_at_configure_time=$(config_hash) + hash_now=$(current_hash) + + if [ "${hash_at_configure_time}" != "${hash_now}" ]; then + echo "Warning: git hash has changed since last configure." + fi +} + +# This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and +# LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that +# the variables are set and that they all evaluate to directory paths. +verify_vpx_test_environment() { + if [ ! -d "${LIBVPX_BIN_PATH}" ]; then + echo "The LIBVPX_BIN_PATH environment variable must be set." + return 1 + fi + if [ ! -d "${LIBVPX_CONFIG_PATH}" ]; then + echo "The LIBVPX_CONFIG_PATH environment variable must be set." + return 1 + fi + if [ ! -d "${LIBVPX_TEST_DATA_PATH}" ]; then + echo "The LIBVPX_TEST_DATA_PATH environment variable must be set." + return 1 + fi +} + +# Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which +# should be a LIBVPX preprocessor flag. Echoes yes to stdout when the feature +# is available. +vpx_config_option_enabled() { + vpx_config_option="${1}" + vpx_config_file="${LIBVPX_CONFIG_PATH}/vpx_config.h" + config_line=$(grep "${vpx_config_option}" "${vpx_config_file}") + if echo "${config_line}" | egrep -q '1$'; then + echo yes + fi +} + +# Echoes yes when output of test_configuration_target() contains win32 or win64. +is_windows_target() { + if test_configuration_target \ + | grep -q -e win32 -e win64 > /dev/null 2>&1; then + echo yes + fi +} + +# Echoes yes to stdout when the file named by positional parameter one exists +# in LIBVPX_BIN_PATH, and is executable. +vpx_tool_available() { + tool_name="${1}" + if [ "$(is_windows_target)" = "yes" ]; then + tool_name="${tool_name}.exe" + fi + [ -x "${LIBVPX_BIN_PATH}/${1}" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP8_DECODER. +vp8_decode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP8_DECODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP8_ENCODER. +vp8_encode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP8_ENCODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP9_DECODER. +vp9_decode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP9_DECODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP9_ENCODER. +vp9_encode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP9_ENCODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_WEBM_IO. +webm_io_available() { + [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpxdec exists according to vpx_tool_available(). +vpxdec_available() { + [ -n $(vpx_tool_available vpxdec) ] && echo yes +} + +# Wrapper function for running vpxdec in noblit mode. Requires that +# LIBVPX_BIN_PATH points to the directory containing vpxdec. Positional +# parameter one is used as the input file path. Positional parameter two, when +# present, is interpreted as a boolean flag that means the input should be sent +# to vpxdec via pipe from cat instead of directly. +vpxdec() { + input="${1}" + pipe_input=${2} + + if [ $# -gt 2 ]; then + # shift away $1 and $2 so the remaining arguments can be passed to vpxdec + # via $@. + shift 2 + fi + + decoder="${LIBVPX_BIN_PATH}/vpxdec" + + if [ "$(is_windows_target)" = "yes" ]; then + decoder="${decoder}.exe" + fi + + if [ -z "${pipe_input}" ]; then + "${decoder}" "$input" --summary --noblit "$@" > /dev/null 2>&1 + else + cat "${input}" | "${decoder}" - --summary --noblit "$@" > /dev/null 2>&1 + fi +} + +# Echoes yes to stdout when vpxenc exists according to vpx_tool_available(). +vpxenc_available() { + [ -n $(vpx_tool_available vpxenc) ] && echo yes +} + +# Wrapper function for running vpxenc. Positional parameters are interpreted as +# follows: +# 1 - codec name +# 2 - input width +# 3 - input height +# 4 - number of frames to encode +# 5 - path to input file +# 6 - path to output file +# Note: The output file path must end in .ivf to output an IVF file. +# 7 - extra flags +# Note: Extra flags currently supports a special case: when set to "-" +# input is piped to vpxenc via cat. +vpxenc() { + encoder="${LIBVPX_BIN_PATH}/vpxenc" + codec="${1}" + width=${2} + height=${3} + frames=${4} + input=${5} + output="${VPX_TEST_OUTPUT_DIR}/${6}" + extra_flags=${7} + + if [ "$(is_windows_target)" = "yes" ]; then + encoder="${encoder}.exe" + fi + + # Because --ivf must be within the command line to get IVF from vpxenc. + if echo "${output}" | egrep -q 'ivf$'; then + use_ivf=--ivf + else + unset use_ivf + fi + + if [ "${extra_flags}" = "-" ]; then + pipe_input=yes + extra_flags=${8} + else + unset pipe_input + fi + + if [ -z "${pipe_input}" ]; then + "${encoder}" --codec=${codec} --width=${width} --height=${height} \ + --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" \ + "${input}" > /dev/null 2>&1 + else + cat "${input}" \ + | "${encoder}" --codec=${codec} --width=${width} --height=${height} \ + --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" - \ + > /dev/null 2>&1 + fi + + if [ ! -e "${output}" ]; then + # Return non-zero exit status: output file doesn't exist, so something + # definitely went wrong. + return 1 + fi +} + +# Filters strings from positional parameter one using the filter specified by +# positional parameter two. Filter behavior depends on the presence of a third +# positional parameter. When parameter three is present, strings that match the +# filter are excluded. When omitted, strings matching the filter are included. +# The filtered string is echoed to stdout. +filter_strings() { + strings=${1} + filter=${2} + exclude=${3} + + if [ -n "${exclude}" ]; then + # When positional parameter three exists the caller wants to remove strings. + # Tell grep to invert matches using the -v argument. + exclude='-v' + else + unset exclude + fi + + if [ -n "${filter}" ]; then + for s in ${strings}; do + if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then + filtered_strings="${filtered_strings} ${s}" + fi + done + else + filtered_strings="${strings}" + fi + echo "${filtered_strings}" +} + +# Runs user test functions passed via positional parameters one and two. +# Functions in positional parameter one are treated as environment verification +# functions and are run unconditionally. Functions in positional parameter two +# are run according to the rules specified in vpx_test_usage(). +run_tests() { + env_tests="verify_vpx_test_environment ${1}" + tests_to_filter="${2}" + + if [ "${VPX_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then + # Filter out DISABLED tests. + tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude) + fi + + if [ -n "${VPX_TEST_FILTER}" ]; then + # Remove tests not matching the user's filter. + tests_to_filter=$(filter_strings "${tests_to_filter}" ${VPX_TEST_FILTER}) + fi + + tests_to_run="${env_tests} ${tests_to_filter}" + + check_git_hashes + + # Run tests. + for test in ${tests_to_run}; do + test_begin "${test}" + "${test}" + [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo " PASS ${test}" + test_end "${test}" + done + + tested_config="$(test_configuration_target) @ $(current_hash)" + echo $(basename "${0%.*}"): Done, all tests pass for ${tested_config}. +} + +vpx_test_usage() { +cat << EOF + Usage: ${0##*/} [arguments] + --bin-path <path to libvpx binaries directory> + --config-path <path to libvpx config directory> + --filter <filter>: User test filter. Only tests matching filter are run. + --run-disabled-tests: Run disabled tests. + --help: Display this message and exit. + --test-data-path <path to libvpx test data directory> + --verbose: Verbose output. + + When the --bin-path option is not specified the script attempts to use + \$LIBVPX_BIN_PATH and then the current directory. + + When the --config-path option is not specified the script attempts to use + \$LIBVPX_CONFIG_PATH and then the current directory. + + When the -test-data-path option is not specified the script attempts to use + \$LIBVPX_TEST_DATA_PATH and then the current directory. +EOF +} + +# Returns non-zero (failure) when required environment variables are empty +# strings. +vpx_test_check_environment() { + if [ -z "${LIBVPX_BIN_PATH}" ] || \ + [ -z "${LIBVPX_CONFIG_PATH}" ] || \ + [ -z "${LIBVPX_TEST_DATA_PATH}" ]; then + return 1 + fi +} + +# Parse the command line. +while [ -n "$1" ]; do + case "$1" in + --bin-path) + LIBVPX_BIN_PATH="$2" + shift + ;; + --config-path) + LIBVPX_CONFIG_PATH="$2" + shift + ;; + --filter) + VPX_TEST_FILTER="$2" + shift + ;; + --run-disabled-tests) + VPX_TEST_RUN_DISABLED_TESTS=yes + ;; + --help) + vpx_test_usage + exit + ;; + --test-data-path) + LIBVPX_TEST_DATA_PATH="$2" + shift + ;; + --verbose) + VPX_TEST_VERBOSE_OUTPUT=yes + ;; + *) + vpx_test_usage + exit 1 + ;; + esac + shift +done + +# Handle running the tests from a build directory without arguments when running +# the tests on *nix/macosx. +LIBVPX_BIN_PATH="${LIBVPX_BIN_PATH:-.}" +LIBVPX_CONFIG_PATH="${LIBVPX_CONFIG_PATH:-.}" +LIBVPX_TEST_DATA_PATH="${LIBVPX_TEST_DATA_PATH:-.}" + +# Create a temporary directory for output files, and a trap to clean it up. +if [ -n "${TMPDIR}" ]; then + VPX_TEST_TEMP_ROOT="${TMPDIR}" +elif [ -n "${TEMPDIR}" ]; then + VPX_TEST_TEMP_ROOT="${TEMPDIR}" +else + VPX_TEST_TEMP_ROOT=/tmp +fi + +VPX_TEST_RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}') +VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_${VPX_TEST_RAND}" + +if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \ + [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then + echo "${0##*/}: Cannot create output directory, giving up." + echo "${0##*/}: VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}" + exit 1 +fi + +trap cleanup EXIT + +if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then +cat << EOF +$(basename "${0%.*}") test configuration: + LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH} + LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH} + LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH} + VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR} + VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT} + VPX_TEST_FILTER=${VPX_TEST_FILTER} + VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS} +EOF +fi diff --git a/source/libvpx/test/vp8_boolcoder_test.cc b/source/libvpx/test/vp8_boolcoder_test.cc index 7c6c601..9cd1987 100644 --- a/source/libvpx/test/vp8_boolcoder_test.cc +++ b/source/libvpx/test/vp8_boolcoder_test.cc @@ -35,14 +35,14 @@ const uint8_t secret_key[16] = { 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0 }; -void encrypt_buffer(uint8_t *buffer, int size) { - for (int i = 0; i < size; ++i) { +void encrypt_buffer(uint8_t *buffer, size_t size) { + for (size_t i = 0; i < size; ++i) { buffer[i] ^= secret_key[i & 15]; } } void test_decrypt_cb(void *decrypt_state, const uint8_t *input, - uint8_t *output, int count) { + uint8_t *output, int count) { const size_t offset = input - reinterpret_cast<uint8_t*>(decrypt_state); for (int i = 0; i < count; i++) { output[i] = input[i] ^ secret_key[(offset + i) & 15]; diff --git a/source/libvpx/test/vp8_decrypt_test.cc b/source/libvpx/test/vp8_decrypt_test.cc index b092509..1b5b083 100644 --- a/source/libvpx/test/vp8_decrypt_test.cc +++ b/source/libvpx/test/vp8_decrypt_test.cc @@ -26,9 +26,9 @@ const uint8_t test_key[16] = { 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0 }; -void encrypt_buffer(const uint8_t *src, uint8_t *dst, - int size, int offset = 0) { - for (int i = 0; i < size; ++i) { +void encrypt_buffer(const uint8_t *src, uint8_t *dst, size_t size, + ptrdiff_t offset) { + for (size_t i = 0; i < size; ++i) { dst[i] = src[i] ^ test_key[(offset + i) & 15]; } } @@ -61,7 +61,7 @@ TEST(TestDecrypt, DecryptWorks) { #if CONFIG_DECRYPT std::vector<uint8_t> encrypted(video.frame_size()); - encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size()); + encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0); vp8_decrypt_init di = { test_decrypt_cb, &encrypted[0] }; decoder.Control(VP8D_SET_DECRYPTOR, &di); #endif // CONFIG_DECRYPT diff --git a/source/libvpx/test/vp8_fdct4x4_test.cc b/source/libvpx/test/vp8_fdct4x4_test.cc index e3c292e..bdbf74e 100644 --- a/source/libvpx/test/vp8_fdct4x4_test.cc +++ b/source/libvpx/test/vp8_fdct4x4_test.cc @@ -68,7 +68,7 @@ void reference_idct4x4(const int16_t *input, int16_t *output) { using libvpx_test::ACMRandom; -TEST(Vp8FdctTest, SignBiasCheck) { +TEST(VP8FdctTest, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int16_t test_input_block[16]; int16_t test_output_block[16]; @@ -127,7 +127,7 @@ TEST(Vp8FdctTest, SignBiasCheck) { << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; }; -TEST(Vp8FdctTest, RoundTripErrorCheck) { +TEST(VP8FdctTest, RoundTripErrorCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; double total_error = 0; diff --git a/source/libvpx/test/vp9_lossless_test.cc b/source/libvpx/test/vp9_lossless_test.cc index ad7ba44..7c3ba9f 100644 --- a/source/libvpx/test/vp9_lossless_test.cc +++ b/source/libvpx/test/vp9_lossless_test.cc @@ -19,16 +19,17 @@ namespace { const int kMaxPsnr = 100; -class LossLessTest : public ::libvpx_test::EncoderTest, +class LosslessTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> { protected: - LossLessTest() : EncoderTest(GET_PARAM(0)), - psnr_(kMaxPsnr), - nframes_(0), - encoding_mode_(GET_PARAM(1)) { + LosslessTestLarge() + : EncoderTest(GET_PARAM(0)), + psnr_(kMaxPsnr), + nframes_(0), + encoding_mode_(GET_PARAM(1)) { } - virtual ~LossLessTest() {} + virtual ~LosslessTestLarge() {} virtual void SetUp() { InitializeConfig(); @@ -55,7 +56,7 @@ class LossLessTest : public ::libvpx_test::EncoderTest, libvpx_test::TestMode encoding_mode_; }; -TEST_P(LossLessTest, TestLossLessEncoding) { +TEST_P(LosslessTestLarge, TestLossLessEncoding) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 2000; @@ -73,7 +74,7 @@ TEST_P(LossLessTest, TestLossLessEncoding) { EXPECT_GE(psnr_lossless, kMaxPsnr); } -TEST_P(LossLessTest, TestLossLessEncoding444) { +TEST_P(LosslessTestLarge, TestLossLessEncoding444) { libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 10); cfg_.g_profile = 1; @@ -90,5 +91,5 @@ TEST_P(LossLessTest, TestLossLessEncoding444) { EXPECT_GE(psnr_lossless, kMaxPsnr); } -VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES); +VP9_INSTANTIATE_TEST_CASE(LosslessTestLarge, ALL_TEST_MODES); } // namespace diff --git a/source/libvpx/test/vp9_thread_test.cc b/source/libvpx/test/vp9_thread_test.cc index a78cdea..5523f20 100644 --- a/source/libvpx/test/vp9_thread_test.cc +++ b/source/libvpx/test/vp9_thread_test.cc @@ -153,6 +153,66 @@ TEST(VP9DecodeMTTest, MTDecode2) { } } +// Test tile quantity changes within one file. +TEST(VP9DecodeMTTest, MTDecode3) { + static const struct { + const char *name; + const char *expected_md5; + } files[] = { + { "vp90-2-14-resize-fp-tiles-1-16.webm", + "0cd5e632c326297e975f38949c31ea94" }, + { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", + "5c78a96a42e7f4a4f6b2edcdb791e44c" }, + { "vp90-2-14-resize-fp-tiles-1-2.webm", + "e030450ae85c3277be2a418769df98e2" }, + { "vp90-2-14-resize-fp-tiles-1-4.webm", + "312eed4e2b64eb7a4e7f18916606a430" }, + { "vp90-2-14-resize-fp-tiles-16-1.webm", + "1755c16d8af16a9cb3fe7338d90abe52" }, + { "vp90-2-14-resize-fp-tiles-16-2.webm", + "500300592d3fcb6f12fab25e48aaf4df" }, + { "vp90-2-14-resize-fp-tiles-16-4.webm", + "47c48379fa6331215d91c67648e1af6e" }, + { "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", + "eecf17290739bc708506fa4827665989" }, + { "vp90-2-14-resize-fp-tiles-16-8.webm", + "29b6bb54e4c26b5ca85d5de5fed94e76" }, + { "vp90-2-14-resize-fp-tiles-1-8.webm", + "1b6f175e08cd82cf84bb800ac6d1caa3" }, + { "vp90-2-14-resize-fp-tiles-2-16.webm", + "ca3b03e4197995d8d5444ede7a6c0804" }, + { "vp90-2-14-resize-fp-tiles-2-1.webm", + "99aec065369d70bbb78ccdff65afed3f" }, + { "vp90-2-14-resize-fp-tiles-2-4.webm", + "22d0ebdb49b87d2920a85aea32e1afd5" }, + { "vp90-2-14-resize-fp-tiles-2-8.webm", + "c2115cf051c62e0f7db1d4a783831541" }, + { "vp90-2-14-resize-fp-tiles-4-16.webm", + "c690d7e1719b31367564cac0af0939cb" }, + { "vp90-2-14-resize-fp-tiles-4-1.webm", + "a926020b2cc3e15ad4cc271853a0ff26" }, + { "vp90-2-14-resize-fp-tiles-4-2.webm", + "42699063d9e581f1993d0cf890c2be78" }, + { "vp90-2-14-resize-fp-tiles-4-8.webm", + "7f76d96036382f45121e3d5aa6f8ec52" }, + { "vp90-2-14-resize-fp-tiles-8-16.webm", + "76a43fcdd7e658542913ea43216ec55d" }, + { "vp90-2-14-resize-fp-tiles-8-1.webm", + "8e3fbe89486ca60a59299dea9da91378" }, + { "vp90-2-14-resize-fp-tiles-8-2.webm", + "ae96f21f21b6370cc0125621b441fc52" }, + { "vp90-2-14-resize-fp-tiles-8-4.webm", + "3eb4f24f10640d42218f7fd7b9fd30d4" }, + }; + + for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) { + for (int t = 2; t <= 8; ++t) { + EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str()) + << "threads = " << t; + } + } +} + INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool()); } // namespace diff --git a/source/libvpx/test/vpxdec.sh b/source/libvpx/test/vpxdec.sh new file mode 100755 index 0000000..d236f97 --- /dev/null +++ b/source/libvpx/test/vpxdec.sh @@ -0,0 +1,65 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests vpxdec. To add new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to vpxdec_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf" +VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm" + +# Environment check: Make sure input is available. +vpxdec_verify_environment() { + if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +vpxdec_can_decode_vp8() { + if [ "$(vpxdec_available)" = "yes" ] && \ + [ "$(vp8_decode_available)" = "yes" ]; then + echo yes + fi +} + +vpxdec_can_decode_vp9() { + if [ "$(vpxdec_available)" = "yes" ] && \ + [ "$(vp9_decode_available)" = "yes" ]; then + echo yes + fi +} + +vpxdec_vp8_ivf() { + if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then + vpxdec "${VP8_IVF_FILE}" + fi +} + +vpxdec_vp8_ivf_pipe_input() { + if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then + vpxdec "${VP8_IVF_FILE}" - + fi +} + +vpxdec_vp9_webm() { + if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + vpxdec "${VP9_WEBM_FILE}" + fi +} + +vpxdec_tests="vpxdec_vp8_ivf + vpxdec_vp8_ivf_pipe_input + vpxdec_vp9_webm" + +run_tests vpxdec_verify_environment "${vpxdec_tests}" diff --git a/source/libvpx/test/vpxenc.sh b/source/libvpx/test/vpxenc.sh new file mode 100755 index 0000000..89e4eb3 --- /dev/null +++ b/source/libvpx/test/vpxenc.sh @@ -0,0 +1,96 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests vpxenc using hantro_collage_w352h288.yuv as input. To add +## new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to vpxenc_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" +YUV_RAW_INPUT_WIDTH=352 +YUV_RAW_INPUT_HEIGHT=288 +TEST_FRAMES=10 + +# Environment check: Make sure input is available. +vpxenc_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +vpxenc_can_encode_vp8() { + if [ "$(vpxenc_available)" = "yes" ] && \ + [ "$(vp8_encode_available)" = "yes" ]; then + echo yes + fi +} + +vpxenc_can_encode_vp9() { + if [ "$(vpxenc_available)" = "yes" ] && \ + [ "$(vp9_encode_available)" = "yes" ]; then + echo yes + fi +} + +vpxenc_vp8_ivf() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then + vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp8.ivf + fi +} + +vpxenc_vp8_ivf_pipe_input() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then + vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp8.ivf - + fi +} + +vpxenc_vp8_webm() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && + [ "$(webm_io_available)" = "yes" ] ; then + vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp8.webm + fi +} + +vpxenc_vp9_ivf() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then + vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp9.ivf + fi +} + +vpxenc_vp9_webm() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && + [ "$(webm_io_available)" = "yes" ] ; then + vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp9.webm + fi +} + +DISABLED_vpxenc_vp9_ivf_lossless() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then + vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless + fi +} + +vpxenc_tests="vpxenc_vp8_ivf + vpxenc_vp8_webm + vpxenc_vp8_ivf_pipe_input + vpxenc_vp9_ivf + vpxenc_vp9_webm + DISABLED_vpxenc_vp9_ivf_lossless" + +run_tests vpxenc_verify_environment "${vpxenc_tests}" diff --git a/source/libvpx/third_party/libwebm/AUTHORS.TXT b/source/libvpx/third_party/libwebm/AUTHORS.TXT new file mode 100644 index 0000000..8ab6f79 --- /dev/null +++ b/source/libvpx/third_party/libwebm/AUTHORS.TXT @@ -0,0 +1,4 @@ +# Names should be added to this file like so:
+# Name or Organization <email address>
+
+Google Inc.
diff --git a/source/libvpx/third_party/libwebm/LICENSE.TXT b/source/libvpx/third_party/libwebm/LICENSE.TXT new file mode 100644 index 0000000..7a6f995 --- /dev/null +++ b/source/libvpx/third_party/libwebm/LICENSE.TXT @@ -0,0 +1,30 @@ +Copyright (c) 2010, Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Google nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/source/libvpx/third_party/libwebm/PATENTS.TXT b/source/libvpx/third_party/libwebm/PATENTS.TXT new file mode 100644 index 0000000..4414d83 --- /dev/null +++ b/source/libvpx/third_party/libwebm/PATENTS.TXT @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the WebM Project. + +Google hereby grants to you a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer, and otherwise run, modify and propagate the contents of this +implementation of VP8, where such license applies only to those patent +claims, both currently owned by Google and acquired in the future, +licensable by Google that are necessarily infringed by this +implementation of VP8. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of VP8 or any code incorporated within this +implementation of VP8 constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of VP8 +shall terminate as of the date such litigation is filed. diff --git a/source/libvpx/third_party/libwebm/README.webm b/source/libvpx/third_party/libwebm/README.webm new file mode 100644 index 0000000..2c7570d --- /dev/null +++ b/source/libvpx/third_party/libwebm/README.webm @@ -0,0 +1,7 @@ +URL: https://chromium.googlesource.com/webm/libwebm +Version: a7118d8ec564e9db841da1eb01f547f3229f240a +License: BSD +License File: LICENSE.txt + +Description: +libwebm is used to handle WebM container I/O. diff --git a/source/libvpx/third_party/libwebm/RELEASE.TXT b/source/libvpx/third_party/libwebm/RELEASE.TXT new file mode 100644 index 0000000..a7e9f03 --- /dev/null +++ b/source/libvpx/third_party/libwebm/RELEASE.TXT @@ -0,0 +1,34 @@ +1.0.0.5
+ * Handled case when no duration
+ * Handled empty clusters
+ * Handled empty clusters when seeking
+ * Implemented check lacing bits
+
+1.0.0.4
+ * Made Cues member variables mutables
+ * Defined against badly-formatted cue points
+ * Segment::GetCluster returns CuePoint too
+ * Separated cue-based searches
+
+1.0.0.3
+ * Added Block::GetOffset() to get a frame's offset in a block
+ * Changed cluster count type from size_t to long
+ * Parsed SeekHead to find cues
+ * Allowed seeking beyond end of cluster cache
+ * Added not to attempt to reparse cues element
+ * Restructured Segment::LoadCluster
+ * Marked position of cues without parsing cues element
+ * Allowed cue points to be loaded incrementally
+ * Implemented to load lazily cue points as they're searched
+ * Merged Cues::LoadCuePoint into Cues::Find
+ * Lazy init cues
+ * Loaded cue point during find
+
+1.0.0.2
+ * added support for Cues element
+ * seeking was improved
+
+1.0.0.1
+ * fixed item 141
+ * added item 142
+ * added this file, RELEASE.TXT, to repository
diff --git a/source/libvpx/third_party/libwebm/mkvmuxer.cpp b/source/libvpx/third_party/libwebm/mkvmuxer.cpp new file mode 100644 index 0000000..8ae0dda --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvmuxer.cpp @@ -0,0 +1,3245 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "mkvmuxer.hpp" + +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <new> + +#include "mkvmuxerutil.hpp" +#include "mkvparser.hpp" +#include "mkvwriter.hpp" +#include "webmids.hpp" + +#ifdef _MSC_VER +// Disable MSVC warnings that suggest making code non-portable. +#pragma warning(disable:4996) +#endif + +namespace mkvmuxer { + +namespace { +// Deallocate the string designated by |dst|, and then copy the |src| +// string to |dst|. The caller owns both the |src| string and the +// |dst| copy (hence the caller is responsible for eventually +// deallocating the strings, either directly, or indirectly via +// StrCpy). Returns true if the source string was successfully copied +// to the destination. +bool StrCpy(const char* src, char** dst_ptr) { + if (dst_ptr == NULL) + return false; + + char*& dst = *dst_ptr; + + delete [] dst; + dst = NULL; + + if (src == NULL) + return true; + + const size_t size = strlen(src) + 1; + + dst = new (std::nothrow) char[size]; // NOLINT + if (dst == NULL) + return false; + + strcpy(dst, src); // NOLINT + return true; +} +} // namespace + +/////////////////////////////////////////////////////////////// +// +// IMkvWriter Class + +IMkvWriter::IMkvWriter() { +} + +IMkvWriter::~IMkvWriter() { +} + +bool WriteEbmlHeader(IMkvWriter* writer) { + // Level 0 + uint64 size = EbmlElementSize(kMkvEBMLVersion, 1ULL); + size += EbmlElementSize(kMkvEBMLReadVersion, 1ULL); + size += EbmlElementSize(kMkvEBMLMaxIDLength, 4ULL); + size += EbmlElementSize(kMkvEBMLMaxSizeLength, 8ULL); + size += EbmlElementSize(kMkvDocType, "webm"); + size += EbmlElementSize(kMkvDocTypeVersion, 2ULL); + size += EbmlElementSize(kMkvDocTypeReadVersion, 2ULL); + + if (!WriteEbmlMasterElement(writer, kMkvEBML, size)) + return false; + if (!WriteEbmlElement(writer, kMkvEBMLVersion, 1ULL)) + return false; + if (!WriteEbmlElement(writer, kMkvEBMLReadVersion, 1ULL)) + return false; + if (!WriteEbmlElement(writer, kMkvEBMLMaxIDLength, 4ULL)) + return false; + if (!WriteEbmlElement(writer, kMkvEBMLMaxSizeLength, 8ULL)) + return false; + if (!WriteEbmlElement(writer, kMkvDocType, "webm")) + return false; + if (!WriteEbmlElement(writer, kMkvDocTypeVersion, 2ULL)) + return false; + if (!WriteEbmlElement(writer, kMkvDocTypeReadVersion, 2ULL)) + return false; + + return true; +} + +bool ChunkedCopy(mkvparser::IMkvReader* source, + mkvmuxer::IMkvWriter* dst, + mkvmuxer::int64 start, int64 size) { + // TODO(vigneshv): Check if this is a reasonable value. + const uint32 kBufSize = 2048; + uint8* buf = new uint8[kBufSize]; + int64 offset = start; + while (size > 0) { + const int64 read_len = (size > kBufSize) ? kBufSize : size; + if (source->Read(offset, static_cast<long>(read_len), buf)) + return false; + dst->Write(buf, static_cast<uint32>(read_len)); + offset += read_len; + size -= read_len; + } + delete[] buf; + return true; +} + +/////////////////////////////////////////////////////////////// +// +// Frame Class + +Frame::Frame() + : add_id_(0), + additional_(NULL), + additional_length_(0), + duration_(0), + frame_(NULL), + is_key_(false), + length_(0), + track_number_(0), + timestamp_(0), + discard_padding_(0) { +} + +Frame::~Frame() { + delete [] frame_; + delete [] additional_; +} + +bool Frame::Init(const uint8* frame, uint64 length) { + uint8* const data = + new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT + if (!data) + return false; + + delete [] frame_; + frame_ = data; + length_ = length; + + memcpy(frame_, frame, static_cast<size_t>(length_)); + return true; +} + +bool Frame::AddAdditionalData(const uint8* additional, uint64 length, + uint64 add_id) { + uint8* const data = + new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT + if (!data) + return false; + + delete [] additional_; + additional_ = data; + additional_length_ = length; + add_id_ = add_id; + + memcpy(additional_, additional, static_cast<size_t>(additional_length_)); + return true; +} + +/////////////////////////////////////////////////////////////// +// +// CuePoint Class + +CuePoint::CuePoint() + : time_(0), + track_(0), + cluster_pos_(0), + block_number_(1), + output_block_number_(true) { +} + +CuePoint::~CuePoint() { +} + +bool CuePoint::Write(IMkvWriter* writer) const { + if (!writer || track_ < 1 || cluster_pos_ < 1) + return false; + + uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_); + size += EbmlElementSize(kMkvCueTrack, track_); + if (output_block_number_ && block_number_ > 1) + size += EbmlElementSize(kMkvCueBlockNumber, block_number_); + const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions, + size) + size; + const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) + + track_pos_size; + + if (!WriteEbmlMasterElement(writer, kMkvCuePoint, payload_size)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlElement(writer, kMkvCueTime, time_)) + return false; + + if (!WriteEbmlMasterElement(writer, kMkvCueTrackPositions, size)) + return false; + if (!WriteEbmlElement(writer, kMkvCueTrack, track_)) + return false; + if (!WriteEbmlElement(writer, kMkvCueClusterPosition, cluster_pos_)) + return false; + if (output_block_number_ && block_number_ > 1) + if (!WriteEbmlElement(writer, kMkvCueBlockNumber, block_number_)) + return false; + + const int64 stop_position = writer->Position(); + if (stop_position < 0) + return false; + + if (stop_position - payload_position != static_cast<int64>(payload_size)) + return false; + + return true; +} + +uint64 CuePoint::PayloadSize() const { + uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_); + size += EbmlElementSize(kMkvCueTrack, track_); + if (output_block_number_ && block_number_ > 1) + size += EbmlElementSize(kMkvCueBlockNumber, block_number_); + const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions, + size) + size; + const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) + + track_pos_size; + + return payload_size; +} + +uint64 CuePoint::Size() const { + const uint64 payload_size = PayloadSize(); + return EbmlMasterElementSize(kMkvCuePoint, payload_size) + payload_size; +} + +/////////////////////////////////////////////////////////////// +// +// Cues Class + +Cues::Cues() + : cue_entries_capacity_(0), + cue_entries_size_(0), + cue_entries_(NULL), + output_block_number_(true) { +} + +Cues::~Cues() { + if (cue_entries_) { + for (int32 i = 0; i < cue_entries_size_; ++i) { + CuePoint* const cue = cue_entries_[i]; + delete cue; + } + delete [] cue_entries_; + } +} + +bool Cues::AddCue(CuePoint* cue) { + if (!cue) + return false; + + if ((cue_entries_size_ + 1) > cue_entries_capacity_) { + // Add more CuePoints. + const int32 new_capacity = + (!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2; + + if (new_capacity < 1) + return false; + + CuePoint** const cues = + new (std::nothrow) CuePoint*[new_capacity]; // NOLINT + if (!cues) + return false; + + for (int32 i = 0; i < cue_entries_size_; ++i) { + cues[i] = cue_entries_[i]; + } + + delete [] cue_entries_; + + cue_entries_ = cues; + cue_entries_capacity_ = new_capacity; + } + + cue->set_output_block_number(output_block_number_); + cue_entries_[cue_entries_size_++] = cue; + return true; +} + +CuePoint* Cues::GetCueByIndex(int32 index) const { + if (cue_entries_ == NULL) + return NULL; + + if (index >= cue_entries_size_) + return NULL; + + return cue_entries_[index]; +} + +uint64 Cues::Size() { + uint64 size = 0; + for (int32 i = 0; i < cue_entries_size_; ++i) + size += GetCueByIndex(i)->Size(); + size += EbmlMasterElementSize(kMkvCues, size); + return size; +} + +bool Cues::Write(IMkvWriter* writer) const { + if (!writer) + return false; + + uint64 size = 0; + for (int32 i = 0; i < cue_entries_size_; ++i) { + const CuePoint* const cue = GetCueByIndex(i); + + if (!cue) + return false; + + size += cue->Size(); + } + + if (!WriteEbmlMasterElement(writer, kMkvCues, size)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + for (int32 i = 0; i < cue_entries_size_; ++i) { + const CuePoint* const cue = GetCueByIndex(i); + + if (!cue->Write(writer)) + return false; + } + + const int64 stop_position = writer->Position(); + if (stop_position < 0) + return false; + + if (stop_position - payload_position != static_cast<int64>(size)) + return false; + + return true; +} + +/////////////////////////////////////////////////////////////// +// +// ContentEncAESSettings Class + +ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {} + +uint64 ContentEncAESSettings::Size() const { + const uint64 payload = PayloadSize(); + const uint64 size = + EbmlMasterElementSize(kMkvContentEncAESSettings, payload) + payload; + return size; +} + +bool ContentEncAESSettings::Write(IMkvWriter* writer) const { + const uint64 payload = PayloadSize(); + + if (!WriteEbmlMasterElement(writer, kMkvContentEncAESSettings, payload)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlElement(writer, kMkvAESSettingsCipherMode, cipher_mode_)) + return false; + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(payload)) + return false; + + return true; +} + +uint64 ContentEncAESSettings::PayloadSize() const { + uint64 size = EbmlElementSize(kMkvAESSettingsCipherMode, cipher_mode_); + return size; +} + +/////////////////////////////////////////////////////////////// +// +// ContentEncoding Class + +ContentEncoding::ContentEncoding() + : enc_algo_(5), + enc_key_id_(NULL), + encoding_order_(0), + encoding_scope_(1), + encoding_type_(1), + enc_key_id_length_(0) { +} + +ContentEncoding::~ContentEncoding() { + delete [] enc_key_id_; +} + +bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) { + if (!id || length < 1) + return false; + + delete [] enc_key_id_; + + enc_key_id_ = + new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT + if (!enc_key_id_) + return false; + + memcpy(enc_key_id_, id, static_cast<size_t>(length)); + enc_key_id_length_ = length; + + return true; +} + +uint64 ContentEncoding::Size() const { + const uint64 encryption_size = EncryptionSize(); + const uint64 encoding_size = EncodingSize(0, encryption_size); + const uint64 encodings_size = EbmlMasterElementSize(kMkvContentEncoding, + encoding_size) + + encoding_size; + + return encodings_size; +} + +bool ContentEncoding::Write(IMkvWriter* writer) const { + const uint64 encryption_size = EncryptionSize(); + const uint64 encoding_size = EncodingSize(0, encryption_size); + const uint64 size = EbmlMasterElementSize(kMkvContentEncoding, + encoding_size) + + encoding_size; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlMasterElement(writer, kMkvContentEncoding, encoding_size)) + return false; + if (!WriteEbmlElement(writer, kMkvContentEncodingOrder, encoding_order_)) + return false; + if (!WriteEbmlElement(writer, kMkvContentEncodingScope, encoding_scope_)) + return false; + if (!WriteEbmlElement(writer, kMkvContentEncodingType, encoding_type_)) + return false; + + if (!WriteEbmlMasterElement(writer, kMkvContentEncryption, encryption_size)) + return false; + if (!WriteEbmlElement(writer, kMkvContentEncAlgo, enc_algo_)) + return false; + if (!WriteEbmlElement(writer, + kMkvContentEncKeyID, + enc_key_id_, + enc_key_id_length_)) + return false; + + if (!enc_aes_settings_.Write(writer)) + return false; + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(size)) + return false; + + return true; +} + +uint64 ContentEncoding::EncodingSize(uint64 compresion_size, + uint64 encryption_size) const { + // TODO(fgalligan): Add support for compression settings. + if (compresion_size != 0) + return 0; + + uint64 encoding_size = 0; + + if (encryption_size > 0) { + encoding_size += EbmlMasterElementSize(kMkvContentEncryption, + encryption_size) + + encryption_size; + } + encoding_size += EbmlElementSize(kMkvContentEncodingType, encoding_type_); + encoding_size += EbmlElementSize(kMkvContentEncodingScope, encoding_scope_); + encoding_size += EbmlElementSize(kMkvContentEncodingOrder, encoding_order_); + + return encoding_size; +} + +uint64 ContentEncoding::EncryptionSize() const { + const uint64 aes_size = enc_aes_settings_.Size(); + + uint64 encryption_size = EbmlElementSize(kMkvContentEncKeyID, + enc_key_id_, + enc_key_id_length_); + encryption_size += EbmlElementSize(kMkvContentEncAlgo, enc_algo_); + + return encryption_size + aes_size; +} + +/////////////////////////////////////////////////////////////// +// +// Track Class + +Track::Track(unsigned int* seed) + : codec_id_(NULL), + codec_private_(NULL), + language_(NULL), + max_block_additional_id_(0), + name_(NULL), + number_(0), + type_(0), + uid_(MakeUID(seed)), + codec_delay_(0), + seek_pre_roll_(0), + codec_private_length_(0), + content_encoding_entries_(NULL), + content_encoding_entries_size_(0) { +} + +Track::~Track() { + delete [] codec_id_; + delete [] codec_private_; + delete [] language_; + delete [] name_; + + if (content_encoding_entries_) { + for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + ContentEncoding* const encoding = content_encoding_entries_[i]; + delete encoding; + } + delete [] content_encoding_entries_; + } +} + +bool Track::AddContentEncoding() { + const uint32 count = content_encoding_entries_size_ + 1; + + ContentEncoding** const content_encoding_entries = + new (std::nothrow) ContentEncoding*[count]; // NOLINT + if (!content_encoding_entries) + return false; + + ContentEncoding* const content_encoding = + new (std::nothrow) ContentEncoding(); // NOLINT + if (!content_encoding) { + delete [] content_encoding_entries; + return false; + } + + for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + content_encoding_entries[i] = content_encoding_entries_[i]; + } + + delete [] content_encoding_entries_; + + content_encoding_entries_ = content_encoding_entries; + content_encoding_entries_[content_encoding_entries_size_] = content_encoding; + content_encoding_entries_size_ = count; + return true; +} + +ContentEncoding* Track::GetContentEncodingByIndex(uint32 index) const { + if (content_encoding_entries_ == NULL) + return NULL; + + if (index >= content_encoding_entries_size_) + return NULL; + + return content_encoding_entries_[index]; +} + +uint64 Track::PayloadSize() const { + uint64 size = EbmlElementSize(kMkvTrackNumber, number_); + size += EbmlElementSize(kMkvTrackUID, uid_); + size += EbmlElementSize(kMkvTrackType, type_); + if (codec_id_) + size += EbmlElementSize(kMkvCodecID, codec_id_); + if (codec_private_) + size += EbmlElementSize(kMkvCodecPrivate, + codec_private_, + codec_private_length_); + if (language_) + size += EbmlElementSize(kMkvLanguage, language_); + if (name_) + size += EbmlElementSize(kMkvName, name_); + if (max_block_additional_id_) + size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_); + if (codec_delay_) + size += EbmlElementSize(kMkvCodecDelay, codec_delay_); + if (seek_pre_roll_) + size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_); + + if (content_encoding_entries_size_ > 0) { + uint64 content_encodings_size = 0; + for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + ContentEncoding* const encoding = content_encoding_entries_[i]; + content_encodings_size += encoding->Size(); + } + + size += EbmlMasterElementSize(kMkvContentEncodings, + content_encodings_size) + + content_encodings_size; + } + + return size; +} + +uint64 Track::Size() const { + uint64 size = PayloadSize(); + size += EbmlMasterElementSize(kMkvTrackEntry, size); + return size; +} + +bool Track::Write(IMkvWriter* writer) const { + if (!writer) + return false; + + // |size| may be bigger than what is written out in this function because + // derived classes may write out more data in the Track element. + const uint64 payload_size = PayloadSize(); + + if (!WriteEbmlMasterElement(writer, kMkvTrackEntry, payload_size)) + return false; + + uint64 size = EbmlElementSize(kMkvTrackNumber, number_); + size += EbmlElementSize(kMkvTrackUID, uid_); + size += EbmlElementSize(kMkvTrackType, type_); + if (codec_id_) + size += EbmlElementSize(kMkvCodecID, codec_id_); + if (codec_private_) + size += EbmlElementSize(kMkvCodecPrivate, + codec_private_, + codec_private_length_); + if (language_) + size += EbmlElementSize(kMkvLanguage, language_); + if (name_) + size += EbmlElementSize(kMkvName, name_); + if (max_block_additional_id_) + size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_); + if (codec_delay_) + size += EbmlElementSize(kMkvCodecDelay, codec_delay_); + if (seek_pre_roll_) + size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_); + + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlElement(writer, kMkvTrackNumber, number_)) + return false; + if (!WriteEbmlElement(writer, kMkvTrackUID, uid_)) + return false; + if (!WriteEbmlElement(writer, kMkvTrackType, type_)) + return false; + if (max_block_additional_id_) { + if (!WriteEbmlElement(writer, + kMkvMaxBlockAdditionID, + max_block_additional_id_)) { + return false; + } + } + if (codec_delay_) { + if (!WriteEbmlElement(writer, kMkvCodecDelay, codec_delay_)) + return false; + } + if (seek_pre_roll_) { + if (!WriteEbmlElement(writer, kMkvSeekPreRoll, seek_pre_roll_)) + return false; + } + if (codec_id_) { + if (!WriteEbmlElement(writer, kMkvCodecID, codec_id_)) + return false; + } + if (codec_private_) { + if (!WriteEbmlElement(writer, + kMkvCodecPrivate, + codec_private_, + codec_private_length_)) + return false; + } + if (language_) { + if (!WriteEbmlElement(writer, kMkvLanguage, language_)) + return false; + } + if (name_) { + if (!WriteEbmlElement(writer, kMkvName, name_)) + return false; + } + + int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(size)) + return false; + + if (content_encoding_entries_size_ > 0) { + uint64 content_encodings_size = 0; + for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + ContentEncoding* const encoding = content_encoding_entries_[i]; + content_encodings_size += encoding->Size(); + } + + if (!WriteEbmlMasterElement(writer, + kMkvContentEncodings, + content_encodings_size)) + return false; + + for (uint32 i = 0; i < content_encoding_entries_size_; ++i) { + ContentEncoding* const encoding = content_encoding_entries_[i]; + if (!encoding->Write(writer)) + return false; + } + } + + stop_position = writer->Position(); + if (stop_position < 0) + return false; + return true; +} + +bool Track::SetCodecPrivate(const uint8* codec_private, uint64 length) { + if (!codec_private || length < 1) + return false; + + delete [] codec_private_; + + codec_private_ = + new (std::nothrow) uint8[static_cast<size_t>(length)]; // NOLINT + if (!codec_private_) + return false; + + memcpy(codec_private_, codec_private, static_cast<size_t>(length)); + codec_private_length_ = length; + + return true; +} + +void Track::set_codec_id(const char* codec_id) { + if (codec_id) { + delete [] codec_id_; + + const size_t length = strlen(codec_id) + 1; + codec_id_ = new (std::nothrow) char[length]; // NOLINT + if (codec_id_) { +#ifdef _MSC_VER + strcpy_s(codec_id_, length, codec_id); +#else + strcpy(codec_id_, codec_id); +#endif + } + } +} + +// TODO(fgalligan): Vet the language parameter. +void Track::set_language(const char* language) { + if (language) { + delete [] language_; + + const size_t length = strlen(language) + 1; + language_ = new (std::nothrow) char[length]; // NOLINT + if (language_) { +#ifdef _MSC_VER + strcpy_s(language_, length, language); +#else + strcpy(language_, language); +#endif + } + } +} + +void Track::set_name(const char* name) { + if (name) { + delete [] name_; + + const size_t length = strlen(name) + 1; + name_ = new (std::nothrow) char[length]; // NOLINT + if (name_) { +#ifdef _MSC_VER + strcpy_s(name_, length, name); +#else + strcpy(name_, name); +#endif + } + } +} + +/////////////////////////////////////////////////////////////// +// +// VideoTrack Class + +VideoTrack::VideoTrack(unsigned int* seed) + : Track(seed), + display_height_(0), + display_width_(0), + frame_rate_(0.0), + height_(0), + stereo_mode_(0), + alpha_mode_(0), + width_(0) { +} + +VideoTrack::~VideoTrack() { +} + +bool VideoTrack::SetStereoMode(uint64 stereo_mode) { + if (stereo_mode != kMono && + stereo_mode != kSideBySideLeftIsFirst && + stereo_mode != kTopBottomRightIsFirst && + stereo_mode != kTopBottomLeftIsFirst && + stereo_mode != kSideBySideRightIsFirst) + return false; + + stereo_mode_ = stereo_mode; + return true; +} + +bool VideoTrack::SetAlphaMode(uint64 alpha_mode) { + if (alpha_mode != kNoAlpha && + alpha_mode != kAlpha) + return false; + + alpha_mode_ = alpha_mode; + return true; +} + +uint64 VideoTrack::PayloadSize() const { + const uint64 parent_size = Track::PayloadSize(); + + uint64 size = VideoPayloadSize(); + size += EbmlMasterElementSize(kMkvVideo, size); + + return parent_size + size; +} + +bool VideoTrack::Write(IMkvWriter* writer) const { + if (!Track::Write(writer)) + return false; + + const uint64 size = VideoPayloadSize(); + + if (!WriteEbmlMasterElement(writer, kMkvVideo, size)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlElement(writer, kMkvPixelWidth, width_)) + return false; + if (!WriteEbmlElement(writer, kMkvPixelHeight, height_)) + return false; + if (display_width_ > 0) + if (!WriteEbmlElement(writer, kMkvDisplayWidth, display_width_)) + return false; + if (display_height_ > 0) + if (!WriteEbmlElement(writer, kMkvDisplayHeight, display_height_)) + return false; + if (stereo_mode_ > kMono) + if (!WriteEbmlElement(writer, kMkvStereoMode, stereo_mode_)) + return false; + if (alpha_mode_ > kNoAlpha) + if (!WriteEbmlElement(writer, kMkvAlphaMode, alpha_mode_)) + return false; + if (frame_rate_ > 0.0) + if (!WriteEbmlElement(writer, + kMkvFrameRate, + static_cast<float>(frame_rate_))) + return false; + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(size)) + return false; + + return true; +} + +uint64 VideoTrack::VideoPayloadSize() const { + uint64 size = EbmlElementSize(kMkvPixelWidth, width_); + size += EbmlElementSize(kMkvPixelHeight, height_); + if (display_width_ > 0) + size += EbmlElementSize(kMkvDisplayWidth, display_width_); + if (display_height_ > 0) + size += EbmlElementSize(kMkvDisplayHeight, display_height_); + if (stereo_mode_ > kMono) + size += EbmlElementSize(kMkvStereoMode, stereo_mode_); + if (alpha_mode_ > kNoAlpha) + size += EbmlElementSize(kMkvAlphaMode, alpha_mode_); + if (frame_rate_ > 0.0) + size += EbmlElementSize(kMkvFrameRate, static_cast<float>(frame_rate_)); + + return size; +} + +/////////////////////////////////////////////////////////////// +// +// AudioTrack Class + +AudioTrack::AudioTrack(unsigned int* seed) + : Track(seed), + bit_depth_(0), + channels_(1), + sample_rate_(0.0) { +} + +AudioTrack::~AudioTrack() { +} + +uint64 AudioTrack::PayloadSize() const { + const uint64 parent_size = Track::PayloadSize(); + + uint64 size = EbmlElementSize(kMkvSamplingFrequency, + static_cast<float>(sample_rate_)); + size += EbmlElementSize(kMkvChannels, channels_); + if (bit_depth_ > 0) + size += EbmlElementSize(kMkvBitDepth, bit_depth_); + size += EbmlMasterElementSize(kMkvAudio, size); + + return parent_size + size; +} + +bool AudioTrack::Write(IMkvWriter* writer) const { + if (!Track::Write(writer)) + return false; + + // Calculate AudioSettings size. + uint64 size = EbmlElementSize(kMkvSamplingFrequency, + static_cast<float>(sample_rate_)); + size += EbmlElementSize(kMkvChannels, channels_); + if (bit_depth_ > 0) + size += EbmlElementSize(kMkvBitDepth, bit_depth_); + + if (!WriteEbmlMasterElement(writer, kMkvAudio, size)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlElement(writer, + kMkvSamplingFrequency, + static_cast<float>(sample_rate_))) + return false; + if (!WriteEbmlElement(writer, kMkvChannels, channels_)) + return false; + if (bit_depth_ > 0) + if (!WriteEbmlElement(writer, kMkvBitDepth, bit_depth_)) + return false; + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(size)) + return false; + + return true; +} + +/////////////////////////////////////////////////////////////// +// +// Tracks Class + +const char Tracks::kOpusCodecId[] = "A_OPUS"; +const char Tracks::kVorbisCodecId[] = "A_VORBIS"; +const char Tracks::kVp8CodecId[] = "V_VP8"; +const char Tracks::kVp9CodecId[] = "V_VP9"; + + +Tracks::Tracks() + : track_entries_(NULL), + track_entries_size_(0) { +} + +Tracks::~Tracks() { + if (track_entries_) { + for (uint32 i = 0; i < track_entries_size_; ++i) { + Track* const track = track_entries_[i]; + delete track; + } + delete [] track_entries_; + } +} + +bool Tracks::AddTrack(Track* track, int32 number) { + if (number < 0) + return false; + + // This muxer only supports track numbers in the range [1, 126], in + // order to be able (to use Matroska integer representation) to + // serialize the block header (of which the track number is a part) + // for a frame using exactly 4 bytes. + + if (number > 0x7E) + return false; + + uint32 track_num = number; + + if (track_num > 0) { + // Check to make sure a track does not already have |track_num|. + for (uint32 i = 0; i < track_entries_size_; ++i) { + if (track_entries_[i]->number() == track_num) + return false; + } + } + + const uint32 count = track_entries_size_ + 1; + + Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT + if (!track_entries) + return false; + + for (uint32 i = 0; i < track_entries_size_; ++i) { + track_entries[i] = track_entries_[i]; + } + + delete [] track_entries_; + + // Find the lowest availible track number > 0. + if (track_num == 0) { + track_num = count; + + // Check to make sure a track does not already have |track_num|. + bool exit = false; + do { + exit = true; + for (uint32 i = 0; i < track_entries_size_; ++i) { + if (track_entries[i]->number() == track_num) { + track_num++; + exit = false; + break; + } + } + } while (!exit); + } + track->set_number(track_num); + + track_entries_ = track_entries; + track_entries_[track_entries_size_] = track; + track_entries_size_ = count; + return true; +} + +const Track* Tracks::GetTrackByIndex(uint32 index) const { + if (track_entries_ == NULL) + return NULL; + + if (index >= track_entries_size_) + return NULL; + + return track_entries_[index]; +} + +Track* Tracks::GetTrackByNumber(uint64 track_number) const { + const int32 count = track_entries_size(); + for (int32 i = 0; i < count; ++i) { + if (track_entries_[i]->number() == track_number) + return track_entries_[i]; + } + + return NULL; +} + +bool Tracks::TrackIsAudio(uint64 track_number) const { + const Track* const track = GetTrackByNumber(track_number); + + if (track->type() == kAudio) + return true; + + return false; +} + +bool Tracks::TrackIsVideo(uint64 track_number) const { + const Track* const track = GetTrackByNumber(track_number); + + if (track->type() == kVideo) + return true; + + return false; +} + +bool Tracks::Write(IMkvWriter* writer) const { + uint64 size = 0; + const int32 count = track_entries_size(); + for (int32 i = 0; i < count; ++i) { + const Track* const track = GetTrackByIndex(i); + + if (!track) + return false; + + size += track->Size(); + } + + if (!WriteEbmlMasterElement(writer, kMkvTracks, size)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + for (int32 i = 0; i < count; ++i) { + const Track* const track = GetTrackByIndex(i); + if (!track->Write(writer)) + return false; + } + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(size)) + return false; + + return true; +} + +/////////////////////////////////////////////////////////////// +// +// Chapter Class + +bool Chapter::set_id(const char* id) { + return StrCpy(id, &id_); +} + +void Chapter::set_time(const Segment& segment, + uint64 start_ns, + uint64 end_ns) { + const SegmentInfo* const info = segment.GetSegmentInfo(); + const uint64 timecode_scale = info->timecode_scale(); + start_timecode_ = start_ns / timecode_scale; + end_timecode_ = end_ns / timecode_scale; +} + +bool Chapter::add_string(const char* title, + const char* language, + const char* country) { + if (!ExpandDisplaysArray()) + return false; + + Display& d = displays_[displays_count_++]; + d.Init(); + + if (!d.set_title(title)) + return false; + + if (!d.set_language(language)) + return false; + + if (!d.set_country(country)) + return false; + + return true; +} + +Chapter::Chapter() { + // This ctor only constructs the object. Proper initialization is + // done in Init() (called in Chapters::AddChapter()). The only + // reason we bother implementing this ctor is because we had to + // declare it as private (along with the dtor), in order to prevent + // clients from creating Chapter instances (a privelege we grant + // only to the Chapters class). Doing no initialization here also + // means that creating arrays of chapter objects is more efficient, + // because we only initialize each new chapter object as it becomes + // active on the array. +} + +Chapter::~Chapter() { +} + +void Chapter::Init(unsigned int* seed) { + id_ = NULL; + displays_ = NULL; + displays_size_ = 0; + displays_count_ = 0; + uid_ = MakeUID(seed); +} + +void Chapter::ShallowCopy(Chapter* dst) const { + dst->id_ = id_; + dst->start_timecode_ = start_timecode_; + dst->end_timecode_ = end_timecode_; + dst->uid_ = uid_; + dst->displays_ = displays_; + dst->displays_size_ = displays_size_; + dst->displays_count_ = displays_count_; +} + +void Chapter::Clear() { + StrCpy(NULL, &id_); + + while (displays_count_ > 0) { + Display& d = displays_[--displays_count_]; + d.Clear(); + } + + delete [] displays_; + displays_ = NULL; + + displays_size_ = 0; +} + +bool Chapter::ExpandDisplaysArray() { + if (displays_size_ > displays_count_) + return true; // nothing to do yet + + const int size = (displays_size_ == 0) ? 1 : 2 * displays_size_; + + Display* const displays = new (std::nothrow) Display[size]; // NOLINT + if (displays == NULL) + return false; + + for (int idx = 0; idx < displays_count_; ++idx) { + displays[idx] = displays_[idx]; // shallow copy + } + + delete [] displays_; + + displays_ = displays; + displays_size_ = size; + + return true; +} + +uint64 Chapter::WriteAtom(IMkvWriter* writer) const { + uint64 payload_size = + EbmlElementSize(kMkvChapterStringUID, id_) + + EbmlElementSize(kMkvChapterUID, uid_) + + EbmlElementSize(kMkvChapterTimeStart, start_timecode_) + + EbmlElementSize(kMkvChapterTimeEnd, end_timecode_); + + for (int idx = 0; idx < displays_count_; ++idx) { + const Display& d = displays_[idx]; + payload_size += d.WriteDisplay(NULL); + } + + const uint64 atom_size = + EbmlMasterElementSize(kMkvChapterAtom, payload_size) + + payload_size; + + if (writer == NULL) + return atom_size; + + const int64 start = writer->Position(); + + if (!WriteEbmlMasterElement(writer, kMkvChapterAtom, payload_size)) + return 0; + + if (!WriteEbmlElement(writer, kMkvChapterStringUID, id_)) + return 0; + + if (!WriteEbmlElement(writer, kMkvChapterUID, uid_)) + return 0; + + if (!WriteEbmlElement(writer, kMkvChapterTimeStart, start_timecode_)) + return 0; + + if (!WriteEbmlElement(writer, kMkvChapterTimeEnd, end_timecode_)) + return 0; + + for (int idx = 0; idx < displays_count_; ++idx) { + const Display& d = displays_[idx]; + + if (!d.WriteDisplay(writer)) + return 0; + } + + const int64 stop = writer->Position(); + + if (stop >= start && uint64(stop - start) != atom_size) + return 0; + + return atom_size; +} + +void Chapter::Display::Init() { + title_ = NULL; + language_ = NULL; + country_ = NULL; +} + +void Chapter::Display::Clear() { + StrCpy(NULL, &title_); + StrCpy(NULL, &language_); + StrCpy(NULL, &country_); +} + +bool Chapter::Display::set_title(const char* title) { + return StrCpy(title, &title_); +} + +bool Chapter::Display::set_language(const char* language) { + return StrCpy(language, &language_); +} + +bool Chapter::Display::set_country(const char* country) { + return StrCpy(country, &country_); +} + +uint64 Chapter::Display::WriteDisplay(IMkvWriter* writer) const { + uint64 payload_size = EbmlElementSize(kMkvChapString, title_); + + if (language_) + payload_size += EbmlElementSize(kMkvChapLanguage, language_); + + if (country_) + payload_size += EbmlElementSize(kMkvChapCountry, country_); + + const uint64 display_size = + EbmlMasterElementSize(kMkvChapterDisplay, payload_size) + + payload_size; + + if (writer == NULL) + return display_size; + + const int64 start = writer->Position(); + + if (!WriteEbmlMasterElement(writer, kMkvChapterDisplay, payload_size)) + return 0; + + if (!WriteEbmlElement(writer, kMkvChapString, title_)) + return 0; + + if (language_) { + if (!WriteEbmlElement(writer, kMkvChapLanguage, language_)) + return 0; + } + + if (country_) { + if (!WriteEbmlElement(writer, kMkvChapCountry, country_)) + return 0; + } + + const int64 stop = writer->Position(); + + if (stop >= start && uint64(stop - start) != display_size) + return 0; + + return display_size; +} + +/////////////////////////////////////////////////////////////// +// +// Chapters Class + +Chapters::Chapters() + : chapters_size_(0), + chapters_count_(0), + chapters_(NULL) { +} + +Chapters::~Chapters() { + while (chapters_count_ > 0) { + Chapter& chapter = chapters_[--chapters_count_]; + chapter.Clear(); + } + + delete [] chapters_; + chapters_ = NULL; +} + +int Chapters::Count() const { + return chapters_count_; +} + +Chapter* Chapters::AddChapter(unsigned int* seed) { + if (!ExpandChaptersArray()) + return NULL; + + Chapter& chapter = chapters_[chapters_count_++]; + chapter.Init(seed); + + return &chapter; +} + +bool Chapters::Write(IMkvWriter* writer) const { + if (writer == NULL) + return false; + + const uint64 payload_size = WriteEdition(NULL); // return size only + + if (!WriteEbmlMasterElement(writer, kMkvChapters, payload_size)) + return false; + + const int64 start = writer->Position(); + + if (WriteEdition(writer) == 0) // error + return false; + + const int64 stop = writer->Position(); + + if (stop >= start && uint64(stop - start) != payload_size) + return false; + + return true; +} + +bool Chapters::ExpandChaptersArray() { + if (chapters_size_ > chapters_count_) + return true; // nothing to do yet + + const int size = (chapters_size_ == 0) ? 1 : 2 * chapters_size_; + + Chapter* const chapters = new (std::nothrow) Chapter[size]; // NOLINT + if (chapters == NULL) + return false; + + for (int idx = 0; idx < chapters_count_; ++idx) { + const Chapter& src = chapters_[idx]; + Chapter* const dst = chapters + idx; + src.ShallowCopy(dst); + } + + delete [] chapters_; + + chapters_ = chapters; + chapters_size_ = size; + + return true; +} + +uint64 Chapters::WriteEdition(IMkvWriter* writer) const { + uint64 payload_size = 0; + + for (int idx = 0; idx < chapters_count_; ++idx) { + const Chapter& chapter = chapters_[idx]; + payload_size += chapter.WriteAtom(NULL); + } + + const uint64 edition_size = + EbmlMasterElementSize(kMkvEditionEntry, payload_size) + + payload_size; + + if (writer == NULL) // return size only + return edition_size; + + const int64 start = writer->Position(); + + if (!WriteEbmlMasterElement(writer, kMkvEditionEntry, payload_size)) + return 0; // error + + for (int idx = 0; idx < chapters_count_; ++idx) { + const Chapter& chapter = chapters_[idx]; + + const uint64 chapter_size = chapter.WriteAtom(writer); + if (chapter_size == 0) // error + return 0; + } + + const int64 stop = writer->Position(); + + if (stop >= start && uint64(stop - start) != edition_size) + return 0; + + return edition_size; +} + +/////////////////////////////////////////////////////////////// +// +// Cluster class + +Cluster::Cluster(uint64 timecode, int64 cues_pos) + : blocks_added_(0), + finalized_(false), + header_written_(false), + payload_size_(0), + position_for_cues_(cues_pos), + size_position_(-1), + timecode_(timecode), + writer_(NULL) { +} + +Cluster::~Cluster() { +} + +bool Cluster::Init(IMkvWriter* ptr_writer) { + if (!ptr_writer) { + return false; + } + writer_ = ptr_writer; + return true; +} + +bool Cluster::AddFrame(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 abs_timecode, + bool is_key) { + return DoWriteBlock(frame, + length, + track_number, + abs_timecode, + is_key ? 1 : 0, + &WriteSimpleBlock); +} + +bool Cluster::AddFrameWithAdditional(const uint8* frame, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + uint64 abs_timecode, + bool is_key) { + return DoWriteBlockWithAdditional(frame, + length, + additional, + additional_length, + add_id, + track_number, + abs_timecode, + is_key ? 1 : 0, + &WriteBlockWithAdditional); +} + +bool Cluster::AddFrameWithDiscardPadding(const uint8* frame, + uint64 length, + int64 discard_padding, + uint64 track_number, + uint64 abs_timecode, + bool is_key) { + return DoWriteBlockWithDiscardPadding(frame, + length, + discard_padding, + track_number, + abs_timecode, + is_key ? 1 : 0, + &WriteBlockWithDiscardPadding); +} + +bool Cluster::AddMetadata(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 abs_timecode, + uint64 duration_timecode) { + return DoWriteBlock(frame, + length, + track_number, + abs_timecode, + duration_timecode, + &WriteMetadataBlock); +} + +void Cluster::AddPayloadSize(uint64 size) { + payload_size_ += size; +} + +bool Cluster::Finalize() { + if (!writer_ || finalized_ || size_position_ == -1) + return false; + + if (writer_->Seekable()) { + const int64 pos = writer_->Position(); + + if (writer_->Position(size_position_)) + return false; + + if (WriteUIntSize(writer_, payload_size(), 8)) + return false; + + if (writer_->Position(pos)) + return false; + } + + finalized_ = true; + + return true; +} + +uint64 Cluster::Size() const { + const uint64 element_size = + EbmlMasterElementSize(kMkvCluster, + 0xFFFFFFFFFFFFFFFFULL) + payload_size_; + return element_size; +} + +template <typename Type> +bool Cluster::PreWriteBlock(Type* write_function) { + if (write_function == NULL) + return false; + + if (finalized_) + return false; + + if (!header_written_) { + if (!WriteClusterHeader()) + return false; + } + + return true; +} + +void Cluster::PostWriteBlock(uint64 element_size) { + AddPayloadSize(element_size); + ++blocks_added_; +} + +bool Cluster::IsValidTrackNumber(uint64 track_number) const { + return (track_number > 0 && track_number <= 0x7E); +} + +int64 Cluster::GetRelativeTimecode(int64 abs_timecode) const { + const int64 cluster_timecode = this->Cluster::timecode(); + const int64 rel_timecode = + static_cast<int64>(abs_timecode) - cluster_timecode; + + if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode) + return -1; + + return rel_timecode; +} + +bool Cluster::DoWriteBlock( + const uint8* frame, + uint64 length, + uint64 track_number, + uint64 abs_timecode, + uint64 generic_arg, + WriteBlock write_block) { + if (frame == NULL || length == 0) + return false; + + if (!IsValidTrackNumber(track_number)) + return false; + + const int64 rel_timecode = GetRelativeTimecode(abs_timecode); + if (rel_timecode < 0) + return false; + + if (!PreWriteBlock(write_block)) + return false; + + const uint64 element_size = (*write_block)(writer_, + frame, + length, + track_number, + rel_timecode, + generic_arg); + if (element_size == 0) + return false; + + PostWriteBlock(element_size); + return true; +} + +bool Cluster::DoWriteBlockWithAdditional( + const uint8* frame, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + uint64 abs_timecode, + uint64 generic_arg, + WriteBlockAdditional write_block) { + if (frame == NULL || length == 0 || + additional == NULL || additional_length == 0) + return false; + + if (!IsValidTrackNumber(track_number)) + return false; + + const int64 rel_timecode = GetRelativeTimecode(abs_timecode); + if (rel_timecode < 0) + return false; + + if (!PreWriteBlock(write_block)) + return false; + + const uint64 element_size = (*write_block)(writer_, + frame, + length, + additional, + additional_length, + add_id, + track_number, + rel_timecode, + generic_arg); + if (element_size == 0) + return false; + + PostWriteBlock(element_size); + return true; +} + +bool Cluster::DoWriteBlockWithDiscardPadding( + const uint8* frame, + uint64 length, + int64 discard_padding, + uint64 track_number, + uint64 abs_timecode, + uint64 generic_arg, + WriteBlockDiscardPadding write_block) { + if (frame == NULL || length == 0 || discard_padding <= 0) + return false; + + if (!IsValidTrackNumber(track_number)) + return false; + + const int64 rel_timecode = GetRelativeTimecode(abs_timecode); + if (rel_timecode < 0) + return false; + + if (!PreWriteBlock(write_block)) + return false; + + const uint64 element_size = (*write_block)(writer_, + frame, + length, + discard_padding, + track_number, + rel_timecode, + generic_arg); + if (element_size == 0) + return false; + + PostWriteBlock(element_size); + return true; +} + +bool Cluster::WriteClusterHeader() { + if (finalized_) + return false; + + if (WriteID(writer_, kMkvCluster)) + return false; + + // Save for later. + size_position_ = writer_->Position(); + + // Write "unknown" (EBML coded -1) as cluster size value. We need to write 8 + // bytes because we do not know how big our cluster will be. + if (SerializeInt(writer_, kEbmlUnknownValue, 8)) + return false; + + if (!WriteEbmlElement(writer_, kMkvTimecode, timecode())) + return false; + AddPayloadSize(EbmlElementSize(kMkvTimecode, timecode())); + header_written_ = true; + + return true; +} + +/////////////////////////////////////////////////////////////// +// +// SeekHead Class + +SeekHead::SeekHead() : start_pos_(0ULL) { + for (int32 i = 0; i < kSeekEntryCount; ++i) { + seek_entry_id_[i] = 0; + seek_entry_pos_[i] = 0; + } +} + +SeekHead::~SeekHead() { +} + +bool SeekHead::Finalize(IMkvWriter* writer) const { + if (writer->Seekable()) { + if (start_pos_ == -1) + return false; + + uint64 payload_size = 0; + uint64 entry_size[kSeekEntryCount]; + + for (int32 i = 0; i < kSeekEntryCount; ++i) { + if (seek_entry_id_[i] != 0) { + entry_size[i] = EbmlElementSize( + kMkvSeekID, + static_cast<uint64>(seek_entry_id_[i])); + entry_size[i] += EbmlElementSize(kMkvSeekPosition, seek_entry_pos_[i]); + + payload_size += EbmlMasterElementSize(kMkvSeek, entry_size[i]) + + entry_size[i]; + } + } + + // No SeekHead elements + if (payload_size == 0) + return true; + + const int64 pos = writer->Position(); + if (writer->Position(start_pos_)) + return false; + + if (!WriteEbmlMasterElement(writer, kMkvSeekHead, payload_size)) + return false; + + for (int32 i = 0; i < kSeekEntryCount; ++i) { + if (seek_entry_id_[i] != 0) { + if (!WriteEbmlMasterElement(writer, kMkvSeek, entry_size[i])) + return false; + + if (!WriteEbmlElement(writer, + kMkvSeekID, + static_cast<uint64>(seek_entry_id_[i]))) + return false; + + if (!WriteEbmlElement(writer, kMkvSeekPosition, seek_entry_pos_[i])) + return false; + } + } + + const uint64 total_entry_size = kSeekEntryCount * MaxEntrySize(); + const uint64 total_size = + EbmlMasterElementSize(kMkvSeekHead, + total_entry_size) + total_entry_size; + const int64 size_left = total_size - (writer->Position() - start_pos_); + + const uint64 bytes_written = WriteVoidElement(writer, size_left); + if (!bytes_written) + return false; + + if (writer->Position(pos)) + return false; + } + + return true; +} + +bool SeekHead::Write(IMkvWriter* writer) { + const uint64 entry_size = kSeekEntryCount * MaxEntrySize(); + const uint64 size = EbmlMasterElementSize(kMkvSeekHead, entry_size); + + start_pos_ = writer->Position(); + + const uint64 bytes_written = WriteVoidElement(writer, size + entry_size); + if (!bytes_written) + return false; + + return true; +} + +bool SeekHead::AddSeekEntry(uint32 id, uint64 pos) { + for (int32 i = 0; i < kSeekEntryCount; ++i) { + if (seek_entry_id_[i] == 0) { + seek_entry_id_[i] = id; + seek_entry_pos_[i] = pos; + return true; + } + } + return false; +} + +uint32 SeekHead::GetId(int index) const { + if (index < 0 || index >= kSeekEntryCount) + return UINT_MAX; + return seek_entry_id_[index]; +} + +uint64 SeekHead::GetPosition(int index) const { + if (index < 0 || index >= kSeekEntryCount) + return ULLONG_MAX; + return seek_entry_pos_[index]; +} + +bool SeekHead::SetSeekEntry(int index, uint32 id, uint64 position) { + if (index < 0 || index >= kSeekEntryCount) + return false; + seek_entry_id_[index] = id; + seek_entry_pos_[index] = position; + return true; +} + +uint64 SeekHead::MaxEntrySize() const { + const uint64 max_entry_payload_size = + EbmlElementSize(kMkvSeekID, 0xffffffffULL) + + EbmlElementSize(kMkvSeekPosition, 0xffffffffffffffffULL); + const uint64 max_entry_size = + EbmlMasterElementSize(kMkvSeek, max_entry_payload_size) + + max_entry_payload_size; + + return max_entry_size; +} + +/////////////////////////////////////////////////////////////// +// +// SegmentInfo Class + +SegmentInfo::SegmentInfo() + : duration_(-1.0), + muxing_app_(NULL), + timecode_scale_(1000000ULL), + writing_app_(NULL), + duration_pos_(-1) { +} + +SegmentInfo::~SegmentInfo() { + delete [] muxing_app_; + delete [] writing_app_; +} + +bool SegmentInfo::Init() { + int32 major; + int32 minor; + int32 build; + int32 revision; + GetVersion(&major, &minor, &build, &revision); + char temp[256]; +#ifdef _MSC_VER + sprintf_s(temp, + sizeof(temp)/sizeof(temp[0]), + "libwebm-%d.%d.%d.%d", + major, + minor, + build, + revision); +#else + snprintf(temp, + sizeof(temp)/sizeof(temp[0]), + "libwebm-%d.%d.%d.%d", + major, + minor, + build, + revision); +#endif + + const size_t app_len = strlen(temp) + 1; + + delete [] muxing_app_; + + muxing_app_ = new (std::nothrow) char[app_len]; // NOLINT + if (!muxing_app_) + return false; + +#ifdef _MSC_VER + strcpy_s(muxing_app_, app_len, temp); +#else + strcpy(muxing_app_, temp); +#endif + + set_writing_app(temp); + if (!writing_app_) + return false; + return true; +} + +bool SegmentInfo::Finalize(IMkvWriter* writer) const { + if (!writer) + return false; + + if (duration_ > 0.0) { + if (writer->Seekable()) { + if (duration_pos_ == -1) + return false; + + const int64 pos = writer->Position(); + + if (writer->Position(duration_pos_)) + return false; + + if (!WriteEbmlElement(writer, + kMkvDuration, + static_cast<float>(duration_))) + return false; + + if (writer->Position(pos)) + return false; + } + } + + return true; +} + +bool SegmentInfo::Write(IMkvWriter* writer) { + if (!writer || !muxing_app_ || !writing_app_) + return false; + + uint64 size = EbmlElementSize(kMkvTimecodeScale, timecode_scale_); + if (duration_ > 0.0) + size += EbmlElementSize(kMkvDuration, static_cast<float>(duration_)); + size += EbmlElementSize(kMkvMuxingApp, muxing_app_); + size += EbmlElementSize(kMkvWritingApp, writing_app_); + + if (!WriteEbmlMasterElement(writer, kMkvInfo, size)) + return false; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return false; + + if (!WriteEbmlElement(writer, kMkvTimecodeScale, timecode_scale_)) + return false; + + if (duration_ > 0.0) { + // Save for later + duration_pos_ = writer->Position(); + + if (!WriteEbmlElement(writer, kMkvDuration, static_cast<float>(duration_))) + return false; + } + + if (!WriteEbmlElement(writer, kMkvMuxingApp, muxing_app_)) + return false; + if (!WriteEbmlElement(writer, kMkvWritingApp, writing_app_)) + return false; + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(size)) + return false; + + return true; +} + +void SegmentInfo::set_muxing_app(const char* app) { + if (app) { + const size_t length = strlen(app) + 1; + char* temp_str = new (std::nothrow) char[length]; // NOLINT + if (!temp_str) + return; + +#ifdef _MSC_VER + strcpy_s(temp_str, length, app); +#else + strcpy(temp_str, app); +#endif + + delete [] muxing_app_; + muxing_app_ = temp_str; + } +} + +void SegmentInfo::set_writing_app(const char* app) { + if (app) { + const size_t length = strlen(app) + 1; + char* temp_str = new (std::nothrow) char[length]; // NOLINT + if (!temp_str) + return; + +#ifdef _MSC_VER + strcpy_s(temp_str, length, app); +#else + strcpy(temp_str, app); +#endif + + delete [] writing_app_; + writing_app_ = temp_str; + } +} + +/////////////////////////////////////////////////////////////// +// +// Segment Class + +Segment::Segment() + : chunk_count_(0), + chunk_name_(NULL), + chunk_writer_cluster_(NULL), + chunk_writer_cues_(NULL), + chunk_writer_header_(NULL), + chunking_(false), + chunking_base_name_(NULL), + cluster_list_(NULL), + cluster_list_capacity_(0), + cluster_list_size_(0), + cues_position_(kAfterClusters), + cues_track_(0), + force_new_cluster_(false), + frames_(NULL), + frames_capacity_(0), + frames_size_(0), + has_video_(false), + header_written_(false), + last_block_duration_(0), + last_timestamp_(0), + max_cluster_duration_(kDefaultMaxClusterDuration), + max_cluster_size_(0), + mode_(kFile), + new_cuepoint_(false), + output_cues_(true), + payload_pos_(0), + size_position_(0), + writer_cluster_(NULL), + writer_cues_(NULL), + writer_header_(NULL) { + const time_t curr_time = time(NULL); + seed_ = static_cast<unsigned int>(curr_time); +#ifdef _WIN32 + srand(seed_); +#endif +} + +Segment::~Segment() { + if (cluster_list_) { + for (int32 i = 0; i < cluster_list_size_; ++i) { + Cluster* const cluster = cluster_list_[i]; + delete cluster; + } + delete [] cluster_list_; + } + + if (frames_) { + for (int32 i = 0; i < frames_size_; ++i) { + Frame* const frame = frames_[i]; + delete frame; + } + delete [] frames_; + } + + delete [] chunk_name_; + delete [] chunking_base_name_; + + if (chunk_writer_cluster_) { + chunk_writer_cluster_->Close(); + delete chunk_writer_cluster_; + } + if (chunk_writer_cues_) { + chunk_writer_cues_->Close(); + delete chunk_writer_cues_; + } + if (chunk_writer_header_) { + chunk_writer_header_->Close(); + delete chunk_writer_header_; + } +} + +void Segment::MoveCuesBeforeClustersHelper(uint64 diff, + int32 index, + uint64* cues_size) { + const uint64 old_cues_size = *cues_size; + CuePoint* const cue_point = cues_.GetCueByIndex(index); + if (cue_point == NULL) + return; + const uint64 old_cue_point_size = cue_point->Size(); + const uint64 cluster_pos = cue_point->cluster_pos() + diff; + cue_point->set_cluster_pos(cluster_pos); // update the new cluster position + // New size of the cue is computed as follows + // Let a = current size of Cues Element + // Let b = Difference in Cue Point's size after this pass + // Let c = Difference in length of Cues Element's size + // (This is computed as CodedSize(a + b) - CodedSize(a) + // Let d = a + b + c. Now d is the new size of the Cues element which is + // passed on to the next recursive call. + const uint64 cue_point_size_diff = cue_point->Size() - old_cue_point_size; + const uint64 cue_size_diff = GetCodedUIntSize(*cues_size + + cue_point_size_diff) - + GetCodedUIntSize(*cues_size); + *cues_size += cue_point_size_diff + cue_size_diff; + diff = *cues_size - old_cues_size; + if (diff > 0) { + for (int32 i = 0; i < cues_.cue_entries_size(); ++i) { + MoveCuesBeforeClustersHelper(diff, i, cues_size); + } + } +} + +void Segment::MoveCuesBeforeClusters() { + const uint64 current_cue_size = cues_.Size(); + uint64 cue_size = current_cue_size; + for (int32 i = 0; i < cues_.cue_entries_size(); i++) + MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size); + + // Adjust the Seek Entry to reflect the change in position + // of Cluster and Cues + int32 cluster_index = 0; + int32 cues_index = 0; + for (int32 i = 0; i < SeekHead::kSeekEntryCount; ++i) { + if (seek_head_.GetId(i) == kMkvCluster) + cluster_index = i; + if (seek_head_.GetId(i) == kMkvCues) + cues_index = i; + } + seek_head_.SetSeekEntry(cues_index, kMkvCues, + seek_head_.GetPosition(cluster_index)); + seek_head_.SetSeekEntry(cluster_index, kMkvCluster, + cues_.Size() + seek_head_.GetPosition(cues_index)); +} + +bool Segment::Init(IMkvWriter* ptr_writer) { + if (!ptr_writer) { + return false; + } + writer_cluster_ = ptr_writer; + writer_cues_ = ptr_writer; + writer_header_ = ptr_writer; + return segment_info_.Init(); +} + +bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, + IMkvWriter* writer) { + if (!writer->Seekable() || chunking_) + return false; + const int64 cluster_offset = cluster_list_[0]->size_position() - + GetUIntSize(kMkvCluster); + + // Copy the headers. + if (!ChunkedCopy(reader, writer, 0, cluster_offset)) + return false; + + // Recompute cue positions and seek entries. + MoveCuesBeforeClusters(); + + // Write cues and seek entries. + // TODO(vigneshv): As of now, it's safe to call seek_head_.Finalize() for the + // second time with a different writer object. But the name Finalize() doesn't + // indicate something we want to call more than once. So consider renaming it + // to write() or some such. + if (!cues_.Write(writer) || !seek_head_.Finalize(writer)) + return false; + + // Copy the Clusters. + if (!ChunkedCopy(reader, writer, cluster_offset, + cluster_end_offset_ - cluster_offset)) + return false; + + // Update the Segment size in case the Cues size has changed. + const int64 pos = writer->Position(); + const int64 segment_size = writer->Position() - payload_pos_; + if (writer->Position(size_position_) || + WriteUIntSize(writer, segment_size, 8) || + writer->Position(pos)) + return false; + return true; +} + +bool Segment::Finalize() { + if (WriteFramesAll() < 0) + return false; + + if (mode_ == kFile) { + if (cluster_list_size_ > 0) { + // Update last cluster's size + Cluster* const old_cluster = cluster_list_[cluster_list_size_-1]; + + if (!old_cluster || !old_cluster->Finalize()) + return false; + } + + if (chunking_ && chunk_writer_cluster_) { + chunk_writer_cluster_->Close(); + chunk_count_++; + } + + const double duration = + (static_cast<double>(last_timestamp_) + last_block_duration_) / + segment_info_.timecode_scale(); + segment_info_.set_duration(duration); + if (!segment_info_.Finalize(writer_header_)) + return false; + + if (output_cues_) + if (!seek_head_.AddSeekEntry(kMkvCues, MaxOffset())) + return false; + + if (chunking_) { + if (!chunk_writer_cues_) + return false; + + char* name = NULL; + if (!UpdateChunkName("cues", &name)) + return false; + + const bool cues_open = chunk_writer_cues_->Open(name); + delete [] name; + if (!cues_open) + return false; + } + + cluster_end_offset_ = writer_cluster_->Position(); + + // Write the seek headers and cues + if (output_cues_) + if (!cues_.Write(writer_cues_)) + return false; + + if (!seek_head_.Finalize(writer_header_)) + return false; + + if (writer_header_->Seekable()) { + if (size_position_ == -1) + return false; + + const int64 pos = writer_header_->Position(); + const int64 segment_size = MaxOffset(); + + if (segment_size < 1) + return false; + + if (writer_header_->Position(size_position_)) + return false; + + if (WriteUIntSize(writer_header_, segment_size, 8)) + return false; + + if (writer_header_->Position(pos)) + return false; + } + + if (chunking_) { + // Do not close any writers until the segment size has been written, + // otherwise the size may be off. + if (!chunk_writer_cues_ || !chunk_writer_header_) + return false; + + chunk_writer_cues_->Close(); + chunk_writer_header_->Close(); + } + } + + return true; +} + +Track* Segment::AddTrack(int32 number) { + Track* const track = new (std::nothrow) Track(&seed_); // NOLINT + + if (!track) + return NULL; + + if (!tracks_.AddTrack(track, number)) { + delete track; + return NULL; + } + + return track; +} + +Chapter* Segment::AddChapter() { + return chapters_.AddChapter(&seed_); +} + +uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) { + VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT + if (!track) + return 0; + + track->set_type(Tracks::kVideo); + track->set_codec_id(Tracks::kVp8CodecId); + track->set_width(width); + track->set_height(height); + + tracks_.AddTrack(track, number); + has_video_ = true; + + return track->number(); +} + +bool Segment::AddCuePoint(uint64 timestamp, uint64 track) { + if (cluster_list_size_ < 1) + return false; + + const Cluster* const cluster = cluster_list_[cluster_list_size_-1]; + if (!cluster) + return false; + + CuePoint* const cue = new (std::nothrow) CuePoint(); // NOLINT + if (!cue) + return false; + + cue->set_time(timestamp / segment_info_.timecode_scale()); + cue->set_block_number(cluster->blocks_added()); + cue->set_cluster_pos(cluster->position_for_cues()); + cue->set_track(track); + if (!cues_.AddCue(cue)) + return false; + + new_cuepoint_ = false; + return true; +} + +uint64 Segment::AddAudioTrack(int32 sample_rate, + int32 channels, + int32 number) { + AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT + if (!track) + return 0; + + track->set_type(Tracks::kAudio); + track->set_codec_id(Tracks::kVorbisCodecId); + track->set_sample_rate(sample_rate); + track->set_channels(channels); + + tracks_.AddTrack(track, number); + + return track->number(); +} + +bool Segment::AddFrame(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 timestamp, + bool is_key) { + if (!frame) + return false; + + if (!CheckHeaderInfo()) + return false; + + // Check for non-monotonically increasing timestamps. + if (timestamp < last_timestamp_) + return false; + + // If the segment has a video track hold onto audio frames to make sure the + // audio that is associated with the start time of a video key-frame is + // muxed into the same cluster. + if (has_video_ && tracks_.TrackIsAudio(track_number) && !force_new_cluster_) { + Frame* const new_frame = new (std::nothrow) Frame(); + if (new_frame == NULL || !new_frame->Init(frame, length)) + return false; + new_frame->set_track_number(track_number); + new_frame->set_timestamp(timestamp); + new_frame->set_is_key(is_key); + + if (!QueueFrame(new_frame)) + return false; + + return true; + } + + if (!DoNewClusterProcessing(track_number, timestamp, is_key)) + return false; + + if (cluster_list_size_ < 1) + return false; + + Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; + if (!cluster) + return false; + + const uint64 timecode_scale = segment_info_.timecode_scale(); + const uint64 abs_timecode = timestamp / timecode_scale; + + if (!cluster->AddFrame(frame, + length, + track_number, + abs_timecode, + is_key)) + return false; + + if (new_cuepoint_ && cues_track_ == track_number) { + if (!AddCuePoint(timestamp, cues_track_)) + return false; + } + + if (timestamp > last_timestamp_) + last_timestamp_ = timestamp; + + return true; +} + +bool Segment::AddFrameWithAdditional(const uint8* frame, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + uint64 timestamp, + bool is_key) { + if (frame == NULL || additional == NULL) + return false; + + if (!CheckHeaderInfo()) + return false; + + // Check for non-monotonically increasing timestamps. + if (timestamp < last_timestamp_) + return false; + + // If the segment has a video track hold onto audio frames to make sure the + // audio that is associated with the start time of a video key-frame is + // muxed into the same cluster. + if (has_video_ && tracks_.TrackIsAudio(track_number) && !force_new_cluster_) { + Frame* const new_frame = new (std::nothrow) Frame(); + if (new_frame == NULL || !new_frame->Init(frame, length)) + return false; + new_frame->set_track_number(track_number); + new_frame->set_timestamp(timestamp); + new_frame->set_is_key(is_key); + + if (!QueueFrame(new_frame)) + return false; + + return true; + } + + if (!DoNewClusterProcessing(track_number, timestamp, is_key)) + return false; + + if (cluster_list_size_ < 1) + return false; + + Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; + if (cluster == NULL) + return false; + + const uint64 timecode_scale = segment_info_.timecode_scale(); + const uint64 abs_timecode = timestamp / timecode_scale; + + if (!cluster->AddFrameWithAdditional(frame, + length, + additional, + additional_length, + add_id, + track_number, + abs_timecode, + is_key)) + return false; + + if (new_cuepoint_ && cues_track_ == track_number) { + if (!AddCuePoint(timestamp, cues_track_)) + return false; + } + + if (timestamp > last_timestamp_) + last_timestamp_ = timestamp; + + return true; +} + +bool Segment::AddFrameWithDiscardPadding(const uint8* frame, + uint64 length, + int64 discard_padding, + uint64 track_number, + uint64 timestamp, + bool is_key) { + if (frame == NULL || discard_padding <= 0) + return false; + + if (!CheckHeaderInfo()) + return false; + + // Check for non-monotonically increasing timestamps. + if (timestamp < last_timestamp_) + return false; + + // If the segment has a video track hold onto audio frames to make sure the + // audio that is associated with the start time of a video key-frame is + // muxed into the same cluster. + if (has_video_ && tracks_.TrackIsAudio(track_number) && !force_new_cluster_) { + Frame* const new_frame = new (std::nothrow) Frame(); + if (new_frame == NULL || !new_frame->Init(frame, length)) + return false; + new_frame->set_track_number(track_number); + new_frame->set_timestamp(timestamp); + new_frame->set_is_key(is_key); + new_frame->set_discard_padding(discard_padding); + + if (!QueueFrame(new_frame)) + return false; + + return true; + } + + if (!DoNewClusterProcessing(track_number, timestamp, is_key)) + return false; + + if (cluster_list_size_ < 1) + return false; + + Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; + if (!cluster) + return false; + + const uint64 timecode_scale = segment_info_.timecode_scale(); + const uint64 abs_timecode = timestamp / timecode_scale; + + if (!cluster->AddFrameWithDiscardPadding(frame, length, + discard_padding, + track_number, + abs_timecode, + is_key)) { + return false; + } + + if (new_cuepoint_ && cues_track_ == track_number) { + if (!AddCuePoint(timestamp, cues_track_)) + return false; + } + + if (timestamp > last_timestamp_) + last_timestamp_ = timestamp; + + return true; +} + +bool Segment::AddMetadata(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 timestamp_ns, + uint64 duration_ns) { + if (!frame) + return false; + + if (!CheckHeaderInfo()) + return false; + + // Check for non-monotonically increasing timestamps. + if (timestamp_ns < last_timestamp_) + return false; + + if (!DoNewClusterProcessing(track_number, timestamp_ns, true)) + return false; + + if (cluster_list_size_ < 1) + return false; + + Cluster* const cluster = cluster_list_[cluster_list_size_-1]; + + if (!cluster) + return false; + + const uint64 timecode_scale = segment_info_.timecode_scale(); + const uint64 abs_timecode = timestamp_ns / timecode_scale; + const uint64 duration_timecode = duration_ns / timecode_scale; + + if (!cluster->AddMetadata(frame, + length, + track_number, + abs_timecode, + duration_timecode)) + return false; + + if (timestamp_ns > last_timestamp_) + last_timestamp_ = timestamp_ns; + + return true; +} + +bool Segment::AddGenericFrame(const Frame* frame) { + last_block_duration_ = frame->duration(); + if (!tracks_.TrackIsAudio(frame->track_number()) && + !tracks_.TrackIsVideo(frame->track_number()) && + frame->duration() > 0) { + return AddMetadata(frame->frame(), + frame->length(), + frame->track_number(), + frame->timestamp(), + frame->duration()); + } else if (frame->additional() && frame->additional_length() > 0) { + return AddFrameWithAdditional(frame->frame(), + frame->length(), + frame->additional(), + frame->additional_length(), + frame->add_id(), + frame->track_number(), + frame->timestamp(), + frame->is_key()); + } else if (frame->discard_padding() > 0) { + return AddFrameWithDiscardPadding(frame->frame(), frame->length(), + frame->discard_padding(), + frame->track_number(), + frame->timestamp(), + frame->is_key()); + } else { + return AddFrame(frame->frame(), + frame->length(), + frame->track_number(), + frame->timestamp(), + frame->is_key()); + } +} + +void Segment::OutputCues(bool output_cues) { + output_cues_ = output_cues; +} + +bool Segment::SetChunking(bool chunking, const char* filename) { + if (chunk_count_ > 0) + return false; + + if (chunking) { + if (!filename) + return false; + + // Check if we are being set to what is already set. + if (chunking_ && !strcmp(filename, chunking_base_name_)) + return true; + + const size_t name_length = strlen(filename) + 1; + char* const temp = new (std::nothrow) char[name_length]; // NOLINT + if (!temp) + return false; + +#ifdef _MSC_VER + strcpy_s(temp, name_length, filename); +#else + strcpy(temp, filename); +#endif + + delete [] chunking_base_name_; + chunking_base_name_ = temp; + + if (!UpdateChunkName("chk", &chunk_name_)) + return false; + + if (!chunk_writer_cluster_) { + chunk_writer_cluster_ = new (std::nothrow) MkvWriter(); // NOLINT + if (!chunk_writer_cluster_) + return false; + } + + if (!chunk_writer_cues_) { + chunk_writer_cues_ = new (std::nothrow) MkvWriter(); // NOLINT + if (!chunk_writer_cues_) + return false; + } + + if (!chunk_writer_header_) { + chunk_writer_header_ = new (std::nothrow) MkvWriter(); // NOLINT + if (!chunk_writer_header_) + return false; + } + + if (!chunk_writer_cluster_->Open(chunk_name_)) + return false; + + const size_t header_length = strlen(filename) + strlen(".hdr") + 1; + char* const header = new (std::nothrow) char[header_length]; // NOLINT + if (!header) + return false; + +#ifdef _MSC_VER + strcpy_s(header, header_length - strlen(".hdr"), chunking_base_name_); + strcat_s(header, header_length, ".hdr"); +#else + strcpy(header, chunking_base_name_); + strcat(header, ".hdr"); +#endif + if (!chunk_writer_header_->Open(header)) { + delete [] header; + return false; + } + + writer_cluster_ = chunk_writer_cluster_; + writer_cues_ = chunk_writer_cues_; + writer_header_ = chunk_writer_header_; + + delete [] header; + } + + chunking_ = chunking; + + return true; +} + +bool Segment::CuesTrack(uint64 track_number) { + const Track* const track = GetTrackByNumber(track_number); + if (!track) + return false; + + cues_track_ = track_number; + return true; +} + +void Segment::ForceNewClusterOnNextFrame() { + force_new_cluster_ = true; +} + +Track* Segment::GetTrackByNumber(uint64 track_number) const { + return tracks_.GetTrackByNumber(track_number); +} + +bool Segment::WriteSegmentHeader() { + // TODO(fgalligan): Support more than one segment. + if (!WriteEbmlHeader(writer_header_)) + return false; + + // Write "unknown" (-1) as segment size value. If mode is kFile, Segment + // will write over duration when the file is finalized. + if (WriteID(writer_header_, kMkvSegment)) + return false; + + // Save for later. + size_position_ = writer_header_->Position(); + + // Write "unknown" (EBML coded -1) as segment size value. We need to write 8 + // bytes because if we are going to overwrite the segment size later we do + // not know how big our segment will be. + if (SerializeInt(writer_header_, kEbmlUnknownValue, 8)) + return false; + + payload_pos_ = writer_header_->Position(); + + if (mode_ == kFile && writer_header_->Seekable()) { + // Set the duration > 0.0 so SegmentInfo will write out the duration. When + // the muxer is done writing we will set the correct duration and have + // SegmentInfo upadte it. + segment_info_.set_duration(1.0); + + if (!seek_head_.Write(writer_header_)) + return false; + } + + if (!seek_head_.AddSeekEntry(kMkvInfo, MaxOffset())) + return false; + if (!segment_info_.Write(writer_header_)) + return false; + + if (!seek_head_.AddSeekEntry(kMkvTracks, MaxOffset())) + return false; + if (!tracks_.Write(writer_header_)) + return false; + + if (chapters_.Count() > 0) { + if (!seek_head_.AddSeekEntry(kMkvChapters, MaxOffset())) + return false; + if (!chapters_.Write(writer_header_)) + return false; + } + + if (chunking_ && (mode_ == kLive || !writer_header_->Seekable())) { + if (!chunk_writer_header_) + return false; + + chunk_writer_header_->Close(); + } + + header_written_ = true; + + return true; +} + +// Here we are testing whether to create a new cluster, given a frame +// having time frame_timestamp_ns. +// +int Segment::TestFrame(uint64 track_number, + uint64 frame_timestamp_ns, + bool is_key) const { + if (force_new_cluster_) + return 1; + + // If no clusters have been created yet, then create a new cluster + // and write this frame immediately, in the new cluster. This path + // should only be followed once, the first time we attempt to write + // a frame. + + if (cluster_list_size_ <= 0) + return 1; + + // There exists at least one cluster. We must compare the frame to + // the last cluster, in order to determine whether the frame is + // written to the existing cluster, or that a new cluster should be + // created. + + const uint64 timecode_scale = segment_info_.timecode_scale(); + const uint64 frame_timecode = frame_timestamp_ns / timecode_scale; + + const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1]; + const uint64 last_cluster_timecode = last_cluster->timecode(); + + // For completeness we test for the case when the frame's timecode + // is less than the cluster's timecode. Although in principle that + // is allowed, this muxer doesn't actually write clusters like that, + // so this indicates a bug somewhere in our algorithm. + + if (frame_timecode < last_cluster_timecode) // should never happen + return -1; // error + + // If the frame has a timestamp significantly larger than the last + // cluster (in Matroska, cluster-relative timestamps are serialized + // using a 16-bit signed integer), then we cannot write this frame + // to that cluster, and so we must create a new cluster. + + const int64 delta_timecode = frame_timecode - last_cluster_timecode; + + if (delta_timecode > kMaxBlockTimecode) + return 2; + + // We decide to create a new cluster when we have a video keyframe. + // This will flush queued (audio) frames, and write the keyframe + // immediately, in the newly-created cluster. + + if (is_key && tracks_.TrackIsVideo(track_number)) + return 1; + + // Create a new cluster if we have accumulated too many frames + // already, where "too many" is defined as "the total time of frames + // in the cluster exceeds a threshold". + + const uint64 delta_ns = delta_timecode * timecode_scale; + + if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_) + return 1; + + // This is similar to the case above, with the difference that a new + // cluster is created when the size of the current cluster exceeds a + // threshold. + + const uint64 cluster_size = last_cluster->payload_size(); + + if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_) + return 1; + + // There's no need to create a new cluster, so emit this frame now. + + return 0; +} + +bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) { + const int32 new_size = cluster_list_size_ + 1; + + if (new_size > cluster_list_capacity_) { + // Add more clusters. + const int32 new_capacity = + (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2; + Cluster** const clusters = + new (std::nothrow) Cluster*[new_capacity]; // NOLINT + if (!clusters) + return false; + + for (int32 i = 0; i < cluster_list_size_; ++i) { + clusters[i] = cluster_list_[i]; + } + + delete [] cluster_list_; + + cluster_list_ = clusters; + cluster_list_capacity_ = new_capacity; + } + + if (!WriteFramesLessThan(frame_timestamp_ns)) + return false; + + if (mode_ == kFile) { + if (cluster_list_size_ > 0) { + // Update old cluster's size + Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; + + if (!old_cluster || !old_cluster->Finalize()) + return false; + } + + if (output_cues_) + new_cuepoint_ = true; + } + + if (chunking_ && cluster_list_size_ > 0) { + chunk_writer_cluster_->Close(); + chunk_count_++; + + if (!UpdateChunkName("chk", &chunk_name_)) + return false; + if (!chunk_writer_cluster_->Open(chunk_name_)) + return false; + } + + const uint64 timecode_scale = segment_info_.timecode_scale(); + const uint64 frame_timecode = frame_timestamp_ns / timecode_scale; + + uint64 cluster_timecode = frame_timecode; + + if (frames_size_ > 0) { + const Frame* const f = frames_[0]; // earliest queued frame + const uint64 ns = f->timestamp(); + const uint64 tc = ns / timecode_scale; + + if (tc < cluster_timecode) + cluster_timecode = tc; + } + + Cluster*& cluster = cluster_list_[cluster_list_size_]; + const int64 offset = MaxOffset(); + cluster = new (std::nothrow) Cluster(cluster_timecode, offset); // NOLINT + if (!cluster) + return false; + + if (!cluster->Init(writer_cluster_)) + return false; + + cluster_list_size_ = new_size; + return true; +} + +bool Segment::DoNewClusterProcessing(uint64 track_number, + uint64 frame_timestamp_ns, + bool is_key) { + for (;;) { + // Based on the characteristics of the current frame and current + // cluster, decide whether to create a new cluster. + const int result = TestFrame(track_number, frame_timestamp_ns, is_key); + if (result < 0) // error + return false; + + // Always set force_new_cluster_ to false after TestFrame. + force_new_cluster_ = false; + + // A non-zero result means create a new cluster. + if (result > 0 && !MakeNewCluster(frame_timestamp_ns)) + return false; + + // Write queued (audio) frames. + const int frame_count = WriteFramesAll(); + if (frame_count < 0) // error + return false; + + // Write the current frame to the current cluster (if TestFrame + // returns 0) or to a newly created cluster (TestFrame returns 1). + if (result <= 1) + return true; + + // TestFrame returned 2, which means there was a large time + // difference between the cluster and the frame itself. Do the + // test again, comparing the frame to the new cluster. + } +} + +bool Segment::CheckHeaderInfo() { + if (!header_written_) { + if (!WriteSegmentHeader()) + return false; + + if (!seek_head_.AddSeekEntry(kMkvCluster, MaxOffset())) + return false; + + if (output_cues_ && cues_track_ == 0) { + // Check for a video track + for (uint32 i = 0; i < tracks_.track_entries_size(); ++i) { + const Track* const track = tracks_.GetTrackByIndex(i); + if (!track) + return false; + + if (tracks_.TrackIsVideo(track->number())) { + cues_track_ = track->number(); + break; + } + } + + // Set first track found + if (cues_track_ == 0) { + const Track* const track = tracks_.GetTrackByIndex(0); + if (!track) + return false; + + cues_track_ = track->number(); + } + } + } + return true; +} + +bool Segment::UpdateChunkName(const char* ext, char** name) const { + if (!name || !ext) + return false; + + char ext_chk[64]; +#ifdef _MSC_VER + sprintf_s(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext); +#else + snprintf(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext); +#endif + + const size_t length = strlen(chunking_base_name_) + strlen(ext_chk) + 1; + char* const str = new (std::nothrow) char[length]; // NOLINT + if (!str) + return false; + +#ifdef _MSC_VER + strcpy_s(str, length-strlen(ext_chk), chunking_base_name_); + strcat_s(str, length, ext_chk); +#else + strcpy(str, chunking_base_name_); + strcat(str, ext_chk); +#endif + + delete [] *name; + *name = str; + + return true; +} + +int64 Segment::MaxOffset() { + if (!writer_header_) + return -1; + + int64 offset = writer_header_->Position() - payload_pos_; + + if (chunking_) { + for (int32 i = 0; i < cluster_list_size_; ++i) { + Cluster* const cluster = cluster_list_[i]; + offset += cluster->Size(); + } + + if (writer_cues_) + offset += writer_cues_->Position(); + } + + return offset; +} + +bool Segment::QueueFrame(Frame* frame) { + const int32 new_size = frames_size_ + 1; + + if (new_size > frames_capacity_) { + // Add more frames. + const int32 new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2; + + if (new_capacity < 1) + return false; + + Frame** const frames = new (std::nothrow) Frame*[new_capacity]; // NOLINT + if (!frames) + return false; + + for (int32 i = 0; i < frames_size_; ++i) { + frames[i] = frames_[i]; + } + + delete [] frames_; + frames_ = frames; + frames_capacity_ = new_capacity; + } + + frames_[frames_size_++] = frame; + + return true; +} + +int Segment::WriteFramesAll() { + if (frames_ == NULL) + return 0; + + if (cluster_list_size_ < 1) + return -1; + + Cluster* const cluster = cluster_list_[cluster_list_size_-1]; + + if (!cluster) + return -1; + + const uint64 timecode_scale = segment_info_.timecode_scale(); + + for (int32 i = 0; i < frames_size_; ++i) { + Frame*& frame = frames_[i]; + const uint64 frame_timestamp = frame->timestamp(); // ns + const uint64 frame_timecode = frame_timestamp / timecode_scale; + + if (frame->discard_padding() > 0) { + if (!cluster->AddFrameWithDiscardPadding(frame->frame(), + frame->length(), + frame->discard_padding(), + frame->track_number(), + frame_timecode, + frame->is_key())) { + return -1; + } + } else { + if (!cluster->AddFrame(frame->frame(), + frame->length(), + frame->track_number(), + frame_timecode, + frame->is_key())) { + return -1; + } + } + + if (new_cuepoint_ && cues_track_ == frame->track_number()) { + if (!AddCuePoint(frame_timestamp, cues_track_)) + return -1; + } + + if (frame_timestamp > last_timestamp_) + last_timestamp_ = frame_timestamp; + + delete frame; + frame = NULL; + } + + const int result = frames_size_; + frames_size_ = 0; + + return result; +} + +bool Segment::WriteFramesLessThan(uint64 timestamp) { + // Check |cluster_list_size_| to see if this is the first cluster. If it is + // the first cluster the audio frames that are less than the first video + // timesatmp will be written in a later step. + if (frames_size_ > 0 && cluster_list_size_ > 0) { + if (!frames_) + return false; + + Cluster* const cluster = cluster_list_[cluster_list_size_-1]; + if (!cluster) + return false; + + const uint64 timecode_scale = segment_info_.timecode_scale(); + int32 shift_left = 0; + + // TODO(fgalligan): Change this to use the durations of frames instead of + // the next frame's start time if the duration is accurate. + for (int32 i = 1; i < frames_size_; ++i) { + const Frame* const frame_curr = frames_[i]; + + if (frame_curr->timestamp() > timestamp) + break; + + const Frame* const frame_prev = frames_[i-1]; + const uint64 frame_timestamp = frame_prev->timestamp(); + const uint64 frame_timecode = frame_timestamp / timecode_scale; + const int64 discard_padding = frame_prev->discard_padding(); + + if (discard_padding > 0) { + if (!cluster->AddFrameWithDiscardPadding(frame_prev->frame(), + frame_prev->length(), + discard_padding, + frame_prev->track_number(), + frame_timecode, + frame_prev->is_key())) { + return false; + } + } else { + if (!cluster->AddFrame(frame_prev->frame(), + frame_prev->length(), + frame_prev->track_number(), + frame_timecode, + frame_prev->is_key())) { + return false; + } + } + + if (new_cuepoint_ && cues_track_ == frame_prev->track_number()) { + if (!AddCuePoint(frame_timestamp, cues_track_)) + return false; + } + + ++shift_left; + if (frame_timestamp > last_timestamp_) + last_timestamp_ = frame_timestamp; + + delete frame_prev; + } + + if (shift_left > 0) { + if (shift_left >= frames_size_) + return false; + + const int32 new_frames_size = frames_size_ - shift_left; + for (int32 i = 0; i < new_frames_size; ++i) { + frames_[i] = frames_[i+shift_left]; + } + + frames_size_ = new_frames_size; + } + } + + return true; +} + +} // namespace mkvmuxer diff --git a/source/libvpx/third_party/libwebm/mkvmuxer.hpp b/source/libvpx/third_party/libwebm/mkvmuxer.hpp new file mode 100644 index 0000000..63a315e --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvmuxer.hpp @@ -0,0 +1,1403 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MKVMUXER_HPP +#define MKVMUXER_HPP + +#include "mkvmuxertypes.hpp" + +// For a description of the WebM elements see +// http://www.webmproject.org/code/specs/container/. + +namespace mkvparser { + class IMkvReader; +} // end namespace + +namespace mkvmuxer { + +class MkvWriter; +class Segment; + +/////////////////////////////////////////////////////////////// +// Interface used by the mkvmuxer to write out the Mkv data. +class IMkvWriter { + public: + // Writes out |len| bytes of |buf|. Returns 0 on success. + virtual int32 Write(const void* buf, uint32 len) = 0; + + // Returns the offset of the output position from the beginning of the + // output. + virtual int64 Position() const = 0; + + // Set the current File position. Returns 0 on success. + virtual int32 Position(int64 position) = 0; + + // Returns true if the writer is seekable. + virtual bool Seekable() const = 0; + + // Element start notification. Called whenever an element identifier is about + // to be written to the stream. |element_id| is the element identifier, and + // |position| is the location in the WebM stream where the first octet of the + // element identifier will be written. + // Note: the |MkvId| enumeration in webmids.hpp defines element values. + virtual void ElementStartNotify(uint64 element_id, int64 position) = 0; + + protected: + IMkvWriter(); + virtual ~IMkvWriter(); + + private: + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter); +}; + +// Writes out the EBML header for a WebM file. This function must be called +// before any other libwebm writing functions are called. +bool WriteEbmlHeader(IMkvWriter* writer); + +// Copies in Chunk from source to destination between the given byte positions +bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, + int64 start, int64 size); + +/////////////////////////////////////////////////////////////// +// Class to hold data the will be written to a block. +class Frame { + public: + Frame(); + ~Frame(); + + // Copies |frame| data into |frame_|. Returns true on success. + bool Init(const uint8* frame, uint64 length); + + // Copies |additional| data into |additional_|. Returns true on success. + bool AddAdditionalData(const uint8* additional, uint64 length, + uint64 add_id); + + uint64 add_id() const { return add_id_; } + const uint8* additional() const { return additional_; } + uint64 additional_length() const { return additional_length_; } + void set_duration(uint64 duration) { duration_ = duration; } + uint64 duration() const { return duration_; } + const uint8* frame() const { return frame_; } + void set_is_key(bool key) { is_key_ = key; } + bool is_key() const { return is_key_; } + uint64 length() const { return length_; } + void set_track_number(uint64 track_number) { track_number_ = track_number; } + uint64 track_number() const { return track_number_; } + void set_timestamp(uint64 timestamp) { timestamp_ = timestamp; } + uint64 timestamp() const { return timestamp_; } + void set_discard_padding(uint64 discard_padding) { + discard_padding_ = discard_padding; + } + uint64 discard_padding() const { return discard_padding_; } + + private: + // Id of the Additional data. + uint64 add_id_; + + // Pointer to additional data. Owned by this class. + uint8* additional_; + + // Length of the additional data. + uint64 additional_length_; + + // Duration of the frame in nanoseconds. + uint64 duration_; + + // Pointer to the data. Owned by this class. + uint8* frame_; + + // Flag telling if the data should set the key flag of a block. + bool is_key_; + + // Length of the data. + uint64 length_; + + // Mkv track number the data is associated with. + uint64 track_number_; + + // Timestamp of the data in nanoseconds. + uint64 timestamp_; + + // Discard padding for the frame. + int64 discard_padding_; +}; + +/////////////////////////////////////////////////////////////// +// Class to hold one cue point in a Cues element. +class CuePoint { + public: + CuePoint(); + ~CuePoint(); + + // Returns the size in bytes for the entire CuePoint element. + uint64 Size() const; + + // Output the CuePoint element to the writer. Returns true on success. + bool Write(IMkvWriter* writer) const; + + void set_time(uint64 time) { time_ = time; } + uint64 time() const { return time_; } + void set_track(uint64 track) { track_ = track; } + uint64 track() const { return track_; } + void set_cluster_pos(uint64 cluster_pos) { cluster_pos_ = cluster_pos; } + uint64 cluster_pos() const { return cluster_pos_; } + void set_block_number(uint64 block_number) { block_number_ = block_number; } + uint64 block_number() const { return block_number_; } + void set_output_block_number(bool output_block_number) { + output_block_number_ = output_block_number; + } + bool output_block_number() const { return output_block_number_; } + + private: + // Returns the size in bytes for the payload of the CuePoint element. + uint64 PayloadSize() const; + + // Absolute timecode according to the segment time base. + uint64 time_; + + // The Track element associated with the CuePoint. + uint64 track_; + + // The position of the Cluster containing the Block. + uint64 cluster_pos_; + + // Number of the Block within the Cluster, starting from 1. + uint64 block_number_; + + // If true the muxer will write out the block number for the cue if the + // block number is different than the default of 1. Default is set to true. + bool output_block_number_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(CuePoint); +}; + +/////////////////////////////////////////////////////////////// +// Cues element. +class Cues { + public: + Cues(); + ~Cues(); + + // Adds a cue point to the Cues element. Returns true on success. + bool AddCue(CuePoint* cue); + + // Returns the cue point by index. Returns NULL if there is no cue point + // match. + CuePoint* GetCueByIndex(int32 index) const; + + // Returns the total size of the Cues element + uint64 Size(); + + // Output the Cues element to the writer. Returns true on success. + bool Write(IMkvWriter* writer) const; + + int32 cue_entries_size() const { return cue_entries_size_; } + void set_output_block_number(bool output_block_number) { + output_block_number_ = output_block_number; + } + bool output_block_number() const { return output_block_number_; } + + private: + // Number of allocated elements in |cue_entries_|. + int32 cue_entries_capacity_; + + // Number of CuePoints in |cue_entries_|. + int32 cue_entries_size_; + + // CuePoint list. + CuePoint** cue_entries_; + + // If true the muxer will write out the block number for the cue if the + // block number is different than the default of 1. Default is set to true. + bool output_block_number_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cues); +}; + +/////////////////////////////////////////////////////////////// +// ContentEncAESSettings element +class ContentEncAESSettings { + public: + enum { + kCTR = 1 + }; + + ContentEncAESSettings(); + ~ContentEncAESSettings() {} + + // Returns the size in bytes for the ContentEncAESSettings element. + uint64 Size() const; + + // Writes out the ContentEncAESSettings element to |writer|. Returns true on + // success. + bool Write(IMkvWriter* writer) const; + + uint64 cipher_mode() const { return cipher_mode_; } + + private: + // Returns the size in bytes for the payload of the ContentEncAESSettings + // element. + uint64 PayloadSize() const; + + // Sub elements + uint64 cipher_mode_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings); +}; + +/////////////////////////////////////////////////////////////// +// ContentEncoding element +// Elements used to describe if the track data has been encrypted or +// compressed with zlib or header stripping. +// Currently only whole frames can be encrypted with AES. This dictates that +// ContentEncodingOrder will be 0, ContentEncodingScope will be 1, +// ContentEncodingType will be 1, and ContentEncAlgo will be 5. +class ContentEncoding { + public: + ContentEncoding(); + ~ContentEncoding(); + + // Sets the content encryption id. Copies |length| bytes from |id| to + // |enc_key_id_|. Returns true on success. + bool SetEncryptionID(const uint8* id, uint64 length); + + // Returns the size in bytes for the ContentEncoding element. + uint64 Size() const; + + // Writes out the ContentEncoding element to |writer|. Returns true on + // success. + bool Write(IMkvWriter* writer) const; + + uint64 enc_algo() const { return enc_algo_; } + uint64 encoding_order() const { return encoding_order_; } + uint64 encoding_scope() const { return encoding_scope_; } + uint64 encoding_type() const { return encoding_type_; } + ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; } + + private: + // Returns the size in bytes for the encoding elements. + uint64 EncodingSize(uint64 compresion_size, uint64 encryption_size) const; + + // Returns the size in bytes for the encryption elements. + uint64 EncryptionSize() const; + + // Track element names + uint64 enc_algo_; + uint8* enc_key_id_; + uint64 encoding_order_; + uint64 encoding_scope_; + uint64 encoding_type_; + + // ContentEncAESSettings element. + ContentEncAESSettings enc_aes_settings_; + + // Size of the ContentEncKeyID data in bytes. + uint64 enc_key_id_length_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); +}; + +/////////////////////////////////////////////////////////////// +// Track element. +class Track { + public: + // The |seed| parameter is used to synthesize a UID for the track. + explicit Track(unsigned int* seed); + virtual ~Track(); + + // Adds a ContentEncoding element to the Track. Returns true on success. + virtual bool AddContentEncoding(); + + // Returns the ContentEncoding by index. Returns NULL if there is no + // ContentEncoding match. + ContentEncoding* GetContentEncodingByIndex(uint32 index) const; + + // Returns the size in bytes for the payload of the Track element. + virtual uint64 PayloadSize() const; + + // Returns the size in bytes of the Track element. + virtual uint64 Size() const; + + // Output the Track element to the writer. Returns true on success. + virtual bool Write(IMkvWriter* writer) const; + + // Sets the CodecPrivate element of the Track element. Copies |length| + // bytes from |codec_private| to |codec_private_|. Returns true on success. + bool SetCodecPrivate(const uint8* codec_private, uint64 length); + + void set_codec_id(const char* codec_id); + const char* codec_id() const { return codec_id_; } + const uint8* codec_private() const { return codec_private_; } + void set_language(const char* language); + const char* language() const { return language_; } + void set_max_block_additional_id(uint64 max_block_additional_id) { + max_block_additional_id_ = max_block_additional_id; + } + uint64 max_block_additional_id() const { return max_block_additional_id_; } + void set_name(const char* name); + const char* name() const { return name_; } + void set_number(uint64 number) { number_ = number; } + uint64 number() const { return number_; } + void set_type(uint64 type) { type_ = type; } + uint64 type() const { return type_; } + void set_uid(uint64 uid) { uid_ = uid; } + uint64 uid() const { return uid_; } + void set_codec_delay(uint64 codec_delay) { codec_delay_ = codec_delay; } + uint64 codec_delay() const { return codec_delay_; } + void set_seek_pre_roll(uint64 seek_pre_roll) { + seek_pre_roll_ = seek_pre_roll; + } + uint64 seek_pre_roll() const { return seek_pre_roll_; } + + uint64 codec_private_length() const { return codec_private_length_; } + uint32 content_encoding_entries_size() const { + return content_encoding_entries_size_; + } + + private: + // Track element names + char* codec_id_; + uint8* codec_private_; + char* language_; + uint64 max_block_additional_id_; + char* name_; + uint64 number_; + uint64 type_; + uint64 uid_; + uint64 codec_delay_; + uint64 seek_pre_roll_; + + // Size of the CodecPrivate data in bytes. + uint64 codec_private_length_; + + // ContentEncoding element list. + ContentEncoding** content_encoding_entries_; + + // Number of ContentEncoding elements added. + uint32 content_encoding_entries_size_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track); +}; + +/////////////////////////////////////////////////////////////// +// Track that has video specific elements. +class VideoTrack : public Track { + public: + // Supported modes for stereo 3D. + enum StereoMode { + kMono = 0, + kSideBySideLeftIsFirst = 1, + kTopBottomRightIsFirst = 2, + kTopBottomLeftIsFirst = 3, + kSideBySideRightIsFirst = 11 + }; + + enum AlphaMode { + kNoAlpha = 0, + kAlpha = 1 + }; + + // The |seed| parameter is used to synthesize a UID for the track. + explicit VideoTrack(unsigned int* seed); + virtual ~VideoTrack(); + + // Returns the size in bytes for the payload of the Track element plus the + // video specific elements. + virtual uint64 PayloadSize() const; + + // Output the VideoTrack element to the writer. Returns true on success. + virtual bool Write(IMkvWriter* writer) const; + + // Sets the video's stereo mode. Returns true on success. + bool SetStereoMode(uint64 stereo_mode); + + // Sets the video's alpha mode. Returns true on success. + bool SetAlphaMode(uint64 alpha_mode); + + void set_display_height(uint64 height) { display_height_ = height; } + uint64 display_height() const { return display_height_; } + void set_display_width(uint64 width) { display_width_ = width; } + uint64 display_width() const { return display_width_; } + void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; } + double frame_rate() const { return frame_rate_; } + void set_height(uint64 height) { height_ = height; } + uint64 height() const { return height_; } + uint64 stereo_mode() { return stereo_mode_; } + uint64 alpha_mode() { return alpha_mode_; } + void set_width(uint64 width) { width_ = width; } + uint64 width() const { return width_; } + + private: + // Returns the size in bytes of the Video element. + uint64 VideoPayloadSize() const; + + // Video track element names. + uint64 display_height_; + uint64 display_width_; + double frame_rate_; + uint64 height_; + uint64 stereo_mode_; + uint64 alpha_mode_; + uint64 width_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack); +}; + +/////////////////////////////////////////////////////////////// +// Track that has audio specific elements. +class AudioTrack : public Track { + public: + // The |seed| parameter is used to synthesize a UID for the track. + explicit AudioTrack(unsigned int* seed); + virtual ~AudioTrack(); + + // Returns the size in bytes for the payload of the Track element plus the + // audio specific elements. + virtual uint64 PayloadSize() const; + + // Output the AudioTrack element to the writer. Returns true on success. + virtual bool Write(IMkvWriter* writer) const; + + void set_bit_depth(uint64 bit_depth) { bit_depth_ = bit_depth; } + uint64 bit_depth() const { return bit_depth_; } + void set_channels(uint64 channels) { channels_ = channels; } + uint64 channels() const { return channels_; } + void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; } + double sample_rate() const { return sample_rate_; } + + private: + // Audio track element names. + uint64 bit_depth_; + uint64 channels_; + double sample_rate_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack); +}; + +/////////////////////////////////////////////////////////////// +// Tracks element +class Tracks { + public: + // Audio and video type defined by the Matroska specs. + enum { + kVideo = 0x1, + kAudio = 0x2 + }; + // Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs. + static const char kOpusCodecId[]; + static const char kVorbisCodecId[]; + static const char kVp8CodecId[]; + static const char kVp9CodecId[]; + + Tracks(); + ~Tracks(); + + // Adds a Track element to the Tracks object. |track| will be owned and + // deleted by the Tracks object. Returns true on success. |number| is the + // number to use for the track. |number| must be >= 0. If |number| == 0 + // then the muxer will decide on the track number. + bool AddTrack(Track* track, int32 number); + + // Returns the track by index. Returns NULL if there is no track match. + const Track* GetTrackByIndex(uint32 idx) const; + + // Search the Tracks and return the track that matches |tn|. Returns NULL + // if there is no track match. + Track* GetTrackByNumber(uint64 track_number) const; + + // Returns true if the track number is an audio track. + bool TrackIsAudio(uint64 track_number) const; + + // Returns true if the track number is a video track. + bool TrackIsVideo(uint64 track_number) const; + + // Output the Tracks element to the writer. Returns true on success. + bool Write(IMkvWriter* writer) const; + + uint32 track_entries_size() const { return track_entries_size_; } + + private: + // Track element list. + Track** track_entries_; + + // Number of Track elements added. + uint32 track_entries_size_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks); +}; + +/////////////////////////////////////////////////////////////// +// Chapter element +// +class Chapter { + public: + // Set the identifier for this chapter. (This corresponds to the + // Cue Identifier line in WebVTT.) + // TODO(matthewjheaney): the actual serialization of this item in + // MKV is pending. + bool set_id(const char* id); + + // Converts the nanosecond start and stop times of this chapter to + // their corresponding timecode values, and stores them that way. + void set_time(const Segment& segment, + uint64 start_time_ns, + uint64 end_time_ns); + + // Sets the uid for this chapter. Primarily used to enable + // deterministic output from the muxer. + void set_uid(const uint64 uid) { uid_ = uid; } + + // Add a title string to this chapter, per the semantics described + // here: + // http://www.matroska.org/technical/specs/index.html + // + // The title ("chapter string") is a UTF-8 string. + // + // The language has ISO 639-2 representation, described here: + // http://www.loc.gov/standards/iso639-2/englangn.html + // http://www.loc.gov/standards/iso639-2/php/English_list.php + // If you specify NULL as the language value, this implies + // English ("eng"). + // + // The country value corresponds to the codes listed here: + // http://www.iana.org/domains/root/db/ + // + // The function returns false if the string could not be allocated. + bool add_string(const char* title, + const char* language, + const char* country); + + private: + friend class Chapters; + + // For storage of chapter titles that differ by language. + class Display { + public: + // Establish representation invariant for new Display object. + void Init(); + + // Reclaim resources, in anticipation of destruction. + void Clear(); + + // Copies the title to the |title_| member. Returns false on + // error. + bool set_title(const char* title); + + // Copies the language to the |language_| member. Returns false + // on error. + bool set_language(const char* language); + + // Copies the country to the |country_| member. Returns false on + // error. + bool set_country(const char* country); + + // If |writer| is non-NULL, serialize the Display sub-element of + // the Atom into the stream. Returns the Display element size on + // success, 0 if error. + uint64 WriteDisplay(IMkvWriter* writer) const; + + private: + char* title_; + char* language_; + char* country_; + }; + + Chapter(); + ~Chapter(); + + // Establish the representation invariant for a newly-created + // Chapter object. The |seed| parameter is used to create the UID + // for this chapter atom. + void Init(unsigned int* seed); + + // Copies this Chapter object to a different one. This is used when + // expanding a plain array of Chapter objects (see Chapters). + void ShallowCopy(Chapter* dst) const; + + // Reclaim resources used by this Chapter object, pending its + // destruction. + void Clear(); + + // If there is no storage remaining on the |displays_| array for a + // new display object, creates a new, longer array and copies the + // existing Display objects to the new array. Returns false if the + // array cannot be expanded. + bool ExpandDisplaysArray(); + + // If |writer| is non-NULL, serialize the Atom sub-element into the + // stream. Returns the total size of the element on success, 0 if + // error. + uint64 WriteAtom(IMkvWriter* writer) const; + + // The string identifier for this chapter (corresponds to WebVTT cue + // identifier). + char* id_; + + // Start timecode of the chapter. + uint64 start_timecode_; + + // Stop timecode of the chapter. + uint64 end_timecode_; + + // The binary identifier for this chapter. + uint64 uid_; + + // The Atom element can contain multiple Display sub-elements, as + // the same logical title can be rendered in different languages. + Display* displays_; + + // The physical length (total size) of the |displays_| array. + int displays_size_; + + // The logical length (number of active elements) on the |displays_| + // array. + int displays_count_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapter); +}; + +/////////////////////////////////////////////////////////////// +// Chapters element +// +class Chapters { + public: + Chapters(); + ~Chapters(); + + Chapter* AddChapter(unsigned int* seed); + + // Returns the number of chapters that have been added. + int Count() const; + + // Output the Chapters element to the writer. Returns true on success. + bool Write(IMkvWriter* writer) const; + + private: + // Expands the chapters_ array if there is not enough space to contain + // another chapter object. Returns true on success. + bool ExpandChaptersArray(); + + // If |writer| is non-NULL, serialize the Edition sub-element of the + // Chapters element into the stream. Returns the Edition element + // size on success, 0 if error. + uint64 WriteEdition(IMkvWriter* writer) const; + + // Total length of the chapters_ array. + int chapters_size_; + + // Number of active chapters on the chapters_ array. + int chapters_count_; + + // Array for storage of chapter objects. + Chapter* chapters_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapters); +}; + +/////////////////////////////////////////////////////////////// +// Cluster element +// +// Notes: +// |Init| must be called before any other method in this class. +class Cluster { + public: + Cluster(uint64 timecode, int64 cues_pos); + ~Cluster(); + + // |timecode| is the absolute timecode of the cluster. |cues_pos| is the + // position for the cluster within the segment that should be written in + // the cues element. + bool Init(IMkvWriter* ptr_writer); + + // Adds a frame to be output in the file. The frame is written out through + // |writer_| if successful. Returns true on success. + // Inputs: + // frame: Pointer to the data + // length: Length of the data + // track_number: Track to add the data to. Value returned by Add track + // functions. The range of allowed values is [1, 126]. + // timecode: Absolute (not relative to cluster) timestamp of the + // frame, expressed in timecode units. + // is_key: Flag telling whether or not this frame is a key frame. + bool AddFrame(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 timecode, // timecode units (absolute) + bool is_key); + + // Adds a frame to be output in the file. The frame is written out through + // |writer_| if successful. Returns true on success. + // Inputs: + // frame: Pointer to the data + // length: Length of the data + // additional: Pointer to the additional data + // additional_length: Length of the additional data + // add_id: Value of BlockAddID element + // track_number: Track to add the data to. Value returned by Add track + // functions. The range of allowed values is [1, 126]. + // abs_timecode: Absolute (not relative to cluster) timestamp of the + // frame, expressed in timecode units. + // is_key: Flag telling whether or not this frame is a key frame. + bool AddFrameWithAdditional(const uint8* frame, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + uint64 abs_timecode, + bool is_key); + + // Adds a frame to be output in the file. The frame is written out through + // |writer_| if successful. Returns true on success. + // Inputs: + // frame: Pointer to the data. + // length: Length of the data. + // discard_padding: DiscardPadding element value. + // track_number: Track to add the data to. Value returned by Add track + // functions. The range of allowed values is [1, 126]. + // abs_timecode: Absolute (not relative to cluster) timestamp of the + // frame, expressed in timecode units. + // is_key: Flag telling whether or not this frame is a key frame. + bool AddFrameWithDiscardPadding(const uint8* frame, + uint64 length, + int64 discard_padding, + uint64 track_number, + uint64 abs_timecode, + bool is_key); + + // Writes a frame of metadata to the output medium; returns true on + // success. + // Inputs: + // frame: Pointer to the data + // length: Length of the data + // track_number: Track to add the data to. Value returned by Add track + // functions. The range of allowed values is [1, 126]. + // timecode: Absolute (not relative to cluster) timestamp of the + // metadata frame, expressed in timecode units. + // duration: Duration of metadata frame, in timecode units. + // + // The metadata frame is written as a block group, with a duration + // sub-element but no reference time sub-elements (indicating that + // it is considered a keyframe, per Matroska semantics). + bool AddMetadata(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 timecode, // timecode units (absolute) + uint64 duration); // timecode units + + // Increments the size of the cluster's data in bytes. + void AddPayloadSize(uint64 size); + + // Closes the cluster so no more data can be written to it. Will update the + // cluster's size if |writer_| is seekable. Returns true on success. + bool Finalize(); + + // Returns the size in bytes for the entire Cluster element. + uint64 Size() const; + + int64 size_position() const { return size_position_; } + int32 blocks_added() const { return blocks_added_; } + uint64 payload_size() const { return payload_size_; } + int64 position_for_cues() const { return position_for_cues_; } + uint64 timecode() const { return timecode_; } + + private: + // Signature that matches either of WriteSimpleBlock or WriteMetadataBlock + // in the muxer utilities package. + typedef uint64 (*WriteBlock)(IMkvWriter* writer, + const uint8* data, + uint64 length, + uint64 track_number, + int64 timecode, + uint64 generic_arg); + + // Signature that matches WriteBlockWithAdditional + // in the muxer utilities package. + typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer, + const uint8* data, + uint64 length, + const uint8* additional, + uint64 add_id, + uint64 additional_length, + uint64 track_number, + int64 timecode, + uint64 is_key); + + // Signature that matches WriteBlockWithDiscardPadding + // in the muxer utilities package. + typedef uint64 (*WriteBlockDiscardPadding)(IMkvWriter* writer, + const uint8* data, + uint64 length, + int64 discard_padding, + uint64 track_number, + int64 timecode, + uint64 is_key); + + // Utility method that confirms that blocks can still be added, and that the + // cluster header has been written. Used by |DoWriteBlock*|. Returns true + // when successful. + template <typename Type> + bool PreWriteBlock(Type* write_function); + + // Utility method used by the |DoWriteBlock*| methods that handles the book + // keeping required after each block is written. + void PostWriteBlock(uint64 element_size); + + // To simplify things, we require that there be fewer than 127 + // tracks -- this allows us to serialize the track number value for + // a stream using a single byte, per the Matroska encoding. + bool IsValidTrackNumber(uint64 track_number) const; + + // Given |abs_timecode|, calculates timecode relative to most recent timecode. + // Returns -1 on failure, or a relative timecode. + int64 GetRelativeTimecode(int64 abs_timecode) const; + + // Used to implement AddFrame and AddMetadata. + bool DoWriteBlock(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 absolute_timecode, + uint64 generic_arg, + WriteBlock write_block); + + // Used to implement AddFrameWithAdditional + bool DoWriteBlockWithAdditional(const uint8* frame, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + uint64 absolute_timecode, + uint64 generic_arg, + WriteBlockAdditional write_block); + + // Used to implement AddFrameWithDiscardPadding + bool DoWriteBlockWithDiscardPadding(const uint8* frame, + uint64 length, + int64 discard_padding, + uint64 track_number, + uint64 absolute_timecode, + uint64 generic_arg, + WriteBlockDiscardPadding write_block); + + // Outputs the Cluster header to |writer_|. Returns true on success. + bool WriteClusterHeader(); + + // Number of blocks added to the cluster. + int32 blocks_added_; + + // Flag telling if the cluster has been closed. + bool finalized_; + + // Flag telling if the cluster's header has been written. + bool header_written_; + + // The size of the cluster elements in bytes. + uint64 payload_size_; + + // The file position used for cue points. + const int64 position_for_cues_; + + // The file position of the cluster's size element. + int64 size_position_; + + // The absolute timecode of the cluster. + const uint64 timecode_; + + // Pointer to the writer object. Not owned by this class. + IMkvWriter* writer_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cluster); +}; + +/////////////////////////////////////////////////////////////// +// SeekHead element +class SeekHead { + public: + SeekHead(); + ~SeekHead(); + + // TODO(fgalligan): Change this to reserve a certain size. Then check how + // big the seek entry to be added is as not every seek entry will be the + // maximum size it could be. + // Adds a seek entry to be written out when the element is finalized. |id| + // must be the coded mkv element id. |pos| is the file position of the + // element. Returns true on success. + bool AddSeekEntry(uint32 id, uint64 pos); + + // Writes out SeekHead and SeekEntry elements. Returns true on success. + bool Finalize(IMkvWriter* writer) const; + + // Returns the id of the Seek Entry at the given index. Returns -1 if index is + // out of range. + uint32 GetId(int index) const; + + // Returns the position of the Seek Entry at the given index. Returns -1 if + // index is out of range. + uint64 GetPosition(int index) const; + + // Sets the Seek Entry id and position at given index. + // Returns true on success. + bool SetSeekEntry(int index, uint32 id, uint64 position); + + // Reserves space by writing out a Void element which will be updated with + // a SeekHead element later. Returns true on success. + bool Write(IMkvWriter* writer); + + // We are going to put a cap on the number of Seek Entries. + const static int32 kSeekEntryCount = 5; + + private: + // Returns the maximum size in bytes of one seek entry. + uint64 MaxEntrySize() const; + + // Seek entry id element list. + uint32 seek_entry_id_[kSeekEntryCount]; + + // Seek entry pos element list. + uint64 seek_entry_pos_[kSeekEntryCount]; + + // The file position of SeekHead element. + int64 start_pos_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead); +}; + +/////////////////////////////////////////////////////////////// +// Segment Information element +class SegmentInfo { + public: + SegmentInfo(); + ~SegmentInfo(); + + // Will update the duration if |duration_| is > 0.0. Returns true on success. + bool Finalize(IMkvWriter* writer) const; + + // Sets |muxing_app_| and |writing_app_|. + bool Init(); + + // Output the Segment Information element to the writer. Returns true on + // success. + bool Write(IMkvWriter* writer); + + void set_duration(double duration) { duration_ = duration; } + double duration() const { return duration_; } + void set_muxing_app(const char* app); + const char* muxing_app() const { return muxing_app_; } + void set_timecode_scale(uint64 scale) { timecode_scale_ = scale; } + uint64 timecode_scale() const { return timecode_scale_; } + void set_writing_app(const char* app); + const char* writing_app() const { return writing_app_; } + + private: + // Segment Information element names. + // Initially set to -1 to signify that a duration has not been set and should + // not be written out. + double duration_; + // Set to libwebm-%d.%d.%d.%d, major, minor, build, revision. + char* muxing_app_; + uint64 timecode_scale_; + // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision. + char* writing_app_; + + // The file position of the duration element. + int64 duration_pos_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo); +}; + +/////////////////////////////////////////////////////////////// +// This class represents the main segment in a WebM file. Currently only +// supports one Segment element. +// +// Notes: +// |Init| must be called before any other method in this class. +class Segment { + public: + enum Mode { + kLive = 0x1, + kFile = 0x2 + }; + + enum CuesPosition { + kAfterClusters = 0x0, // Position Cues after Clusters - Default + kBeforeClusters = 0x1 // Position Cues before Clusters + }; + + const static uint64 kDefaultMaxClusterDuration = 30000000000ULL; + + Segment(); + ~Segment(); + + // Initializes |SegmentInfo| and returns result. Always returns false when + // |ptr_writer| is NULL. + bool Init(IMkvWriter* ptr_writer); + + // Adds a generic track to the segment. Returns the newly-allocated + // track object (which is owned by the segment) on success, NULL on + // error. |number| is the number to use for the track. |number| + // must be >= 0. If |number| == 0 then the muxer will decide on the + // track number. + Track* AddTrack(int32 number); + + // Adds a Vorbis audio track to the segment. Returns the number of the track + // on success, 0 on error. |number| is the number to use for the audio track. + // |number| must be >= 0. If |number| == 0 then the muxer will decide on + // the track number. + uint64 AddAudioTrack(int32 sample_rate, int32 channels, int32 number); + + // Adds an empty chapter to the chapters of this segment. Returns + // non-NULL on success. After adding the chapter, the caller should + // populate its fields via the Chapter member functions. + Chapter* AddChapter(); + + // Adds a cue point to the Cues element. |timestamp| is the time in + // nanoseconds of the cue's time. |track| is the Track of the Cue. This + // function must be called after AddFrame to calculate the correct + // BlockNumber for the CuePoint. Returns true on success. + bool AddCuePoint(uint64 timestamp, uint64 track); + + // Adds a frame to be output in the file. Returns true on success. + // Inputs: + // frame: Pointer to the data + // length: Length of the data + // track_number: Track to add the data to. Value returned by Add track + // functions. + // timestamp: Timestamp of the frame in nanoseconds from 0. + // is_key: Flag telling whether or not this frame is a key frame. + bool AddFrame(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 timestamp_ns, + bool is_key); + + // Writes a frame of metadata to the output medium; returns true on + // success. + // Inputs: + // frame: Pointer to the data + // length: Length of the data + // track_number: Track to add the data to. Value returned by Add track + // functions. + // timecode: Absolute timestamp of the metadata frame, expressed + // in nanosecond units. + // duration: Duration of metadata frame, in nanosecond units. + // + // The metadata frame is written as a block group, with a duration + // sub-element but no reference time sub-elements (indicating that + // it is considered a keyframe, per Matroska semantics). + bool AddMetadata(const uint8* frame, + uint64 length, + uint64 track_number, + uint64 timestamp_ns, + uint64 duration_ns); + + // Writes a frame with additional data to the output medium; returns true on + // success. + // Inputs: + // frame: Pointer to the data. + // length: Length of the data. + // additional: Pointer to additional data. + // additional_length: Length of additional data. + // add_id: Additional ID which identifies the type of additional data. + // track_number: Track to add the data to. Value returned by Add track + // functions. + // timestamp: Absolute timestamp of the frame, expressed in nanosecond + // units. + // is_key: Flag telling whether or not this frame is a key frame. + bool AddFrameWithAdditional(const uint8* frame, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + uint64 timestamp, + bool is_key); + + // Writes a frame with DiscardPadding to the output medium; returns true on + // success. + // Inputs: + // frame: Pointer to the data. + // length: Length of the data. + // discard_padding: DiscardPadding element value. + // track_number: Track to add the data to. Value returned by Add track + // functions. + // timestamp: Absolute timestamp of the frame, expressed in nanosecond + // units. + // is_key: Flag telling whether or not this frame is a key frame. + bool AddFrameWithDiscardPadding(const uint8* frame, + uint64 length, + int64 discard_padding, + uint64 track_number, + uint64 timestamp, + bool is_key); + + // Writes a Frame to the output medium. Chooses the correct way of writing + // the frame (Block vs SimpleBlock) based on the parameters passed. + // Inputs: + // frame: frame object + bool AddGenericFrame(const Frame* frame); + + // Adds a VP8 video track to the segment. Returns the number of the track on + // success, 0 on error. |number| is the number to use for the video track. + // |number| must be >= 0. If |number| == 0 then the muxer will decide on + // the track number. + uint64 AddVideoTrack(int32 width, int32 height, int32 number); + + // This function must be called after Finalize() if you need a copy of the + // output with Cues written before the Clusters. It will return false if the + // writer is not seekable of if chunking is set to true. + // Input parameters: + // reader - an IMkvReader object created with the same underlying file of the + // current writer object. Make sure to close the existing writer + // object before creating this so that all the data is properly + // flushed and available for reading. + // writer - an IMkvWriter object pointing to a *different* file than the one + // pointed by the current writer object. This file will contain the + // Cues element before the Clusters. + bool CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, + IMkvWriter* writer); + + // Sets which track to use for the Cues element. Must have added the track + // before calling this function. Returns true on success. |track_number| is + // returned by the Add track functions. + bool CuesTrack(uint64 track_number); + + // This will force the muxer to create a new Cluster when the next frame is + // added. + void ForceNewClusterOnNextFrame(); + + // Writes out any frames that have not been written out. Finalizes the last + // cluster. May update the size and duration of the segment. May output the + // Cues element. May finalize the SeekHead element. Returns true on success. + bool Finalize(); + + // Returns the Cues object. + Cues* GetCues() { return &cues_; } + + // Returns the Segment Information object. + const SegmentInfo* GetSegmentInfo() const { return &segment_info_; } + SegmentInfo* GetSegmentInfo() { return &segment_info_; } + + // Search the Tracks and return the track that matches |track_number|. + // Returns NULL if there is no track match. + Track* GetTrackByNumber(uint64 track_number) const; + + // Toggles whether to output a cues element. + void OutputCues(bool output_cues); + + // Sets if the muxer will output files in chunks or not. |chunking| is a + // flag telling whether or not to turn on chunking. |filename| is the base + // filename for the chunk files. The header chunk file will be named + // |filename|.hdr and the data chunks will be named + // |filename|_XXXXXX.chk. Chunking implies that the muxer will be writing + // to files so the muxer will use the default MkvWriter class to control + // what data is written to what files. Returns true on success. + // TODO: Should we change the IMkvWriter Interface to add Open and Close? + // That will force the interface to be dependent on files. + bool SetChunking(bool chunking, const char* filename); + + bool chunking() const { return chunking_; } + uint64 cues_track() const { return cues_track_; } + void set_max_cluster_duration(uint64 max_cluster_duration) { + max_cluster_duration_ = max_cluster_duration; + } + uint64 max_cluster_duration() const { return max_cluster_duration_; } + void set_max_cluster_size(uint64 max_cluster_size) { + max_cluster_size_ = max_cluster_size; + } + uint64 max_cluster_size() const { return max_cluster_size_; } + void set_mode(Mode mode) { mode_ = mode; } + Mode mode() const { return mode_; } + CuesPosition cues_position() const { return cues_position_; } + bool output_cues() const { return output_cues_; } + const SegmentInfo* segment_info() const { return &segment_info_; } + + private: + // Checks if header information has been output and initialized. If not it + // will output the Segment element and initialize the SeekHead elment and + // Cues elements. + bool CheckHeaderInfo(); + + // Sets |name| according to how many chunks have been written. |ext| is the + // file extension. |name| must be deleted by the calling app. Returns true + // on success. + bool UpdateChunkName(const char* ext, char** name) const; + + // Returns the maximum offset within the segment's payload. When chunking + // this function is needed to determine offsets of elements within the + // chunked files. Returns -1 on error. + int64 MaxOffset(); + + // Adds the frame to our frame array. + bool QueueFrame(Frame* frame); + + // Output all frames that are queued. Returns -1 on error, otherwise + // it returns the number of frames written. + int WriteFramesAll(); + + // Output all frames that are queued that have an end time that is less + // then |timestamp|. Returns true on success and if there are no frames + // queued. + bool WriteFramesLessThan(uint64 timestamp); + + // Outputs the segment header, Segment Information element, SeekHead element, + // and Tracks element to |writer_|. + bool WriteSegmentHeader(); + + // Given a frame with the specified timestamp (nanosecond units) and + // keyframe status, determine whether a new cluster should be + // created, before writing enqueued frames and the frame itself. The + // function returns one of the following values: + // -1 = error: an out-of-order frame was detected + // 0 = do not create a new cluster, and write frame to the existing cluster + // 1 = create a new cluster, and write frame to that new cluster + // 2 = create a new cluster, and re-run test + int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const; + + // Create a new cluster, using the earlier of the first enqueued + // frame, or the indicated time. Returns true on success. + bool MakeNewCluster(uint64 timestamp_ns); + + // Checks whether a new cluster needs to be created, and if so + // creates a new cluster. Returns false if creation of a new cluster + // was necessary but creation was not successful. + bool DoNewClusterProcessing(uint64 track_num, uint64 timestamp_ns, bool key); + + + // Adjusts Cue Point values (to place Cues before Clusters) so that they + // reflect the correct offsets. + void MoveCuesBeforeClusters(); + + // This function recursively computes the correct cluster offsets (this is + // done to move the Cues before Clusters). It recursively updates the change + // in size (which indicates a change in cluster offset) until no sizes change. + // Parameters: + // diff - indicates the difference in size of the Cues element that needs to + // accounted for. + // index - index in the list of Cues which is currently being adjusted. + // cue_size - size of the Cues element. + void MoveCuesBeforeClustersHelper(uint64 diff, int index, uint64* cue_size); + + // Seeds the random number generator used to make UIDs. + unsigned int seed_; + + // WebM elements + Cues cues_; + SeekHead seek_head_; + SegmentInfo segment_info_; + Tracks tracks_; + Chapters chapters_; + + // Number of chunks written. + int chunk_count_; + + // Current chunk filename. + char* chunk_name_; + + // Default MkvWriter object created by this class used for writing clusters + // out in separate files. + MkvWriter* chunk_writer_cluster_; + + // Default MkvWriter object created by this class used for writing Cues + // element out to a file. + MkvWriter* chunk_writer_cues_; + + // Default MkvWriter object created by this class used for writing the + // Matroska header out to a file. + MkvWriter* chunk_writer_header_; + + // Flag telling whether or not the muxer is chunking output to multiple + // files. + bool chunking_; + + // Base filename for the chunked files. + char* chunking_base_name_; + + // File position offset where the Clusters end. + int64 cluster_end_offset_; + + // List of clusters. + Cluster** cluster_list_; + + // Number of cluster pointers allocated in the cluster list. + int32 cluster_list_capacity_; + + // Number of clusters in the cluster list. + int32 cluster_list_size_; + + // Indicates whether Cues should be written before or after Clusters + CuesPosition cues_position_; + + // Track number that is associated with the cues element for this segment. + uint64 cues_track_; + + // Tells the muxer to force a new cluster on the next Block. + bool force_new_cluster_; + + // List of stored audio frames. These variables are used to store frames so + // the muxer can follow the guideline "Audio blocks that contain the video + // key frame's timecode should be in the same cluster as the video key frame + // block." + Frame** frames_; + + // Number of frame pointers allocated in the frame list. + int32 frames_capacity_; + + // Number of frames in the frame list. + int32 frames_size_; + + // Flag telling if a video track has been added to the segment. + bool has_video_; + + // Flag telling if the segment's header has been written. + bool header_written_; + + // Duration of the last block in nanoseconds. + uint64 last_block_duration_; + + // Last timestamp in nanoseconds added to a cluster. + uint64 last_timestamp_; + + // Maximum time in nanoseconds for a cluster duration. This variable is a + // guideline and some clusters may have a longer duration. Default is 30 + // seconds. + uint64 max_cluster_duration_; + + // Maximum size in bytes for a cluster. This variable is a guideline and + // some clusters may have a larger size. Default is 0 which signifies that + // the muxer will decide the size. + uint64 max_cluster_size_; + + // The mode that segment is in. If set to |kLive| the writer must not + // seek backwards. + Mode mode_; + + // Flag telling the muxer that a new cue point should be added. + bool new_cuepoint_; + + // TODO(fgalligan): Should we add support for more than one Cues element? + // Flag whether or not the muxer should output a Cues element. + bool output_cues_; + + // The file position of the segment's payload. + int64 payload_pos_; + + // The file position of the element's size. + int64 size_position_; + + // Pointer to the writer objects. Not owned by this class. + IMkvWriter* writer_cluster_; + IMkvWriter* writer_cues_; + IMkvWriter* writer_header_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment); +}; + +} //end namespace mkvmuxer + +#endif //MKVMUXER_HPP diff --git a/source/libvpx/third_party/libwebm/mkvmuxertypes.hpp b/source/libvpx/third_party/libwebm/mkvmuxertypes.hpp new file mode 100644 index 0000000..2c66fd2 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvmuxertypes.hpp @@ -0,0 +1,30 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXERTYPES_HPP
+#define MKVMUXERTYPES_HPP
+
+// Copied from Chromium basictypes.h
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+
+namespace mkvmuxer {
+
+typedef unsigned char uint8;
+typedef short int16;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+} //end namespace mkvmuxer
+
+#endif // MKVMUXERTYPES_HPP
diff --git a/source/libvpx/third_party/libwebm/mkvmuxerutil.cpp b/source/libvpx/third_party/libwebm/mkvmuxerutil.cpp new file mode 100644 index 0000000..18060e9 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvmuxerutil.cpp @@ -0,0 +1,713 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "mkvmuxerutil.hpp" + +#ifdef __ANDROID__ +#include <fcntl.h> +#endif + +#include <cassert> +#include <cmath> +#include <cstdio> +#ifdef _MSC_VER +#define _CRT_RAND_S +#endif +#include <cstdlib> +#include <cstring> +#include <ctime> + +#include <new> + +#include "mkvwriter.hpp" +#include "webmids.hpp" + +namespace mkvmuxer { + +int32 GetCodedUIntSize(uint64 value) { + if (value < 0x000000000000007FULL) + return 1; + else if (value < 0x0000000000003FFFULL) + return 2; + else if (value < 0x00000000001FFFFFULL) + return 3; + else if (value < 0x000000000FFFFFFFULL) + return 4; + else if (value < 0x00000007FFFFFFFFULL) + return 5; + else if (value < 0x000003FFFFFFFFFFULL) + return 6; + else if (value < 0x0001FFFFFFFFFFFFULL) + return 7; + return 8; +} + +int32 GetUIntSize(uint64 value) { + if (value < 0x0000000000000100ULL) + return 1; + else if (value < 0x0000000000010000ULL) + return 2; + else if (value < 0x0000000001000000ULL) + return 3; + else if (value < 0x0000000100000000ULL) + return 4; + else if (value < 0x0000010000000000ULL) + return 5; + else if (value < 0x0001000000000000ULL) + return 6; + else if (value < 0x0100000000000000ULL) + return 7; + return 8; +} + +uint64 EbmlMasterElementSize(uint64 type, uint64 value) { + // Size of EBML ID + int32 ebml_size = GetUIntSize(type); + + // Datasize + ebml_size += GetCodedUIntSize(value); + + return ebml_size; +} + +uint64 EbmlElementSize(uint64 type, int64 value) { + return EbmlElementSize(type, static_cast<uint64>(value)); +} + +uint64 EbmlElementSize(uint64 type, uint64 value) { + // Size of EBML ID + int32 ebml_size = GetUIntSize(type); + + // Datasize + ebml_size += GetUIntSize(value); + + // Size of Datasize + ebml_size++; + + return ebml_size; +} + +uint64 EbmlElementSize(uint64 type, float /* value */ ) { + // Size of EBML ID + uint64 ebml_size = GetUIntSize(type); + + // Datasize + ebml_size += sizeof(float); + + // Size of Datasize + ebml_size++; + + return ebml_size; +} + +uint64 EbmlElementSize(uint64 type, const char* value) { + if (!value) + return 0; + + // Size of EBML ID + uint64 ebml_size = GetUIntSize(type); + + // Datasize + ebml_size += strlen(value); + + // Size of Datasize + ebml_size++; + + return ebml_size; +} + +uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) { + if (!value) + return 0; + + // Size of EBML ID + uint64 ebml_size = GetUIntSize(type); + + // Datasize + ebml_size += size; + + // Size of Datasize + ebml_size += GetCodedUIntSize(size); + + return ebml_size; +} + +int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) { + if (!writer || size < 1 || size > 8) + return -1; + + for (int32 i = 1; i <= size; ++i) { + const int32 byte_count = size - i; + const int32 bit_count = byte_count * 8; + + const int64 bb = value >> bit_count; + const uint8 b = static_cast<uint8>(bb); + + const int32 status = writer->Write(&b, 1); + + if (status < 0) + return status; + } + + return 0; +} + +int32 SerializeFloat(IMkvWriter* writer, float f) { + if (!writer) + return -1; + + assert(sizeof(uint32) == sizeof(float)); + // This union is merely used to avoid a reinterpret_cast from float& to + // uint32& which will result in violation of strict aliasing. + union U32 { + uint32 u32; + float f; + } value; + value.f = f; + + for (int32 i = 1; i <= 4; ++i) { + const int32 byte_count = 4 - i; + const int32 bit_count = byte_count * 8; + + const uint8 byte = static_cast<uint8>(value.u32 >> bit_count); + + const int32 status = writer->Write(&byte, 1); + + if (status < 0) + return status; + } + + return 0; +} + +int32 WriteUInt(IMkvWriter* writer, uint64 value) { + if (!writer) + return -1; + + int32 size = GetCodedUIntSize(value); + + return WriteUIntSize(writer, value, size); +} + +int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) { + if (!writer || size < 0 || size > 8) + return -1; + + if (size > 0) { + const uint64 bit = 1LL << (size * 7); + + if (value > (bit - 2)) + return -1; + + value |= bit; + } else { + size = 1; + int64 bit; + + for (;;) { + bit = 1LL << (size * 7); + const uint64 max = bit - 2; + + if (value <= max) + break; + + ++size; + } + + if (size > 8) + return false; + + value |= bit; + } + + return SerializeInt(writer, value, size); +} + +int32 WriteID(IMkvWriter* writer, uint64 type) { + if (!writer) + return -1; + + writer->ElementStartNotify(type, writer->Position()); + + const int32 size = GetUIntSize(type); + + return SerializeInt(writer, type, size); +} + +bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) { + if (!writer) + return false; + + if (WriteID(writer, type)) + return false; + + if (WriteUInt(writer, size)) + return false; + + return true; +} + +bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) { + if (!writer) + return false; + + if (WriteID(writer, type)) + return false; + + const uint64 size = GetUIntSize(value); + if (WriteUInt(writer, size)) + return false; + + if (SerializeInt(writer, value, static_cast<int32>(size))) + return false; + + return true; +} + +bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) { + if (!writer) + return false; + + if (WriteID(writer, type)) + return false; + + if (WriteUInt(writer, 4)) + return false; + + if (SerializeFloat(writer, value)) + return false; + + return true; +} + +bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) { + if (!writer || !value) + return false; + + if (WriteID(writer, type)) + return false; + + const uint64 length = strlen(value); + if (WriteUInt(writer, length)) + return false; + + if (writer->Write(value, static_cast<const uint32>(length))) + return false; + + return true; +} + +bool WriteEbmlElement(IMkvWriter* writer, + uint64 type, + const uint8* value, + uint64 size) { + if (!writer || !value || size < 1) + return false; + + if (WriteID(writer, type)) + return false; + + if (WriteUInt(writer, size)) + return false; + + if (writer->Write(value, static_cast<uint32>(size))) + return false; + + return true; +} + +uint64 WriteSimpleBlock(IMkvWriter* writer, + const uint8* data, + uint64 length, + uint64 track_number, + int64 timecode, + uint64 is_key) { + if (!writer) + return false; + + if (!data || length < 1) + return false; + + // Here we only permit track number values to be no greater than + // 126, which the largest value we can store having a Matroska + // integer representation of only 1 byte. + + if (track_number < 1 || track_number > 126) + return false; + + // Technically the timestamp for a block can be less than the + // timestamp for the cluster itself (remember that block timestamp + // is a signed, 16-bit integer). However, as a simplification we + // only permit non-negative cluster-relative timestamps for blocks. + + if (timecode < 0 || timecode > kMaxBlockTimecode) + return false; + + if (WriteID(writer, kMkvSimpleBlock)) + return 0; + + const int32 size = static_cast<int32>(length) + 4; + if (WriteUInt(writer, size)) + return 0; + + if (WriteUInt(writer, static_cast<uint64>(track_number))) + return 0; + + if (SerializeInt(writer, timecode, 2)) + return 0; + + uint64 flags = 0; + if (is_key) + flags |= 0x80; + + if (SerializeInt(writer, flags, 1)) + return 0; + + if (writer->Write(data, static_cast<uint32>(length))) + return 0; + + const uint64 element_size = + GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length; + + return element_size; +} + +// We must write the metadata (key)frame as a BlockGroup element, +// because we need to specify a duration for the frame. The +// BlockGroup element comprises the frame itself and its duration, +// and is laid out as follows: +// +// BlockGroup tag +// BlockGroup size +// Block tag +// Block size +// (the frame is the block payload) +// Duration tag +// Duration size +// (duration payload) +// +uint64 WriteMetadataBlock(IMkvWriter* writer, + const uint8* data, + uint64 length, + uint64 track_number, + int64 timecode, + uint64 duration) { + // We don't backtrack when writing to the stream, so we must + // pre-compute the BlockGroup size, by summing the sizes of each + // sub-element (the block and the duration). + + // We use a single byte for the track number of the block, which + // means the block header is exactly 4 bytes. + + // TODO(matthewjheaney): use EbmlMasterElementSize and WriteEbmlMasterElement + + const uint64 block_payload_size = 4 + length; + const int32 block_size = GetCodedUIntSize(block_payload_size); + const uint64 block_elem_size = 1 + block_size + block_payload_size; + + const int32 duration_payload_size = GetUIntSize(duration); + const int32 duration_size = GetCodedUIntSize(duration_payload_size); + const uint64 duration_elem_size = 1 + duration_size + duration_payload_size; + + const uint64 blockg_payload_size = block_elem_size + duration_elem_size; + const int32 blockg_size = GetCodedUIntSize(blockg_payload_size); + const uint64 blockg_elem_size = 1 + blockg_size + blockg_payload_size; + + if (WriteID(writer, kMkvBlockGroup)) // 1-byte ID size + return 0; + + if (WriteUInt(writer, blockg_payload_size)) + return 0; + + // Write Block element + + if (WriteID(writer, kMkvBlock)) // 1-byte ID size + return 0; + + if (WriteUInt(writer, block_payload_size)) + return 0; + + // Byte 1 of 4 + + if (WriteUInt(writer, track_number)) + return 0; + + // Bytes 2 & 3 of 4 + + if (SerializeInt(writer, timecode, 2)) + return 0; + + // Byte 4 of 4 + + const uint64 flags = 0; + + if (SerializeInt(writer, flags, 1)) + return 0; + + // Now write the actual frame (of metadata) + + if (writer->Write(data, static_cast<uint32>(length))) + return 0; + + // Write Duration element + + if (WriteID(writer, kMkvBlockDuration)) // 1-byte ID size + return 0; + + if (WriteUInt(writer, duration_payload_size)) + return 0; + + if (SerializeInt(writer, duration, duration_payload_size)) + return 0; + + // Note that we don't write a reference time as part of the block + // group; no reference time(s) indicates that this block is a + // keyframe. (Unlike the case for a SimpleBlock element, the header + // bits of the Block sub-element of a BlockGroup element do not + // indicate keyframe status. The keyframe status is inferred from + // the absence of reference time sub-elements.) + + return blockg_elem_size; +} + +// Writes a WebM BlockGroup with BlockAdditional data. The structure is as +// follows: +// Indentation shows sub-levels +// BlockGroup +// Block +// Data +// BlockAdditions +// BlockMore +// BlockAddID +// 1 (Denotes Alpha) +// BlockAdditional +// Data +uint64 WriteBlockWithAdditional(IMkvWriter* writer, + const uint8* data, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + int64 timecode, + uint64 is_key) { + if (!data || !additional || length < 1 || additional_length < 1) + return 0; + + const uint64 block_payload_size = 4 + length; + const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock, + block_payload_size) + + block_payload_size; + const uint64 block_additional_elem_size = EbmlElementSize(kMkvBlockAdditional, + additional, + additional_length); + const uint64 block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, add_id); + + const uint64 block_more_payload_size = block_addid_elem_size + + block_additional_elem_size; + const uint64 block_more_elem_size = EbmlMasterElementSize( + kMkvBlockMore, + block_more_payload_size) + + block_more_payload_size; + const uint64 block_additions_payload_size = block_more_elem_size; + const uint64 block_additions_elem_size = EbmlMasterElementSize( + kMkvBlockAdditions, + block_additions_payload_size) + + block_additions_payload_size; + const uint64 block_group_payload_size = block_elem_size + + block_additions_elem_size; + const uint64 block_group_elem_size = EbmlMasterElementSize( + kMkvBlockGroup, + block_group_payload_size) + + block_group_payload_size; + + if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, + block_group_payload_size)) + return 0; + + if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size)) + return 0; + + if (WriteUInt(writer, track_number)) + return 0; + + if (SerializeInt(writer, timecode, 2)) + return 0; + + uint64 flags = 0; + if (is_key) + flags |= 0x80; + if (SerializeInt(writer, flags, 1)) + return 0; + + if (writer->Write(data, static_cast<uint32>(length))) + return 0; + + if (!WriteEbmlMasterElement(writer, kMkvBlockAdditions, + block_additions_payload_size)) + return 0; + + if (!WriteEbmlMasterElement(writer, kMkvBlockMore, block_more_payload_size)) + return 0; + + if (!WriteEbmlElement(writer, kMkvBlockAddID, add_id)) + return 0; + + if (!WriteEbmlElement(writer, kMkvBlockAdditional, + additional, additional_length)) + return 0; + + return block_group_elem_size; +} + +// Writes a WebM BlockGroup with DiscardPadding. The structure is as follows: +// Indentation shows sub-levels +// BlockGroup +// Block +// Data +// DiscardPadding +uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, + const uint8* data, + uint64 length, + int64 discard_padding, + uint64 track_number, + int64 timecode, + uint64 is_key) { + if (!data || length < 1 || discard_padding <= 0) + return 0; + + const uint64 block_payload_size = 4 + length; + const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock, + block_payload_size) + + block_payload_size; + const uint64 discard_padding_elem_size = EbmlElementSize(kMkvDiscardPadding, + discard_padding); + const uint64 block_group_payload_size = block_elem_size + + discard_padding_elem_size; + const uint64 block_group_elem_size = EbmlMasterElementSize( + kMkvBlockGroup, + block_group_payload_size) + + block_group_payload_size; + + if (!WriteEbmlMasterElement(writer, kMkvBlockGroup, + block_group_payload_size)) + return 0; + + if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size)) + return 0; + + if (WriteUInt(writer, track_number)) + return 0; + + if (SerializeInt(writer, timecode, 2)) + return 0; + + uint64 flags = 0; + if (is_key) + flags |= 0x80; + if (SerializeInt(writer, flags, 1)) + return 0; + + if (writer->Write(data, static_cast<uint32>(length))) + return 0; + + if (WriteID(writer, kMkvDiscardPadding)) + return 0; + + const uint64 size = GetUIntSize(discard_padding); + if (WriteUInt(writer, size)) + return false; + + if (SerializeInt(writer, discard_padding, static_cast<int32>(size))) + return false; + + return block_group_elem_size; +} + +uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) { + if (!writer) + return false; + + // Subtract one for the void ID and the coded size. + uint64 void_entry_size = size - 1 - GetCodedUIntSize(size-1); + uint64 void_size = EbmlMasterElementSize(kMkvVoid, void_entry_size) + + void_entry_size; + + if (void_size != size) + return 0; + + const int64 payload_position = writer->Position(); + if (payload_position < 0) + return 0; + + if (WriteID(writer, kMkvVoid)) + return 0; + + if (WriteUInt(writer, void_entry_size)) + return 0; + + const uint8 value = 0; + for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) { + if (writer->Write(&value, 1)) + return 0; + } + + const int64 stop_position = writer->Position(); + if (stop_position < 0 || + stop_position - payload_position != static_cast<int64>(void_size)) + return 0; + + return void_size; +} + +void GetVersion(int32* major, int32* minor, int32* build, int32* revision) { + *major = 0; + *minor = 2; + *build = 1; + *revision = 0; +} + +} // namespace mkvmuxer + +mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) { + uint64 uid = 0; + +#ifdef __MINGW32__ + srand(*seed); +#endif + + for (int i = 0; i < 7; ++i) { // avoid problems with 8-byte values + uid <<= 8; + + // TODO(fgalligan): Move random number generation to platform specific code. +#ifdef _MSC_VER + (void)seed; + unsigned int random_value; + const errno_t e = rand_s(&random_value); + (void)e; + const int32 nn = random_value; +#elif __ANDROID__ + int32 temp_num = 1; + int fd = open("/dev/urandom", O_RDONLY); + if (fd != -1) { + read(fd, &temp_num, sizeof(int32)); + close(fd); + } + const int32 nn = temp_num; +#elif defined __MINGW32__ + const int32 nn = rand(); +#else + const int32 nn = rand_r(seed); +#endif + const int32 n = 0xFF & (nn >> 4); // throw away low-order bits + + uid |= n; + } + + return uid; +} diff --git a/source/libvpx/third_party/libwebm/mkvmuxerutil.hpp b/source/libvpx/third_party/libwebm/mkvmuxerutil.hpp new file mode 100644 index 0000000..d196ad3 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvmuxerutil.hpp @@ -0,0 +1,151 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MKVMUXERUTIL_HPP +#define MKVMUXERUTIL_HPP + +#include "mkvmuxertypes.hpp" + +namespace mkvmuxer { + +class IMkvWriter; + +const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL; +const int64 kMaxBlockTimecode = 0x07FFFLL; + +// Writes out |value| in Big Endian order. Returns 0 on success. +int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size); + +// Returns the size in bytes of the element. +int32 GetUIntSize(uint64 value); +int32 GetCodedUIntSize(uint64 value); +uint64 EbmlMasterElementSize(uint64 type, uint64 value); +uint64 EbmlElementSize(uint64 type, int64 value); +uint64 EbmlElementSize(uint64 type, uint64 value); +uint64 EbmlElementSize(uint64 type, float value); +uint64 EbmlElementSize(uint64 type, const char* value); +uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size); + +// Creates an EBML coded number from |value| and writes it out. The size of +// the coded number is determined by the value of |value|. |value| must not +// be in a coded form. Returns 0 on success. +int32 WriteUInt(IMkvWriter* writer, uint64 value); + +// Creates an EBML coded number from |value| and writes it out. The size of +// the coded number is determined by the value of |size|. |value| must not +// be in a coded form. Returns 0 on success. +int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size); + +// Output an Mkv master element. Returns true if the element was written. +bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size); + +// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the +// ID to |SerializeInt|. Returns 0 on success. +int32 WriteID(IMkvWriter* writer, uint64 type); + +// Output an Mkv non-master element. Returns true if the element was written. +bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value); +bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value); +bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value); +bool WriteEbmlElement(IMkvWriter* writer, + uint64 type, + const uint8* value, + uint64 size); + +// Output an Mkv Simple Block. +// Inputs: +// data: Pointer to the data. +// length: Length of the data. +// track_number: Track to add the data to. Value returned by Add track +// functions. Only values in the range [1, 126] are +// permitted. +// timecode: Relative timecode of the Block. Only values in the +// range [0, 2^15) are permitted. +// is_key: Non-zero value specifies that frame is a key frame. +uint64 WriteSimpleBlock(IMkvWriter* writer, + const uint8* data, + uint64 length, + uint64 track_number, + int64 timecode, + uint64 is_key); + +// Output a metadata keyframe, using a Block Group element. +// Inputs: +// data: Pointer to the (meta)data. +// length: Length of the (meta)data. +// track_number: Track to add the data to. Value returned by Add track +// functions. Only values in the range [1, 126] are +// permitted. +// timecode Timecode of frame, relative to cluster timecode. Only +// values in the range [0, 2^15) are permitted. +// duration_timecode Duration of frame, using timecode units. +uint64 WriteMetadataBlock(IMkvWriter* writer, + const uint8* data, + uint64 length, + uint64 track_number, + int64 timecode, + uint64 duration_timecode); + +// Output an Mkv Block with BlockAdditional data. +// Inputs: +// data: Pointer to the data. +// length: Length of the data. +// additional: Pointer to the additional data +// additional_length: Length of the additional data. +// add_id: Value of BlockAddID element. +// track_number: Track to add the data to. Value returned by Add track +// functions. Only values in the range [1, 126] are +// permitted. +// timecode: Relative timecode of the Block. Only values in the +// range [0, 2^15) are permitted. +// is_key: Non-zero value specifies that frame is a key frame. +uint64 WriteBlockWithAdditional(IMkvWriter* writer, + const uint8* data, + uint64 length, + const uint8* additional, + uint64 additional_length, + uint64 add_id, + uint64 track_number, + int64 timecode, + uint64 is_key); + +// Output an Mkv Block with a DiscardPadding element. +// Inputs: +// data: Pointer to the data. +// length: Length of the data. +// discard_padding: DiscardPadding value. +// track_number: Track to add the data to. Value returned by Add track +// functions. Only values in the range [1, 126] are +// permitted. +// timecode: Relative timecode of the Block. Only values in the +// range [0, 2^15) are permitted. +// is_key: Non-zero value specifies that frame is a key frame. +uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer, + const uint8* data, + uint64 length, + int64 discard_padding, + uint64 track_number, + int64 timecode, + uint64 is_key); + +// Output a void element. |size| must be the entire size in bytes that will be +// void. The function will calculate the size of the void header and subtract +// it from |size|. +uint64 WriteVoidElement(IMkvWriter* writer, uint64 size); + +// Returns the version number of the muxer in |major|, |minor|, |build|, +// and |revision|. +void GetVersion(int32* major, int32* minor, int32* build, int32* revision); + +// Returns a random number to be used for UID, using |seed| to seed +// the random-number generator (see POSIX rand_r() for semantics). +uint64 MakeUID(unsigned int* seed); + +} //end namespace mkvmuxer + +#endif // MKVMUXERUTIL_HPP diff --git a/source/libvpx/third_party/libwebm/mkvparser.cpp b/source/libvpx/third_party/libwebm/mkvparser.cpp new file mode 100644 index 0000000..b41456a --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvparser.cpp @@ -0,0 +1,9617 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "mkvparser.hpp" +#include <cassert> +#include <cstring> +#include <new> +#include <climits> + +#ifdef _MSC_VER +// Disable MSVC warnings that suggest making code non-portable. +#pragma warning(disable:4996) +#endif + +mkvparser::IMkvReader::~IMkvReader() +{ +} + +void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision) +{ + major = 1; + minor = 0; + build = 0; + revision = 27; +} + +long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) +{ + assert(pReader); + assert(pos >= 0); + + int status; + +//#ifdef _DEBUG +// long long total, available; +// status = pReader->Length(&total, &available); +// assert(status >= 0); +// assert((total < 0) || (available <= total)); +// assert(pos < available); +// assert((available - pos) >= 1); //assume here max u-int len is 8 +//#endif + + len = 1; + + unsigned char b; + + status = pReader->Read(pos, 1, &b); + + if (status < 0) //error or underflow + return status; + + if (status > 0) //interpreted as "underflow" + return E_BUFFER_NOT_FULL; + + if (b == 0) //we can't handle u-int values larger than 8 bytes + return E_FILE_FORMAT_INVALID; + + unsigned char m = 0x80; + + while (!(b & m)) + { + m >>= 1; + ++len; + } + +//#ifdef _DEBUG +// assert((available - pos) >= len); +//#endif + + long long result = b & (~m); + ++pos; + + for (int i = 1; i < len; ++i) + { + status = pReader->Read(pos, 1, &b); + + if (status < 0) + { + len = 1; + return status; + } + + if (status > 0) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result <<= 8; + result |= b; + + ++pos; + } + + return result; +} + +long long mkvparser::GetUIntLength( + IMkvReader* pReader, + long long pos, + long& len) +{ + assert(pReader); + assert(pos >= 0); + + long long total, available; + + int status = pReader->Length(&total, &available); + assert(status >= 0); + assert((total < 0) || (available <= total)); + + len = 1; + + if (pos >= available) + return pos; //too few bytes available + + unsigned char b; + + status = pReader->Read(pos, 1, &b); + + if (status < 0) + return status; + + assert(status == 0); + + if (b == 0) //we can't handle u-int values larger than 8 bytes + return E_FILE_FORMAT_INVALID; + + unsigned char m = 0x80; + + while (!(b & m)) + { + m >>= 1; + ++len; + } + + return 0; //success +} + + +long long mkvparser::UnserializeUInt( + IMkvReader* pReader, + long long pos, + long long size) +{ + assert(pReader); + assert(pos >= 0); + + if ((size <= 0) || (size > 8)) + return E_FILE_FORMAT_INVALID; + + long long result = 0; + + for (long long i = 0; i < size; ++i) + { + unsigned char b; + + const long status = pReader->Read(pos, 1, &b); + + if (status < 0) + return status; + + result <<= 8; + result |= b; + + ++pos; + } + + return result; +} + + +long mkvparser::UnserializeFloat( + IMkvReader* pReader, + long long pos, + long long size_, + double& result) +{ + assert(pReader); + assert(pos >= 0); + + if ((size_ != 4) && (size_ != 8)) + return E_FILE_FORMAT_INVALID; + + const long size = static_cast<long>(size_); + + unsigned char buf[8]; + + const int status = pReader->Read(pos, size, buf); + + if (status < 0) //error + return status; + + if (size == 4) + { + union + { + float f; + unsigned long ff; + }; + + ff = 0; + + for (int i = 0;;) + { + ff |= buf[i]; + + if (++i >= 4) + break; + + ff <<= 8; + } + + result = f; + } + else + { + assert(size == 8); + + union + { + double d; + unsigned long long dd; + }; + + dd = 0; + + for (int i = 0;;) + { + dd |= buf[i]; + + if (++i >= 8) + break; + + dd <<= 8; + } + + result = d; + } + + return 0; +} + + +long mkvparser::UnserializeInt( + IMkvReader* pReader, + long long pos, + long size, + long long& result) +{ + assert(pReader); + assert(pos >= 0); + assert(size > 0); + assert(size <= 8); + + { + signed char b; + + const long status = pReader->Read(pos, 1, (unsigned char*)&b); + + if (status < 0) + return status; + + result = b; + + ++pos; + } + + for (long i = 1; i < size; ++i) + { + unsigned char b; + + const long status = pReader->Read(pos, 1, &b); + + if (status < 0) + return status; + + result <<= 8; + result |= b; + + ++pos; + } + + return 0; //success +} + + +long mkvparser::UnserializeString( + IMkvReader* pReader, + long long pos, + long long size_, + char*& str) +{ + delete[] str; + str = NULL; + + if (size_ >= LONG_MAX) //we need (size+1) chars + return E_FILE_FORMAT_INVALID; + + const long size = static_cast<long>(size_); + + str = new (std::nothrow) char[size+1]; + + if (str == NULL) + return -1; + + unsigned char* const buf = reinterpret_cast<unsigned char*>(str); + + const long status = pReader->Read(pos, size, buf); + + if (status) + { + delete[] str; + str = NULL; + + return status; + } + + str[size] = '\0'; + + return 0; //success +} + + +long mkvparser::ParseElementHeader( + IMkvReader* pReader, + long long& pos, + long long stop, + long long& id, + long long& size) +{ + if ((stop >= 0) && (pos >= stop)) + return E_FILE_FORMAT_INVALID; + + long len; + + id = ReadUInt(pReader, pos, len); + + if (id < 0) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume id + + if ((stop >= 0) && (pos >= stop)) + return E_FILE_FORMAT_INVALID; + + size = ReadUInt(pReader, pos, len); + + if (size < 0) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume length of size + + //pos now designates payload + + if ((stop >= 0) && ((pos + size) > stop)) + return E_FILE_FORMAT_INVALID; + + return 0; //success +} + + +bool mkvparser::Match( + IMkvReader* pReader, + long long& pos, + unsigned long id_, + long long& val) +{ + assert(pReader); + assert(pos >= 0); + + long long total, available; + + const long status = pReader->Length(&total, &available); + assert(status >= 0); + assert((total < 0) || (available <= total)); + if (status < 0) + return false; + + long len; + + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); + assert(len > 0); + assert(len <= 8); + assert((pos + len) <= available); + + if ((unsigned long)id != id_) + return false; + + pos += len; //consume id + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); + assert(size <= 8); + assert(len > 0); + assert(len <= 8); + assert((pos + len) <= available); + + pos += len; //consume length of size of payload + + val = UnserializeUInt(pReader, pos, size); + assert(val >= 0); + + pos += size; //consume size of payload + + return true; +} + +bool mkvparser::Match( + IMkvReader* pReader, + long long& pos, + unsigned long id_, + unsigned char*& buf, + size_t& buflen) +{ + assert(pReader); + assert(pos >= 0); + + long long total, available; + + long status = pReader->Length(&total, &available); + assert(status >= 0); + assert((total < 0) || (available <= total)); + if (status < 0) + return false; + + long len; + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); + assert(len > 0); + assert(len <= 8); + assert((pos + len) <= available); + + if ((unsigned long)id != id_) + return false; + + pos += len; //consume id + + const long long size_ = ReadUInt(pReader, pos, len); + assert(size_ >= 0); + assert(len > 0); + assert(len <= 8); + assert((pos + len) <= available); + + pos += len; //consume length of size of payload + assert((pos + size_) <= available); + + const long buflen_ = static_cast<long>(size_); + + buf = new (std::nothrow) unsigned char[buflen_]; + assert(buf); //TODO + + status = pReader->Read(pos, buflen_, buf); + assert(status == 0); //TODO + + buflen = buflen_; + + pos += size_; //consume size of payload + return true; +} + + +namespace mkvparser +{ + +EBMLHeader::EBMLHeader() : + m_docType(NULL) +{ + Init(); +} + +EBMLHeader::~EBMLHeader() +{ + delete[] m_docType; +} + +void EBMLHeader::Init() +{ + m_version = 1; + m_readVersion = 1; + m_maxIdLength = 4; + m_maxSizeLength = 8; + + if (m_docType) + { + delete[] m_docType; + m_docType = NULL; + } + + m_docTypeVersion = 1; + m_docTypeReadVersion = 1; +} + +long long EBMLHeader::Parse( + IMkvReader* pReader, + long long& pos) +{ + assert(pReader); + + long long total, available; + + long status = pReader->Length(&total, &available); + + if (status < 0) //error + return status; + + pos = 0; + long long end = (available >= 1024) ? 1024 : available; + + for (;;) + { + unsigned char b = 0; + + while (pos < end) + { + status = pReader->Read(pos, 1, &b); + + if (status < 0) //error + return status; + + if (b == 0x1A) + break; + + ++pos; + } + + if (b != 0x1A) + { + if (pos >= 1024) + return E_FILE_FORMAT_INVALID; //don't bother looking anymore + + if ((total >= 0) && ((total - available) < 5)) + return E_FILE_FORMAT_INVALID; + + return available + 5; //5 = 4-byte ID + 1st byte of size + } + + if ((total >= 0) && ((total - pos) < 5)) + return E_FILE_FORMAT_INVALID; + + if ((available - pos) < 5) + return pos + 5; //try again later + + long len; + + const long long result = ReadUInt(pReader, pos, len); + + if (result < 0) //error + return result; + + if (result == 0x0A45DFA3) //EBML Header ID + { + pos += len; //consume ID + break; + } + + ++pos; //throw away just the 0x1A byte, and try again + } + + //pos designates start of size field + + //get length of size field + + long len; + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return result; + + if (result > 0) //need more data + return result; + + assert(len > 0); + assert(len <= 8); + + if ((total >= 0) && ((total - pos) < len)) + return E_FILE_FORMAT_INVALID; + + if ((available - pos) < len) + return pos + len; //try again later + + //get the EBML header size + + result = ReadUInt(pReader, pos, len); + + if (result < 0) //error + return result; + + pos += len; //consume size field + + //pos now designates start of payload + + if ((total >= 0) && ((total - pos) < result)) + return E_FILE_FORMAT_INVALID; + + if ((available - pos) < result) + return pos + result; + + end = pos + result; + + Init(); + + while (pos < end) + { + long long id, size; + + status = ParseElementHeader( + pReader, + pos, + end, + id, + size); + + if (status < 0) //error + return status; + + if (size == 0) //weird + return E_FILE_FORMAT_INVALID; + + if (id == 0x0286) //version + { + m_version = UnserializeUInt(pReader, pos, size); + + if (m_version <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x02F7) //read version + { + m_readVersion = UnserializeUInt(pReader, pos, size); + + if (m_readVersion <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x02F2) //max id length + { + m_maxIdLength = UnserializeUInt(pReader, pos, size); + + if (m_maxIdLength <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x02F3) //max size length + { + m_maxSizeLength = UnserializeUInt(pReader, pos, size); + + if (m_maxSizeLength <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x0282) //doctype + { + if (m_docType) + return E_FILE_FORMAT_INVALID; + + status = UnserializeString(pReader, pos, size, m_docType); + + if (status) //error + return status; + } + else if (id == 0x0287) //doctype version + { + m_docTypeVersion = UnserializeUInt(pReader, pos, size); + + if (m_docTypeVersion <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x0285) //doctype read version + { + m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); + + if (m_docTypeReadVersion <= 0) + return E_FILE_FORMAT_INVALID; + } + + pos += size; + } + + assert(pos == end); + return 0; +} + + +Segment::Segment( + IMkvReader* pReader, + long long elem_start, + //long long elem_size, + long long start, + long long size) : + m_pReader(pReader), + m_element_start(elem_start), + //m_element_size(elem_size), + m_start(start), + m_size(size), + m_pos(start), + m_pUnknownSize(0), + m_pSeekHead(NULL), + m_pInfo(NULL), + m_pTracks(NULL), + m_pCues(NULL), + m_pChapters(NULL), + m_clusters(NULL), + m_clusterCount(0), + m_clusterPreloadCount(0), + m_clusterSize(0) +{ +} + + +Segment::~Segment() +{ + const long count = m_clusterCount + m_clusterPreloadCount; + + Cluster** i = m_clusters; + Cluster** j = m_clusters + count; + + while (i != j) + { + Cluster* const p = *i++; + assert(p); + + delete p; + } + + delete[] m_clusters; + + delete m_pTracks; + delete m_pInfo; + delete m_pCues; + delete m_pChapters; + delete m_pSeekHead; +} + + +long long Segment::CreateInstance( + IMkvReader* pReader, + long long pos, + Segment*& pSegment) +{ + assert(pReader); + assert(pos >= 0); + + pSegment = NULL; + + long long total, available; + + const long status = pReader->Length(&total, &available); + + if (status < 0) //error + return status; + + if (available < 0) + return -1; + + if ((total >= 0) && (available > total)) + return -1; + + //I would assume that in practice this loop would execute + //exactly once, but we allow for other elements (e.g. Void) + //to immediately follow the EBML header. This is fine for + //the source filter case (since the entire file is available), + //but in the splitter case over a network we should probably + //just give up early. We could for example decide only to + //execute this loop a maximum of, say, 10 times. + //TODO: + //There is an implied "give up early" by only parsing up + //to the available limit. We do do that, but only if the + //total file size is unknown. We could decide to always + //use what's available as our limit (irrespective of whether + //we happen to know the total file length). This would have + //as its sense "parse this much of the file before giving up", + //which a slightly different sense from "try to parse up to + //10 EMBL elements before giving up". + + for (;;) + { + if ((total >= 0) && (pos >= total)) + return E_FILE_FORMAT_INVALID; + + //Read ID + long len; + long long result = GetUIntLength(pReader, pos, len); + + if (result) //error, or too few available bytes + return result; + + if ((total >= 0) && ((pos + len) > total)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + const long long idpos = pos; + const long long id = ReadUInt(pReader, pos, len); + + if (id < 0) //error + return id; + + pos += len; //consume ID + + //Read Size + + result = GetUIntLength(pReader, pos, len); + + if (result) //error, or too few available bytes + return result; + + if ((total >= 0) && ((pos + len) > total)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return size; + + pos += len; //consume length of size of element + + //Pos now points to start of payload + + //Handle "unknown size" for live streaming of webm files. + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (id == 0x08538067) //Segment ID + { + if (size == unknown_size) + size = -1; + + else if (total < 0) + size = -1; + + else if ((pos + size) > total) + size = -1; + + pSegment = new (std::nothrow) Segment( + pReader, + idpos, + //elem_size + pos, + size); + + if (pSegment == 0) + return -1; //generic error + + return 0; //success + } + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((pos + size) > total)) + return E_FILE_FORMAT_INVALID; + + if ((pos + size) > available) + return pos + size; + + pos += size; //consume payload + } +} + + +long long Segment::ParseHeaders() +{ + //Outermost (level 0) segment object has been constructed, + //and pos designates start of payload. We need to find the + //inner (level 1) elements. + long long total, available; + + const int status = m_pReader->Length(&total, &available); + + if (status < 0) //error + return status; + + assert((total < 0) || (available <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + assert((segment_stop < 0) || (total < 0) || (segment_stop <= total)); + assert((segment_stop < 0) || (m_pos <= segment_stop)); + + for (;;) + { + if ((total >= 0) && (m_pos >= total)) + break; + + if ((segment_stop >= 0) && (m_pos >= segment_stop)) + break; + + long long pos = m_pos; + const long long element_start = pos; + + if ((pos + 1) > available) + return (pos + 1); + + long len; + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return result; + + if (result > 0) //underflow (weird) + return (pos + 1); + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + const long long idpos = pos; + const long long id = ReadUInt(m_pReader, idpos, len); + + if (id < 0) //error + return id; + + if (id == 0x0F43B675) //Cluster ID + break; + + pos += len; //consume ID + + if ((pos + 1) > available) + return (pos + 1); + + //Read Size + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return result; + + if (result > 0) //underflow (weird) + return (pos + 1); + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > available) + return pos + len; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return size; + + pos += len; //consume length of size of element + + const long long element_size = size + pos - element_start; + + //Pos now points to start of payload + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + //We read EBML elements either in total or nothing at all. + + if ((pos + size) > available) + return pos + size; + + if (id == 0x0549A966) //Segment Info ID + { + if (m_pInfo) + return E_FILE_FORMAT_INVALID; + + m_pInfo = new (std::nothrow) SegmentInfo( + this, + pos, + size, + element_start, + element_size); + + if (m_pInfo == NULL) + return -1; + + const long status = m_pInfo->Parse(); + + if (status) + return status; + } + else if (id == 0x0654AE6B) //Tracks ID + { + if (m_pTracks) + return E_FILE_FORMAT_INVALID; + + m_pTracks = new (std::nothrow) Tracks(this, + pos, + size, + element_start, + element_size); + + if (m_pTracks == NULL) + return -1; + + const long status = m_pTracks->Parse(); + + if (status) + return status; + } + else if (id == 0x0C53BB6B) //Cues ID + { + if (m_pCues == NULL) + { + m_pCues = new (std::nothrow) Cues( + this, + pos, + size, + element_start, + element_size); + + if (m_pCues == NULL) + return -1; + } + } + else if (id == 0x014D9B74) //SeekHead ID + { + if (m_pSeekHead == NULL) + { + m_pSeekHead = new (std::nothrow) SeekHead( + this, + pos, + size, + element_start, + element_size); + + if (m_pSeekHead == NULL) + return -1; + + const long status = m_pSeekHead->Parse(); + + if (status) + return status; + } + } + else if (id == 0x0043A770) //Chapters ID + { + if (m_pChapters == NULL) + { + m_pChapters = new (std::nothrow) Chapters( + this, + pos, + size, + element_start, + element_size); + + if (m_pChapters == NULL) + return -1; + + const long status = m_pChapters->Parse(); + + if (status) + return status; + } + } + + m_pos = pos + size; //consume payload + } + + assert((segment_stop < 0) || (m_pos <= segment_stop)); + + if (m_pInfo == NULL) //TODO: liberalize this behavior + return E_FILE_FORMAT_INVALID; + + if (m_pTracks == NULL) + return E_FILE_FORMAT_INVALID; + + return 0; //success +} + + +long Segment::LoadCluster( + long long& pos, + long& len) +{ + for (;;) + { + const long result = DoLoadCluster(pos, len); + + if (result <= 1) + return result; + } +} + + +long Segment::DoLoadCluster( + long long& pos, + long& len) +{ + if (m_pos < 0) + return DoLoadClusterUnknownSize(pos, len); + + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + long long cluster_off = -1; //offset relative to start of segment + long long cluster_size = -1; //size of cluster payload + + for (;;) + { + if ((total >= 0) && (m_pos >= total)) + return 1; //no more clusters + + if ((segment_stop >= 0) && (m_pos >= segment_stop)) + return 1; //no more clusters + + pos = m_pos; + + //Read ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + const long long id = ReadUInt(m_pReader, idpos, len); + + if (id < 0) //error (or underflow) + return static_cast<long>(id); + + pos += len; //consume ID + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume length of size of element + + //pos now points to start of payload + + if (size == 0) //weird + { + m_pos = pos; + continue; + } + + const long long unknown_size = (1LL << (7 * len)) - 1; + +#if 0 //we must handle this to support live webm + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //TODO: allow this +#endif + + if ((segment_stop >= 0) && + (size != unknown_size) && + ((pos + size) > segment_stop)) + { + return E_FILE_FORMAT_INVALID; + } + +#if 0 //commented-out, to support incremental cluster parsing + len = static_cast<long>(size); + + if ((pos + size) > avail) + return E_BUFFER_NOT_FULL; +#endif + + if (id == 0x0C53BB6B) //Cues ID + { + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //TODO: liberalize + + if (m_pCues == NULL) + { + const long long element_size = (pos - idpos) + size; + + m_pCues = new Cues(this, + pos, + size, + idpos, + element_size); + assert(m_pCues); //TODO + } + + m_pos = pos + size; //consume payload + continue; + } + + if (id != 0x0F43B675) //Cluster ID + { + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //TODO: liberalize + + m_pos = pos + size; //consume payload + continue; + } + + //We have a cluster. + + cluster_off = idpos - m_start; //relative pos + + if (size != unknown_size) + cluster_size = size; + + break; + } + + assert(cluster_off >= 0); //have cluster + + long long pos_; + long len_; + + status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_); + + if (status < 0) //error, or underflow + { + pos = pos_; + len = len_; + + return status; + } + + //status == 0 means "no block entries found" + //status > 0 means "found at least one block entry" + + //TODO: + //The issue here is that the segment increments its own + //pos ptr past the most recent cluster parsed, and then + //starts from there to parse the next cluster. If we + //don't know the size of the current cluster, then we + //must either parse its payload (as we do below), looking + //for the cluster (or cues) ID to terminate the parse. + //This isn't really what we want: rather, we really need + //a way to create the curr cluster object immediately. + //The pity is that cluster::parse can determine its own + //boundary, and we largely duplicate that same logic here. + // + //Maybe we need to get rid of our look-ahead preloading + //in source::parse??? + // + //As we're parsing the blocks in the curr cluster + //(in cluster::parse), we should have some way to signal + //to the segment that we have determined the boundary, + //so it can adjust its own segment::m_pos member. + // + //The problem is that we're asserting in asyncreadinit, + //because we adjust the pos down to the curr seek pos, + //and the resulting adjusted len is > 2GB. I'm suspicious + //that this is even correct, but even if it is, we can't + //be loading that much data in the cache anyway. + + const long idx = m_clusterCount; + + if (m_clusterPreloadCount > 0) + { + assert(idx < m_clusterSize); + + Cluster* const pCluster = m_clusters[idx]; + assert(pCluster); + assert(pCluster->m_index < 0); + + const long long off = pCluster->GetPosition(); + assert(off >= 0); + + if (off == cluster_off) //preloaded already + { + if (status == 0) //no entries found + return E_FILE_FORMAT_INVALID; + + if (cluster_size >= 0) + pos += cluster_size; + else + { + const long long element_size = pCluster->GetElementSize(); + + if (element_size <= 0) + return E_FILE_FORMAT_INVALID; //TODO: handle this case + + pos = pCluster->m_element_start + element_size; + } + + pCluster->m_index = idx; //move from preloaded to loaded + ++m_clusterCount; + --m_clusterPreloadCount; + + m_pos = pos; //consume payload + assert((segment_stop < 0) || (m_pos <= segment_stop)); + + return 0; //success + } + } + + if (status == 0) //no entries found + { + if (cluster_size < 0) + return E_FILE_FORMAT_INVALID; //TODO: handle this + + pos += cluster_size; + + if ((total >= 0) && (pos >= total)) + { + m_pos = total; + return 1; //no more clusters + } + + if ((segment_stop >= 0) && (pos >= segment_stop)) + { + m_pos = segment_stop; + return 1; //no more clusters + } + + m_pos = pos; + return 2; //try again + } + + //status > 0 means we have an entry + + Cluster* const pCluster = Cluster::Create(this, + idx, + cluster_off); + //element_size); + assert(pCluster); + + AppendCluster(pCluster); + assert(m_clusters); + assert(idx < m_clusterSize); + assert(m_clusters[idx] == pCluster); + + if (cluster_size >= 0) + { + pos += cluster_size; + + m_pos = pos; + assert((segment_stop < 0) || (m_pos <= segment_stop)); + + return 0; + } + + m_pUnknownSize = pCluster; + m_pos = -pos; + + return 0; //partial success, since we have a new cluster + + //status == 0 means "no block entries found" + + //pos designates start of payload + //m_pos has NOT been adjusted yet (in case we need to come back here) + +#if 0 + + if (cluster_size < 0) //unknown size + { + const long long payload_pos = pos; //absolute pos of cluster payload + + for (;;) //determine cluster size + { + if ((total >= 0) && (pos >= total)) + break; + + if ((segment_stop >= 0) && (pos >= segment_stop)) + break; //no more clusters + + //Read ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + const long long id = ReadUInt(m_pReader, idpos, len); + + if (id < 0) //error (or underflow) + return static_cast<long>(id); + + //This is the distinguished set of ID's we use to determine + //that we have exhausted the sub-element's inside the cluster + //whose ID we parsed earlier. + + if (id == 0x0F43B675) //Cluster ID + break; + + if (id == 0x0C53BB6B) //Cues ID + break; + + switch (id) + { + case 0x20: //BlockGroup + case 0x23: //Simple Block + case 0x67: //TimeCode + case 0x2B: //PrevSize + break; + + default: + assert(false); + break; + } + + pos += len; //consume ID (of sub-element) + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume size field of element + + //pos now points to start of sub-element's payload + + if (size == 0) //weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //not allowed for sub-elements + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) //weird + return E_FILE_FORMAT_INVALID; + + pos += size; //consume payload of sub-element + assert((segment_stop < 0) || (pos <= segment_stop)); + } //determine cluster size + + cluster_size = pos - payload_pos; + assert(cluster_size >= 0); + + pos = payload_pos; //reset and re-parse original cluster + } + + if (m_clusterPreloadCount > 0) + { + assert(idx < m_clusterSize); + + Cluster* const pCluster = m_clusters[idx]; + assert(pCluster); + assert(pCluster->m_index < 0); + + const long long off = pCluster->GetPosition(); + assert(off >= 0); + + if (off == cluster_off) //preloaded already + return E_FILE_FORMAT_INVALID; //subtle + } + + m_pos = pos + cluster_size; //consume payload + assert((segment_stop < 0) || (m_pos <= segment_stop)); + + return 2; //try to find another cluster + +#endif + +} + + +long Segment::DoLoadClusterUnknownSize( + long long& pos, + long& len) +{ + assert(m_pos < 0); + assert(m_pUnknownSize); + +#if 0 + assert(m_pUnknownSize->GetElementSize() < 0); //TODO: verify this + + const long long element_start = m_pUnknownSize->m_element_start; + + pos = -m_pos; + assert(pos > element_start); + + //We have already consumed the (cluster) ID and size fields. + //We just need to consume the blocks and other sub-elements + //of this cluster, until we discover the boundary. + + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + long long element_size = -1; + + for (;;) //determine cluster size + { + if ((total >= 0) && (pos >= total)) + { + element_size = total - element_start; + assert(element_size > 0); + + break; + } + + if ((segment_stop >= 0) && (pos >= segment_stop)) + { + element_size = segment_stop - element_start; + assert(element_size > 0); + + break; + } + + //Read ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + const long long id = ReadUInt(m_pReader, idpos, len); + + if (id < 0) //error (or underflow) + return static_cast<long>(id); + + //This is the distinguished set of ID's we use to determine + //that we have exhausted the sub-element's inside the cluster + //whose ID we parsed earlier. + + if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster ID or Cues ID + { + element_size = pos - element_start; + assert(element_size > 0); + + break; + } + +#ifdef _DEBUG + switch (id) + { + case 0x20: //BlockGroup + case 0x23: //Simple Block + case 0x67: //TimeCode + case 0x2B: //PrevSize + break; + + default: + assert(false); + break; + } +#endif + + pos += len; //consume ID (of sub-element) + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume size field of element + + //pos now points to start of sub-element's payload + + if (size == 0) //weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //not allowed for sub-elements + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) //weird + return E_FILE_FORMAT_INVALID; + + pos += size; //consume payload of sub-element + assert((segment_stop < 0) || (pos <= segment_stop)); + } //determine cluster size + + assert(element_size >= 0); + + m_pos = element_start + element_size; + m_pUnknownSize = 0; + + return 2; //continue parsing +#else + const long status = m_pUnknownSize->Parse(pos, len); + + if (status < 0) //error or underflow + return status; + + if (status == 0) //parsed a block + return 2; //continue parsing + + assert(status > 0); //nothing left to parse of this cluster + + const long long start = m_pUnknownSize->m_element_start; + + const long long size = m_pUnknownSize->GetElementSize(); + assert(size >= 0); + + pos = start + size; + m_pos = pos; + + m_pUnknownSize = 0; + + return 2; //continue parsing +#endif +} + + +void Segment::AppendCluster(Cluster* pCluster) +{ + assert(pCluster); + assert(pCluster->m_index >= 0); + + const long count = m_clusterCount + m_clusterPreloadCount; + + long& size = m_clusterSize; + assert(size >= count); + + const long idx = pCluster->m_index; + assert(idx == m_clusterCount); + + if (count >= size) + { + const long n = (size <= 0) ? 2048 : 2*size; + + Cluster** const qq = new Cluster*[n]; + Cluster** q = qq; + + Cluster** p = m_clusters; + Cluster** const pp = p + count; + + while (p != pp) + *q++ = *p++; + + delete[] m_clusters; + + m_clusters = qq; + size = n; + } + + if (m_clusterPreloadCount > 0) + { + assert(m_clusters); + + Cluster** const p = m_clusters + m_clusterCount; + assert(*p); + assert((*p)->m_index < 0); + + Cluster** q = p + m_clusterPreloadCount; + assert(q < (m_clusters + size)); + + for (;;) + { + Cluster** const qq = q - 1; + assert((*qq)->m_index < 0); + + *q = *qq; + q = qq; + + if (q == p) + break; + } + } + + m_clusters[idx] = pCluster; + ++m_clusterCount; +} + + +void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) +{ + assert(pCluster); + assert(pCluster->m_index < 0); + assert(idx >= m_clusterCount); + + const long count = m_clusterCount + m_clusterPreloadCount; + + long& size = m_clusterSize; + assert(size >= count); + + if (count >= size) + { + const long n = (size <= 0) ? 2048 : 2*size; + + Cluster** const qq = new Cluster*[n]; + Cluster** q = qq; + + Cluster** p = m_clusters; + Cluster** const pp = p + count; + + while (p != pp) + *q++ = *p++; + + delete[] m_clusters; + + m_clusters = qq; + size = n; + } + + assert(m_clusters); + + Cluster** const p = m_clusters + idx; + + Cluster** q = m_clusters + count; + assert(q >= p); + assert(q < (m_clusters + size)); + + while (q > p) + { + Cluster** const qq = q - 1; + assert((*qq)->m_index < 0); + + *q = *qq; + q = qq; + } + + m_clusters[idx] = pCluster; + ++m_clusterPreloadCount; +} + + +long Segment::Load() +{ + assert(m_clusters == NULL); + assert(m_clusterSize == 0); + assert(m_clusterCount == 0); + //assert(m_size >= 0); + + //Outermost (level 0) segment object has been constructed, + //and pos designates start of payload. We need to find the + //inner (level 1) elements. + + const long long header_status = ParseHeaders(); + + if (header_status < 0) //error + return static_cast<long>(header_status); + + if (header_status > 0) //underflow + return E_BUFFER_NOT_FULL; + + assert(m_pInfo); + assert(m_pTracks); + + for (;;) + { + const int status = LoadCluster(); + + if (status < 0) //error + return status; + + if (status >= 1) //no more clusters + return 0; + } +} + + +SeekHead::SeekHead( + Segment* pSegment, + long long start, + long long size_, + long long element_start, + long long element_size) : + m_pSegment(pSegment), + m_start(start), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_entries(0), + m_entry_count(0), + m_void_elements(0), + m_void_element_count(0) +{ +} + + +SeekHead::~SeekHead() +{ + delete[] m_entries; + delete[] m_void_elements; +} + + +long SeekHead::Parse() +{ + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; + const long long stop = m_start + m_size; + + //first count the seek head entries + + int entry_count = 0; + int void_element_count = 0; + + while (pos < stop) + { + long long id, size; + + const long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) //error + return status; + + if (id == 0x0DBB) //SeekEntry ID + ++entry_count; + else if (id == 0x6C) //Void ID + ++void_element_count; + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + + m_entries = new (std::nothrow) Entry[entry_count]; + + if (m_entries == NULL) + return -1; + + m_void_elements = new (std::nothrow) VoidElement[void_element_count]; + + if (m_void_elements == NULL) + return -1; + + //now parse the entries and void elements + + Entry* pEntry = m_entries; + VoidElement* pVoidElement = m_void_elements; + + pos = m_start; + + while (pos < stop) + { + const long long idpos = pos; + + long long id, size; + + const long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) //error + return status; + + if (id == 0x0DBB) //SeekEntry ID + { + if (ParseEntry(pReader, pos, size, pEntry)) + { + Entry& e = *pEntry++; + + e.element_start = idpos; + e.element_size = (pos + size) - idpos; + } + } + else if (id == 0x6C) //Void ID + { + VoidElement& e = *pVoidElement++; + + e.element_start = idpos; + e.element_size = (pos + size) - idpos; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + + ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); + assert(count_ >= 0); + assert(count_ <= entry_count); + + m_entry_count = static_cast<int>(count_); + + count_ = ptrdiff_t(pVoidElement - m_void_elements); + assert(count_ >= 0); + assert(count_ <= void_element_count); + + m_void_element_count = static_cast<int>(count_); + + return 0; +} + + +int SeekHead::GetCount() const +{ + return m_entry_count; +} + +const SeekHead::Entry* SeekHead::GetEntry(int idx) const +{ + if (idx < 0) + return 0; + + if (idx >= m_entry_count) + return 0; + + return m_entries + idx; +} + +int SeekHead::GetVoidElementCount() const +{ + return m_void_element_count; +} + +const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const +{ + if (idx < 0) + return 0; + + if (idx >= m_void_element_count) + return 0; + + return m_void_elements + idx; +} + + +#if 0 +void Segment::ParseCues(long long off) +{ + if (m_pCues) + return; + + //odbgstream os; + //os << "Segment::ParseCues (begin)" << endl; + + long long pos = m_start + off; + const long long element_start = pos; + const long long stop = m_start + m_size; + + long len; + + long long result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); + assert((pos + len) <= stop); + + const long long idpos = pos; + + const long long id = ReadUInt(m_pReader, idpos, len); + assert(id == 0x0C53BB6B); //Cues ID + + pos += len; //consume ID + assert(pos < stop); + + //Read Size + + result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); + assert((pos + len) <= stop); + + const long long size = ReadUInt(m_pReader, pos, len); + assert(size >= 0); + + pos += len; //consume length of size of element + assert((pos + size) <= stop); + + const long long element_size = size + pos - element_start; + + //Pos now points to start of payload + + m_pCues = new Cues(this, pos, size, element_start, element_size); + assert(m_pCues); //TODO + + //os << "Segment::ParseCues (end)" << endl; +} +#else +long Segment::ParseCues( + long long off, + long long& pos, + long& len) +{ + if (m_pCues) + return 0; //success + + if (off < 0) + return -1; + + long long total, avail; + + const int status = m_pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + pos = m_start + off; + + if ((total < 0) || (pos >= total)) + return 1; //don't bother parsing cues + + const long long element_start = pos; + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //underflow (weird) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + + const long long id = ReadUInt(m_pReader, idpos, len); + + if (id != 0x0C53BB6B) //Cues ID + return E_FILE_FORMAT_INVALID; + + pos += len; //consume ID + assert((segment_stop < 0) || (pos <= segment_stop)); + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //underflow (weird) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + if (size == 0) //weird, although technically not illegal + return 1; //done + + pos += len; //consume length of size of element + assert((segment_stop < 0) || (pos <= segment_stop)); + + //Pos now points to start of payload + + const long long element_stop = pos + size; + + if ((segment_stop >= 0) && (element_stop > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && (element_stop > total)) + return 1; //don't bother parsing anymore + + len = static_cast<long>(size); + + if (element_stop > avail) + return E_BUFFER_NOT_FULL; + + const long long element_size = element_stop - element_start; + + m_pCues = new (std::nothrow) Cues( + this, + pos, + size, + element_start, + element_size); + assert(m_pCues); //TODO + + return 0; //success +} +#endif + + +#if 0 +void Segment::ParseSeekEntry( + long long start, + long long size_) +{ + long long pos = start; + + const long long stop = start + size_; + + long len; + + const long long seekIdId = ReadUInt(m_pReader, pos, len); + //seekIdId; + assert(seekIdId == 0x13AB); //SeekID ID + assert((pos + len) <= stop); + + pos += len; //consume id + + const long long seekIdSize = ReadUInt(m_pReader, pos, len); + assert(seekIdSize >= 0); + assert((pos + len) <= stop); + + pos += len; //consume size + + const long long seekId = ReadUInt(m_pReader, pos, len); //payload + assert(seekId >= 0); + assert(len == seekIdSize); + assert((pos + len) <= stop); + + pos += seekIdSize; //consume payload + + const long long seekPosId = ReadUInt(m_pReader, pos, len); + //seekPosId; + assert(seekPosId == 0x13AC); //SeekPos ID + assert((pos + len) <= stop); + + pos += len; //consume id + + const long long seekPosSize = ReadUInt(m_pReader, pos, len); + assert(seekPosSize >= 0); + assert((pos + len) <= stop); + + pos += len; //consume size + assert((pos + seekPosSize) <= stop); + + const long long seekOff = UnserializeUInt(m_pReader, pos, seekPosSize); + assert(seekOff >= 0); + assert(seekOff < m_size); + + pos += seekPosSize; //consume payload + assert(pos == stop); + + const long long seekPos = m_start + seekOff; + assert(seekPos < (m_start + m_size)); + + if (seekId == 0x0C53BB6B) //Cues ID + ParseCues(seekOff); +} +#else +bool SeekHead::ParseEntry( + IMkvReader* pReader, + long long start, + long long size_, + Entry* pEntry) +{ + if (size_ <= 0) + return false; + + long long pos = start; + const long long stop = start + size_; + + long len; + + //parse the container for the level-1 element ID + + const long long seekIdId = ReadUInt(pReader, pos, len); + //seekIdId; + + if (seekIdId != 0x13AB) //SeekID ID + return false; + + if ((pos + len) > stop) + return false; + + pos += len; //consume SeekID id + + const long long seekIdSize = ReadUInt(pReader, pos, len); + + if (seekIdSize <= 0) + return false; + + if ((pos + len) > stop) + return false; + + pos += len; //consume size of field + + if ((pos + seekIdSize) > stop) + return false; + + //Note that the SeekId payload really is serialized + //as a "Matroska integer", not as a plain binary value. + //In fact, Matroska requires that ID values in the + //stream exactly match the binary representation as listed + //in the Matroska specification. + // + //This parser is more liberal, and permits IDs to have + //any width. (This could make the representation in the stream + //different from what's in the spec, but it doesn't matter here, + //since we always normalize "Matroska integer" values.) + + pEntry->id = ReadUInt(pReader, pos, len); //payload + + if (pEntry->id <= 0) + return false; + + if (len != seekIdSize) + return false; + + pos += seekIdSize; //consume SeekID payload + + const long long seekPosId = ReadUInt(pReader, pos, len); + + if (seekPosId != 0x13AC) //SeekPos ID + return false; + + if ((pos + len) > stop) + return false; + + pos += len; //consume id + + const long long seekPosSize = ReadUInt(pReader, pos, len); + + if (seekPosSize <= 0) + return false; + + if ((pos + len) > stop) + return false; + + pos += len; //consume size + + if ((pos + seekPosSize) > stop) + return false; + + pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize); + + if (pEntry->pos < 0) + return false; + + pos += seekPosSize; //consume payload + + if (pos != stop) + return false; + + return true; +} +#endif + + +Cues::Cues( + Segment* pSegment, + long long start_, + long long size_, + long long element_start, + long long element_size) : + m_pSegment(pSegment), + m_start(start_), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_cue_points(NULL), + m_count(0), + m_preload_count(0), + m_pos(start_) +{ +} + + +Cues::~Cues() +{ + const long n = m_count + m_preload_count; + + CuePoint** p = m_cue_points; + CuePoint** const q = p + n; + + while (p != q) + { + CuePoint* const pCP = *p++; + assert(pCP); + + delete pCP; + } + + delete[] m_cue_points; +} + + +long Cues::GetCount() const +{ + if (m_cue_points == NULL) + return -1; + + return m_count; //TODO: really ignore preload count? +} + + +bool Cues::DoneParsing() const +{ + const long long stop = m_start + m_size; + return (m_pos >= stop); +} + + +void Cues::Init() const +{ + if (m_cue_points) + return; + + assert(m_count == 0); + assert(m_preload_count == 0); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + const long long stop = m_start + m_size; + long long pos = m_start; + + long cue_points_size = 0; + + while (pos < stop) + { + const long long idpos = pos; + + long len; + + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); + assert((pos + len) <= stop); + + pos += len; //consume Size field + assert((pos + size) <= stop); + + if (id == 0x3B) //CuePoint ID + PreloadCuePoint(cue_points_size, idpos); + + pos += size; //consume payload + assert(pos <= stop); + } +} + + +void Cues::PreloadCuePoint( + long& cue_points_size, + long long pos) const +{ + assert(m_count == 0); + + if (m_preload_count >= cue_points_size) + { + const long n = (cue_points_size <= 0) ? 2048 : 2*cue_points_size; + + CuePoint** const qq = new CuePoint*[n]; + CuePoint** q = qq; //beginning of target + + CuePoint** p = m_cue_points; //beginning of source + CuePoint** const pp = p + m_preload_count; //end of source + + while (p != pp) + *q++ = *p++; + + delete[] m_cue_points; + + m_cue_points = qq; + cue_points_size = n; + } + + CuePoint* const pCP = new CuePoint(m_preload_count, pos); + m_cue_points[m_preload_count++] = pCP; +} + + +bool Cues::LoadCuePoint() const +{ + //odbgstream os; + //os << "Cues::LoadCuePoint" << endl; + + const long long stop = m_start + m_size; + + if (m_pos >= stop) + return false; //nothing else to do + + Init(); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + while (m_pos < stop) + { + const long long idpos = m_pos; + + long len; + + const long long id = ReadUInt(pReader, m_pos, len); + assert(id >= 0); //TODO + assert((m_pos + len) <= stop); + + m_pos += len; //consume ID + + const long long size = ReadUInt(pReader, m_pos, len); + assert(size >= 0); + assert((m_pos + len) <= stop); + + m_pos += len; //consume Size field + assert((m_pos + size) <= stop); + + if (id != 0x3B) //CuePoint ID + { + m_pos += size; //consume payload + assert(m_pos <= stop); + + continue; + } + + assert(m_preload_count > 0); + + CuePoint* const pCP = m_cue_points[m_count]; + assert(pCP); + assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos)); + if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos)) + return false; + + pCP->Load(pReader); + ++m_count; + --m_preload_count; + + m_pos += size; //consume payload + assert(m_pos <= stop); + + return true; //yes, we loaded a cue point + } + + //return (m_pos < stop); + return false; //no, we did not load a cue point +} + + +bool Cues::Find( + long long time_ns, + const Track* pTrack, + const CuePoint*& pCP, + const CuePoint::TrackPosition*& pTP) const +{ + assert(time_ns >= 0); + assert(pTrack); + +#if 0 + LoadCuePoint(); //establish invariant + + assert(m_cue_points); + assert(m_count > 0); + + CuePoint** const ii = m_cue_points; + CuePoint** i = ii; + + CuePoint** const jj = ii + m_count + m_preload_count; + CuePoint** j = jj; + + pCP = *i; + assert(pCP); + + if (time_ns <= pCP->GetTime(m_pSegment)) + { + pTP = pCP->Find(pTrack); + return (pTP != NULL); + } + + IMkvReader* const pReader = m_pSegment->m_pReader; + + while (i < j) + { + //INVARIANT: + //[ii, i) <= time_ns + //[i, j) ? + //[j, jj) > time_ns + + CuePoint** const k = i + (j - i) / 2; + assert(k < jj); + + CuePoint* const pCP = *k; + assert(pCP); + + pCP->Load(pReader); + + const long long t = pCP->GetTime(m_pSegment); + + if (t <= time_ns) + i = k + 1; + else + j = k; + + assert(i <= j); + } + + assert(i == j); + assert(i <= jj); + assert(i > ii); + + pCP = *--i; + assert(pCP); + assert(pCP->GetTime(m_pSegment) <= time_ns); +#else + if (m_cue_points == NULL) + return false; + + if (m_count == 0) + return false; + + CuePoint** const ii = m_cue_points; + CuePoint** i = ii; + + CuePoint** const jj = ii + m_count; + CuePoint** j = jj; + + pCP = *i; + assert(pCP); + + if (time_ns <= pCP->GetTime(m_pSegment)) + { + pTP = pCP->Find(pTrack); + return (pTP != NULL); + } + + while (i < j) + { + //INVARIANT: + //[ii, i) <= time_ns + //[i, j) ? + //[j, jj) > time_ns + + CuePoint** const k = i + (j - i) / 2; + assert(k < jj); + + CuePoint* const pCP = *k; + assert(pCP); + + const long long t = pCP->GetTime(m_pSegment); + + if (t <= time_ns) + i = k + 1; + else + j = k; + + assert(i <= j); + } + + assert(i == j); + assert(i <= jj); + assert(i > ii); + + pCP = *--i; + assert(pCP); + assert(pCP->GetTime(m_pSegment) <= time_ns); +#endif + + //TODO: here and elsewhere, it's probably not correct to search + //for the cue point with this time, and then search for a matching + //track. In principle, the matching track could be on some earlier + //cue point, and with our current algorithm, we'd miss it. To make + //this bullet-proof, we'd need to create a secondary structure, + //with a list of cue points that apply to a track, and then search + //that track-based structure for a matching cue point. + + pTP = pCP->Find(pTrack); + return (pTP != NULL); +} + + +#if 0 +bool Cues::FindNext( + long long time_ns, + const Track* pTrack, + const CuePoint*& pCP, + const CuePoint::TrackPosition*& pTP) const +{ + pCP = 0; + pTP = 0; + + if (m_count == 0) + return false; + + assert(m_cue_points); + + const CuePoint* const* const ii = m_cue_points; + const CuePoint* const* i = ii; + + const CuePoint* const* const jj = ii + m_count; + const CuePoint* const* j = jj; + + while (i < j) + { + //INVARIANT: + //[ii, i) <= time_ns + //[i, j) ? + //[j, jj) > time_ns + + const CuePoint* const* const k = i + (j - i) / 2; + assert(k < jj); + + pCP = *k; + assert(pCP); + + const long long t = pCP->GetTime(m_pSegment); + + if (t <= time_ns) + i = k + 1; + else + j = k; + + assert(i <= j); + } + + assert(i == j); + assert(i <= jj); + + if (i >= jj) //time_ns is greater than max cue point + return false; + + pCP = *i; + assert(pCP); + assert(pCP->GetTime(m_pSegment) > time_ns); + + pTP = pCP->Find(pTrack); + return (pTP != NULL); +} +#endif + + +const CuePoint* Cues::GetFirst() const +{ + if (m_cue_points == NULL) + return NULL; + + if (m_count == 0) + return NULL; + +#if 0 + LoadCuePoint(); //init cues + + const size_t count = m_count + m_preload_count; + + if (count == 0) //weird + return NULL; +#endif + + CuePoint* const* const pp = m_cue_points; + assert(pp); + + CuePoint* const pCP = pp[0]; + assert(pCP); + assert(pCP->GetTimeCode() >= 0); + + return pCP; +} + + +const CuePoint* Cues::GetLast() const +{ + if (m_cue_points == NULL) + return NULL; + + if (m_count <= 0) + return NULL; + +#if 0 + LoadCuePoint(); //init cues + + const size_t count = m_count + m_preload_count; + + if (count == 0) //weird + return NULL; + + const size_t index = count - 1; + + CuePoint* const* const pp = m_cue_points; + assert(pp); + + CuePoint* const pCP = pp[index]; + assert(pCP); + + pCP->Load(m_pSegment->m_pReader); + assert(pCP->GetTimeCode() >= 0); +#else + const long index = m_count - 1; + + CuePoint* const* const pp = m_cue_points; + assert(pp); + + CuePoint* const pCP = pp[index]; + assert(pCP); + assert(pCP->GetTimeCode() >= 0); +#endif + + return pCP; +} + + +const CuePoint* Cues::GetNext(const CuePoint* pCurr) const +{ + if (pCurr == NULL) + return NULL; + + assert(pCurr->GetTimeCode() >= 0); + assert(m_cue_points); + assert(m_count >= 1); + +#if 0 + const size_t count = m_count + m_preload_count; + + size_t index = pCurr->m_index; + assert(index < count); + + CuePoint* const* const pp = m_cue_points; + assert(pp); + assert(pp[index] == pCurr); + + ++index; + + if (index >= count) + return NULL; + + CuePoint* const pNext = pp[index]; + assert(pNext); + + pNext->Load(m_pSegment->m_pReader); +#else + long index = pCurr->m_index; + assert(index < m_count); + + CuePoint* const* const pp = m_cue_points; + assert(pp); + assert(pp[index] == pCurr); + + ++index; + + if (index >= m_count) + return NULL; + + CuePoint* const pNext = pp[index]; + assert(pNext); + assert(pNext->GetTimeCode() >= 0); +#endif + + return pNext; +} + + +const BlockEntry* Cues::GetBlock( + const CuePoint* pCP, + const CuePoint::TrackPosition* pTP) const +{ + if (pCP == NULL) + return NULL; + + if (pTP == NULL) + return NULL; + + return m_pSegment->GetBlock(*pCP, *pTP); +} + + +const BlockEntry* Segment::GetBlock( + const CuePoint& cp, + const CuePoint::TrackPosition& tp) +{ + Cluster** const ii = m_clusters; + Cluster** i = ii; + + const long count = m_clusterCount + m_clusterPreloadCount; + + Cluster** const jj = ii + count; + Cluster** j = jj; + + while (i < j) + { + //INVARIANT: + //[ii, i) < pTP->m_pos + //[i, j) ? + //[j, jj) > pTP->m_pos + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + Cluster* const pCluster = *k; + assert(pCluster); + + //const long long pos_ = pCluster->m_pos; + //assert(pos_); + //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); + + const long long pos = pCluster->GetPosition(); + assert(pos >= 0); + + if (pos < tp.m_pos) + i = k + 1; + else if (pos > tp.m_pos) + j = k; + else + return pCluster->GetEntry(cp, tp); + } + + assert(i == j); + //assert(Cluster::HasBlockEntries(this, tp.m_pos)); + + Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); + assert(pCluster); + + const ptrdiff_t idx = i - m_clusters; + + PreloadCluster(pCluster, idx); + assert(m_clusters); + assert(m_clusterPreloadCount > 0); + assert(m_clusters[idx] == pCluster); + + return pCluster->GetEntry(cp, tp); +} + + +const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) +{ + if (requested_pos < 0) + return 0; + + Cluster** const ii = m_clusters; + Cluster** i = ii; + + const long count = m_clusterCount + m_clusterPreloadCount; + + Cluster** const jj = ii + count; + Cluster** j = jj; + + while (i < j) + { + //INVARIANT: + //[ii, i) < pTP->m_pos + //[i, j) ? + //[j, jj) > pTP->m_pos + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + Cluster* const pCluster = *k; + assert(pCluster); + + //const long long pos_ = pCluster->m_pos; + //assert(pos_); + //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); + + const long long pos = pCluster->GetPosition(); + assert(pos >= 0); + + if (pos < requested_pos) + i = k + 1; + else if (pos > requested_pos) + j = k; + else + return pCluster; + } + + assert(i == j); + //assert(Cluster::HasBlockEntries(this, tp.m_pos)); + + Cluster* const pCluster = Cluster::Create( + this, + -1, + requested_pos); + //-1); + assert(pCluster); + + const ptrdiff_t idx = i - m_clusters; + + PreloadCluster(pCluster, idx); + assert(m_clusters); + assert(m_clusterPreloadCount > 0); + assert(m_clusters[idx] == pCluster); + + return pCluster; +} + + +CuePoint::CuePoint(long idx, long long pos) : + m_element_start(0), + m_element_size(0), + m_index(idx), + m_timecode(-1 * pos), + m_track_positions(NULL), + m_track_positions_count(0) +{ + assert(pos > 0); +} + + +CuePoint::~CuePoint() +{ + delete[] m_track_positions; +} + + +void CuePoint::Load(IMkvReader* pReader) +{ + //odbgstream os; + //os << "CuePoint::Load(begin): timecode=" << m_timecode << endl; + + if (m_timecode >= 0) //already loaded + return; + + assert(m_track_positions == NULL); + assert(m_track_positions_count == 0); + + long long pos_ = -m_timecode; + const long long element_start = pos_; + + long long stop; + + { + long len; + + const long long id = ReadUInt(pReader, pos_, len); + assert(id == 0x3B); //CuePoint ID + if (id != 0x3B) + return; + + pos_ += len; //consume ID + + const long long size = ReadUInt(pReader, pos_, len); + assert(size >= 0); + + pos_ += len; //consume Size field + //pos_ now points to start of payload + + stop = pos_ + size; + } + + const long long element_size = stop - element_start; + + long long pos = pos_; + + //First count number of track positions + + while (pos < stop) + { + long len; + + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); + assert((pos + len) <= stop); + + pos += len; //consume Size field + assert((pos + size) <= stop); + + if (id == 0x33) //CueTime ID + m_timecode = UnserializeUInt(pReader, pos, size); + + else if (id == 0x37) //CueTrackPosition(s) ID + ++m_track_positions_count; + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(m_timecode >= 0); + assert(m_track_positions_count > 0); + + //os << "CuePoint::Load(cont'd): idpos=" << idpos + // << " timecode=" << m_timecode + // << endl; + + m_track_positions = new TrackPosition[m_track_positions_count]; + + //Now parse track positions + + TrackPosition* p = m_track_positions; + pos = pos_; + + while (pos < stop) + { + long len; + + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); + assert((pos + len) <= stop); + + pos += len; //consume Size field + assert((pos + size) <= stop); + + if (id == 0x37) //CueTrackPosition(s) ID + { + TrackPosition& tp = *p++; + tp.Parse(pReader, pos, size); + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(size_t(p - m_track_positions) == m_track_positions_count); + + m_element_start = element_start; + m_element_size = element_size; +} + + + +void CuePoint::TrackPosition::Parse( + IMkvReader* pReader, + long long start_, + long long size_) +{ + const long long stop = start_ + size_; + long long pos = start_; + + m_track = -1; + m_pos = -1; + m_block = 1; //default + + while (pos < stop) + { + long len; + + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); + assert((pos + len) <= stop); + + pos += len; //consume Size field + assert((pos + size) <= stop); + + if (id == 0x77) //CueTrack ID + m_track = UnserializeUInt(pReader, pos, size); + + else if (id == 0x71) //CueClusterPos ID + m_pos = UnserializeUInt(pReader, pos, size); + + else if (id == 0x1378) //CueBlockNumber + m_block = UnserializeUInt(pReader, pos, size); + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(m_pos >= 0); + assert(m_track > 0); + //assert(m_block > 0); +} + + +const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const +{ + assert(pTrack); + + const long long n = pTrack->GetNumber(); + + const TrackPosition* i = m_track_positions; + const TrackPosition* const j = i + m_track_positions_count; + + while (i != j) + { + const TrackPosition& p = *i++; + + if (p.m_track == n) + return &p; + } + + return NULL; //no matching track number found +} + + +long long CuePoint::GetTimeCode() const +{ + return m_timecode; +} + +long long CuePoint::GetTime(const Segment* pSegment) const +{ + assert(pSegment); + assert(m_timecode >= 0); + + const SegmentInfo* const pInfo = pSegment->GetInfo(); + assert(pInfo); + + const long long scale = pInfo->GetTimeCodeScale(); + assert(scale >= 1); + + const long long time = scale * m_timecode; + + return time; +} + + +#if 0 +long long Segment::Unparsed() const +{ + if (m_size < 0) + return LLONG_MAX; + + const long long stop = m_start + m_size; + + const long long result = stop - m_pos; + assert(result >= 0); + + return result; +} +#else +bool Segment::DoneParsing() const +{ + if (m_size < 0) + { + long long total, avail; + + const int status = m_pReader->Length(&total, &avail); + + if (status < 0) //error + return true; //must assume done + + if (total < 0) + return false; //assume live stream + + return (m_pos >= total); + } + + const long long stop = m_start + m_size; + + return (m_pos >= stop); +} +#endif + + +const Cluster* Segment::GetFirst() const +{ + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return &m_eos; + + Cluster* const pCluster = m_clusters[0]; + assert(pCluster); + + return pCluster; +} + + +const Cluster* Segment::GetLast() const +{ + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return &m_eos; + + const long idx = m_clusterCount - 1; + + Cluster* const pCluster = m_clusters[idx]; + assert(pCluster); + + return pCluster; +} + + +unsigned long Segment::GetCount() const +{ + return m_clusterCount; +} + + +const Cluster* Segment::GetNext(const Cluster* pCurr) +{ + assert(pCurr); + assert(pCurr != &m_eos); + assert(m_clusters); + + long idx = pCurr->m_index; + + if (idx >= 0) + { + assert(m_clusterCount > 0); + assert(idx < m_clusterCount); + assert(pCurr == m_clusters[idx]); + + ++idx; + + if (idx >= m_clusterCount) + return &m_eos; //caller will LoadCluster as desired + + Cluster* const pNext = m_clusters[idx]; + assert(pNext); + assert(pNext->m_index >= 0); + assert(pNext->m_index == idx); + + return pNext; + } + + assert(m_clusterPreloadCount > 0); + + long long pos = pCurr->m_element_start; + + assert(m_size >= 0); //TODO + const long long stop = m_start + m_size; //end of segment + + { + long len; + + long long result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); + assert((pos + len) <= stop); //TODO + if (result != 0) + return NULL; + + const long long id = ReadUInt(m_pReader, pos, len); + assert(id == 0x0F43B675); //Cluster ID + if (id != 0x0F43B675) + return NULL; + + pos += len; //consume ID + + //Read Size + result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); //TODO + assert((pos + len) <= stop); //TODO + + const long long size = ReadUInt(m_pReader, pos, len); + assert(size > 0); //TODO + //assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); + + pos += len; //consume length of size of element + assert((pos + size) <= stop); //TODO + + //Pos now points to start of payload + + pos += size; //consume payload + } + + long long off_next = 0; + + while (pos < stop) + { + long len; + + long long result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); + assert((pos + len) <= stop); //TODO + if (result != 0) + return NULL; + + const long long idpos = pos; //pos of next (potential) cluster + + const long long id = ReadUInt(m_pReader, idpos, len); + assert(id > 0); //TODO + + pos += len; //consume ID + + //Read Size + result = GetUIntLength(m_pReader, pos, len); + assert(result == 0); //TODO + assert((pos + len) <= stop); //TODO + + const long long size = ReadUInt(m_pReader, pos, len); + assert(size >= 0); //TODO + + pos += len; //consume length of size of element + assert((pos + size) <= stop); //TODO + + //Pos now points to start of payload + + if (size == 0) //weird + continue; + + if (id == 0x0F43B675) //Cluster ID + { + const long long off_next_ = idpos - m_start; + + long long pos_; + long len_; + + const long status = Cluster::HasBlockEntries( + this, + off_next_, + pos_, + len_); + + assert(status >= 0); + + if (status > 0) + { + off_next = off_next_; + break; + } + } + + pos += size; //consume payload + } + + if (off_next <= 0) + return 0; + + Cluster** const ii = m_clusters + m_clusterCount; + Cluster** i = ii; + + Cluster** const jj = ii + m_clusterPreloadCount; + Cluster** j = jj; + + while (i < j) + { + //INVARIANT: + //[0, i) < pos_next + //[i, j) ? + //[j, jj) > pos_next + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + Cluster* const pNext = *k; + assert(pNext); + assert(pNext->m_index < 0); + + //const long long pos_ = pNext->m_pos; + //assert(pos_); + //pos = pos_ * ((pos_ < 0) ? -1 : 1); + + pos = pNext->GetPosition(); + + if (pos < off_next) + i = k + 1; + else if (pos > off_next) + j = k; + else + return pNext; + } + + assert(i == j); + + Cluster* const pNext = Cluster::Create(this, + -1, + off_next); + assert(pNext); + + const ptrdiff_t idx_next = i - m_clusters; //insertion position + + PreloadCluster(pNext, idx_next); + assert(m_clusters); + assert(idx_next < m_clusterSize); + assert(m_clusters[idx_next] == pNext); + + return pNext; +} + + +long Segment::ParseNext( + const Cluster* pCurr, + const Cluster*& pResult, + long long& pos, + long& len) +{ + assert(pCurr); + assert(!pCurr->EOS()); + assert(m_clusters); + + pResult = 0; + + if (pCurr->m_index >= 0) //loaded (not merely preloaded) + { + assert(m_clusters[pCurr->m_index] == pCurr); + + const long next_idx = pCurr->m_index + 1; + + if (next_idx < m_clusterCount) + { + pResult = m_clusters[next_idx]; + return 0; //success + } + + //curr cluster is last among loaded + + const long result = LoadCluster(pos, len); + + if (result < 0) //error or underflow + return result; + + if (result > 0) //no more clusters + { + //pResult = &m_eos; + return 1; + } + + pResult = GetLast(); + return 0; //success + } + + assert(m_pos > 0); + + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + //interrogate curr cluster + + pos = pCurr->m_element_start; + + if (pCurr->m_element_size >= 0) + pos += pCurr->m_element_size; + else + { + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(m_pReader, pos, len); + + if (id != 0x0F43B675) //weird: not Cluster ID + return -1; + + pos += len; //consume ID + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume size field + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) //TODO: should never happen + return E_FILE_FORMAT_INVALID; //TODO: resolve this + + //assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + //Pos now points to start of payload + + pos += size; //consume payload (that is, the current cluster) + assert((segment_stop < 0) || (pos <= segment_stop)); + + //By consuming the payload, we are assuming that the curr + //cluster isn't interesting. That is, we don't bother checking + //whether the payload of the curr cluster is less than what + //happens to be available (obtained via IMkvReader::Length). + //Presumably the caller has already dispensed with the current + //cluster, and really does want the next cluster. + } + + //pos now points to just beyond the last fully-loaded cluster + + for (;;) + { + const long status = DoParseNext(pResult, pos, len); + + if (status <= 1) + return status; + } +} + + +long Segment::DoParseNext( + const Cluster*& pResult, + long long& pos, + long& len) +{ + long long total, avail; + + long status = m_pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; + + //Parse next cluster. This is strictly a parsing activity. + //Creation of a new cluster object happens later, after the + //parsing is done. + + long long off_next = 0; + long long cluster_size = -1; + + for (;;) + { + if ((total >= 0) && (pos >= total)) + return 1; //EOF + + if ((segment_stop >= 0) && (pos >= segment_stop)) + return 1; //EOF + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; //absolute + const long long idoff = pos - m_start; //relative + + const long long id = ReadUInt(m_pReader, idpos, len); //absolute + + if (id < 0) //error + return static_cast<long>(id); + + if (id == 0) //weird + return -1; //generic error + + pos += len; //consume ID + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume length of size of element + + //Pos now points to start of payload + + if (size == 0) //weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if ((segment_stop >= 0) && + (size != unknown_size) && + ((pos + size) > segment_stop)) + { + return E_FILE_FORMAT_INVALID; + } + + if (id == 0x0C53BB6B) //Cues ID + { + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + const long long element_stop = pos + size; + + if ((segment_stop >= 0) && (element_stop > segment_stop)) + return E_FILE_FORMAT_INVALID; + + const long long element_start = idpos; + const long long element_size = element_stop - element_start; + + if (m_pCues == NULL) + { + m_pCues = new Cues(this, + pos, + size, + element_start, + element_size); + assert(m_pCues); //TODO + } + + pos += size; //consume payload + assert((segment_stop < 0) || (pos <= segment_stop)); + + continue; + } + + if (id != 0x0F43B675) //not a Cluster ID + { + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + pos += size; //consume payload + assert((segment_stop < 0) || (pos <= segment_stop)); + + continue; + } + +#if 0 //this is commented-out to support incremental cluster parsing + len = static_cast<long>(size); + + if (element_stop > avail) + return E_BUFFER_NOT_FULL; +#endif + + //We have a cluster. + + off_next = idoff; + + if (size != unknown_size) + cluster_size = size; + + break; + } + + assert(off_next > 0); //have cluster + + //We have parsed the next cluster. + //We have not created a cluster object yet. What we need + //to do now is determine whether it has already be preloaded + //(in which case, an object for this cluster has already been + //created), and if not, create a new cluster object. + + Cluster** const ii = m_clusters + m_clusterCount; + Cluster** i = ii; + + Cluster** const jj = ii + m_clusterPreloadCount; + Cluster** j = jj; + + while (i < j) + { + //INVARIANT: + //[0, i) < pos_next + //[i, j) ? + //[j, jj) > pos_next + + Cluster** const k = i + (j - i) / 2; + assert(k < jj); + + const Cluster* const pNext = *k; + assert(pNext); + assert(pNext->m_index < 0); + + pos = pNext->GetPosition(); + assert(pos >= 0); + + if (pos < off_next) + i = k + 1; + else if (pos > off_next) + j = k; + else + { + pResult = pNext; + return 0; //success + } + } + + assert(i == j); + + long long pos_; + long len_; + + status = Cluster::HasBlockEntries(this, off_next, pos_, len_); + + if (status < 0) //error or underflow + { + pos = pos_; + len = len_; + + return status; + } + + if (status > 0) //means "found at least one block entry" + { + Cluster* const pNext = Cluster::Create(this, + -1, //preloaded + off_next); + //element_size); + assert(pNext); + + const ptrdiff_t idx_next = i - m_clusters; //insertion position + + PreloadCluster(pNext, idx_next); + assert(m_clusters); + assert(idx_next < m_clusterSize); + assert(m_clusters[idx_next] == pNext); + + pResult = pNext; + return 0; //success + } + + //status == 0 means "no block entries found" + + if (cluster_size < 0) //unknown size + { + const long long payload_pos = pos; //absolute pos of cluster payload + + for (;;) //determine cluster size + { + if ((total >= 0) && (pos >= total)) + break; + + if ((segment_stop >= 0) && (pos >= segment_stop)) + break; //no more clusters + + //Read ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long idpos = pos; + const long long id = ReadUInt(m_pReader, idpos, len); + + if (id < 0) //error (or underflow) + return static_cast<long>(id); + + //This is the distinguished set of ID's we use to determine + //that we have exhausted the sub-element's inside the cluster + //whose ID we parsed earlier. + + if (id == 0x0F43B675) //Cluster ID + break; + + if (id == 0x0C53BB6B) //Cues ID + break; + + pos += len; //consume ID (of sub-element) + + //Read Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(m_pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(m_pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume size field of element + + //pos now points to start of sub-element's payload + + if (size == 0) //weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //not allowed for sub-elements + + if ((segment_stop >= 0) && ((pos + size) > segment_stop)) //weird + return E_FILE_FORMAT_INVALID; + + pos += size; //consume payload of sub-element + assert((segment_stop < 0) || (pos <= segment_stop)); + } //determine cluster size + + cluster_size = pos - payload_pos; + assert(cluster_size >= 0); //TODO: handle cluster_size = 0 + + pos = payload_pos; //reset and re-parse original cluster + } + + pos += cluster_size; //consume payload + assert((segment_stop < 0) || (pos <= segment_stop)); + + return 2; //try to find a cluster that follows next +} + + +const Cluster* Segment::FindCluster(long long time_ns) const +{ + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return &m_eos; + + { + Cluster* const pCluster = m_clusters[0]; + assert(pCluster); + assert(pCluster->m_index == 0); + + if (time_ns <= pCluster->GetTime()) + return pCluster; + } + + //Binary search of cluster array + + long i = 0; + long j = m_clusterCount; + + while (i < j) + { + //INVARIANT: + //[0, i) <= time_ns + //[i, j) ? + //[j, m_clusterCount) > time_ns + + const long k = i + (j - i) / 2; + assert(k < m_clusterCount); + + Cluster* const pCluster = m_clusters[k]; + assert(pCluster); + assert(pCluster->m_index == k); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + i = k + 1; + else + j = k; + + assert(i <= j); + } + + assert(i == j); + assert(i > 0); + assert(i <= m_clusterCount); + + const long k = i - 1; + + Cluster* const pCluster = m_clusters[k]; + assert(pCluster); + assert(pCluster->m_index == k); + assert(pCluster->GetTime() <= time_ns); + + return pCluster; +} + + +#if 0 +const BlockEntry* Segment::Seek( + long long time_ns, + const Track* pTrack) const +{ + assert(pTrack); + + if ((m_clusters == NULL) || (m_clusterCount <= 0)) + return pTrack->GetEOS(); + + Cluster** const i = m_clusters; + assert(i); + + { + Cluster* const pCluster = *i; + assert(pCluster); + assert(pCluster->m_index == 0); //m_clusterCount > 0 + assert(pCluster->m_pSegment == this); + + if (time_ns <= pCluster->GetTime()) + return pCluster->GetEntry(pTrack); + } + + Cluster** const j = i + m_clusterCount; + + if (pTrack->GetType() == 2) //audio + { + //TODO: we could decide to use cues for this, as we do for video. + //But we only use it for video because looking around for a keyframe + //can get expensive. Audio doesn't require anything special so a + //straight cluster search is good enough (we assume). + + Cluster** lo = i; + Cluster** hi = j; + + while (lo < hi) + { + //INVARIANT: + //[i, lo) <= time_ns + //[lo, hi) ? + //[hi, j) > time_ns + + Cluster** const mid = lo + (hi - lo) / 2; + assert(mid < hi); + + Cluster* const pCluster = *mid; + assert(pCluster); + assert(pCluster->m_index == long(mid - m_clusters)); + assert(pCluster->m_pSegment == this); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + lo = mid + 1; + else + hi = mid; + + assert(lo <= hi); + } + + assert(lo == hi); + assert(lo > i); + assert(lo <= j); + + while (lo > i) + { + Cluster* const pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + const BlockEntry* const pBE = pCluster->GetEntry(pTrack); + + if ((pBE != 0) && !pBE->EOS()) + return pBE; + + //landed on empty cluster (no entries) + } + + return pTrack->GetEOS(); //weird + } + + assert(pTrack->GetType() == 1); //video + + Cluster** lo = i; + Cluster** hi = j; + + while (lo < hi) + { + //INVARIANT: + //[i, lo) <= time_ns + //[lo, hi) ? + //[hi, j) > time_ns + + Cluster** const mid = lo + (hi - lo) / 2; + assert(mid < hi); + + Cluster* const pCluster = *mid; + assert(pCluster); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + lo = mid + 1; + else + hi = mid; + + assert(lo <= hi); + } + + assert(lo == hi); + assert(lo > i); + assert(lo <= j); + + Cluster* pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + { + const BlockEntry* const pBE = pCluster->GetEntry(pTrack, time_ns); + + if ((pBE != 0) && !pBE->EOS()) //found a keyframe + return pBE; + } + + const VideoTrack* const pVideo = static_cast<const VideoTrack*>(pTrack); + + while (lo != i) + { + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + const BlockEntry* const pBlockEntry = pCluster->GetMaxKey(pVideo); + + if ((pBlockEntry != 0) && !pBlockEntry->EOS()) + return pBlockEntry; + } + + //weird: we're on the first cluster, but no keyframe found + //should never happen but we must return something anyway + + return pTrack->GetEOS(); +} +#endif + + +#if 0 +bool Segment::SearchCues( + long long time_ns, + Track* pTrack, + Cluster*& pCluster, + const BlockEntry*& pBlockEntry, + const CuePoint*& pCP, + const CuePoint::TrackPosition*& pTP) +{ + if (pTrack->GetType() != 1) //not video + return false; //TODO: for now, just handle video stream + + if (m_pCues == NULL) + return false; + + if (!m_pCues->Find(time_ns, pTrack, pCP, pTP)) + return false; //weird + + assert(pCP); + assert(pTP); + assert(pTP->m_track == pTrack->GetNumber()); + + //We have the cue point and track position we want, + //so we now need to search for the cluster having + //the indicated position. + + return GetCluster(pCP, pTP, pCluster, pBlockEntry); +} +#endif + + +const Tracks* Segment::GetTracks() const +{ + return m_pTracks; +} + + +const SegmentInfo* Segment::GetInfo() const +{ + return m_pInfo; +} + + +const Cues* Segment::GetCues() const +{ + return m_pCues; +} + + +const Chapters* Segment::GetChapters() const +{ + return m_pChapters; +} + + +const SeekHead* Segment::GetSeekHead() const +{ + return m_pSeekHead; +} + + +long long Segment::GetDuration() const +{ + assert(m_pInfo); + return m_pInfo->GetDuration(); +} + + +Chapters::Chapters( + Segment* pSegment, + long long payload_start, + long long payload_size, + long long element_start, + long long element_size) : + m_pSegment(pSegment), + m_start(payload_start), + m_size(payload_size), + m_element_start(element_start), + m_element_size(element_size), + m_editions(NULL), + m_editions_size(0), + m_editions_count(0) +{ +} + + +Chapters::~Chapters() +{ + while (m_editions_count > 0) + { + Edition& e = m_editions[--m_editions_count]; + e.Clear(); + } +} + + +long Chapters::Parse() +{ + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; // payload start + const long long stop = pos + m_size; // payload stop + + while (pos < stop) + { + long long id, size; + + long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == 0x05B9) // EditionEntry ID + { + status = ParseEdition(pos, size); + + if (status < 0) // error + return status; + } + + pos += size; + assert(pos <= stop); + } + + assert(pos == stop); + return 0; +} + + +int Chapters::GetEditionCount() const +{ + return m_editions_count; +} + + +const Chapters::Edition* Chapters::GetEdition(int idx) const +{ + if (idx < 0) + return NULL; + + if (idx >= m_editions_count) + return NULL; + + return m_editions + idx; +} + + +bool Chapters::ExpandEditionsArray() +{ + if (m_editions_size > m_editions_count) + return true; // nothing else to do + + const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size; + + Edition* const editions = new (std::nothrow) Edition[size]; + + if (editions == NULL) + return false; + + for (int idx = 0; idx < m_editions_count; ++idx) + { + m_editions[idx].ShallowCopy(editions[idx]); + } + + delete[] m_editions; + m_editions = editions; + + m_editions_size = size; + return true; +} + + +long Chapters::ParseEdition( + long long pos, + long long size) +{ + if (!ExpandEditionsArray()) + return -1; + + Edition& e = m_editions[m_editions_count++]; + e.Init(); + + return e.Parse(m_pSegment->m_pReader, pos, size); +} + + +Chapters::Edition::Edition() +{ +} + + +Chapters::Edition::~Edition() +{ +} + + +int Chapters::Edition::GetAtomCount() const +{ + return m_atoms_count; +} + + +const Chapters::Atom* Chapters::Edition::GetAtom(int index) const +{ + if (index < 0) + return NULL; + + if (index >= m_atoms_count) + return NULL; + + return m_atoms + index; +} + + +void Chapters::Edition::Init() +{ + m_atoms = NULL; + m_atoms_size = 0; + m_atoms_count = 0; +} + + +void Chapters::Edition::ShallowCopy(Edition& rhs) const +{ + rhs.m_atoms = m_atoms; + rhs.m_atoms_size = m_atoms_size; + rhs.m_atoms_count = m_atoms_count; +} + + +void Chapters::Edition::Clear() +{ + while (m_atoms_count > 0) + { + Atom& a = m_atoms[--m_atoms_count]; + a.Clear(); + } + + delete[] m_atoms; + m_atoms = NULL; + + m_atoms_size = 0; +} + + +long Chapters::Edition::Parse( + IMkvReader* pReader, + long long pos, + long long size) +{ + const long long stop = pos + size; + + while (pos < stop) + { + long long id, size; + + long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == 0x36) // Atom ID + { + status = ParseAtom(pReader, pos, size); + + if (status < 0) // error + return status; + } + + pos += size; + assert(pos <= stop); + } + + assert(pos == stop); + return 0; +} + + +long Chapters::Edition::ParseAtom( + IMkvReader* pReader, + long long pos, + long long size) +{ + if (!ExpandAtomsArray()) + return -1; + + Atom& a = m_atoms[m_atoms_count++]; + a.Init(); + + return a.Parse(pReader, pos, size); +} + + +bool Chapters::Edition::ExpandAtomsArray() +{ + if (m_atoms_size > m_atoms_count) + return true; // nothing else to do + + const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size; + + Atom* const atoms = new (std::nothrow) Atom[size]; + + if (atoms == NULL) + return false; + + for (int idx = 0; idx < m_atoms_count; ++idx) + { + m_atoms[idx].ShallowCopy(atoms[idx]); + } + + delete[] m_atoms; + m_atoms = atoms; + + m_atoms_size = size; + return true; +} + + +Chapters::Atom::Atom() +{ +} + + +Chapters::Atom::~Atom() +{ +} + + +unsigned long long Chapters::Atom::GetUID() const +{ + return m_uid; +} + + +const char* Chapters::Atom::GetStringUID() const +{ + return m_string_uid; +} + + +long long Chapters::Atom::GetStartTimecode() const +{ + return m_start_timecode; +} + + +long long Chapters::Atom::GetStopTimecode() const +{ + return m_stop_timecode; +} + + +long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const +{ + return GetTime(pChapters, m_start_timecode); +} + + +long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const +{ + return GetTime(pChapters, m_stop_timecode); +} + + +int Chapters::Atom::GetDisplayCount() const +{ + return m_displays_count; +} + + +const Chapters::Display* Chapters::Atom::GetDisplay(int index) const +{ + if (index < 0) + return NULL; + + if (index >= m_displays_count) + return NULL; + + return m_displays + index; +} + + +void Chapters::Atom::Init() +{ + m_string_uid = NULL; + m_uid = 0; + m_start_timecode = -1; + m_stop_timecode = -1; + + m_displays = NULL; + m_displays_size = 0; + m_displays_count = 0; +} + + +void Chapters::Atom::ShallowCopy(Atom& rhs) const +{ + rhs.m_string_uid = m_string_uid; + rhs.m_uid = m_uid; + rhs.m_start_timecode = m_start_timecode; + rhs.m_stop_timecode = m_stop_timecode; + + rhs.m_displays = m_displays; + rhs.m_displays_size = m_displays_size; + rhs.m_displays_count = m_displays_count; +} + + +void Chapters::Atom::Clear() +{ + delete[] m_string_uid; + m_string_uid = NULL; + + while (m_displays_count > 0) + { + Display& d = m_displays[--m_displays_count]; + d.Clear(); + } + + delete[] m_displays; + m_displays = NULL; + + m_displays_size = 0; +} + + +long Chapters::Atom::Parse( + IMkvReader* pReader, + long long pos, + long long size) +{ + const long long stop = pos + size; + + while (pos < stop) + { + long long id, size; + + long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == 0x00) // Display ID + { + status = ParseDisplay(pReader, pos, size); + + if (status < 0) // error + return status; + } + else if (id == 0x1654) // StringUID ID + { + status = UnserializeString(pReader, pos, size, m_string_uid); + + if (status < 0) // error + return status; + } + else if (id == 0x33C4) // UID ID + { + const long long val = UnserializeUInt(pReader, pos, size); + + if (val < 0) // error + return static_cast<long>(val); + + m_uid = val; + } + else if (id == 0x11) // TimeStart ID + { + const long long val = UnserializeUInt(pReader, pos, size); + + if (val < 0) // error + return static_cast<long>(val); + + m_start_timecode = val; + } + else if (id == 0x12) // TimeEnd ID + { + const long long val = UnserializeUInt(pReader, pos, size); + + if (val < 0) // error + return static_cast<long>(val); + + m_stop_timecode = val; + } + + pos += size; + assert(pos <= stop); + } + + assert(pos == stop); + return 0; +} + + +long long Chapters::Atom::GetTime( + const Chapters* pChapters, + long long timecode) +{ + if (pChapters == NULL) + return -1; + + Segment* const pSegment = pChapters->m_pSegment; + + if (pSegment == NULL) // weird + return -1; + + const SegmentInfo* const pInfo = pSegment->GetInfo(); + + if (pInfo == NULL) + return -1; + + const long long timecode_scale = pInfo->GetTimeCodeScale(); + + if (timecode_scale < 1) // weird + return -1; + + if (timecode < 0) + return -1; + + const long long result = timecode_scale * timecode; + + return result; +} + + +long Chapters::Atom::ParseDisplay( + IMkvReader* pReader, + long long pos, + long long size) +{ + if (!ExpandDisplaysArray()) + return -1; + + Display& d = m_displays[m_displays_count++]; + d.Init(); + + return d.Parse(pReader, pos, size); +} + + +bool Chapters::Atom::ExpandDisplaysArray() +{ + if (m_displays_size > m_displays_count) + return true; // nothing else to do + + const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size; + + Display* const displays = new (std::nothrow) Display[size]; + + if (displays == NULL) + return false; + + for (int idx = 0; idx < m_displays_count; ++idx) + { + m_displays[idx].ShallowCopy(displays[idx]); + } + + delete[] m_displays; + m_displays = displays; + + m_displays_size = size; + return true; +} + + +Chapters::Display::Display() +{ +} + + +Chapters::Display::~Display() +{ +} + + +const char* Chapters::Display::GetString() const +{ + return m_string; +} + + +const char* Chapters::Display::GetLanguage() const +{ + return m_language; +} + + +const char* Chapters::Display::GetCountry() const +{ + return m_country; +} + + +void Chapters::Display::Init() +{ + m_string = NULL; + m_language = NULL; + m_country = NULL; +} + + +void Chapters::Display::ShallowCopy(Display& rhs) const +{ + rhs.m_string = m_string; + rhs.m_language = m_language; + rhs.m_country = m_country; +} + + +void Chapters::Display::Clear() +{ + delete[] m_string; + m_string = NULL; + + delete[] m_language; + m_language = NULL; + + delete[] m_country; + m_country = NULL; +} + + +long Chapters::Display::Parse( + IMkvReader* pReader, + long long pos, + long long size) +{ + const long long stop = pos + size; + + while (pos < stop) + { + long long id, size; + + long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) // error + return status; + + if (size == 0) // weird + continue; + + if (id == 0x05) // ChapterString ID + { + status = UnserializeString(pReader, pos, size, m_string); + + if (status) + return status; + } + else if (id == 0x037C) // ChapterLanguage ID + { + status = UnserializeString(pReader, pos, size, m_language); + + if (status) + return status; + } + else if (id == 0x037E) // ChapterCountry ID + { + status = UnserializeString(pReader, pos, size, m_country); + + if (status) + return status; + } + + pos += size; + assert(pos <= stop); + } + + assert(pos == stop); + return 0; +} + + +SegmentInfo::SegmentInfo( + Segment* pSegment, + long long start, + long long size_, + long long element_start, + long long element_size) : + m_pSegment(pSegment), + m_start(start), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_pMuxingAppAsUTF8(NULL), + m_pWritingAppAsUTF8(NULL), + m_pTitleAsUTF8(NULL) +{ +} + +SegmentInfo::~SegmentInfo() +{ + delete[] m_pMuxingAppAsUTF8; + m_pMuxingAppAsUTF8 = NULL; + + delete[] m_pWritingAppAsUTF8; + m_pWritingAppAsUTF8 = NULL; + + delete[] m_pTitleAsUTF8; + m_pTitleAsUTF8 = NULL; +} + + +long SegmentInfo::Parse() +{ + assert(m_pMuxingAppAsUTF8 == NULL); + assert(m_pWritingAppAsUTF8 == NULL); + assert(m_pTitleAsUTF8 == NULL); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = m_start; + const long long stop = m_start + m_size; + + m_timecodeScale = 1000000; + m_duration = -1; + + while (pos < stop) + { + long long id, size; + + const long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) //error + return status; + + if (id == 0x0AD7B1) //Timecode Scale + { + m_timecodeScale = UnserializeUInt(pReader, pos, size); + + if (m_timecodeScale <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x0489) //Segment duration + { + const long status = UnserializeFloat( + pReader, + pos, + size, + m_duration); + + if (status < 0) + return status; + + if (m_duration < 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x0D80) //MuxingApp + { + const long status = UnserializeString( + pReader, + pos, + size, + m_pMuxingAppAsUTF8); + + if (status) + return status; + } + else if (id == 0x1741) //WritingApp + { + const long status = UnserializeString( + pReader, + pos, + size, + m_pWritingAppAsUTF8); + + if (status) + return status; + } + else if (id == 0x3BA9) //Title + { + const long status = UnserializeString( + pReader, + pos, + size, + m_pTitleAsUTF8); + + if (status) + return status; + } + + pos += size; + assert(pos <= stop); + } + + assert(pos == stop); + + return 0; +} + + +long long SegmentInfo::GetTimeCodeScale() const +{ + return m_timecodeScale; +} + + +long long SegmentInfo::GetDuration() const +{ + if (m_duration < 0) + return -1; + + assert(m_timecodeScale >= 1); + + const double dd = double(m_duration) * double(m_timecodeScale); + const long long d = static_cast<long long>(dd); + + return d; +} + +const char* SegmentInfo::GetMuxingAppAsUTF8() const +{ + return m_pMuxingAppAsUTF8; +} + + +const char* SegmentInfo::GetWritingAppAsUTF8() const +{ + return m_pWritingAppAsUTF8; +} + +const char* SegmentInfo::GetTitleAsUTF8() const +{ + return m_pTitleAsUTF8; +} + +/////////////////////////////////////////////////////////////// +// ContentEncoding element +ContentEncoding::ContentCompression::ContentCompression() + : algo(0), + settings(NULL), + settings_len(0) { +} + +ContentEncoding::ContentCompression::~ContentCompression() { + delete [] settings; +} + +ContentEncoding::ContentEncryption::ContentEncryption() + : algo(0), + key_id(NULL), + key_id_len(0), + signature(NULL), + signature_len(0), + sig_key_id(NULL), + sig_key_id_len(0), + sig_algo(0), + sig_hash_algo(0) { +} + +ContentEncoding::ContentEncryption::~ContentEncryption() { + delete [] key_id; + delete [] signature; + delete [] sig_key_id; +} + +ContentEncoding::ContentEncoding() + : compression_entries_(NULL), + compression_entries_end_(NULL), + encryption_entries_(NULL), + encryption_entries_end_(NULL), + encoding_order_(0), + encoding_scope_(1), + encoding_type_(0) { +} + +ContentEncoding::~ContentEncoding() { + ContentCompression** comp_i = compression_entries_; + ContentCompression** const comp_j = compression_entries_end_; + + while (comp_i != comp_j) { + ContentCompression* const comp = *comp_i++; + delete comp; + } + + delete [] compression_entries_; + + ContentEncryption** enc_i = encryption_entries_; + ContentEncryption** const enc_j = encryption_entries_end_; + + while (enc_i != enc_j) { + ContentEncryption* const enc = *enc_i++; + delete enc; + } + + delete [] encryption_entries_; +} + + +const ContentEncoding::ContentCompression* +ContentEncoding::GetCompressionByIndex(unsigned long idx) const { + const ptrdiff_t count = compression_entries_end_ - compression_entries_; + assert(count >= 0); + + if (idx >= static_cast<unsigned long>(count)) + return NULL; + + return compression_entries_[idx]; +} + +unsigned long ContentEncoding::GetCompressionCount() const { + const ptrdiff_t count = compression_entries_end_ - compression_entries_; + assert(count >= 0); + + return static_cast<unsigned long>(count); +} + +const ContentEncoding::ContentEncryption* +ContentEncoding::GetEncryptionByIndex(unsigned long idx) const { + const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; + assert(count >= 0); + + if (idx >= static_cast<unsigned long>(count)) + return NULL; + + return encryption_entries_[idx]; +} + +unsigned long ContentEncoding::GetEncryptionCount() const { + const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; + assert(count >= 0); + + return static_cast<unsigned long>(count); +} + +long ContentEncoding::ParseContentEncAESSettingsEntry( + long long start, + long long size, + IMkvReader* pReader, + ContentEncAESSettings* aes) { + assert(pReader); + assert(aes); + + long long pos = start; + const long long stop = start + size; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + if (id == 0x7E8) { + // AESSettingsCipherMode + aes->cipher_mode = UnserializeUInt(pReader, pos, size); + if (aes->cipher_mode != 1) + return E_FILE_FORMAT_INVALID; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + return 0; +} + +long ContentEncoding::ParseContentEncodingEntry(long long start, + long long size, + IMkvReader* pReader) { + assert(pReader); + + long long pos = start; + const long long stop = start + size; + + // Count ContentCompression and ContentEncryption elements. + int compression_count = 0; + int encryption_count = 0; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + if (id == 0x1034) // ContentCompression ID + ++compression_count; + + if (id == 0x1035) // ContentEncryption ID + ++encryption_count; + + pos += size; //consume payload + assert(pos <= stop); + } + + if (compression_count <= 0 && encryption_count <= 0) + return -1; + + if (compression_count > 0) { + compression_entries_ = + new (std::nothrow) ContentCompression*[compression_count]; + if (!compression_entries_) + return -1; + compression_entries_end_ = compression_entries_; + } + + if (encryption_count > 0) { + encryption_entries_ = + new (std::nothrow) ContentEncryption*[encryption_count]; + if (!encryption_entries_) { + delete [] compression_entries_; + return -1; + } + encryption_entries_end_ = encryption_entries_; + } + + pos = start; + while (pos < stop) { + long long id, size; + long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + if (id == 0x1031) { + // ContentEncodingOrder + encoding_order_ = UnserializeUInt(pReader, pos, size); + } else if (id == 0x1032) { + // ContentEncodingScope + encoding_scope_ = UnserializeUInt(pReader, pos, size); + if (encoding_scope_ < 1) + return -1; + } else if (id == 0x1033) { + // ContentEncodingType + encoding_type_ = UnserializeUInt(pReader, pos, size); + } else if (id == 0x1034) { + // ContentCompression ID + ContentCompression* const compression = + new (std::nothrow) ContentCompression(); + if (!compression) + return -1; + + status = ParseCompressionEntry(pos, size, pReader, compression); + if (status) { + delete compression; + return status; + } + *compression_entries_end_++ = compression; + } else if (id == 0x1035) { + // ContentEncryption ID + ContentEncryption* const encryption = + new (std::nothrow) ContentEncryption(); + if (!encryption) + return -1; + + status = ParseEncryptionEntry(pos, size, pReader, encryption); + if (status) { + delete encryption; + return status; + } + *encryption_entries_end_++ = encryption; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + return 0; +} + +long ContentEncoding::ParseCompressionEntry( + long long start, + long long size, + IMkvReader* pReader, + ContentCompression* compression) { + assert(pReader); + assert(compression); + + long long pos = start; + const long long stop = start + size; + + bool valid = false; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + if (id == 0x254) { + // ContentCompAlgo + long long algo = UnserializeUInt(pReader, pos, size); + if (algo < 0) + return E_FILE_FORMAT_INVALID; + compression->algo = algo; + valid = true; + } else if (id == 0x255) { + // ContentCompSettings + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast<size_t>(size); + typedef unsigned char* buf_t; + const buf_t buf = new (std::nothrow) unsigned char[buflen]; + if (buf == NULL) + return -1; + + const int read_status = pReader->Read(pos, buflen, buf); + if (read_status) { + delete [] buf; + return status; + } + + compression->settings = buf; + compression->settings_len = buflen; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + // ContentCompAlgo is mandatory + if (!valid) + return E_FILE_FORMAT_INVALID; + + return 0; +} + +long ContentEncoding::ParseEncryptionEntry( + long long start, + long long size, + IMkvReader* pReader, + ContentEncryption* encryption) { + assert(pReader); + assert(encryption); + + long long pos = start; + const long long stop = start + size; + + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + if (id == 0x7E1) { + // ContentEncAlgo + encryption->algo = UnserializeUInt(pReader, pos, size); + if (encryption->algo != 5) + return E_FILE_FORMAT_INVALID; + } else if (id == 0x7E2) { + // ContentEncKeyID + delete[] encryption->key_id; + encryption->key_id = NULL; + encryption->key_id_len = 0; + + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast<size_t>(size); + typedef unsigned char* buf_t; + const buf_t buf = new (std::nothrow) unsigned char[buflen]; + if (buf == NULL) + return -1; + + const int read_status = pReader->Read(pos, buflen, buf); + if (read_status) { + delete [] buf; + return status; + } + + encryption->key_id = buf; + encryption->key_id_len = buflen; + } else if (id == 0x7E3) { + // ContentSignature + delete[] encryption->signature; + encryption->signature = NULL; + encryption->signature_len = 0; + + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast<size_t>(size); + typedef unsigned char* buf_t; + const buf_t buf = new (std::nothrow) unsigned char[buflen]; + if (buf == NULL) + return -1; + + const int read_status = pReader->Read(pos, buflen, buf); + if (read_status) { + delete [] buf; + return status; + } + + encryption->signature = buf; + encryption->signature_len = buflen; + } else if (id == 0x7E4) { + // ContentSigKeyID + delete[] encryption->sig_key_id; + encryption->sig_key_id = NULL; + encryption->sig_key_id_len = 0; + + if (size <= 0) + return E_FILE_FORMAT_INVALID; + + const size_t buflen = static_cast<size_t>(size); + typedef unsigned char* buf_t; + const buf_t buf = new (std::nothrow) unsigned char[buflen]; + if (buf == NULL) + return -1; + + const int read_status = pReader->Read(pos, buflen, buf); + if (read_status) { + delete [] buf; + return status; + } + + encryption->sig_key_id = buf; + encryption->sig_key_id_len = buflen; + } else if (id == 0x7E5) { + // ContentSigAlgo + encryption->sig_algo = UnserializeUInt(pReader, pos, size); + } else if (id == 0x7E6) { + // ContentSigHashAlgo + encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); + } else if (id == 0x7E7) { + // ContentEncAESSettings + const long status = ParseContentEncAESSettingsEntry( + pos, + size, + pReader, + &encryption->aes_settings); + if (status) + return status; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + return 0; +} + +Track::Track( + Segment* pSegment, + long long element_start, + long long element_size) : + m_pSegment(pSegment), + m_element_start(element_start), + m_element_size(element_size), + content_encoding_entries_(NULL), + content_encoding_entries_end_(NULL) +{ +} + +Track::~Track() +{ + Info& info = const_cast<Info&>(m_info); + info.Clear(); + + ContentEncoding** i = content_encoding_entries_; + ContentEncoding** const j = content_encoding_entries_end_; + + while (i != j) { + ContentEncoding* const encoding = *i++; + delete encoding; + } + + delete [] content_encoding_entries_; +} + +long Track::Create( + Segment* pSegment, + const Info& info, + long long element_start, + long long element_size, + Track*& pResult) +{ + if (pResult) + return -1; + + Track* const pTrack = new (std::nothrow) Track(pSegment, + element_start, + element_size); + + if (pTrack == NULL) + return -1; //generic error + + const int status = info.Copy(pTrack->m_info); + + if (status) // error + { + delete pTrack; + return status; + } + + pResult = pTrack; + return 0; //success +} + +Track::Info::Info(): + uid(0), + defaultDuration(0), + codecDelay(0), + seekPreRoll(0), + nameAsUTF8(NULL), + language(NULL), + codecId(NULL), + codecNameAsUTF8(NULL), + codecPrivate(NULL), + codecPrivateSize(0), + lacing(false) +{ +} + +Track::Info::~Info() +{ + Clear(); +} + +void Track::Info::Clear() +{ + delete[] nameAsUTF8; + nameAsUTF8 = NULL; + + delete[] language; + language = NULL; + + delete[] codecId; + codecId = NULL; + + delete[] codecPrivate; + codecPrivate = NULL; + codecPrivateSize = 0; + + delete[] codecNameAsUTF8; + codecNameAsUTF8 = NULL; +} + +int Track::Info::CopyStr(char* Info::*str, Info& dst_) const +{ + if (str == static_cast<char* Info::*>(NULL)) + return -1; + + char*& dst = dst_.*str; + + if (dst) //should be NULL already + return -1; + + const char* const src = this->*str; + + if (src == NULL) + return 0; + + const size_t len = strlen(src); + + dst = new (std::nothrow) char[len+1]; + + if (dst == NULL) + return -1; + + strcpy(dst, src); + + return 0; +} + + +int Track::Info::Copy(Info& dst) const +{ + if (&dst == this) + return 0; + + dst.type = type; + dst.number = number; + dst.defaultDuration = defaultDuration; + dst.codecDelay = codecDelay; + dst.seekPreRoll = seekPreRoll; + dst.uid = uid; + dst.lacing = lacing; + dst.settings = settings; + + //We now copy the string member variables from src to dst. + //This involves memory allocation so in principle the operation + //can fail (indeed, that's why we have Info::Copy), so we must + //report this to the caller. An error return from this function + //therefore implies that the copy was only partially successful. + + if (int status = CopyStr(&Info::nameAsUTF8, dst)) + return status; + + if (int status = CopyStr(&Info::language, dst)) + return status; + + if (int status = CopyStr(&Info::codecId, dst)) + return status; + + if (int status = CopyStr(&Info::codecNameAsUTF8, dst)) + return status; + + if (codecPrivateSize > 0) + { + if (codecPrivate == NULL) + return -1; + + if (dst.codecPrivate) + return -1; + + if (dst.codecPrivateSize != 0) + return -1; + + dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize]; + + if (dst.codecPrivate == NULL) + return -1; + + memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize); + dst.codecPrivateSize = codecPrivateSize; + } + + return 0; +} + +const BlockEntry* Track::GetEOS() const +{ + return &m_eos; +} + +long Track::GetType() const +{ + return m_info.type; +} + +long Track::GetNumber() const +{ + return m_info.number; +} + +unsigned long long Track::GetUid() const +{ + return m_info.uid; +} + +const char* Track::GetNameAsUTF8() const +{ + return m_info.nameAsUTF8; +} + +const char* Track::GetLanguage() const +{ + return m_info.language; +} + +const char* Track::GetCodecNameAsUTF8() const +{ + return m_info.codecNameAsUTF8; +} + + +const char* Track::GetCodecId() const +{ + return m_info.codecId; +} + +const unsigned char* Track::GetCodecPrivate(size_t& size) const +{ + size = m_info.codecPrivateSize; + return m_info.codecPrivate; +} + + +bool Track::GetLacing() const +{ + return m_info.lacing; +} + +unsigned long long Track::GetDefaultDuration() const +{ + return m_info.defaultDuration; +} + +unsigned long long Track::GetCodecDelay() const +{ + return m_info.codecDelay; +} + +unsigned long long Track::GetSeekPreRoll() const +{ + return m_info.seekPreRoll; +} + +long Track::GetFirst(const BlockEntry*& pBlockEntry) const +{ + const Cluster* pCluster = m_pSegment->GetFirst(); + + for (int i = 0; ; ) + { + if (pCluster == NULL) + { + pBlockEntry = GetEOS(); + return 1; + } + + if (pCluster->EOS()) + { +#if 0 + if (m_pSegment->Unparsed() <= 0) //all clusters have been loaded + { + pBlockEntry = GetEOS(); + return 1; + } +#else + if (m_pSegment->DoneParsing()) + { + pBlockEntry = GetEOS(); + return 1; + } +#endif + + pBlockEntry = 0; + return E_BUFFER_NOT_FULL; + } + + long status = pCluster->GetFirst(pBlockEntry); + + if (status < 0) //error + return status; + + if (pBlockEntry == 0) //empty cluster + { + pCluster = m_pSegment->GetNext(pCluster); + continue; + } + + for (;;) + { + const Block* const pBlock = pBlockEntry->GetBlock(); + assert(pBlock); + + const long long tn = pBlock->GetTrackNumber(); + + if ((tn == m_info.number) && VetEntry(pBlockEntry)) + return 0; + + const BlockEntry* pNextEntry; + + status = pCluster->GetNext(pBlockEntry, pNextEntry); + + if (status < 0) //error + return status; + + if (pNextEntry == 0) + break; + + pBlockEntry = pNextEntry; + } + + ++i; + + if (i >= 100) + break; + + pCluster = m_pSegment->GetNext(pCluster); + } + + //NOTE: if we get here, it means that we didn't find a block with + //a matching track number. We interpret that as an error (which + //might be too conservative). + + pBlockEntry = GetEOS(); //so we can return a non-NULL value + return 1; +} + + +long Track::GetNext( + const BlockEntry* pCurrEntry, + const BlockEntry*& pNextEntry) const +{ + assert(pCurrEntry); + assert(!pCurrEntry->EOS()); //? + + const Block* const pCurrBlock = pCurrEntry->GetBlock(); + assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number); + if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number) + return -1; + + const Cluster* pCluster = pCurrEntry->GetCluster(); + assert(pCluster); + assert(!pCluster->EOS()); + + long status = pCluster->GetNext(pCurrEntry, pNextEntry); + + if (status < 0) //error + return status; + + for (int i = 0; ; ) + { + while (pNextEntry) + { + const Block* const pNextBlock = pNextEntry->GetBlock(); + assert(pNextBlock); + + if (pNextBlock->GetTrackNumber() == m_info.number) + return 0; + + pCurrEntry = pNextEntry; + + status = pCluster->GetNext(pCurrEntry, pNextEntry); + + if (status < 0) //error + return status; + } + + pCluster = m_pSegment->GetNext(pCluster); + + if (pCluster == NULL) + { + pNextEntry = GetEOS(); + return 1; + } + + if (pCluster->EOS()) + { +#if 0 + if (m_pSegment->Unparsed() <= 0) //all clusters have been loaded + { + pNextEntry = GetEOS(); + return 1; + } +#else + if (m_pSegment->DoneParsing()) + { + pNextEntry = GetEOS(); + return 1; + } +#endif + + //TODO: there is a potential O(n^2) problem here: we tell the + //caller to (pre)load another cluster, which he does, but then he + //calls GetNext again, which repeats the same search. This is + //a pathological case, since the only way it can happen is if + //there exists a long sequence of clusters none of which contain a + // block from this track. One way around this problem is for the + //caller to be smarter when he loads another cluster: don't call + //us back until you have a cluster that contains a block from this + //track. (Of course, that's not cheap either, since our caller + //would have to scan the each cluster as it's loaded, so that + //would just push back the problem.) + + pNextEntry = NULL; + return E_BUFFER_NOT_FULL; + } + + status = pCluster->GetFirst(pNextEntry); + + if (status < 0) //error + return status; + + if (pNextEntry == NULL) //empty cluster + continue; + + ++i; + + if (i >= 100) + break; + } + + //NOTE: if we get here, it means that we didn't find a block with + //a matching track number after lots of searching, so we give + //up trying. + + pNextEntry = GetEOS(); //so we can return a non-NULL value + return 1; +} + +bool Track::VetEntry(const BlockEntry* pBlockEntry) const +{ + assert(pBlockEntry); + const Block* const pBlock = pBlockEntry->GetBlock(); + assert(pBlock); + assert(pBlock->GetTrackNumber() == m_info.number); + if (!pBlock || pBlock->GetTrackNumber() != m_info.number) + return false; + + // This function is used during a seek to determine whether the + // frame is a valid seek target. This default function simply + // returns true, which means all frames are valid seek targets. + // It gets overridden by the VideoTrack class, because only video + // keyframes can be used as seek target. + + return true; +} + +long Track::Seek( + long long time_ns, + const BlockEntry*& pResult) const +{ + const long status = GetFirst(pResult); + + if (status < 0) //buffer underflow, etc + return status; + + assert(pResult); + + if (pResult->EOS()) + return 0; + + const Cluster* pCluster = pResult->GetCluster(); + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + + if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) + return 0; + + Cluster** const clusters = m_pSegment->m_clusters; + assert(clusters); + + const long count = m_pSegment->GetCount(); //loaded only, not preloaded + assert(count > 0); + + Cluster** const i = clusters + pCluster->GetIndex(); + assert(i); + assert(*i == pCluster); + assert(pCluster->GetTime() <= time_ns); + + Cluster** const j = clusters + count; + + Cluster** lo = i; + Cluster** hi = j; + + while (lo < hi) + { + //INVARIANT: + //[i, lo) <= time_ns + //[lo, hi) ? + //[hi, j) > time_ns + + Cluster** const mid = lo + (hi - lo) / 2; + assert(mid < hi); + + pCluster = *mid; + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + lo = mid + 1; + else + hi = mid; + + assert(lo <= hi); + } + + assert(lo == hi); + assert(lo > i); + assert(lo <= j); + + while (lo > i) + { + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + pResult = pCluster->GetEntry(this); + + if ((pResult != 0) && !pResult->EOS()) + return 0; + + //landed on empty cluster (no entries) + } + + pResult = GetEOS(); //weird + return 0; +} + +const ContentEncoding* +Track::GetContentEncodingByIndex(unsigned long idx) const { + const ptrdiff_t count = + content_encoding_entries_end_ - content_encoding_entries_; + assert(count >= 0); + + if (idx >= static_cast<unsigned long>(count)) + return NULL; + + return content_encoding_entries_[idx]; +} + +unsigned long Track::GetContentEncodingCount() const { + const ptrdiff_t count = + content_encoding_entries_end_ - content_encoding_entries_; + assert(count >= 0); + + return static_cast<unsigned long>(count); +} + +long Track::ParseContentEncodingsEntry(long long start, long long size) { + IMkvReader* const pReader = m_pSegment->m_pReader; + assert(pReader); + + long long pos = start; + const long long stop = start + size; + + // Count ContentEncoding elements. + int count = 0; + while (pos < stop) { + long long id, size; + const long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + + //pos now designates start of element + if (id == 0x2240) // ContentEncoding ID + ++count; + + pos += size; //consume payload + assert(pos <= stop); + } + + if (count <= 0) + return -1; + + content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count]; + if (!content_encoding_entries_) + return -1; + + content_encoding_entries_end_ = content_encoding_entries_; + + pos = start; + while (pos < stop) { + long long id, size; + long status = ParseElementHeader(pReader, + pos, + stop, + id, + size); + if (status < 0) //error + return status; + + //pos now designates start of element + if (id == 0x2240) { // ContentEncoding ID + ContentEncoding* const content_encoding = + new (std::nothrow) ContentEncoding(); + if (!content_encoding) + return -1; + + status = content_encoding->ParseContentEncodingEntry(pos, + size, + pReader); + if (status) { + delete content_encoding; + return status; + } + + *content_encoding_entries_end_++ = content_encoding; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + + return 0; +} + +Track::EOSBlock::EOSBlock() : + BlockEntry(NULL, LONG_MIN) +{ +} + +BlockEntry::Kind Track::EOSBlock::GetKind() const +{ + return kBlockEOS; +} + + +const Block* Track::EOSBlock::GetBlock() const +{ + return NULL; +} + + +VideoTrack::VideoTrack( + Segment* pSegment, + long long element_start, + long long element_size) : + Track(pSegment, element_start, element_size) +{ +} + + +long VideoTrack::Parse( + Segment* pSegment, + const Info& info, + long long element_start, + long long element_size, + VideoTrack*& pResult) +{ + if (pResult) + return -1; + + if (info.type != Track::kVideo) + return -1; + + long long width = 0; + long long height = 0; + double rate = 0.0; + + IMkvReader* const pReader = pSegment->m_pReader; + + const Settings& s = info.settings; + assert(s.start >= 0); + assert(s.size >= 0); + + long long pos = s.start; + assert(pos >= 0); + + const long long stop = pos + s.size; + + while (pos < stop) + { + long long id, size; + + const long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) //error + return status; + + if (id == 0x30) //pixel width + { + width = UnserializeUInt(pReader, pos, size); + + if (width <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x3A) //pixel height + { + height = UnserializeUInt(pReader, pos, size); + + if (height <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x0383E3) //frame rate + { + const long status = UnserializeFloat( + pReader, + pos, + size, + rate); + + if (status < 0) + return status; + + if (rate <= 0) + return E_FILE_FORMAT_INVALID; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + + VideoTrack* const pTrack = new (std::nothrow) VideoTrack(pSegment, + element_start, + element_size); + + if (pTrack == NULL) + return -1; //generic error + + const int status = info.Copy(pTrack->m_info); + + if (status) // error + { + delete pTrack; + return status; + } + + pTrack->m_width = width; + pTrack->m_height = height; + pTrack->m_rate = rate; + + pResult = pTrack; + return 0; //success +} + + +bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const +{ + return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey(); +} + +long VideoTrack::Seek( + long long time_ns, + const BlockEntry*& pResult) const +{ + const long status = GetFirst(pResult); + + if (status < 0) //buffer underflow, etc + return status; + + assert(pResult); + + if (pResult->EOS()) + return 0; + + const Cluster* pCluster = pResult->GetCluster(); + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + + if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) + return 0; + + Cluster** const clusters = m_pSegment->m_clusters; + assert(clusters); + + const long count = m_pSegment->GetCount(); //loaded only, not pre-loaded + assert(count > 0); + + Cluster** const i = clusters + pCluster->GetIndex(); + assert(i); + assert(*i == pCluster); + assert(pCluster->GetTime() <= time_ns); + + Cluster** const j = clusters + count; + + Cluster** lo = i; + Cluster** hi = j; + + while (lo < hi) + { + //INVARIANT: + //[i, lo) <= time_ns + //[lo, hi) ? + //[hi, j) > time_ns + + Cluster** const mid = lo + (hi - lo) / 2; + assert(mid < hi); + + pCluster = *mid; + assert(pCluster); + assert(pCluster->GetIndex() >= 0); + assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); + + const long long t = pCluster->GetTime(); + + if (t <= time_ns) + lo = mid + 1; + else + hi = mid; + + assert(lo <= hi); + } + + assert(lo == hi); + assert(lo > i); + assert(lo <= j); + + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + + pResult = pCluster->GetEntry(this, time_ns); + + if ((pResult != 0) && !pResult->EOS()) //found a keyframe + return 0; + + while (lo != i) + { + pCluster = *--lo; + assert(pCluster); + assert(pCluster->GetTime() <= time_ns); + +#if 0 + //TODO: + //We need to handle the case when a cluster + //contains multiple keyframes. Simply returning + //the largest keyframe on the cluster isn't + //good enough. + pResult = pCluster->GetMaxKey(this); +#else + pResult = pCluster->GetEntry(this, time_ns); +#endif + + if ((pResult != 0) && !pResult->EOS()) + return 0; + } + + //weird: we're on the first cluster, but no keyframe found + //should never happen but we must return something anyway + + pResult = GetEOS(); + return 0; +} + + +long long VideoTrack::GetWidth() const +{ + return m_width; +} + + +long long VideoTrack::GetHeight() const +{ + return m_height; +} + + +double VideoTrack::GetFrameRate() const +{ + return m_rate; +} + + +AudioTrack::AudioTrack( + Segment* pSegment, + long long element_start, + long long element_size) : + Track(pSegment, element_start, element_size) +{ +} + + +long AudioTrack::Parse( + Segment* pSegment, + const Info& info, + long long element_start, + long long element_size, + AudioTrack*& pResult) +{ + if (pResult) + return -1; + + if (info.type != Track::kAudio) + return -1; + + IMkvReader* const pReader = pSegment->m_pReader; + + const Settings& s = info.settings; + assert(s.start >= 0); + assert(s.size >= 0); + + long long pos = s.start; + assert(pos >= 0); + + const long long stop = pos + s.size; + + double rate = 8000.0; // MKV default + long long channels = 1; + long long bit_depth = 0; + + while (pos < stop) + { + long long id, size; + + long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) //error + return status; + + if (id == 0x35) //Sample Rate + { + status = UnserializeFloat(pReader, pos, size, rate); + + if (status < 0) + return status; + + if (rate <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x1F) //Channel Count + { + channels = UnserializeUInt(pReader, pos, size); + + if (channels <= 0) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x2264) //Bit Depth + { + bit_depth = UnserializeUInt(pReader, pos, size); + + if (bit_depth <= 0) + return E_FILE_FORMAT_INVALID; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + + AudioTrack* const pTrack = new (std::nothrow) AudioTrack(pSegment, + element_start, + element_size); + + if (pTrack == NULL) + return -1; //generic error + + const int status = info.Copy(pTrack->m_info); + + if (status) + { + delete pTrack; + return status; + } + + pTrack->m_rate = rate; + pTrack->m_channels = channels; + pTrack->m_bitDepth = bit_depth; + + pResult = pTrack; + return 0; //success +} + + +double AudioTrack::GetSamplingRate() const +{ + return m_rate; +} + + +long long AudioTrack::GetChannels() const +{ + return m_channels; +} + +long long AudioTrack::GetBitDepth() const +{ + return m_bitDepth; +} + +Tracks::Tracks( + Segment* pSegment, + long long start, + long long size_, + long long element_start, + long long element_size) : + m_pSegment(pSegment), + m_start(start), + m_size(size_), + m_element_start(element_start), + m_element_size(element_size), + m_trackEntries(NULL), + m_trackEntriesEnd(NULL) +{ +} + + +long Tracks::Parse() +{ + assert(m_trackEntries == NULL); + assert(m_trackEntriesEnd == NULL); + + const long long stop = m_start + m_size; + IMkvReader* const pReader = m_pSegment->m_pReader; + + int count = 0; + long long pos = m_start; + + while (pos < stop) + { + long long id, size; + + const long status = ParseElementHeader( + pReader, + pos, + stop, + id, + size); + + if (status < 0) //error + return status; + + if (size == 0) //weird + continue; + + if (id == 0x2E) //TrackEntry ID + ++count; + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + + if (count <= 0) + return 0; //success + + m_trackEntries = new (std::nothrow) Track*[count]; + + if (m_trackEntries == NULL) + return -1; + + m_trackEntriesEnd = m_trackEntries; + + pos = m_start; + + while (pos < stop) + { + const long long element_start = pos; + + long long id, payload_size; + + const long status = ParseElementHeader( + pReader, + pos, + stop, + id, + payload_size); + + if (status < 0) //error + return status; + + if (payload_size == 0) //weird + continue; + + const long long payload_stop = pos + payload_size; + assert(payload_stop <= stop); //checked in ParseElement + + const long long element_size = payload_stop - element_start; + + if (id == 0x2E) //TrackEntry ID + { + Track*& pTrack = *m_trackEntriesEnd; + pTrack = NULL; + + const long status = ParseTrackEntry( + pos, + payload_size, + element_start, + element_size, + pTrack); + + if (status) + return status; + + if (pTrack) + ++m_trackEntriesEnd; + } + + pos = payload_stop; + assert(pos <= stop); + } + + assert(pos == stop); + + return 0; //success +} + + +unsigned long Tracks::GetTracksCount() const +{ + const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries; + assert(result >= 0); + + return static_cast<unsigned long>(result); +} + +long Tracks::ParseTrackEntry( + long long track_start, + long long track_size, + long long element_start, + long long element_size, + Track*& pResult) const +{ + if (pResult) + return -1; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = track_start; + const long long track_stop = track_start + track_size; + + Track::Info info; + + info.type = 0; + info.number = 0; + info.uid = 0; + info.defaultDuration = 0; + + Track::Settings v; + v.start = -1; + v.size = -1; + + Track::Settings a; + a.start = -1; + a.size = -1; + + Track::Settings e; //content_encodings_settings; + e.start = -1; + e.size = -1; + + long long lacing = 1; //default is true + + while (pos < track_stop) + { + long long id, size; + + const long status = ParseElementHeader( + pReader, + pos, + track_stop, + id, + size); + + if (status < 0) //error + return status; + + if (size < 0) + return E_FILE_FORMAT_INVALID; + + const long long start = pos; + + if (id == 0x60) // VideoSettings ID + { + v.start = start; + v.size = size; + } + else if (id == 0x61) // AudioSettings ID + { + a.start = start; + a.size = size; + } + else if (id == 0x2D80) // ContentEncodings ID + { + e.start = start; + e.size = size; + } + else if (id == 0x33C5) //Track UID + { + if (size > 8) + return E_FILE_FORMAT_INVALID; + + info.uid = 0; + + long long pos_ = start; + const long long pos_end = start + size; + + while (pos_ != pos_end) + { + unsigned char b; + + const int status = pReader->Read(pos_, 1, &b); + + if (status) + return status; + + info.uid <<= 8; + info.uid |= b; + + ++pos_; + } + } + else if (id == 0x57) //Track Number + { + const long long num = UnserializeUInt(pReader, pos, size); + + if ((num <= 0) || (num > 127)) + return E_FILE_FORMAT_INVALID; + + info.number = static_cast<long>(num); + } + else if (id == 0x03) //Track Type + { + const long long type = UnserializeUInt(pReader, pos, size); + + if ((type <= 0) || (type > 254)) + return E_FILE_FORMAT_INVALID; + + info.type = static_cast<long>(type); + } + else if (id == 0x136E) //Track Name + { + const long status = UnserializeString( + pReader, + pos, + size, + info.nameAsUTF8); + + if (status) + return status; + } + else if (id == 0x02B59C) //Track Language + { + const long status = UnserializeString( + pReader, + pos, + size, + info.language); + + if (status) + return status; + } + else if (id == 0x03E383) //Default Duration + { + const long long duration = UnserializeUInt(pReader, pos, size); + + if (duration < 0) + return E_FILE_FORMAT_INVALID; + + info.defaultDuration = static_cast<unsigned long long>(duration); + } + else if (id == 0x06) //CodecID + { + const long status = UnserializeString( + pReader, + pos, + size, + info.codecId); + + if (status) + return status; + } + else if (id == 0x1C) //lacing + { + lacing = UnserializeUInt(pReader, pos, size); + + if ((lacing < 0) || (lacing > 1)) + return E_FILE_FORMAT_INVALID; + } + else if (id == 0x23A2) //Codec Private + { + delete[] info.codecPrivate; + info.codecPrivate = NULL; + info.codecPrivateSize = 0; + + const size_t buflen = static_cast<size_t>(size); + + if (buflen) + { + typedef unsigned char* buf_t; + + const buf_t buf = new (std::nothrow) unsigned char[buflen]; + + if (buf == NULL) + return -1; + + const int status = pReader->Read(pos, buflen, buf); + + if (status) + { + delete[] buf; + return status; + } + + info.codecPrivate = buf; + info.codecPrivateSize = buflen; + } + } + else if (id == 0x058688) //Codec Name + { + const long status = UnserializeString( + pReader, + pos, + size, + info.codecNameAsUTF8); + + if (status) + return status; + } + else if (id == 0x16AA) //Codec Delay + { + info.codecDelay = UnserializeUInt(pReader, pos, size); + + } + else if (id == 0x16BB) //Seek Pre Roll + { + info.seekPreRoll = UnserializeUInt(pReader, pos, size); + } + + pos += size; //consume payload + assert(pos <= track_stop); + } + + assert(pos == track_stop); + + if (info.number <= 0) //not specified + return E_FILE_FORMAT_INVALID; + + if (GetTrackByNumber(info.number)) + return E_FILE_FORMAT_INVALID; + + if (info.type <= 0) //not specified + return E_FILE_FORMAT_INVALID; + + info.lacing = (lacing > 0) ? true : false; + + if (info.type == Track::kVideo) + { + if (v.start < 0) + return E_FILE_FORMAT_INVALID; + + if (a.start >= 0) + return E_FILE_FORMAT_INVALID; + + info.settings = v; + + VideoTrack* pTrack = NULL; + + const long status = VideoTrack::Parse(m_pSegment, + info, + element_start, + element_size, + pTrack); + + if (status) + return status; + + pResult = pTrack; + assert(pResult); + + if (e.start >= 0) + pResult->ParseContentEncodingsEntry(e.start, e.size); + } + else if (info.type == Track::kAudio) + { + if (a.start < 0) + return E_FILE_FORMAT_INVALID; + + if (v.start >= 0) + return E_FILE_FORMAT_INVALID; + + info.settings = a; + + AudioTrack* pTrack = NULL; + + const long status = AudioTrack::Parse(m_pSegment, + info, + element_start, + element_size, + pTrack); + + if (status) + return status; + + pResult = pTrack; + assert(pResult); + + if (e.start >= 0) + pResult->ParseContentEncodingsEntry(e.start, e.size); + } + else + { + // neither video nor audio - probably metadata or subtitles + + if (a.start >= 0) + return E_FILE_FORMAT_INVALID; + + if (v.start >= 0) + return E_FILE_FORMAT_INVALID; + + if (e.start >= 0) + return E_FILE_FORMAT_INVALID; + + info.settings.start = -1; + info.settings.size = 0; + + Track* pTrack = NULL; + + const long status = Track::Create(m_pSegment, + info, + element_start, + element_size, + pTrack); + + if (status) + return status; + + pResult = pTrack; + assert(pResult); + } + + return 0; //success +} + + +Tracks::~Tracks() +{ + Track** i = m_trackEntries; + Track** const j = m_trackEntriesEnd; + + while (i != j) + { + Track* const pTrack = *i++; + delete pTrack; + } + + delete[] m_trackEntries; +} + +const Track* Tracks::GetTrackByNumber(long tn) const +{ + if (tn < 0) + return NULL; + + Track** i = m_trackEntries; + Track** const j = m_trackEntriesEnd; + + while (i != j) + { + Track* const pTrack = *i++; + + if (pTrack == NULL) + continue; + + if (tn == pTrack->GetNumber()) + return pTrack; + } + + return NULL; //not found +} + + +const Track* Tracks::GetTrackByIndex(unsigned long idx) const +{ + const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries; + + if (idx >= static_cast<unsigned long>(count)) + return NULL; + + return m_trackEntries[idx]; +} + +#if 0 +long long Cluster::Unparsed() const +{ + if (m_timecode < 0) //not even partially loaded + return LLONG_MAX; + + assert(m_pos >= m_element_start); + //assert(m_element_size > m_size); + + const long long element_stop = m_element_start + m_element_size; + assert(m_pos <= element_stop); + + const long long result = element_stop - m_pos; + assert(result >= 0); + + return result; +} +#endif + + +long Cluster::Load(long long& pos, long& len) const +{ + assert(m_pSegment); + assert(m_pos >= m_element_start); + + if (m_timecode >= 0) //at least partially loaded + return 0; + + assert(m_pos == m_element_start); + assert(m_element_size < 0); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + const int status = pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + assert((total < 0) || (m_pos <= total)); //TODO: verify this + + pos = m_pos; + + long long cluster_size = -1; + + { + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error or underflow + return static_cast<long>(result); + + if (result > 0) //underflow (weird) + return E_BUFFER_NOT_FULL; + + //if ((pos + len) > segment_stop) + // return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id_ = ReadUInt(pReader, pos, len); + + if (id_ < 0) //error + return static_cast<long>(id_); + + if (id_ != 0x0F43B675) //Cluster ID + return E_FILE_FORMAT_INVALID; + + pos += len; //consume id + + //read cluster size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + //if ((pos + len) > segment_stop) + // return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(cluster_size); + + if (size == 0) + return E_FILE_FORMAT_INVALID; //TODO: verify this + + pos += len; //consume length of size of element + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size != unknown_size) + cluster_size = size; + } + + //pos points to start of payload + +#if 0 + len = static_cast<long>(size_); + + if (cluster_stop > avail) + return E_BUFFER_NOT_FULL; +#endif + + long long timecode = -1; + long long new_pos = -1; + bool bBlock = false; + + long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size; + + for (;;) + { + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + break; + + //Parse ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(pReader, pos, len); + + if (id < 0) //error + return static_cast<long>(id); + + if (id == 0) + return E_FILE_FORMAT_INVALID; + + //This is the distinguished set of ID's we use to determine + //that we have exhausted the sub-element's inside the cluster + //whose ID we parsed earlier. + + if (id == 0x0F43B675) //Cluster ID + break; + + if (id == 0x0C53BB6B) //Cues ID + break; + + pos += len; //consume ID field + + //Parse Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume size field + + if ((cluster_stop >= 0) && (pos > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + //pos now points to start of payload + + if (size == 0) //weird + continue; + + if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if (id == 0x67) //TimeCode ID + { + len = static_cast<long>(size); + + if ((pos + size) > avail) + return E_BUFFER_NOT_FULL; + + timecode = UnserializeUInt(pReader, pos, size); + + if (timecode < 0) //error (or underflow) + return static_cast<long>(timecode); + + new_pos = pos + size; + + if (bBlock) + break; + } + else if (id == 0x20) //BlockGroup ID + { + bBlock = true; + break; + } + else if (id == 0x23) //SimpleBlock ID + { + bBlock = true; + break; + } + + pos += size; //consume payload + assert((cluster_stop < 0) || (pos <= cluster_stop)); + } + + assert((cluster_stop < 0) || (pos <= cluster_stop)); + + if (timecode < 0) //no timecode found + return E_FILE_FORMAT_INVALID; + + if (!bBlock) + return E_FILE_FORMAT_INVALID; + + m_pos = new_pos; //designates position just beyond timecode payload + m_timecode = timecode; // m_timecode >= 0 means we're partially loaded + + if (cluster_size >= 0) + m_element_size = cluster_stop - m_element_start; + + return 0; +} + + +long Cluster::Parse(long long& pos, long& len) const +{ + long status = Load(pos, len); + + if (status < 0) + return status; + + assert(m_pos >= m_element_start); + assert(m_timecode >= 0); + //assert(m_size > 0); + //assert(m_element_size > m_size); + + const long long cluster_stop = + (m_element_size < 0) ? -1 : m_element_start + m_element_size; + + if ((cluster_stop >= 0) && (m_pos >= cluster_stop)) + return 1; //nothing else to do + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + status = pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + pos = m_pos; + + for (;;) + { + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + break; + + if ((total >= 0) && (pos >= total)) + { + if (m_element_size < 0) + m_element_size = pos - m_element_start; + + break; + } + + //Parse ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(pReader, pos, len); + + if (id < 0) //error + return static_cast<long>(id); + + if (id == 0) //weird + return E_FILE_FORMAT_INVALID; + + //This is the distinguished set of ID's we use to determine + //that we have exhausted the sub-element's inside the cluster + //whose ID we parsed earlier. + + if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster or Cues ID + { + if (m_element_size < 0) + m_element_size = pos - m_element_start; + + break; + } + + pos += len; //consume ID field + + //Parse Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume size field + + if ((cluster_stop >= 0) && (pos > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + //pos now points to start of payload + + if (size == 0) //weird + continue; + + //const long long block_start = pos; + const long long block_stop = pos + size; + + if (cluster_stop >= 0) + { + if (block_stop > cluster_stop) + { + if ((id == 0x20) || (id == 0x23)) + return E_FILE_FORMAT_INVALID; + + pos = cluster_stop; + break; + } + } + else if ((total >= 0) && (block_stop > total)) + { + m_element_size = total - m_element_start; + pos = total; + break; + } + else if (block_stop > avail) + { + len = static_cast<long>(size); + return E_BUFFER_NOT_FULL; + } + + Cluster* const this_ = const_cast<Cluster*>(this); + + if (id == 0x20) //BlockGroup + return this_->ParseBlockGroup(size, pos, len); + + if (id == 0x23) //SimpleBlock + return this_->ParseSimpleBlock(size, pos, len); + + pos += size; //consume payload + assert((cluster_stop < 0) || (pos <= cluster_stop)); + } + + assert(m_element_size > 0); + + m_pos = pos; + assert((cluster_stop < 0) || (m_pos <= cluster_stop)); + + if (m_entries_count > 0) + { + const long idx = m_entries_count - 1; + + const BlockEntry* const pLast = m_entries[idx]; + assert(pLast); + + const Block* const pBlock = pLast->GetBlock(); + assert(pBlock); + + const long long start = pBlock->m_start; + + if ((total >= 0) && (start > total)) + return -1; //defend against trucated stream + + const long long size = pBlock->m_size; + + const long long stop = start + size; + assert((cluster_stop < 0) || (stop <= cluster_stop)); + + if ((total >= 0) && (stop > total)) + return -1; //defend against trucated stream + } + + return 1; //no more entries +} + + +long Cluster::ParseSimpleBlock( + long long block_size, + long long& pos, + long& len) +{ + const long long block_start = pos; + const long long block_stop = pos + block_size; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + long status = pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + //parse track number + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long track = ReadUInt(pReader, pos, len); + + if (track < 0) //error + return static_cast<long>(track); + + if (track == 0) + return E_FILE_FORMAT_INVALID; + +#if 0 + //TODO(matthewjheaney) + //This turned out to be too conservative. The problem is that + //if we see a track header in the tracks element with an unsupported + //track type, we throw that track header away, so it is not present + //in the track map. But even though we don't understand the track + //header, there are still blocks in the cluster with that track + //number. It was our decision to ignore that track header, so it's + //up to us to deal with blocks associated with that track -- we + //cannot simply report an error since technically there's nothing + //wrong with the file. + // + //For now we go ahead and finish the parse, creating a block entry + //for this block. This is somewhat wasteful, because without a + //track header there's nothing you can do with the block. What + //we really need here is a special return value that indicates to + //the caller that he should ignore this particular block, and + //continue parsing. + + const Tracks* const pTracks = m_pSegment->GetTracks(); + assert(pTracks); + + const long tn = static_cast<long>(track); + + const Track* const pTrack = pTracks->GetTrackByNumber(tn); + + if (pTrack == NULL) + return E_FILE_FORMAT_INVALID; +#endif + + pos += len; //consume track number + + if ((pos + 2) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 2) > avail) + { + len = 2; + return E_BUFFER_NOT_FULL; + } + + pos += 2; //consume timecode + + if ((pos + 1) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + unsigned char flags; + + status = pReader->Read(pos, 1, &flags); + + if (status < 0) //error or underflow + { + len = 1; + return status; + } + + ++pos; //consume flags byte + assert(pos <= avail); + + if (pos >= block_stop) + return E_FILE_FORMAT_INVALID; + + const int lacing = int(flags & 0x06) >> 1; + + if ((lacing != 0) && (block_stop > avail)) + { + len = static_cast<long>(block_stop - pos); + return E_BUFFER_NOT_FULL; + } + + status = CreateBlock(0x23, //simple block id + block_start, block_size, + 0); //DiscardPadding + + if (status != 0) + return status; + + m_pos = block_stop; + + return 0; //success +} + + +long Cluster::ParseBlockGroup( + long long payload_size, + long long& pos, + long& len) +{ + const long long payload_start = pos; + const long long payload_stop = pos + payload_size; + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long total, avail; + + long status = pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + if ((total >= 0) && (payload_stop > total)) + return E_FILE_FORMAT_INVALID; + + if (payload_stop > avail) + { + len = static_cast<long>(payload_size); + return E_BUFFER_NOT_FULL; + } + + long long discard_padding = 0; + + while (pos < payload_stop) + { + //parse sub-block element ID + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > payload_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(pReader, pos, len); + + if (id < 0) //error + return static_cast<long>(id); + + if (id == 0) //not a value ID + return E_FILE_FORMAT_INVALID; + + pos += len; //consume ID field + + //Parse Size + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > payload_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume size field + + //pos now points to start of sub-block group payload + + if (pos > payload_stop) + return E_FILE_FORMAT_INVALID; + + if (size == 0) //weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; + + if (id == 0x35A2) //DiscardPadding + { + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + status = UnserializeInt(pReader, pos, len, discard_padding); + + if (status < 0) //error + return status; + } + + if (id != 0x21) //sub-part of BlockGroup is not a Block + { + pos += size; //consume sub-part of block group + + if (pos > payload_stop) + return E_FILE_FORMAT_INVALID; + + continue; + } + + const long long block_stop = pos + size; + + if (block_stop > payload_stop) + return E_FILE_FORMAT_INVALID; + + //parse track number + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((pos + len) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long track = ReadUInt(pReader, pos, len); + + if (track < 0) //error + return static_cast<long>(track); + + if (track == 0) + return E_FILE_FORMAT_INVALID; + +#if 0 + //TODO(matthewjheaney) + //This turned out to be too conservative. The problem is that + //if we see a track header in the tracks element with an unsupported + //track type, we throw that track header away, so it is not present + //in the track map. But even though we don't understand the track + //header, there are still blocks in the cluster with that track + //number. It was our decision to ignore that track header, so it's + //up to us to deal with blocks associated with that track -- we + //cannot simply report an error since technically there's nothing + //wrong with the file. + // + //For now we go ahead and finish the parse, creating a block entry + //for this block. This is somewhat wasteful, because without a + //track header there's nothing you can do with the block. What + //we really need here is a special return value that indicates to + //the caller that he should ignore this particular block, and + //continue parsing. + + const Tracks* const pTracks = m_pSegment->GetTracks(); + assert(pTracks); + + const long tn = static_cast<long>(track); + + const Track* const pTrack = pTracks->GetTrackByNumber(tn); + + if (pTrack == NULL) + return E_FILE_FORMAT_INVALID; +#endif + + pos += len; //consume track number + + if ((pos + 2) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 2) > avail) + { + len = 2; + return E_BUFFER_NOT_FULL; + } + + pos += 2; //consume timecode + + if ((pos + 1) > block_stop) + return E_FILE_FORMAT_INVALID; + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + unsigned char flags; + + status = pReader->Read(pos, 1, &flags); + + if (status < 0) //error or underflow + { + len = 1; + return status; + } + + ++pos; //consume flags byte + assert(pos <= avail); + + if (pos >= block_stop) + return E_FILE_FORMAT_INVALID; + + const int lacing = int(flags & 0x06) >> 1; + + if ((lacing != 0) && (block_stop > avail)) + { + len = static_cast<long>(block_stop - pos); + return E_BUFFER_NOT_FULL; + } + + pos = block_stop; //consume block-part of block group + assert(pos <= payload_stop); + } + + assert(pos == payload_stop); + + status = CreateBlock(0x20, //BlockGroup ID + payload_start, payload_size, + discard_padding); + if (status != 0) + return status; + + m_pos = payload_stop; + + return 0; //success +} + + +long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const +{ + assert(m_pos >= m_element_start); + + pEntry = NULL; + + if (index < 0) + return -1; //generic error + + if (m_entries_count < 0) + return E_BUFFER_NOT_FULL; + + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count <= m_entries_size); + + if (index < m_entries_count) + { + pEntry = m_entries[index]; + assert(pEntry); + + return 1; //found entry + } + + if (m_element_size < 0) //we don't know cluster end yet + return E_BUFFER_NOT_FULL; //underflow + + const long long element_stop = m_element_start + m_element_size; + + if (m_pos >= element_stop) + return 0; //nothing left to parse + + return E_BUFFER_NOT_FULL; //underflow, since more remains to be parsed +} + + +Cluster* Cluster::Create( + Segment* pSegment, + long idx, + long long off) + //long long element_size) +{ + assert(pSegment); + assert(off >= 0); + + const long long element_start = pSegment->m_start + off; + + Cluster* const pCluster = new Cluster(pSegment, + idx, + element_start); + //element_size); + assert(pCluster); + + return pCluster; +} + + +Cluster::Cluster() : + m_pSegment(NULL), + m_element_start(0), + m_index(0), + m_pos(0), + m_element_size(0), + m_timecode(0), + m_entries(NULL), + m_entries_size(0), + m_entries_count(0) //means "no entries" +{ +} + + +Cluster::Cluster( + Segment* pSegment, + long idx, + long long element_start + /* long long element_size */ ) : + m_pSegment(pSegment), + m_element_start(element_start), + m_index(idx), + m_pos(element_start), + m_element_size(-1 /* element_size */ ), + m_timecode(-1), + m_entries(NULL), + m_entries_size(0), + m_entries_count(-1) //means "has not been parsed yet" +{ +} + + +Cluster::~Cluster() +{ + if (m_entries_count <= 0) + return; + + BlockEntry** i = m_entries; + BlockEntry** const j = m_entries + m_entries_count; + + while (i != j) + { + BlockEntry* p = *i++; + assert(p); + + delete p; + } + + delete[] m_entries; +} + + +bool Cluster::EOS() const +{ + return (m_pSegment == NULL); +} + + +long Cluster::GetIndex() const +{ + return m_index; +} + + +long long Cluster::GetPosition() const +{ + const long long pos = m_element_start - m_pSegment->m_start; + assert(pos >= 0); + + return pos; +} + + +long long Cluster::GetElementSize() const +{ + return m_element_size; +} + + +#if 0 +bool Cluster::HasBlockEntries( + const Segment* pSegment, + long long off) //relative to start of segment payload +{ + assert(pSegment); + assert(off >= 0); //relative to segment + + IMkvReader* const pReader = pSegment->m_pReader; + + long long pos = pSegment->m_start + off; //absolute + long long size; + + { + long len; + + const long long id = ReadUInt(pReader, pos, len); + (void)id; + assert(id >= 0); + assert(id == 0x0F43B675); //Cluster ID + + pos += len; //consume id + + size = ReadUInt(pReader, pos, len); + assert(size > 0); + + pos += len; //consume size + + //pos now points to start of payload + } + + const long long stop = pos + size; + + while (pos < stop) + { + long len; + + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume id + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume size + + if (id == 0x20) //BlockGroup ID + return true; + + if (id == 0x23) //SimpleBlock ID + return true; + + pos += size; //consume payload + assert(pos <= stop); + } + + return false; +} +#endif + + +long Cluster::HasBlockEntries( + const Segment* pSegment, + long long off, //relative to start of segment payload + long long& pos, + long& len) +{ + assert(pSegment); + assert(off >= 0); //relative to segment + + IMkvReader* const pReader = pSegment->m_pReader; + + long long total, avail; + + long status = pReader->Length(&total, &avail); + + if (status < 0) //error + return status; + + assert((total < 0) || (avail <= total)); + + pos = pSegment->m_start + off; //absolute + + if ((total >= 0) && (pos >= total)) + return 0; //we don't even have a complete cluster + + const long long segment_stop = + (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size; + + long long cluster_stop = -1; //interpreted later to mean "unknown size" + + { + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //need more data + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((pos + len) > total)) + return 0; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(pReader, pos, len); + + if (id < 0) //error + return static_cast<long>(id); + + if (id != 0x0F43B675) //weird: not cluster ID + return -1; //generic error + + pos += len; //consume Cluster ID field + + //read size field + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //weird + return E_BUFFER_NOT_FULL; + + if ((segment_stop >= 0) && ((pos + len) > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && ((pos + len) > total)) + return 0; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + if (size == 0) + return 0; //cluster does not have entries + + pos += len; //consume size field + + //pos now points to start of payload + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size != unknown_size) + { + cluster_stop = pos + size; + assert(cluster_stop >= 0); + + if ((segment_stop >= 0) && (cluster_stop > segment_stop)) + return E_FILE_FORMAT_INVALID; + + if ((total >= 0) && (cluster_stop > total)) + //return E_FILE_FORMAT_INVALID; //too conservative + return 0; //cluster does not have any entries + } + } + + for (;;) + { + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + return 0; //no entries detected + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + long long result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //need more data + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long id = ReadUInt(pReader, pos, len); + + if (id < 0) //error + return static_cast<long>(id); + + //This is the distinguished set of ID's we use to determine + //that we have exhausted the sub-element's inside the cluster + //whose ID we parsed earlier. + + if (id == 0x0F43B675) //Cluster ID + return 0; //no entries found + + if (id == 0x0C53BB6B) //Cues ID + return 0; //no entries found + + pos += len; //consume id field + + if ((cluster_stop >= 0) && (pos >= cluster_stop)) + return E_FILE_FORMAT_INVALID; + + //read size field + + if ((pos + 1) > avail) + { + len = 1; + return E_BUFFER_NOT_FULL; + } + + result = GetUIntLength(pReader, pos, len); + + if (result < 0) //error + return static_cast<long>(result); + + if (result > 0) //underflow + return E_BUFFER_NOT_FULL; + + if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > avail) + return E_BUFFER_NOT_FULL; + + const long long size = ReadUInt(pReader, pos, len); + + if (size < 0) //error + return static_cast<long>(size); + + pos += len; //consume size field + + //pos now points to start of payload + + if ((cluster_stop >= 0) && (pos > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if (size == 0) //weird + continue; + + const long long unknown_size = (1LL << (7 * len)) - 1; + + if (size == unknown_size) + return E_FILE_FORMAT_INVALID; //not supported inside cluster + + if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) + return E_FILE_FORMAT_INVALID; + + if (id == 0x20) //BlockGroup ID + return 1; //have at least one entry + + if (id == 0x23) //SimpleBlock ID + return 1; //have at least one entry + + pos += size; //consume payload + assert((cluster_stop < 0) || (pos <= cluster_stop)); + } +} + + +long long Cluster::GetTimeCode() const +{ + long long pos; + long len; + + const long status = Load(pos, len); + + if (status < 0) //error + return status; + + return m_timecode; +} + + +long long Cluster::GetTime() const +{ + const long long tc = GetTimeCode(); + + if (tc < 0) + return tc; + + const SegmentInfo* const pInfo = m_pSegment->GetInfo(); + assert(pInfo); + + const long long scale = pInfo->GetTimeCodeScale(); + assert(scale >= 1); + + const long long t = m_timecode * scale; + + return t; +} + + +long long Cluster::GetFirstTime() const +{ + const BlockEntry* pEntry; + + const long status = GetFirst(pEntry); + + if (status < 0) //error + return status; + + if (pEntry == NULL) //empty cluster + return GetTime(); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + return pBlock->GetTime(this); +} + + +long long Cluster::GetLastTime() const +{ + const BlockEntry* pEntry; + + const long status = GetLast(pEntry); + + if (status < 0) //error + return status; + + if (pEntry == NULL) //empty cluster + return GetTime(); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + return pBlock->GetTime(this); +} + + +long Cluster::CreateBlock( + long long id, + long long pos, //absolute pos of payload + long long size, + long long discard_padding) +{ + assert((id == 0x20) || (id == 0x23)); //BlockGroup or SimpleBlock + + if (m_entries_count < 0) //haven't parsed anything yet + { + assert(m_entries == NULL); + assert(m_entries_size == 0); + + m_entries_size = 1024; + m_entries = new BlockEntry*[m_entries_size]; + + m_entries_count = 0; + } + else + { + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count <= m_entries_size); + + if (m_entries_count >= m_entries_size) + { + const long entries_size = 2 * m_entries_size; + + BlockEntry** const entries = new BlockEntry*[entries_size]; + assert(entries); + + BlockEntry** src = m_entries; + BlockEntry** const src_end = src + m_entries_count; + + BlockEntry** dst = entries; + + while (src != src_end) + *dst++ = *src++; + + delete[] m_entries; + + m_entries = entries; + m_entries_size = entries_size; + } + } + + if (id == 0x20) //BlockGroup ID + return CreateBlockGroup(pos, size, discard_padding); + else //SimpleBlock ID + return CreateSimpleBlock(pos, size); +} + + +long Cluster::CreateBlockGroup( + long long start_offset, + long long size, + long long discard_padding) +{ + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count >= 0); + assert(m_entries_count < m_entries_size); + + IMkvReader* const pReader = m_pSegment->m_pReader; + + long long pos = start_offset; + const long long stop = start_offset + size; + + //For WebM files, there is a bias towards previous reference times + //(in order to support alt-ref frames, which refer back to the previous + //keyframe). Normally a 0 value is not possible, but here we tenatively + //allow 0 as the value of a reference frame, with the interpretation + //that this is a "previous" reference time. + + long long prev = 1; //nonce + long long next = 0; //nonce + long long duration = -1; //really, this is unsigned + + long long bpos = -1; + long long bsize = -1; + + while (pos < stop) + { + long len; + const long long id = ReadUInt(pReader, pos, len); + assert(id >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume ID + + const long long size = ReadUInt(pReader, pos, len); + assert(size >= 0); //TODO + assert((pos + len) <= stop); + + pos += len; //consume size + + if (id == 0x21) //Block ID + { + if (bpos < 0) //Block ID + { + bpos = pos; + bsize = size; + } + } + else if (id == 0x1B) //Duration ID + { + assert(size <= 8); + + duration = UnserializeUInt(pReader, pos, size); + assert(duration >= 0); //TODO + } + else if (id == 0x7B) //ReferenceBlock + { + assert(size <= 8); + const long size_ = static_cast<long>(size); + + long long time; + + long status = UnserializeInt(pReader, pos, size_, time); + assert(status == 0); + if (status != 0) + return -1; + + if (time <= 0) //see note above + prev = time; + else //weird + next = time; + } + + pos += size; //consume payload + assert(pos <= stop); + } + + assert(pos == stop); + assert(bpos >= 0); + assert(bsize >= 0); + + const long idx = m_entries_count; + + BlockEntry** const ppEntry = m_entries + idx; + BlockEntry*& pEntry = *ppEntry; + + pEntry = new (std::nothrow) BlockGroup( + this, + idx, + bpos, + bsize, + prev, + next, + duration, + discard_padding); + + if (pEntry == NULL) + return -1; //generic error + + BlockGroup* const p = static_cast<BlockGroup*>(pEntry); + + const long status = p->Parse(); + + if (status == 0) //success + { + ++m_entries_count; + return 0; + } + + delete pEntry; + pEntry = 0; + + return status; +} + + + +long Cluster::CreateSimpleBlock( + long long st, + long long sz) +{ + assert(m_entries); + assert(m_entries_size > 0); + assert(m_entries_count >= 0); + assert(m_entries_count < m_entries_size); + + const long idx = m_entries_count; + + BlockEntry** const ppEntry = m_entries + idx; + BlockEntry*& pEntry = *ppEntry; + + pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz); + + if (pEntry == NULL) + return -1; //generic error + + SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry); + + const long status = p->Parse(); + + if (status == 0) + { + ++m_entries_count; + return 0; + } + + delete pEntry; + pEntry = 0; + + return status; +} + + +long Cluster::GetFirst(const BlockEntry*& pFirst) const +{ + if (m_entries_count <= 0) + { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) //error + { + pFirst = NULL; + return status; + } + + if (m_entries_count <= 0) //empty cluster + { + pFirst = NULL; + return 0; + } + } + + assert(m_entries); + + pFirst = m_entries[0]; + assert(pFirst); + + return 0; //success +} + +long Cluster::GetLast(const BlockEntry*& pLast) const +{ + for (;;) + { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) //error + { + pLast = NULL; + return status; + } + + if (status > 0) //no new block + break; + } + + if (m_entries_count <= 0) + { + pLast = NULL; + return 0; + } + + assert(m_entries); + + const long idx = m_entries_count - 1; + + pLast = m_entries[idx]; + assert(pLast); + + return 0; +} + + +long Cluster::GetNext( + const BlockEntry* pCurr, + const BlockEntry*& pNext) const +{ + assert(pCurr); + assert(m_entries); + assert(m_entries_count > 0); + + size_t idx = pCurr->GetIndex(); + assert(idx < size_t(m_entries_count)); + assert(m_entries[idx] == pCurr); + + ++idx; + + if (idx >= size_t(m_entries_count)) + { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) //error + { + pNext = NULL; + return status; + } + + if (status > 0) + { + pNext = NULL; + return 0; + } + + assert(m_entries); + assert(m_entries_count > 0); + assert(idx < size_t(m_entries_count)); + } + + pNext = m_entries[idx]; + assert(pNext); + + return 0; +} + + +long Cluster::GetEntryCount() const +{ + return m_entries_count; +} + + +const BlockEntry* Cluster::GetEntry( + const Track* pTrack, + long long time_ns) const +{ + assert(pTrack); + + if (m_pSegment == NULL) //this is the special EOS cluster + return pTrack->GetEOS(); + +#if 0 + + LoadBlockEntries(); + + if ((m_entries == NULL) || (m_entries_count <= 0)) + return NULL; //return EOS here? + + const BlockEntry* pResult = pTrack->GetEOS(); + + BlockEntry** i = m_entries; + assert(i); + + BlockEntry** const j = i + m_entries_count; + + while (i != j) + { + const BlockEntry* const pEntry = *i++; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != pTrack->GetNumber()) + continue; + + if (pTrack->VetEntry(pEntry)) + { + if (time_ns < 0) //just want first candidate block + return pEntry; + + const long long ns = pBlock->GetTime(this); + + if (ns > time_ns) + break; + + pResult = pEntry; + } + else if (time_ns >= 0) + { + const long long ns = pBlock->GetTime(this); + + if (ns > time_ns) + break; + } + } + + return pResult; + +#else + + const BlockEntry* pResult = pTrack->GetEOS(); + + long index = 0; + + for (;;) + { + if (index >= m_entries_count) + { + long long pos; + long len; + + const long status = Parse(pos, len); + assert(status >= 0); + + if (status > 0) //completely parsed, and no more entries + return pResult; + + if (status < 0) //should never happen + return 0; + + assert(m_entries); + assert(index < m_entries_count); + } + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != pTrack->GetNumber()) + { + ++index; + continue; + } + + if (pTrack->VetEntry(pEntry)) + { + if (time_ns < 0) //just want first candidate block + return pEntry; + + const long long ns = pBlock->GetTime(this); + + if (ns > time_ns) + return pResult; + + pResult = pEntry; //have a candidate + } + else if (time_ns >= 0) + { + const long long ns = pBlock->GetTime(this); + + if (ns > time_ns) + return pResult; + } + + ++index; + } + +#endif +} + + +const BlockEntry* +Cluster::GetEntry( + const CuePoint& cp, + const CuePoint::TrackPosition& tp) const +{ + assert(m_pSegment); + +#if 0 + + LoadBlockEntries(); + + if (m_entries == NULL) + return NULL; + + const long long count = m_entries_count; + + if (count <= 0) + return NULL; + + const long long tc = cp.GetTimeCode(); + + if ((tp.m_block > 0) && (tp.m_block <= count)) + { + const size_t block = static_cast<size_t>(tp.m_block); + const size_t index = block - 1; + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if ((pBlock->GetTrackNumber() == tp.m_track) && + (pBlock->GetTimeCode(this) == tc)) + { + return pEntry; + } + } + + const BlockEntry* const* i = m_entries; + const BlockEntry* const* const j = i + count; + + while (i != j) + { +#ifdef _DEBUG + const ptrdiff_t idx = i - m_entries; + idx; +#endif + + const BlockEntry* const pEntry = *i++; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != tp.m_track) + continue; + + const long long tc_ = pBlock->GetTimeCode(this); + assert(tc_ >= 0); + + if (tc_ < tc) + continue; + + if (tc_ > tc) + return NULL; + + const Tracks* const pTracks = m_pSegment->GetTracks(); + assert(pTracks); + + const long tn = static_cast<long>(tp.m_track); + const Track* const pTrack = pTracks->GetTrackByNumber(tn); + + if (pTrack == NULL) + return NULL; + + const long long type = pTrack->GetType(); + + if (type == 2) //audio + return pEntry; + + if (type != 1) //not video + return NULL; + + if (!pBlock->IsKey()) + return NULL; + + return pEntry; + } + + return NULL; + +#else + + const long long tc = cp.GetTimeCode(); + + if (tp.m_block > 0) + { + const long block = static_cast<long>(tp.m_block); + const long index = block - 1; + + while (index >= m_entries_count) + { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) //TODO: can this happen? + return NULL; + + if (status > 0) //nothing remains to be parsed + return NULL; + } + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if ((pBlock->GetTrackNumber() == tp.m_track) && + (pBlock->GetTimeCode(this) == tc)) + { + return pEntry; + } + } + + long index = 0; + + for (;;) + { + if (index >= m_entries_count) + { + long long pos; + long len; + + const long status = Parse(pos, len); + + if (status < 0) //TODO: can this happen? + return NULL; + + if (status > 0) //nothing remains to be parsed + return NULL; + + assert(m_entries); + assert(index < m_entries_count); + } + + const BlockEntry* const pEntry = m_entries[index]; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != tp.m_track) + { + ++index; + continue; + } + + const long long tc_ = pBlock->GetTimeCode(this); + + if (tc_ < tc) + { + ++index; + continue; + } + + if (tc_ > tc) + return NULL; + + const Tracks* const pTracks = m_pSegment->GetTracks(); + assert(pTracks); + + const long tn = static_cast<long>(tp.m_track); + const Track* const pTrack = pTracks->GetTrackByNumber(tn); + + if (pTrack == NULL) + return NULL; + + const long long type = pTrack->GetType(); + + if (type == 2) //audio + return pEntry; + + if (type != 1) //not video + return NULL; + + if (!pBlock->IsKey()) + return NULL; + + return pEntry; + } + +#endif + +} + + +#if 0 +const BlockEntry* Cluster::GetMaxKey(const VideoTrack* pTrack) const +{ + assert(pTrack); + + if (m_pSegment == NULL) //EOS + return pTrack->GetEOS(); + + LoadBlockEntries(); + + if ((m_entries == NULL) || (m_entries_count <= 0)) + return pTrack->GetEOS(); + + BlockEntry** i = m_entries + m_entries_count; + BlockEntry** const j = m_entries; + + while (i != j) + { + const BlockEntry* const pEntry = *--i; + assert(pEntry); + assert(!pEntry->EOS()); + + const Block* const pBlock = pEntry->GetBlock(); + assert(pBlock); + + if (pBlock->GetTrackNumber() != pTrack->GetNumber()) + continue; + + if (pBlock->IsKey()) + return pEntry; + } + + return pTrack->GetEOS(); //no satisfactory block found +} +#endif + + +BlockEntry::BlockEntry(Cluster* p, long idx) : + m_pCluster(p), + m_index(idx) +{ +} + + +BlockEntry::~BlockEntry() +{ +} + + +bool BlockEntry::EOS() const +{ + return (GetKind() == kBlockEOS); +} + + +const Cluster* BlockEntry::GetCluster() const +{ + return m_pCluster; +} + + +long BlockEntry::GetIndex() const +{ + return m_index; +} + + +SimpleBlock::SimpleBlock( + Cluster* pCluster, + long idx, + long long start, + long long size) : + BlockEntry(pCluster, idx), + m_block(start, size, 0) +{ +} + + +long SimpleBlock::Parse() +{ + return m_block.Parse(m_pCluster); +} + + +BlockEntry::Kind SimpleBlock::GetKind() const +{ + return kBlockSimple; +} + + +const Block* SimpleBlock::GetBlock() const +{ + return &m_block; +} + + +BlockGroup::BlockGroup( + Cluster* pCluster, + long idx, + long long block_start, + long long block_size, + long long prev, + long long next, + long long duration, + long long discard_padding) : + BlockEntry(pCluster, idx), + m_block(block_start, block_size, discard_padding), + m_prev(prev), + m_next(next), + m_duration(duration) +{ +} + + +long BlockGroup::Parse() +{ + const long status = m_block.Parse(m_pCluster); + + if (status) + return status; + + m_block.SetKey((m_prev > 0) && (m_next <= 0)); + + return 0; +} + + +#if 0 +void BlockGroup::ParseBlock(long long start, long long size) +{ + IMkvReader* const pReader = m_pCluster->m_pSegment->m_pReader; + + Block* const pBlock = new Block(start, size, pReader); + assert(pBlock); //TODO + + //TODO: the Matroska spec says you have multiple blocks within the + //same block group, with blocks ranked by priority (the flag bits). + + assert(m_pBlock == NULL); + m_pBlock = pBlock; +} +#endif + + +BlockEntry::Kind BlockGroup::GetKind() const +{ + return kBlockGroup; +} + + +const Block* BlockGroup::GetBlock() const +{ + return &m_block; +} + + +long long BlockGroup::GetPrevTimeCode() const +{ + return m_prev; +} + + +long long BlockGroup::GetNextTimeCode() const +{ + return m_next; +} + +long long BlockGroup::GetDurationTimeCode() const +{ + return m_duration; +} + +Block::Block(long long start, long long size_, long long discard_padding) : + m_start(start), + m_size(size_), + m_track(0), + m_timecode(-1), + m_flags(0), + m_frames(NULL), + m_frame_count(-1), + m_discard_padding(discard_padding) +{ +} + + +Block::~Block() +{ + delete[] m_frames; +} + + +long Block::Parse(const Cluster* pCluster) +{ + if (pCluster == NULL) + return -1; + + if (pCluster->m_pSegment == NULL) + return -1; + + assert(m_start >= 0); + assert(m_size >= 0); + assert(m_track <= 0); + assert(m_frames == NULL); + assert(m_frame_count <= 0); + + long long pos = m_start; + const long long stop = m_start + m_size; + + long len; + + IMkvReader* const pReader = pCluster->m_pSegment->m_pReader; + + m_track = ReadUInt(pReader, pos, len); + + if (m_track <= 0) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume track number + + if ((stop - pos) < 2) + return E_FILE_FORMAT_INVALID; + + long status; + long long value; + + status = UnserializeInt(pReader, pos, 2, value); + + if (status) + return E_FILE_FORMAT_INVALID; + + if (value < SHRT_MIN) + return E_FILE_FORMAT_INVALID; + + if (value > SHRT_MAX) + return E_FILE_FORMAT_INVALID; + + m_timecode = static_cast<short>(value); + + pos += 2; + + if ((stop - pos) <= 0) + return E_FILE_FORMAT_INVALID; + + status = pReader->Read(pos, 1, &m_flags); + + if (status) + return E_FILE_FORMAT_INVALID; + + const int lacing = int(m_flags & 0x06) >> 1; + + ++pos; //consume flags byte + + if (lacing == 0) //no lacing + { + if (pos > stop) + return E_FILE_FORMAT_INVALID; + + m_frame_count = 1; + m_frames = new Frame[m_frame_count]; + + Frame& f = m_frames[0]; + f.pos = pos; + + const long long frame_size = stop - pos; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + f.len = static_cast<long>(frame_size); + + return 0; //success + } + + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + unsigned char biased_count; + + status = pReader->Read(pos, 1, &biased_count); + + if (status) + return E_FILE_FORMAT_INVALID; + + ++pos; //consume frame count + assert(pos <= stop); + + m_frame_count = int(biased_count) + 1; + + m_frames = new Frame[m_frame_count]; + assert(m_frames); + + if (lacing == 1) //Xiph + { + Frame* pf = m_frames; + Frame* const pf_end = pf + m_frame_count; + + long size = 0; + int frame_count = m_frame_count; + + while (frame_count > 1) + { + long frame_size = 0; + + for (;;) + { + unsigned char val; + + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + status = pReader->Read(pos, 1, &val); + + if (status) + return E_FILE_FORMAT_INVALID; + + ++pos; //consume xiph size byte + + frame_size += val; + + if (val < 255) + break; + } + + Frame& f = *pf++; + assert(pf < pf_end); + + f.pos = 0; //patch later + + f.len = frame_size; + size += frame_size; //contribution of this frame + + --frame_count; + } + + assert(pf < pf_end); + assert(pos <= stop); + + { + Frame& f = *pf++; + + if (pf != pf_end) + return E_FILE_FORMAT_INVALID; + + f.pos = 0; //patch later + + const long long total_size = stop - pos; + + if (total_size < size) + return E_FILE_FORMAT_INVALID; + + const long long frame_size = total_size - size; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + f.len = static_cast<long>(frame_size); + } + + pf = m_frames; + while (pf != pf_end) + { + Frame& f = *pf++; + assert((pos + f.len) <= stop); + + f.pos = pos; + pos += f.len; + } + + assert(pos == stop); + } + else if (lacing == 2) //fixed-size lacing + { + const long long total_size = stop - pos; + + if ((total_size % m_frame_count) != 0) + return E_FILE_FORMAT_INVALID; + + const long long frame_size = total_size / m_frame_count; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + Frame* pf = m_frames; + Frame* const pf_end = pf + m_frame_count; + + while (pf != pf_end) + { + assert((pos + frame_size) <= stop); + + Frame& f = *pf++; + + f.pos = pos; + f.len = static_cast<long>(frame_size); + + pos += frame_size; + } + + assert(pos == stop); + } + else + { + assert(lacing == 3); //EBML lacing + + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + long size = 0; + int frame_count = m_frame_count; + + long long frame_size = ReadUInt(pReader, pos, len); + + if (frame_size < 0) + return E_FILE_FORMAT_INVALID; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume length of size of first frame + + if ((pos + frame_size) > stop) + return E_FILE_FORMAT_INVALID; + + Frame* pf = m_frames; + Frame* const pf_end = pf + m_frame_count; + + { + Frame& curr = *pf; + + curr.pos = 0; //patch later + + curr.len = static_cast<long>(frame_size); + size += curr.len; //contribution of this frame + } + + --frame_count; + + while (frame_count > 1) + { + if (pos >= stop) + return E_FILE_FORMAT_INVALID; + + assert(pf < pf_end); + + const Frame& prev = *pf++; + assert(prev.len == frame_size); + if (prev.len != frame_size) + return E_FILE_FORMAT_INVALID; + + assert(pf < pf_end); + + Frame& curr = *pf; + + curr.pos = 0; //patch later + + const long long delta_size_ = ReadUInt(pReader, pos, len); + + if (delta_size_ < 0) + return E_FILE_FORMAT_INVALID; + + if ((pos + len) > stop) + return E_FILE_FORMAT_INVALID; + + pos += len; //consume length of (delta) size + assert(pos <= stop); + + const int exp = 7*len - 1; + const long long bias = (1LL << exp) - 1LL; + const long long delta_size = delta_size_ - bias; + + frame_size += delta_size; + + if (frame_size < 0) + return E_FILE_FORMAT_INVALID; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + curr.len = static_cast<long>(frame_size); + size += curr.len; //contribution of this frame + + --frame_count; + } + + { + assert(pos <= stop); + assert(pf < pf_end); + + const Frame& prev = *pf++; + assert(prev.len == frame_size); + if (prev.len != frame_size) + return E_FILE_FORMAT_INVALID; + + assert(pf < pf_end); + + Frame& curr = *pf++; + assert(pf == pf_end); + + curr.pos = 0; //patch later + + const long long total_size = stop - pos; + + if (total_size < size) + return E_FILE_FORMAT_INVALID; + + frame_size = total_size - size; + + if (frame_size > LONG_MAX) + return E_FILE_FORMAT_INVALID; + + curr.len = static_cast<long>(frame_size); + } + + pf = m_frames; + while (pf != pf_end) + { + Frame& f = *pf++; + assert((pos + f.len) <= stop); + + f.pos = pos; + pos += f.len; + } + + assert(pos == stop); + } + + return 0; //success +} + + +long long Block::GetTimeCode(const Cluster* pCluster) const +{ + if (pCluster == 0) + return m_timecode; + + const long long tc0 = pCluster->GetTimeCode(); + assert(tc0 >= 0); + + const long long tc = tc0 + m_timecode; + + return tc; //unscaled timecode units +} + + +long long Block::GetTime(const Cluster* pCluster) const +{ + assert(pCluster); + + const long long tc = GetTimeCode(pCluster); + + const Segment* const pSegment = pCluster->m_pSegment; + const SegmentInfo* const pInfo = pSegment->GetInfo(); + assert(pInfo); + + const long long scale = pInfo->GetTimeCodeScale(); + assert(scale >= 1); + + const long long ns = tc * scale; + + return ns; +} + + +long long Block::GetTrackNumber() const +{ + return m_track; +} + + +bool Block::IsKey() const +{ + return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0); +} + + +void Block::SetKey(bool bKey) +{ + if (bKey) + m_flags |= static_cast<unsigned char>(1 << 7); + else + m_flags &= 0x7F; +} + + +bool Block::IsInvisible() const +{ + return bool(int(m_flags & 0x08) != 0); +} + + +Block::Lacing Block::GetLacing() const +{ + const int value = int(m_flags & 0x06) >> 1; + return static_cast<Lacing>(value); +} + + +int Block::GetFrameCount() const +{ + return m_frame_count; +} + + +const Block::Frame& Block::GetFrame(int idx) const +{ + assert(idx >= 0); + assert(idx < m_frame_count); + + const Frame& f = m_frames[idx]; + assert(f.pos > 0); + assert(f.len > 0); + + return f; +} + + +long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const +{ + assert(pReader); + assert(buf); + + const long status = pReader->Read(pos, len, buf); + return status; +} + +long long Block::GetDiscardPadding() const +{ + return m_discard_padding; +} + +} //end namespace mkvparser diff --git a/source/libvpx/third_party/libwebm/mkvparser.hpp b/source/libvpx/third_party/libwebm/mkvparser.hpp new file mode 100644 index 0000000..7184d26 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvparser.hpp @@ -0,0 +1,1079 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MKVPARSER_HPP +#define MKVPARSER_HPP + +#include <cstdlib> +#include <cstdio> +#include <cstddef> + +namespace mkvparser +{ + +const int E_FILE_FORMAT_INVALID = -2; +const int E_BUFFER_NOT_FULL = -3; + +class IMkvReader +{ +public: + virtual int Read(long long pos, long len, unsigned char* buf) = 0; + virtual int Length(long long* total, long long* available) = 0; +protected: + virtual ~IMkvReader(); +}; + +long long GetUIntLength(IMkvReader*, long long, long&); +long long ReadUInt(IMkvReader*, long long, long&); +long long UnserializeUInt(IMkvReader*, long long pos, long long size); + +long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); +long UnserializeInt(IMkvReader*, long long pos, long len, long long& result); + +long UnserializeString( + IMkvReader*, + long long pos, + long long size, + char*& str); + +long ParseElementHeader( + IMkvReader* pReader, + long long& pos, //consume id and size fields + long long stop, //if you know size of element's parent + long long& id, + long long& size); + +bool Match(IMkvReader*, long long&, unsigned long, long long&); +bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&); + +void GetVersion(int& major, int& minor, int& build, int& revision); + +struct EBMLHeader +{ + EBMLHeader(); + ~EBMLHeader(); + long long m_version; + long long m_readVersion; + long long m_maxIdLength; + long long m_maxSizeLength; + char* m_docType; + long long m_docTypeVersion; + long long m_docTypeReadVersion; + + long long Parse(IMkvReader*, long long&); + void Init(); +}; + + +class Segment; +class Track; +class Cluster; + +class Block +{ + Block(const Block&); + Block& operator=(const Block&); + +public: + const long long m_start; + const long long m_size; + + Block(long long start, long long size, long long discard_padding); + ~Block(); + + long Parse(const Cluster*); + + long long GetTrackNumber() const; + long long GetTimeCode(const Cluster*) const; //absolute, but not scaled + long long GetTime(const Cluster*) const; //absolute, and scaled (ns) + bool IsKey() const; + void SetKey(bool); + bool IsInvisible() const; + + enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml }; + Lacing GetLacing() const; + + int GetFrameCount() const; //to index frames: [0, count) + + struct Frame + { + long long pos; //absolute offset + long len; + + long Read(IMkvReader*, unsigned char*) const; + }; + + const Frame& GetFrame(int frame_index) const; + + long long GetDiscardPadding() const; + +private: + long long m_track; //Track::Number() + short m_timecode; //relative to cluster + unsigned char m_flags; + + Frame* m_frames; + int m_frame_count; + +protected: + const long long m_discard_padding; +}; + + +class BlockEntry +{ + BlockEntry(const BlockEntry&); + BlockEntry& operator=(const BlockEntry&); + +protected: + BlockEntry(Cluster*, long index); + +public: + virtual ~BlockEntry(); + + bool EOS() const; + const Cluster* GetCluster() const; + long GetIndex() const; + virtual const Block* GetBlock() const = 0; + + enum Kind { kBlockEOS, kBlockSimple, kBlockGroup }; + virtual Kind GetKind() const = 0; + +protected: + Cluster* const m_pCluster; + const long m_index; + +}; + + +class SimpleBlock : public BlockEntry +{ + SimpleBlock(const SimpleBlock&); + SimpleBlock& operator=(const SimpleBlock&); + +public: + SimpleBlock(Cluster*, long index, long long start, long long size); + long Parse(); + + Kind GetKind() const; + const Block* GetBlock() const; + +protected: + Block m_block; + +}; + + +class BlockGroup : public BlockEntry +{ + BlockGroup(const BlockGroup&); + BlockGroup& operator=(const BlockGroup&); + +public: + BlockGroup( + Cluster*, + long index, + long long block_start, //absolute pos of block's payload + long long block_size, //size of block's payload + long long prev, + long long next, + long long duration, + long long discard_padding); + + long Parse(); + + Kind GetKind() const; + const Block* GetBlock() const; + + long long GetPrevTimeCode() const; //relative to block's time + long long GetNextTimeCode() const; //as above + long long GetDurationTimeCode() const; + +private: + Block m_block; + const long long m_prev; + const long long m_next; + const long long m_duration; +}; + +/////////////////////////////////////////////////////////////// +// ContentEncoding element +// Elements used to describe if the track data has been encrypted or +// compressed with zlib or header stripping. +class ContentEncoding { +public: + enum { + kCTR = 1 + }; + + ContentEncoding(); + ~ContentEncoding(); + + // ContentCompression element names + struct ContentCompression { + ContentCompression(); + ~ContentCompression(); + + unsigned long long algo; + unsigned char* settings; + long long settings_len; + }; + + // ContentEncAESSettings element names + struct ContentEncAESSettings { + ContentEncAESSettings() : cipher_mode(kCTR) {} + ~ContentEncAESSettings() {} + + unsigned long long cipher_mode; + }; + + // ContentEncryption element names + struct ContentEncryption { + ContentEncryption(); + ~ContentEncryption(); + + unsigned long long algo; + unsigned char* key_id; + long long key_id_len; + unsigned char* signature; + long long signature_len; + unsigned char* sig_key_id; + long long sig_key_id_len; + unsigned long long sig_algo; + unsigned long long sig_hash_algo; + + ContentEncAESSettings aes_settings; + }; + + // Returns ContentCompression represented by |idx|. Returns NULL if |idx| + // is out of bounds. + const ContentCompression* GetCompressionByIndex(unsigned long idx) const; + + // Returns number of ContentCompression elements in this ContentEncoding + // element. + unsigned long GetCompressionCount() const; + + // Parses the ContentCompression element from |pReader|. |start| is the + // starting offset of the ContentCompression payload. |size| is the size in + // bytes of the ContentCompression payload. |compression| is where the parsed + // values will be stored. + long ParseCompressionEntry(long long start, + long long size, + IMkvReader* pReader, + ContentCompression* compression); + + // Returns ContentEncryption represented by |idx|. Returns NULL if |idx| + // is out of bounds. + const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const; + + // Returns number of ContentEncryption elements in this ContentEncoding + // element. + unsigned long GetEncryptionCount() const; + + // Parses the ContentEncAESSettings element from |pReader|. |start| is the + // starting offset of the ContentEncAESSettings payload. |size| is the + // size in bytes of the ContentEncAESSettings payload. |encryption| is + // where the parsed values will be stored. + long ParseContentEncAESSettingsEntry(long long start, + long long size, + IMkvReader* pReader, + ContentEncAESSettings* aes); + + // Parses the ContentEncoding element from |pReader|. |start| is the + // starting offset of the ContentEncoding payload. |size| is the size in + // bytes of the ContentEncoding payload. Returns true on success. + long ParseContentEncodingEntry(long long start, + long long size, + IMkvReader* pReader); + + // Parses the ContentEncryption element from |pReader|. |start| is the + // starting offset of the ContentEncryption payload. |size| is the size in + // bytes of the ContentEncryption payload. |encryption| is where the parsed + // values will be stored. + long ParseEncryptionEntry(long long start, + long long size, + IMkvReader* pReader, + ContentEncryption* encryption); + + unsigned long long encoding_order() const { return encoding_order_; } + unsigned long long encoding_scope() const { return encoding_scope_; } + unsigned long long encoding_type() const { return encoding_type_; } + +private: + // Member variables for list of ContentCompression elements. + ContentCompression** compression_entries_; + ContentCompression** compression_entries_end_; + + // Member variables for list of ContentEncryption elements. + ContentEncryption** encryption_entries_; + ContentEncryption** encryption_entries_end_; + + // ContentEncoding element names + unsigned long long encoding_order_; + unsigned long long encoding_scope_; + unsigned long long encoding_type_; + + // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); + ContentEncoding(const ContentEncoding&); + ContentEncoding& operator=(const ContentEncoding&); +}; + +class Track +{ + Track(const Track&); + Track& operator=(const Track&); + +public: + class Info; + static long Create( + Segment*, + const Info&, + long long element_start, + long long element_size, + Track*&); + + enum Type { + kVideo = 1, + kAudio = 2, + kSubtitle = 0x11, + kMetadata = 0x21 + }; + + Segment* const m_pSegment; + const long long m_element_start; + const long long m_element_size; + virtual ~Track(); + + long GetType() const; + long GetNumber() const; + unsigned long long GetUid() const; + const char* GetNameAsUTF8() const; + const char* GetLanguage() const; + const char* GetCodecNameAsUTF8() const; + const char* GetCodecId() const; + const unsigned char* GetCodecPrivate(size_t&) const; + bool GetLacing() const; + unsigned long long GetDefaultDuration() const; + unsigned long long GetCodecDelay() const; + unsigned long long GetSeekPreRoll() const; + + const BlockEntry* GetEOS() const; + + struct Settings + { + long long start; + long long size; + }; + + class Info + { + public: + Info(); + ~Info(); + int Copy(Info&) const; + void Clear(); + long type; + long number; + unsigned long long uid; + unsigned long long defaultDuration; + unsigned long long codecDelay; + unsigned long long seekPreRoll; + char* nameAsUTF8; + char* language; + char* codecId; + char* codecNameAsUTF8; + unsigned char* codecPrivate; + size_t codecPrivateSize; + bool lacing; + Settings settings; + + private: + Info(const Info&); + Info& operator=(const Info&); + int CopyStr(char* Info::*str, Info&) const; + }; + + long GetFirst(const BlockEntry*&) const; + long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const; + virtual bool VetEntry(const BlockEntry*) const; + virtual long Seek(long long time_ns, const BlockEntry*&) const; + + const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const; + unsigned long GetContentEncodingCount() const; + + long ParseContentEncodingsEntry(long long start, long long size); + +protected: + Track( + Segment*, + long long element_start, + long long element_size); + + Info m_info; + + class EOSBlock : public BlockEntry + { + public: + EOSBlock(); + + Kind GetKind() const; + const Block* GetBlock() const; + }; + + EOSBlock m_eos; + +private: + ContentEncoding** content_encoding_entries_; + ContentEncoding** content_encoding_entries_end_; +}; + + +class VideoTrack : public Track +{ + VideoTrack(const VideoTrack&); + VideoTrack& operator=(const VideoTrack&); + + VideoTrack( + Segment*, + long long element_start, + long long element_size); + +public: + static long Parse( + Segment*, + const Info&, + long long element_start, + long long element_size, + VideoTrack*&); + + long long GetWidth() const; + long long GetHeight() const; + double GetFrameRate() const; + + bool VetEntry(const BlockEntry*) const; + long Seek(long long time_ns, const BlockEntry*&) const; + +private: + long long m_width; + long long m_height; + double m_rate; + +}; + + +class AudioTrack : public Track +{ + AudioTrack(const AudioTrack&); + AudioTrack& operator=(const AudioTrack&); + + AudioTrack( + Segment*, + long long element_start, + long long element_size); +public: + static long Parse( + Segment*, + const Info&, + long long element_start, + long long element_size, + AudioTrack*&); + + double GetSamplingRate() const; + long long GetChannels() const; + long long GetBitDepth() const; + +private: + double m_rate; + long long m_channels; + long long m_bitDepth; +}; + + +class Tracks +{ + Tracks(const Tracks&); + Tracks& operator=(const Tracks&); + +public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + Tracks( + Segment*, + long long start, + long long size, + long long element_start, + long long element_size); + + ~Tracks(); + + long Parse(); + + unsigned long GetTracksCount() const; + + const Track* GetTrackByNumber(long tn) const; + const Track* GetTrackByIndex(unsigned long idx) const; + +private: + Track** m_trackEntries; + Track** m_trackEntriesEnd; + + long ParseTrackEntry( + long long payload_start, + long long payload_size, + long long element_start, + long long element_size, + Track*&) const; + +}; + + +class Chapters +{ + Chapters(const Chapters&); + Chapters& operator=(const Chapters&); + +public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + Chapters( + Segment*, + long long payload_start, + long long payload_size, + long long element_start, + long long element_size); + + ~Chapters(); + + long Parse(); + + class Atom; + class Edition; + + class Display + { + friend class Atom; + Display(); + Display(const Display&); + ~Display(); + Display& operator=(const Display&); + public: + const char* GetString() const; + const char* GetLanguage() const; + const char* GetCountry() const; + private: + void Init(); + void ShallowCopy(Display&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + + char* m_string; + char* m_language; + char* m_country; + }; + + class Atom + { + friend class Edition; + Atom(); + Atom(const Atom&); + ~Atom(); + Atom& operator=(const Atom&); + public: + unsigned long long GetUID() const; + const char* GetStringUID() const; + + long long GetStartTimecode() const; + long long GetStopTimecode() const; + + long long GetStartTime(const Chapters*) const; + long long GetStopTime(const Chapters*) const; + + int GetDisplayCount() const; + const Display* GetDisplay(int index) const; + private: + void Init(); + void ShallowCopy(Atom&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + static long long GetTime(const Chapters*, long long timecode); + + long ParseDisplay(IMkvReader*, long long pos, long long size); + bool ExpandDisplaysArray(); + + char* m_string_uid; + unsigned long long m_uid; + long long m_start_timecode; + long long m_stop_timecode; + + Display* m_displays; + int m_displays_size; + int m_displays_count; + }; + + class Edition + { + friend class Chapters; + Edition(); + Edition(const Edition&); + ~Edition(); + Edition& operator=(const Edition&); + public: + int GetAtomCount() const; + const Atom* GetAtom(int index) const; + private: + void Init(); + void ShallowCopy(Edition&) const; + void Clear(); + long Parse(IMkvReader*, long long pos, long long size); + + long ParseAtom(IMkvReader*, long long pos, long long size); + bool ExpandAtomsArray(); + + Atom* m_atoms; + int m_atoms_size; + int m_atoms_count; + }; + + int GetEditionCount() const; + const Edition* GetEdition(int index) const; + +private: + long ParseEdition(long long pos, long long size); + bool ExpandEditionsArray(); + + Edition* m_editions; + int m_editions_size; + int m_editions_count; + +}; + + +class SegmentInfo +{ + SegmentInfo(const SegmentInfo&); + SegmentInfo& operator=(const SegmentInfo&); + +public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + SegmentInfo( + Segment*, + long long start, + long long size, + long long element_start, + long long element_size); + + ~SegmentInfo(); + + long Parse(); + + long long GetTimeCodeScale() const; + long long GetDuration() const; //scaled + const char* GetMuxingAppAsUTF8() const; + const char* GetWritingAppAsUTF8() const; + const char* GetTitleAsUTF8() const; + +private: + long long m_timecodeScale; + double m_duration; + char* m_pMuxingAppAsUTF8; + char* m_pWritingAppAsUTF8; + char* m_pTitleAsUTF8; +}; + + +class SeekHead +{ + SeekHead(const SeekHead&); + SeekHead& operator=(const SeekHead&); + +public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + SeekHead( + Segment*, + long long start, + long long size, + long long element_start, + long long element_size); + + ~SeekHead(); + + long Parse(); + + struct Entry + { + //the SeekHead entry payload + long long id; + long long pos; + + //absolute pos of SeekEntry ID + long long element_start; + + //SeekEntry ID size + size size + payload + long long element_size; + }; + + int GetCount() const; + const Entry* GetEntry(int idx) const; + + struct VoidElement + { + //absolute pos of Void ID + long long element_start; + + //ID size + size size + payload size + long long element_size; + }; + + int GetVoidElementCount() const; + const VoidElement* GetVoidElement(int idx) const; + +private: + Entry* m_entries; + int m_entry_count; + + VoidElement* m_void_elements; + int m_void_element_count; + + static bool ParseEntry( + IMkvReader*, + long long pos, //payload + long long size, + Entry*); + +}; + +class Cues; +class CuePoint +{ + friend class Cues; + + CuePoint(long, long long); + ~CuePoint(); + + CuePoint(const CuePoint&); + CuePoint& operator=(const CuePoint&); + +public: + long long m_element_start; + long long m_element_size; + + void Load(IMkvReader*); + + long long GetTimeCode() const; //absolute but unscaled + long long GetTime(const Segment*) const; //absolute and scaled (ns units) + + struct TrackPosition + { + long long m_track; + long long m_pos; //of cluster + long long m_block; + //codec_state //defaults to 0 + //reference = clusters containing req'd referenced blocks + // reftime = timecode of the referenced block + + void Parse(IMkvReader*, long long, long long); + }; + + const TrackPosition* Find(const Track*) const; + +private: + const long m_index; + long long m_timecode; + TrackPosition* m_track_positions; + size_t m_track_positions_count; + +}; + + +class Cues +{ + friend class Segment; + + Cues( + Segment*, + long long start, + long long size, + long long element_start, + long long element_size); + ~Cues(); + + Cues(const Cues&); + Cues& operator=(const Cues&); + +public: + Segment* const m_pSegment; + const long long m_start; + const long long m_size; + const long long m_element_start; + const long long m_element_size; + + bool Find( //lower bound of time_ns + long long time_ns, + const Track*, + const CuePoint*&, + const CuePoint::TrackPosition*&) const; + +#if 0 + bool FindNext( //upper_bound of time_ns + long long time_ns, + const Track*, + const CuePoint*&, + const CuePoint::TrackPosition*&) const; +#endif + + const CuePoint* GetFirst() const; + const CuePoint* GetLast() const; + const CuePoint* GetNext(const CuePoint*) const; + + const BlockEntry* GetBlock( + const CuePoint*, + const CuePoint::TrackPosition*) const; + + bool LoadCuePoint() const; + long GetCount() const; //loaded only + //long GetTotal() const; //loaded + preloaded + bool DoneParsing() const; + +private: + void Init() const; + void PreloadCuePoint(long&, long long) const; + + mutable CuePoint** m_cue_points; + mutable long m_count; + mutable long m_preload_count; + mutable long long m_pos; + +}; + + +class Cluster +{ + friend class Segment; + + Cluster(const Cluster&); + Cluster& operator=(const Cluster&); + +public: + Segment* const m_pSegment; + +public: + static Cluster* Create( + Segment*, + long index, //index in segment + long long off); //offset relative to segment + //long long element_size); + + Cluster(); //EndOfStream + ~Cluster(); + + bool EOS() const; + + long long GetTimeCode() const; //absolute, but not scaled + long long GetTime() const; //absolute, and scaled (nanosecond units) + long long GetFirstTime() const; //time (ns) of first (earliest) block + long long GetLastTime() const; //time (ns) of last (latest) block + + long GetFirst(const BlockEntry*&) const; + long GetLast(const BlockEntry*&) const; + long GetNext(const BlockEntry* curr, const BlockEntry*& next) const; + + const BlockEntry* GetEntry(const Track*, long long ns = -1) const; + const BlockEntry* GetEntry( + const CuePoint&, + const CuePoint::TrackPosition&) const; + //const BlockEntry* GetMaxKey(const VideoTrack*) const; + +// static bool HasBlockEntries(const Segment*, long long); + + static long HasBlockEntries( + const Segment*, + long long idoff, + long long& pos, + long& size); + + long GetEntryCount() const; + + long Load(long long& pos, long& size) const; + + long Parse(long long& pos, long& size) const; + long GetEntry(long index, const mkvparser::BlockEntry*&) const; + +protected: + Cluster( + Segment*, + long index, + long long element_start); + //long long element_size); + +public: + const long long m_element_start; + long long GetPosition() const; //offset relative to segment + + long GetIndex() const; + long long GetElementSize() const; + //long long GetPayloadSize() const; + + //long long Unparsed() const; + +private: + long m_index; + mutable long long m_pos; + //mutable long long m_size; + mutable long long m_element_size; + mutable long long m_timecode; + mutable BlockEntry** m_entries; + mutable long m_entries_size; + mutable long m_entries_count; + + long ParseSimpleBlock(long long, long long&, long&); + long ParseBlockGroup(long long, long long&, long&); + + long CreateBlock(long long id, long long pos, long long size, + long long discard_padding); + long CreateBlockGroup(long long start_offset, long long size, + long long discard_padding); + long CreateSimpleBlock(long long, long long); + +}; + + +class Segment +{ + friend class Cues; + friend class Track; + friend class VideoTrack; + + Segment(const Segment&); + Segment& operator=(const Segment&); + +private: + Segment( + IMkvReader*, + long long elem_start, + //long long elem_size, + long long pos, + long long size); + +public: + IMkvReader* const m_pReader; + const long long m_element_start; + //const long long m_element_size; + const long long m_start; //posn of segment payload + const long long m_size; //size of segment payload + Cluster m_eos; //TODO: make private? + + static long long CreateInstance(IMkvReader*, long long, Segment*&); + ~Segment(); + + long Load(); //loads headers and all clusters + + //for incremental loading + //long long Unparsed() const; + bool DoneParsing() const; + long long ParseHeaders(); //stops when first cluster is found + //long FindNextCluster(long long& pos, long& size) const; + long LoadCluster(long long& pos, long& size); //load one cluster + long LoadCluster(); + + long ParseNext( + const Cluster* pCurr, + const Cluster*& pNext, + long long& pos, + long& size); + +#if 0 + //This pair parses one cluster, but only changes the state of the + //segment object when the cluster is actually added to the index. + long ParseCluster(long long& cluster_pos, long long& new_pos) const; + bool AddCluster(long long cluster_pos, long long new_pos); +#endif + + const SeekHead* GetSeekHead() const; + const Tracks* GetTracks() const; + const SegmentInfo* GetInfo() const; + const Cues* GetCues() const; + const Chapters* GetChapters() const; + + long long GetDuration() const; + + unsigned long GetCount() const; + const Cluster* GetFirst() const; + const Cluster* GetLast() const; + const Cluster* GetNext(const Cluster*); + + const Cluster* FindCluster(long long time_nanoseconds) const; + //const BlockEntry* Seek(long long time_nanoseconds, const Track*) const; + + const Cluster* FindOrPreloadCluster(long long pos); + + long ParseCues( + long long cues_off, //offset relative to start of segment + long long& parse_pos, + long& parse_len); + +private: + + long long m_pos; //absolute file posn; what has been consumed so far + Cluster* m_pUnknownSize; + + SeekHead* m_pSeekHead; + SegmentInfo* m_pInfo; + Tracks* m_pTracks; + Cues* m_pCues; + Chapters* m_pChapters; + Cluster** m_clusters; + long m_clusterCount; //number of entries for which m_index >= 0 + long m_clusterPreloadCount; //number of entries for which m_index < 0 + long m_clusterSize; //array size + + long DoLoadCluster(long long&, long&); + long DoLoadClusterUnknownSize(long long&, long&); + long DoParseNext(const Cluster*&, long long&, long&); + + void AppendCluster(Cluster*); + void PreloadCluster(Cluster*, ptrdiff_t); + + //void ParseSeekHead(long long pos, long long size); + //void ParseSeekEntry(long long pos, long long size); + //void ParseCues(long long); + + const BlockEntry* GetBlock( + const CuePoint&, + const CuePoint::TrackPosition&); + +}; + +} //end namespace mkvparser + +inline long mkvparser::Segment::LoadCluster() +{ + long long pos; + long size; + + return LoadCluster(pos, size); +} + +#endif //MKVPARSER_HPP diff --git a/source/libvpx/third_party/libwebm/mkvreader.cpp b/source/libvpx/third_party/libwebm/mkvreader.cpp new file mode 100644 index 0000000..b4b2459 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvreader.cpp @@ -0,0 +1,143 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "mkvreader.hpp" + +#include <cassert> + +namespace mkvparser +{ + +MkvReader::MkvReader() : + m_file(NULL), + reader_owns_file_(true) { +} + +MkvReader::MkvReader(FILE* fp) : + m_file(fp), + reader_owns_file_(false) { + GetFileSize(); +} + +MkvReader::~MkvReader() { + if (reader_owns_file_) + Close(); + m_file = NULL; +} + +int MkvReader::Open(const char* fileName) +{ + if (fileName == NULL) + return -1; + + if (m_file) + return -1; + +#ifdef _MSC_VER + const errno_t e = fopen_s(&m_file, fileName, "rb"); + + if (e) + return -1; //error +#else + m_file = fopen(fileName, "rb"); + + if (m_file == NULL) + return -1; +#endif + return !GetFileSize(); +} + +bool MkvReader::GetFileSize() { + if (m_file == NULL) + return false; +#ifdef _MSC_VER + int status = _fseeki64(m_file, 0L, SEEK_END); + + if (status) + return false; //error + + m_length = _ftelli64(m_file); +#else + fseek(m_file, 0L, SEEK_END); + m_length = ftell(m_file); +#endif + assert(m_length >= 0); + + if (m_length < 0) + return false; + +#ifdef _MSC_VER + status = _fseeki64(m_file, 0L, SEEK_SET); + + if (status) + return false; //error +#else + fseek(m_file, 0L, SEEK_SET); +#endif + + return true; +} + +void MkvReader::Close() +{ + if (m_file != NULL) + { + fclose(m_file); + m_file = NULL; + } +} + +int MkvReader::Length(long long* total, long long* available) +{ + if (m_file == NULL) + return -1; + + if (total) + *total = m_length; + + if (available) + *available = m_length; + + return 0; +} + +int MkvReader::Read(long long offset, long len, unsigned char* buffer) +{ + if (m_file == NULL) + return -1; + + if (offset < 0) + return -1; + + if (len < 0) + return -1; + + if (len == 0) + return 0; + + if (offset >= m_length) + return -1; + +#ifdef _MSC_VER + const int status = _fseeki64(m_file, offset, SEEK_SET); + + if (status) + return -1; //error +#else + fseek(m_file, offset, SEEK_SET); +#endif + + const size_t size = fread(buffer, 1, len, m_file); + + if (size < size_t(len)) + return -1; //error + + return 0; //success +} + +} //end namespace mkvparser diff --git a/source/libvpx/third_party/libwebm/mkvreader.hpp b/source/libvpx/third_party/libwebm/mkvreader.hpp new file mode 100644 index 0000000..8ebdd99 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvreader.hpp @@ -0,0 +1,46 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MKVREADER_HPP +#define MKVREADER_HPP + +#include "mkvparser.hpp" +#include <cstdio> + +namespace mkvparser +{ + +class MkvReader : public IMkvReader +{ + MkvReader(const MkvReader&); + MkvReader& operator=(const MkvReader&); +public: + MkvReader(); + MkvReader(FILE* fp); + virtual ~MkvReader(); + + int Open(const char*); + void Close(); + + virtual int Read(long long position, long length, unsigned char* buffer); + virtual int Length(long long* total, long long* available); +private: + + // Determines the size of the file. This is called either by the constructor + // or by the Open function depending on file ownership. Returns true on + // success. + bool GetFileSize(); + + long long m_length; + FILE* m_file; + bool reader_owns_file_; +}; + +} //end namespace mkvparser + +#endif //MKVREADER_HPP diff --git a/source/libvpx/third_party/libwebm/mkvwriter.cpp b/source/libvpx/third_party/libwebm/mkvwriter.cpp new file mode 100644 index 0000000..8de89a4 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvwriter.cpp @@ -0,0 +1,97 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include "mkvwriter.hpp" + +#ifdef _MSC_VER +#include <share.h> // for _SH_DENYWR +#endif + +#include <new> + +namespace mkvmuxer { + +MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) { +} + +MkvWriter::MkvWriter(FILE* fp): file_(fp), writer_owns_file_(false) { +} + +MkvWriter::~MkvWriter() { + Close(); +} + +int32 MkvWriter::Write(const void* buffer, uint32 length) { + if (!file_) + return -1; + + if (length == 0) + return 0; + + if (buffer == NULL) + return -1; + + const size_t bytes_written = fwrite(buffer, 1, length, file_); + + return (bytes_written == length) ? 0 : -1; +} + +bool MkvWriter::Open(const char* filename) { + if (filename == NULL) + return false; + + if (file_) + return false; + +#ifdef _MSC_VER + file_ = _fsopen(filename, "wb", _SH_DENYWR); +#else + file_ = fopen(filename, "wb"); +#endif + if (file_ == NULL) + return false; + return true; +} + +void MkvWriter::Close() { + if (file_ && writer_owns_file_) { + fclose(file_); + } + file_ = NULL; +} + +int64 MkvWriter::Position() const { + if (!file_) + return 0; + +#ifdef _MSC_VER + return _ftelli64(file_); +#else + return ftell(file_); +#endif +} + +int32 MkvWriter::Position(int64 position) { + if (!file_) + return -1; + +#ifdef _MSC_VER + return _fseeki64(file_, position, SEEK_SET); +#else + return fseek(file_, position, SEEK_SET); +#endif +} + +bool MkvWriter::Seekable() const { + return true; +} + +void MkvWriter::ElementStartNotify(uint64, int64) { +} + +} // namespace mkvmuxer diff --git a/source/libvpx/third_party/libwebm/mkvwriter.hpp b/source/libvpx/third_party/libwebm/mkvwriter.hpp new file mode 100644 index 0000000..524e0f7 --- /dev/null +++ b/source/libvpx/third_party/libwebm/mkvwriter.hpp @@ -0,0 +1,51 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef MKVWRITER_HPP +#define MKVWRITER_HPP + +#include <stdio.h> + +#include "mkvmuxer.hpp" +#include "mkvmuxertypes.hpp" + +namespace mkvmuxer { + +// Default implementation of the IMkvWriter interface on Windows. +class MkvWriter : public IMkvWriter { + public: + MkvWriter(); + MkvWriter(FILE* fp); + virtual ~MkvWriter(); + + // IMkvWriter interface + virtual int64 Position() const; + virtual int32 Position(int64 position); + virtual bool Seekable() const; + virtual int32 Write(const void* buffer, uint32 length); + virtual void ElementStartNotify(uint64 element_id, int64 position); + + // Creates and opens a file for writing. |filename| is the name of the file + // to open. This function will overwrite the contents of |filename|. Returns + // true on success. + bool Open(const char* filename); + + // Closes an opened file. + void Close(); + + private: + // File handle to output file. + FILE* file_; + bool writer_owns_file_; + + LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter); +}; + +} //end namespace mkvmuxer + +#endif // MKVWRITER_HPP diff --git a/source/libvpx/third_party/libwebm/webmids.hpp b/source/libvpx/third_party/libwebm/webmids.hpp new file mode 100644 index 0000000..65fab96 --- /dev/null +++ b/source/libvpx/third_party/libwebm/webmids.hpp @@ -0,0 +1,141 @@ +// Copyright (c) 2012 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#ifndef WEBMIDS_HPP +#define WEBMIDS_HPP + +namespace mkvmuxer { + +enum MkvId { + kMkvEBML = 0x1A45DFA3, + kMkvEBMLVersion = 0x4286, + kMkvEBMLReadVersion = 0x42F7, + kMkvEBMLMaxIDLength = 0x42F2, + kMkvEBMLMaxSizeLength = 0x42F3, + kMkvDocType = 0x4282, + kMkvDocTypeVersion = 0x4287, + kMkvDocTypeReadVersion = 0x4285, + kMkvVoid = 0xEC, + kMkvSignatureSlot = 0x1B538667, + kMkvSignatureAlgo = 0x7E8A, + kMkvSignatureHash = 0x7E9A, + kMkvSignaturePublicKey = 0x7EA5, + kMkvSignature = 0x7EB5, + kMkvSignatureElements = 0x7E5B, + kMkvSignatureElementList = 0x7E7B, + kMkvSignedElement = 0x6532, + //segment + kMkvSegment = 0x18538067, + //Meta Seek Information + kMkvSeekHead = 0x114D9B74, + kMkvSeek = 0x4DBB, + kMkvSeekID = 0x53AB, + kMkvSeekPosition = 0x53AC, + //Segment Information + kMkvInfo = 0x1549A966, + kMkvTimecodeScale = 0x2AD7B1, + kMkvDuration = 0x4489, + kMkvDateUTC = 0x4461, + kMkvMuxingApp = 0x4D80, + kMkvWritingApp = 0x5741, + //Cluster + kMkvCluster = 0x1F43B675, + kMkvTimecode = 0xE7, + kMkvPrevSize = 0xAB, + kMkvBlockGroup = 0xA0, + kMkvBlock = 0xA1, + kMkvBlockDuration = 0x9B, + kMkvReferenceBlock = 0xFB, + kMkvLaceNumber = 0xCC, + kMkvSimpleBlock = 0xA3, + kMkvBlockAdditions = 0x75A1, + kMkvBlockMore = 0xA6, + kMkvBlockAddID = 0xEE, + kMkvBlockAdditional = 0xA5, + kMkvDiscardPadding = 0x75A2, + //Track + kMkvTracks = 0x1654AE6B, + kMkvTrackEntry = 0xAE, + kMkvTrackNumber = 0xD7, + kMkvTrackUID = 0x73C5, + kMkvTrackType = 0x83, + kMkvFlagEnabled = 0xB9, + kMkvFlagDefault = 0x88, + kMkvFlagForced = 0x55AA, + kMkvFlagLacing = 0x9C, + kMkvDefaultDuration = 0x23E383, + kMkvMaxBlockAdditionID = 0x55EE, + kMkvName = 0x536E, + kMkvLanguage = 0x22B59C, + kMkvCodecID = 0x86, + kMkvCodecPrivate = 0x63A2, + kMkvCodecName = 0x258688, + kMkvCodecDelay = 0x56AA, + kMkvSeekPreRoll = 0x56BB, + //video + kMkvVideo = 0xE0, + kMkvFlagInterlaced = 0x9A, + kMkvStereoMode = 0x53B8, + kMkvAlphaMode = 0x53C0, + kMkvPixelWidth = 0xB0, + kMkvPixelHeight = 0xBA, + kMkvPixelCropBottom = 0x54AA, + kMkvPixelCropTop = 0x54BB, + kMkvPixelCropLeft = 0x54CC, + kMkvPixelCropRight = 0x54DD, + kMkvDisplayWidth = 0x54B0, + kMkvDisplayHeight = 0x54BA, + kMkvDisplayUnit = 0x54B2, + kMkvAspectRatioType = 0x54B3, + kMkvFrameRate = 0x2383E3, + //end video + //audio + kMkvAudio = 0xE1, + kMkvSamplingFrequency = 0xB5, + kMkvOutputSamplingFrequency = 0x78B5, + kMkvChannels = 0x9F, + kMkvBitDepth = 0x6264, + //end audio + //ContentEncodings + kMkvContentEncodings = 0x6D80, + kMkvContentEncoding = 0x6240, + kMkvContentEncodingOrder = 0x5031, + kMkvContentEncodingScope = 0x5032, + kMkvContentEncodingType = 0x5033, + kMkvContentEncryption = 0x5035, + kMkvContentEncAlgo = 0x47E1, + kMkvContentEncKeyID = 0x47E2, + kMkvContentEncAESSettings = 0x47E7, + kMkvAESSettingsCipherMode = 0x47E8, + kMkvAESSettingsCipherInitData = 0x47E9, + //end ContentEncodings + //Cueing Data + kMkvCues = 0x1C53BB6B, + kMkvCuePoint = 0xBB, + kMkvCueTime = 0xB3, + kMkvCueTrackPositions = 0xB7, + kMkvCueTrack = 0xF7, + kMkvCueClusterPosition = 0xF1, + kMkvCueBlockNumber = 0x5378, + //Chapters + kMkvChapters = 0x1043A770, + kMkvEditionEntry = 0x45B9, + kMkvChapterAtom = 0xB6, + kMkvChapterUID = 0x73C4, + kMkvChapterStringUID = 0x5654, + kMkvChapterTimeStart = 0x91, + kMkvChapterTimeEnd = 0x92, + kMkvChapterDisplay = 0x80, + kMkvChapString = 0x85, + kMkvChapLanguage = 0x437C, + kMkvChapCountry = 0x437E +}; + +} // end namespace mkvmuxer + +#endif // WEBMIDS_HPP diff --git a/source/libvpx/third_party/nestegg/README.webm b/source/libvpx/third_party/nestegg/README.webm index 7860a7c..8e3760b 100644 --- a/source/libvpx/third_party/nestegg/README.webm +++ b/source/libvpx/third_party/nestegg/README.webm @@ -18,3 +18,7 @@ nestegg.c|975 col 6| warning: ‘r’ may be used uninitialized in this function - fix track_number uint64->uint32 warnings - fix track_scale double->uint64 warning - nestegg_packet_track: fix uint64->uint32 warning +- ne_read_(string|binary|block): normalize size_t usage +- ne_parse: normalize size_t usage +- quiet read related uint64->size_t warnings +- ne_buffer_read: quiet uint64->size_t warning diff --git a/source/libvpx/third_party/nestegg/src/nestegg.c b/source/libvpx/third_party/nestegg/src/nestegg.c index 35ce9f1..c7e2b02 100644 --- a/source/libvpx/third_party/nestegg/src/nestegg.c +++ b/source/libvpx/third_party/nestegg/src/nestegg.c @@ -694,14 +694,15 @@ ne_read_string(nestegg * ctx, char ** val, uint64_t length) { char * str; int r; + const size_t alloc_size = (size_t)length + 1; if (length == 0 || length > LIMIT_STRING) return -1; - str = ne_pool_alloc(length + 1, ctx->alloc_pool); - r = ne_io_read(ctx->io, (unsigned char *) str, length); + str = ne_pool_alloc(alloc_size, ctx->alloc_pool); + r = ne_io_read(ctx->io, (unsigned char *) str, alloc_size - 1); if (r != 1) return r; - str[length] = '\0'; + str[alloc_size - 1] = '\0'; *val = str; return 1; } @@ -711,9 +712,9 @@ ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length) { if (length == 0 || length > LIMIT_BINARY) return -1; - val->data = ne_pool_alloc(length, ctx->alloc_pool); - val->length = length; - return ne_io_read(ctx->io, val->data, length); + val->length = (size_t)length; + val->data = ne_pool_alloc(val->length, ctx->alloc_pool); + return ne_io_read(ctx->io, val->data, val->length); } static int @@ -1043,7 +1044,7 @@ ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset ne_read_single_master(ctx, element); continue; } else { - r = ne_read_simple(ctx, element, size); + r = ne_read_simple(ctx, element, (size_t)size); if (r < 0) break; } @@ -1062,7 +1063,7 @@ ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset if (id != ID_VOID && id != ID_CRC32) ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id); - r = ne_io_read_skip(ctx->io, size); + r = ne_io_read_skip(ctx->io, (size_t)size); if (r != 1) break; } @@ -1151,7 +1152,8 @@ ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, ui r = ne_read_vint(io, &lace, &length); if (r != 1) return r; - *read += length; + assert(length <= 8); + *read += (size_t)length; sizes[i] = lace; sum = sizes[i]; @@ -1163,7 +1165,8 @@ ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, ui r = ne_read_svint(io, &slace, &length); if (r != 1) return r; - *read += length; + assert(length <= 8); + *read += (size_t)length; sizes[i] = sizes[i - 1] + slace; sum += sizes[i]; i += 1; @@ -1263,7 +1266,8 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac if (track_number == 0 || (unsigned int)track_number != track_number) return -1; - consumed += length; + assert(length <= 8); + consumed += (size_t)length; r = ne_read_int(ctx->io, &timecode, 2); if (r != 1) @@ -1307,7 +1311,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac case LACING_XIPH: if (frames == 1) return -1; - r = ne_read_xiph_lacing(ctx->io, block_size, &consumed, frames, frame_sizes); + r = ne_read_xiph_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes); if (r != 1) return r; break; @@ -1320,7 +1324,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac case LACING_EBML: if (frames == 1) return -1; - r = ne_read_ebml_lacing(ctx->io, block_size, &consumed, frames, frame_sizes); + r = ne_read_ebml_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes); if (r != 1) return r; break; @@ -1365,9 +1369,9 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac return -1; } f = ne_alloc(sizeof(*f)); - f->data = ne_alloc(frame_sizes[i]); - f->length = frame_sizes[i]; - r = ne_io_read(ctx->io, f->data, frame_sizes[i]); + f->length = (size_t)frame_sizes[i]; + f->data = ne_alloc(f->length); + r = ne_io_read(ctx->io, f->data, f->length); if (r != 1) { free(f->data); free(f); @@ -1406,7 +1410,8 @@ ne_read_discard_padding(nestegg * ctx, nestegg_packet * pkt) if (!element) return 1; - r = ne_read_simple(ctx, element, size); + assert((size_t)size == size); + r = ne_read_simple(ctx, element, (size_t)size); if (r != 1) return r; storage = (struct ebml_type *) (ctx->ancestor->data + element->offset); @@ -1600,7 +1605,7 @@ ne_buffer_read(void * buffer, size_t length, void * user_data) struct sniff_buffer * sb = user_data; int rv = 1; - size_t available = sb->length - sb->offset; + size_t available = sb->length - (size_t)sb->offset; if (available < length) return 0; @@ -2074,7 +2079,7 @@ nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item, p += sizes[i]; } *data = p; - *length = sizes[item]; + *length = (size_t)sizes[item]; } else { *data = codec_private.data; *length = codec_private.length; diff --git a/source/libvpx/tools_common.h b/source/libvpx/tools_common.h index 58894de..549e895 100644 --- a/source/libvpx/tools_common.h +++ b/source/libvpx/tools_common.h @@ -22,10 +22,12 @@ #endif #if defined(_MSC_VER) -/* MSVS doesn't define off_t, and uses _f{seek,tell}i64. */ -typedef __int64 off_t; +/* MSVS uses _f{seek,tell}i64. */ #define fseeko _fseeki64 #define ftello _ftelli64 +typedef long _off_t; // NOLINT - MSVS compatible type +typedef __int64 off_t; // fseeki64 compatible type +#define _OFF_T_DEFINED #elif defined(_WIN32) /* MinGW defines off_t as long and uses f{seek,tell}o64/off64_t for large * files. */ diff --git a/source/libvpx/vp8/common/loopfilter.c b/source/libvpx/vp8/common/loopfilter.c index 19857a7..7a07e76 100644 --- a/source/libvpx/vp8/common/loopfilter.c +++ b/source/libvpx/vp8/common/loopfilter.c @@ -15,7 +15,6 @@ #include "onyxc_int.h" #include "vpx_mem/vpx_mem.h" -typedef unsigned char uc; static void lf_init_lut(loop_filter_info_n *lfi) { diff --git a/source/libvpx/vp8/common/postproc.c b/source/libvpx/vp8/common/postproc.c index e3bee32..7d0fbf6 100644 --- a/source/libvpx/vp8/common/postproc.c +++ b/source/libvpx/vp8/common/postproc.c @@ -303,8 +303,8 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i { d[r&15] = (rv2[r&127] + sum + s[0]) >> 4; } - - s[-8*pitch] = d[(r-8)&15]; + if (r >= 8) + s[-8*pitch] = d[(r-8)&15]; s += pitch; } } diff --git a/source/libvpx/vp8/common/rtcd_defs.pl b/source/libvpx/vp8/common/rtcd_defs.pl new file mode 100644 index 0000000..130d965 --- /dev/null +++ b/source/libvpx/vp8/common/rtcd_defs.pl @@ -0,0 +1,541 @@ +sub vp8_common_forward_decls() { +print <<EOF +/* + * VP8 + */ + +struct blockd; +struct macroblockd; +struct loop_filter_info; + +/* Encoder forward decls */ +struct block; +struct macroblock; +struct variance_vtable; +union int_mv; +struct yv12_buffer_config; +EOF +} +forward_decls qw/vp8_common_forward_decls/; + +# +# system state +# +add_proto qw/void vp8_clear_system_state/, ""; +specialize qw/vp8_clear_system_state mmx/; +$vp8_clear_system_state_mmx=vpx_reset_mmx_state; + +# +# Dequant +# +add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc"; +specialize qw/vp8_dequantize_b mmx media neon/; +$vp8_dequantize_b_media=vp8_dequantize_b_v6; + +add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride"; +specialize qw/vp8_dequant_idct_add mmx media neon dspr2/; +$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6; +$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2; + +add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs"; +specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/; +$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6; +$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; + +add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"; +specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/; +$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6; +$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2; + +# +# Loopfilter +# +add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/; +$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6; +$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2; + +add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/; +$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6; +$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2; + +add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/; +$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6; +$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2; + +add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"; +specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/; +$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6; +$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2; + + +add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/; +$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c; +$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx; +$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2; +$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6; +$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon; + +add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/; +$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c; +$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx; +$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2; +$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6; +$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon; + +add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/; +$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c; +$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx; +$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2; +$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6; +$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon; + +add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit"; +specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/; +$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c; +$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx; +$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2; +$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6; +$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon; + +# +# IDCT +# +#idct16 +add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"; +specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/; +$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual; +$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2; + +#iwalsh1 +add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output"; +specialize qw/vp8_short_inv_walsh4x4_1 dspr2/; +$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2; +# no asm yet + +#iwalsh16 +add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output"; +specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/; +$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6; +$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2; + +#idct1_scalar_add +add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"; +specialize qw/vp8_dc_only_idct_add mmx media neon dspr2/; +$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6; +$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2; + +# +# RECON +# +add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/; +$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6; +$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2; + +add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/; +$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6; +$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2; + +add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/; +$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6; +$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2; + +add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"; +specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/; +#TODO: fix assembly for neon + +add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"; +specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/; + +add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"; +specialize qw/vp8_intra4x4_predict media/; +$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6; + +# +# Postproc +# +if (vpx_config("CONFIG_POSTPROC") eq "yes") { + add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; + specialize qw/vp8_mbpost_proc_down mmx sse2/; + $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm; + + add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit"; + specialize qw/vp8_mbpost_proc_across_ip sse2/; + $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm; + + add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"; + specialize qw/vp8_post_proc_down_and_across_mb_row sse2/; + + add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"; + specialize qw/vp8_plane_add_noise mmx sse2/; + $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt; + + add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + # no asm yet + + add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + # no asm yet + + add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"; + # no asm yet + + add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; + specialize qw/vp8_filter_by_weight16x16 sse2/; + + add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; + specialize qw/vp8_filter_by_weight8x8 sse2/; + + add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"; + # no asm yet +} + +# +# Subpixel +# +add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/; +$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6; +$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2; + +add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/; +$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6; +$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2; + +add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/; +$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6; +$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2; + +add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/; +$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6; +$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2; + +add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/; +$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6; + +add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/; +$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6; + +add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict8x4 mmx media neon/; +$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6; + +add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"; +specialize qw/vp8_bilinear_predict4x4 mmx media neon/; +$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6; + +# +# Whole-pixel Variance +# +add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance4x4 mmx sse2/; +$vp8_variance4x4_sse2=vp8_variance4x4_wmt; + +add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance8x8 mmx sse2 media neon/; +$vp8_variance8x8_sse2=vp8_variance8x8_wmt; +$vp8_variance8x8_media=vp8_variance8x8_armv6; + +add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance8x16 mmx sse2 neon/; +$vp8_variance8x16_sse2=vp8_variance8x16_wmt; + +add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance16x8 mmx sse2 neon/; +$vp8_variance16x8_sse2=vp8_variance16x8_wmt; + +add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance16x16 mmx sse2 media neon/; +$vp8_variance16x16_sse2=vp8_variance16x16_wmt; +$vp8_variance16x16_media=vp8_variance16x16_armv6; + +# +# Sub-pixel Variance +# +add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/; +$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt; + +add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/; +$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt; +$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6; + +add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/; +$vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt; + +add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/; +$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt; + +add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/; +$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt; +$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6; + +add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/; +$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt; +$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6; + +add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/; +$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt; +$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6; + +add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/; +$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt; +$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6; + +# +# Single block SAD +# +add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad4x4 mmx sse2 neon/; +$vp8_sad4x4_sse2=vp8_sad4x4_wmt; + +add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad8x8 mmx sse2 neon/; +$vp8_sad8x8_sse2=vp8_sad8x8_wmt; + +add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad8x16 mmx sse2 neon/; +$vp8_sad8x16_sse2=vp8_sad8x16_wmt; + +add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad16x8 mmx sse2 neon/; +$vp8_sad16x8_sse2=vp8_sad16x8_wmt; + +add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/; +$vp8_sad16x16_sse2=vp8_sad16x16_wmt; +$vp8_sad16x16_media=vp8_sad16x16_armv6; + +# +# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally +# +add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad4x4x3 sse3/; + +add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x8x3 sse3/; + +add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x16x3 sse3/; + +add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x8x3 sse3 ssse3/; + +add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x16x3 sse3 ssse3/; + +# Note the only difference in the following prototypes is that they return into +# an array of short +add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad4x4x8 sse4_1/; +$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4; + +add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad8x8x8 sse4_1/; +$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4; + +add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad8x16x8 sse4_1/; +$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4; + +add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad16x8x8 sse4_1/; +$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4; + +add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array"; +specialize qw/vp8_sad16x16x8 sse4_1/; +$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4; + +# +# Multi-block SAD, comparing a reference to N independent blocks +# +add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad4x4x4d sse3/; + +add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x8x4d sse3/; + +add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad8x16x4d sse3/; + +add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x8x4d sse3/; + +add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp8_sad16x16x4d sse3/; + +# +# Encoder functions below this point. +# +if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") { + +# +# Sum of squares (vector) +# +add_proto qw/unsigned int vp8_get_mb_ss/, "const short *"; +specialize qw/vp8_get_mb_ss mmx sse2/; + +# +# SSE (Sum Squared Error) +# +add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"; +specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/; +$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt; + +add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp8_mse16x16 mmx sse2 media neon/; +$vp8_mse16x16_sse2=vp8_mse16x16_wmt; +$vp8_mse16x16_media=vp8_mse16x16_armv6; + +add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; +specialize qw/vp8_get4x4sse_cs mmx neon/; + +# +# Block copy +# +if ($opts{arch} =~ /x86/) { + add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"; + specialize qw/vp8_copy32xn sse2 sse3/; +} + +# +# Structured Similarity (SSIM) +# +if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { + $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2"; + + add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64"; + + add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64"; +} + +# +# Forward DCT +# +add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch"; +specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/; +$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6; + +add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch"; +specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/; +$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6; + +add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch"; +specialize qw/vp8_short_walsh4x4 sse2 media neon/; +$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6; + +# +# Quantizer +# +add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *"; +specialize qw/vp8_regular_quantize_b sse2/; +# TODO(johann) Update sse4 implementation and re-enable +#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4; + +add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *"; +specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/; +$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6; + +add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"; +# no asm yet + +add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"; +specialize qw/vp8_fast_quantize_b_pair neon/; + +add_proto qw/void vp8_quantize_mb/, "struct macroblock *"; +specialize qw/vp8_quantize_mb neon/; + +add_proto qw/void vp8_quantize_mby/, "struct macroblock *"; +specialize qw/vp8_quantize_mby neon/; + +add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *"; +specialize qw/vp8_quantize_mbuv neon/; + +# +# Block subtraction +# +add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff"; +specialize qw/vp8_block_error mmx sse2/; +$vp8_block_error_sse2=vp8_block_error_xmm; + +add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc"; +specialize qw/vp8_mbblock_error mmx sse2/; +$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm; + +add_proto qw/int vp8_mbuverror/, "struct macroblock *mb"; +specialize qw/vp8_mbuverror mmx sse2/; +$vp8_mbuverror_sse2=vp8_mbuverror_xmm; + +add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch"; +specialize qw/vp8_subtract_b mmx sse2 media neon/; +$vp8_subtract_b_media=vp8_subtract_b_armv6; + +add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"; +specialize qw/vp8_subtract_mby mmx sse2 media neon/; +$vp8_subtract_mby_media=vp8_subtract_mby_armv6; + +add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"; +specialize qw/vp8_subtract_mbuv mmx sse2 media neon/; +$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6; + +# +# Motion search +# +add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +specialize qw/vp8_full_search_sad sse3 sse4_1/; +$vp8_full_search_sad_sse3=vp8_full_search_sadx3; +$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8; + +add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +specialize qw/vp8_refining_search_sad sse3/; +$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4; + +add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"; +$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4; + +# +# Alt-ref Noise Reduction (ARNR) +# +if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { + add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"; + specialize qw/vp8_temporal_filter_apply sse2/; +} + +# +# Pick Loopfilter +# +add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; +specialize qw/vp8_yv12_copy_partial_frame neon/; + +# +# Denoiser filter +# +if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") { + add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"; + specialize qw/vp8_denoiser_filter sse2 neon/; +} + +# End of encoder only functions +} +1; diff --git a/source/libvpx/vp8/common/rtcd_defs.sh b/source/libvpx/vp8/common/rtcd_defs.sh deleted file mode 100755 index 28e6754..0000000 --- a/source/libvpx/vp8/common/rtcd_defs.sh +++ /dev/null @@ -1,542 +0,0 @@ -vp8_common_forward_decls() { -cat <<EOF -/* - * VP8 - */ - -struct blockd; -struct macroblockd; -struct loop_filter_info; - -/* Encoder forward decls */ -struct block; -struct macroblock; -struct variance_vtable; -union int_mv; -struct yv12_buffer_config; -EOF -} -forward_decls vp8_common_forward_decls - -# -# system state -# -prototype void vp8_clear_system_state "" -specialize vp8_clear_system_state mmx -vp8_clear_system_state_mmx=vpx_reset_mmx_state - -# -# Dequant -# -prototype void vp8_dequantize_b "struct blockd*, short *dqc" -specialize vp8_dequantize_b mmx media neon -vp8_dequantize_b_media=vp8_dequantize_b_v6 - -prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride" -specialize vp8_dequant_idct_add mmx media neon dspr2 -vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6 -vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2 - -prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs" -specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2 -vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6 -vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2 - -prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs" -specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2 -vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6 -vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2 - -# -# Loopfilter -# -prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2 -vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6 -vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2 - -prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bv mmx sse2 media neon dspr2 -vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6 -vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2 - -prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2 -vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6 -vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2 - -prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi" -specialize vp8_loop_filter_bh mmx sse2 media neon dspr2 -vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6 -vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2 - - -prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_mbv mmx sse2 media neon -vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c -vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx -vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2 -vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6 -vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon - -prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_mbh mmx sse2 media neon -vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c -vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx -vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2 -vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6 -vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon - -prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_bv mmx sse2 media neon -vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c -vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx -vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2 -vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6 -vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon - -prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit" -specialize vp8_loop_filter_simple_bh mmx sse2 media neon -vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c -vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx -vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2 -vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6 -vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon - -# -# IDCT -# -#idct16 -prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride" -specialize vp8_short_idct4x4llm mmx media neon dspr2 -vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual -vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2 - -#iwalsh1 -prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output" -specialize vp8_short_inv_walsh4x4_1 dspr2 -vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2 -# no asm yet - -#iwalsh16 -prototype void vp8_short_inv_walsh4x4 "short *input, short *output" -specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2 -vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6 -vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2 - -#idct1_scalar_add -prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride" -specialize vp8_dc_only_idct_add mmx media neon dspr2 -vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6 -vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2 - -# -# RECON -# -prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2 -vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6 -vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2 - -prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp8_copy_mem8x8 mmx media neon dspr2 -vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6 -vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2 - -prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch" -specialize vp8_copy_mem8x4 mmx media neon dspr2 -vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6 -vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2 - -prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride" -specialize vp8_build_intra_predictors_mby_s sse2 ssse3 -#TODO: fix assembly for neon - -prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride" -specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3 - -prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left" -specialize vp8_intra4x4_predict media -vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6 - -# -# Postproc -# -if [ "$CONFIG_POSTPROC" = "yes" ]; then - prototype void vp8_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols,int flimit" - specialize vp8_mbpost_proc_down mmx sse2 - vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm - - prototype void vp8_mbpost_proc_across_ip "unsigned char *dst, int pitch, int rows, int cols,int flimit" - specialize vp8_mbpost_proc_across_ip sse2 - vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm - - prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size" - specialize vp8_post_proc_down_and_across_mb_row sse2 - - prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch" - specialize vp8_plane_add_noise mmx sse2 - vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt - - prototype void vp8_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" - # no asm yet - - prototype void vp8_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" - # no asm yet - - prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" - # no asm yet - - prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" - specialize vp8_filter_by_weight16x16 sse2 - - prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" - specialize vp8_filter_by_weight8x8 sse2 - - prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight" - # no asm yet -fi - -# -# Subpixel -# -prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2 -vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6 -vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2 - -prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2 -vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6 -vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2 - -prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2 -vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6 -vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2 - -prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2 -vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6 -vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2 - -prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon -vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6 - -prototype void vp8_bilinear_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon -vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6 - -prototype void vp8_bilinear_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict8x4 mmx media neon -vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6 - -prototype void vp8_bilinear_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch" -specialize vp8_bilinear_predict4x4 mmx media neon -vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6 - -# -# Whole-pixel Variance -# -prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance4x4 mmx sse2 -vp8_variance4x4_sse2=vp8_variance4x4_wmt - -prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance8x8 mmx sse2 media neon -vp8_variance8x8_sse2=vp8_variance8x8_wmt -vp8_variance8x8_media=vp8_variance8x8_armv6 - -prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance8x16 mmx sse2 neon -vp8_variance8x16_sse2=vp8_variance8x16_wmt - -prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance16x8 mmx sse2 neon -vp8_variance16x8_sse2=vp8_variance16x8_wmt - -prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance16x16 mmx sse2 media neon -vp8_variance16x16_sse2=vp8_variance16x16_wmt -vp8_variance16x16_media=vp8_variance16x16_armv6 - -# -# Sub-pixel Variance -# -prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance4x4 mmx sse2 -vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt - -prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance8x8 mmx sse2 media neon -vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt -vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6 - -prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance8x16 mmx sse2 -vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt - -prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance16x8 mmx sse2 ssse3 -vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt - -prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon -vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt -vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6 - -prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance_halfpixvar16x16_h mmx sse2 media neon -vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt -vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6 - -prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance_halfpixvar16x16_v mmx sse2 media neon -vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt -vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6 - -prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_variance_halfpixvar16x16_hv mmx sse2 media neon -vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt -vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6 - -# -# Single block SAD -# -prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad4x4 mmx sse2 neon -vp8_sad4x4_sse2=vp8_sad4x4_wmt - -prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad8x8 mmx sse2 neon -vp8_sad8x8_sse2=vp8_sad8x8_wmt - -prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad8x16 mmx sse2 neon -vp8_sad8x16_sse2=vp8_sad8x16_wmt - -prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad16x8 mmx sse2 neon -vp8_sad16x8_sse2=vp8_sad16x8_wmt - -prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp8_sad16x16 mmx sse2 sse3 media neon -vp8_sad16x16_sse2=vp8_sad16x16_wmt -vp8_sad16x16_media=vp8_sad16x16_armv6 - -# -# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally -# -prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad4x4x3 sse3 - -prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x8x3 sse3 - -prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x16x3 sse3 - -prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x8x3 sse3 ssse3 - -prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x16x3 sse3 ssse3 - -# Note the only difference in the following prototypes is that they return into -# an array of short -prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad4x4x8 sse4_1 -vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4 - -prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad8x8x8 sse4_1 -vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4 - -prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad8x16x8 sse4_1 -vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4 - -prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad16x8x8 sse4_1 -vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4 - -prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" -specialize vp8_sad16x16x8 sse4_1 -vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4 - -# -# Multi-block SAD, comparing a reference to N independent blocks -# -prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad4x4x4d sse3 - -prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x8x4d sse3 - -prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad8x16x4d sse3 - -prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x8x4d sse3 - -prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp8_sad16x16x4d sse3 - -# -# Encoder functions below this point. -# -if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then - -# -# Sum of squares (vector) -# -prototype unsigned int vp8_get_mb_ss "const short *" -specialize vp8_get_mb_ss mmx sse2 - -# -# SSE (Sum Squared Error) -# -prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" -specialize vp8_sub_pixel_mse16x16 mmx sse2 -vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt - -prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp8_mse16x16 mmx sse2 media neon -vp8_mse16x16_sse2=vp8_mse16x16_wmt -vp8_mse16x16_media=vp8_mse16x16_armv6 - -prototype unsigned int vp8_get4x4sse_cs "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride" -specialize vp8_get4x4sse_cs mmx neon - -# -# Block copy -# -case $arch in - x86*) - prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n" - specialize vp8_copy32xn sse2 sse3 - ;; -esac - -# -# Structured Similarity (SSIM) -# -if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 - - prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp8_ssim_parms_8x8 $sse2_on_x86_64 - - prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp8_ssim_parms_16x16 $sse2_on_x86_64 -fi - -# -# Forward DCT -# -prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch" -specialize vp8_short_fdct4x4 mmx sse2 media neon -vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6 - -prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch" -specialize vp8_short_fdct8x4 mmx sse2 media neon -vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6 - -prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch" -specialize vp8_short_walsh4x4 sse2 media neon -vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6 - -# -# Quantizer -# -prototype void vp8_regular_quantize_b "struct block *, struct blockd *" -specialize vp8_regular_quantize_b sse2 #sse4_1 -# TODO(johann) Update sse4 implementation and re-enable -#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4 - -prototype void vp8_fast_quantize_b "struct block *, struct blockd *" -specialize vp8_fast_quantize_b sse2 ssse3 media neon -vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6 - -prototype void vp8_regular_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2" -# no asm yet - -prototype void vp8_fast_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2" -specialize vp8_fast_quantize_b_pair neon - -prototype void vp8_quantize_mb "struct macroblock *" -specialize vp8_quantize_mb neon - -prototype void vp8_quantize_mby "struct macroblock *" -specialize vp8_quantize_mby neon - -prototype void vp8_quantize_mbuv "struct macroblock *" -specialize vp8_quantize_mbuv neon - -# -# Block subtraction -# -prototype int vp8_block_error "short *coeff, short *dqcoeff" -specialize vp8_block_error mmx sse2 -vp8_block_error_sse2=vp8_block_error_xmm - -prototype int vp8_mbblock_error "struct macroblock *mb, int dc" -specialize vp8_mbblock_error mmx sse2 -vp8_mbblock_error_sse2=vp8_mbblock_error_xmm - -prototype int vp8_mbuverror "struct macroblock *mb" -specialize vp8_mbuverror mmx sse2 -vp8_mbuverror_sse2=vp8_mbuverror_xmm - -prototype void vp8_subtract_b "struct block *be, struct blockd *bd, int pitch" -specialize vp8_subtract_b mmx sse2 media neon -vp8_subtract_b_media=vp8_subtract_b_armv6 - -prototype void vp8_subtract_mby "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride" -specialize vp8_subtract_mby mmx sse2 media neon -vp8_subtract_mby_media=vp8_subtract_mby_armv6 - -prototype void vp8_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride" -specialize vp8_subtract_mbuv mmx sse2 media neon -vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6 - -# -# Motion search -# -prototype int vp8_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv" -specialize vp8_full_search_sad sse3 sse4_1 -vp8_full_search_sad_sse3=vp8_full_search_sadx3 -vp8_full_search_sad_sse4_1=vp8_full_search_sadx8 - -prototype int vp8_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv" -specialize vp8_refining_search_sad sse3 -vp8_refining_search_sad_sse3=vp8_refining_search_sadx4 - -prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv" -vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4 - -# -# Alt-ref Noise Reduction (ARNR) -# -if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then - prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count" - specialize vp8_temporal_filter_apply sse2 -fi - -# -# Pick Loopfilter -# -prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_partial_frame neon - -# -# Denoiser filter -# -if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then - prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset" - specialize vp8_denoiser_filter sse2 neon -fi - -# End of encoder only functions -fi diff --git a/source/libvpx/vp8/common/x86/postproc_mmx.asm b/source/libvpx/vp8/common/x86/postproc_mmx.asm index 5cf110b..8be3431 100644 --- a/source/libvpx/vp8/common/x86/postproc_mmx.asm +++ b/source/libvpx/vp8/common/x86/postproc_mmx.asm @@ -204,13 +204,16 @@ sym(vp8_mbpost_proc_down_mmx): and rcx, 15 movd DWORD PTR [rsp+rcx*4], mm1 ;d[rcx*4] + cmp edx, 8 + jl .skip_assignment + mov rcx, rdx sub rcx, 8 - and rcx, 15 movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4] - movd [rsi], mm1 + +.skip_assignment lea rsi, [rsi+rax] lea rdi, [rdi+rax] diff --git a/source/libvpx/vp8/common/x86/postproc_sse2.asm b/source/libvpx/vp8/common/x86/postproc_sse2.asm index 00f84a3..f53daa7 100644 --- a/source/libvpx/vp8/common/x86/postproc_sse2.asm +++ b/source/libvpx/vp8/common/x86/postproc_sse2.asm @@ -425,13 +425,16 @@ sym(vp8_mbpost_proc_down_xmm): and rcx, 15 movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8] + cmp edx, 8 + jl .skip_assignment + mov rcx, rdx sub rcx, 8 - and rcx, 15 movq mm0, [rsp + rcx*8] ;d[rcx*8] - movq [rsi], mm0 + +.skip_assignment lea rsi, [rsi+rax] lea rdi, [rdi+rax] diff --git a/source/libvpx/vp8/encoder/onyx_if.c b/source/libvpx/vp8/encoder/onyx_if.c index 849a0ed..ef37c0e 100644 --- a/source/libvpx/vp8/encoder/onyx_if.c +++ b/source/libvpx/vp8/encoder/onyx_if.c @@ -1401,6 +1401,7 @@ static void update_layer_contexts (VP8_COMP *cpi) unsigned int i; double prev_layer_framerate=0; + assert(oxcf->number_of_layers <= VPX_TS_MAX_LAYERS); for (i=0; i<oxcf->number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; @@ -5071,6 +5072,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l unsigned int i; /* Update frame rates for each layer */ + assert(cpi->oxcf.number_of_layers <= VPX_TS_MAX_LAYERS); for (i=0; i<cpi->oxcf.number_of_layers; i++) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; diff --git a/source/libvpx/vp8/encoder/rdopt.c b/source/libvpx/vp8/encoder/rdopt.c index 5016cc4..387701c 100644 --- a/source/libvpx/vp8/encoder/rdopt.c +++ b/source/libvpx/vp8/encoder/rdopt.c @@ -528,19 +528,16 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); -# define QC( I) ( qcoeff_ptr [vp8_default_zig_zag1d[I]] ) - + assert(eob <= 16); for (; c < eob; c++) { - int v = QC(c); - int t = vp8_dct_value_tokens_ptr[v].Token; + const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]]; + const int t = vp8_dct_value_tokens_ptr[v].Token; cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t]; cost += vp8_dct_value_cost_ptr[v]; pt = vp8_prev_token_class[t]; } -# undef QC - if (c < 16) cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN]; diff --git a/source/libvpx/vp8/encoder/tokenize.c b/source/libvpx/vp8/encoder/tokenize.c index 11559a7..2dc8205 100644 --- a/source/libvpx/vp8/encoder/tokenize.c +++ b/source/libvpx/vp8/encoder/tokenize.c @@ -213,6 +213,7 @@ static void tokenize1st_order_b /* Luma */ for (block = 0; block < 16; block++, b++) { + const int eob = *b->eob; tmp1 = vp8_block2above[block]; tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; @@ -223,7 +224,7 @@ static void tokenize1st_order_b c = type ? 0 : 1; - if(c >= *b->eob) + if(c >= eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; @@ -250,7 +251,8 @@ static void tokenize1st_order_b t++; c++; - for (; c < *b->eob; c++) + assert(eob <= 16); + for (; c < eob; c++) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; @@ -286,6 +288,7 @@ static void tokenize1st_order_b /* Chroma */ for (block = 16; block < 24; block++, b++) { + const int eob = *b->eob; tmp1 = vp8_block2above[block]; tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; @@ -294,7 +297,7 @@ static void tokenize1st_order_b VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - if(!(*b->eob)) + if(!eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; @@ -321,7 +324,8 @@ static void tokenize1st_order_b t++; c = 1; - for (; c < *b->eob; c++) + assert(eob <= 16); + for (; c < eob; c++) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; diff --git a/source/libvpx/vp8/vp8_common.mk b/source/libvpx/vp8/vp8_common.mk index ac91d7a..dfb54a5 100644 --- a/source/libvpx/vp8/vp8_common.mk +++ b/source/libvpx/vp8/vp8_common.mk @@ -47,7 +47,7 @@ VP8_COMMON_SRCS-yes += common/quant_common.h VP8_COMMON_SRCS-yes += common/reconinter.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h VP8_COMMON_SRCS-yes += common/rtcd.c -VP8_COMMON_SRCS-yes += common/rtcd_defs.sh +VP8_COMMON_SRCS-yes += common/rtcd_defs.pl VP8_COMMON_SRCS-yes += common/setupintrarecon.h VP8_COMMON_SRCS-yes += common/swapyv12buffer.h VP8_COMMON_SRCS-yes += common/systemdependent.h @@ -189,4 +189,4 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c -$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh)) +$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl)) diff --git a/source/libvpx/vp9/common/vp9_alloccommon.c b/source/libvpx/vp9/common/vp9_alloccommon.c index a72821b..f44ada1 100644 --- a/source/libvpx/vp9/common/vp9_alloccommon.c +++ b/source/libvpx/vp9/common/vp9_alloccommon.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "./vpx_config.h" #include "vpx_mem/vpx_mem.h" @@ -18,70 +17,100 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_systemdependent.h" -void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) { - const int stride = cm->mode_info_stride; +static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) { int i; - // Clear down top border row - vpx_memset(mi, 0, sizeof(MODE_INFO) * stride); + // Top border row + vpx_memset(mi, 0, sizeof(*mi) * cm->mi_stride); - // Clear left border column - for (i = 1; i < cm->mi_rows + 1; i++) - vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO)); + // Left border column + for (i = 1; i < cm->mi_rows + 1; ++i) + vpx_memset(&mi[i * cm->mi_stride], 0, sizeof(*mi)); } -void vp9_free_frame_buffers(VP9_COMMON *cm) { - int i; +static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) { + cm->mi_cols = aligned_width >> MI_SIZE_LOG2; + cm->mi_rows = aligned_height >> MI_SIZE_LOG2; + cm->mi_stride = cm->mi_cols + MI_BLOCK_SIZE; - for (i = 0; i < FRAME_BUFFERS; i++) { - vp9_free_frame_buffer(&cm->frame_bufs[i].buf); + cm->mb_cols = (cm->mi_cols + 1) >> 1; + cm->mb_rows = (cm->mi_rows + 1) >> 1; + cm->MBs = cm->mb_rows * cm->mb_cols; +} - if (cm->frame_bufs[i].ref_count > 0 && - cm->frame_bufs[i].raw_frame_buffer.data != NULL) { - cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer); - cm->frame_bufs[i].ref_count = 0; - } - } +static void setup_mi(VP9_COMMON *cm) { + cm->mi = cm->mip + cm->mi_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; - vp9_free_frame_buffer(&cm->post_proc_buffer); + vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + vpx_memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) * + sizeof(*cm->mi_grid_base)); + + clear_mi_border(cm, cm->prev_mip); +} + +static int alloc_mi(VP9_COMMON *cm, int mi_size) { + cm->mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip)); + if (cm->mip == NULL) + return 1; + + cm->prev_mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->prev_mip)); + if (cm->prev_mip == NULL) + return 1; + + cm->mi_grid_base = + (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base)); + if (cm->mi_grid_base == NULL) + return 1; + + cm->prev_mi_grid_base = + (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base)); + if (cm->prev_mi_grid_base == NULL) + return 1; + + return 0; +} + +static void free_mi(VP9_COMMON *cm) { vpx_free(cm->mip); vpx_free(cm->prev_mip); - vpx_free(cm->last_frame_seg_map); vpx_free(cm->mi_grid_base); vpx_free(cm->prev_mi_grid_base); cm->mip = NULL; cm->prev_mip = NULL; - cm->last_frame_seg_map = NULL; cm->mi_grid_base = NULL; cm->prev_mi_grid_base = NULL; } -static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) { - cm->mi_cols = aligned_width >> MI_SIZE_LOG2; - cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mode_info_stride = cm->mi_cols + MI_BLOCK_SIZE; +void vp9_free_frame_buffers(VP9_COMMON *cm) { + int i; - cm->mb_cols = (cm->mi_cols + 1) >> 1; - cm->mb_rows = (cm->mi_rows + 1) >> 1; - cm->MBs = cm->mb_rows * cm->mb_cols; -} + for (i = 0; i < FRAME_BUFFERS; ++i) { + vp9_free_frame_buffer(&cm->frame_bufs[i].buf); -static void setup_mi(VP9_COMMON *cm) { - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + if (cm->frame_bufs[i].ref_count > 0 && + cm->frame_bufs[i].raw_frame_buffer.data != NULL) { + cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer); + cm->frame_bufs[i].ref_count = 0; + } + } - vpx_memset(cm->mip, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); + vp9_free_frame_buffer(&cm->post_proc_buffer); + + free_mi(cm); + + vpx_free(cm->last_frame_seg_map); + cm->last_frame_seg_map = NULL; - vpx_memset(cm->mi_grid_base, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * - sizeof(*cm->mi_grid_base)); + vpx_free(cm->above_context); + cm->above_context = NULL; - vp9_update_mode_info_border(cm, cm->prev_mip); + vpx_free(cm->above_seg_context); + cm->above_seg_context = NULL; } int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { @@ -89,7 +118,6 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); const int ss_x = cm->subsampling_x; const int ss_y = cm->subsampling_y; - int mi_size; if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y, VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0) @@ -97,37 +125,33 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { set_mb_mi(cm, aligned_width, aligned_height); - // Allocation - mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE); - - vpx_free(cm->mip); - cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO)); - if (!cm->mip) - goto fail; - - vpx_free(cm->prev_mip); - cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO)); - if (!cm->prev_mip) - goto fail; - - vpx_free(cm->mi_grid_base); - cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base)); - if (!cm->mi_grid_base) - goto fail; - - vpx_free(cm->prev_mi_grid_base); - cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base)); - if (!cm->prev_mi_grid_base) + free_mi(cm); + if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE))) goto fail; setup_mi(cm); // Create the segmentation map structure and set to 0. vpx_free(cm->last_frame_seg_map); - cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1); + cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1); if (!cm->last_frame_seg_map) goto fail; + vpx_free(cm->above_context); + cm->above_context = + (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * + MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) + goto fail; + + vpx_free(cm->above_seg_context); + cm->above_seg_context = + (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), + sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) + goto fail; + return 0; fail: @@ -136,13 +160,11 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { } int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { - int i; - const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); const int ss_x = cm->subsampling_x; const int ss_y = cm->subsampling_y; - int mi_size; + int i; vp9_free_frame_buffers(cm); @@ -167,30 +189,27 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { set_mb_mi(cm, aligned_width, aligned_height); - // Allocation - mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE); - - cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO)); - if (!cm->mip) + if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE))) goto fail; - cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO)); - if (!cm->prev_mip) - goto fail; + setup_mi(cm); - cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base)); - if (!cm->mi_grid_base) + // Create the segmentation map structure and set to 0. + cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1); + if (!cm->last_frame_seg_map) goto fail; - cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base)); - if (!cm->prev_mi_grid_base) + cm->above_context = + (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * + MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) goto fail; - setup_mi(cm); - - // Create the segmentation map structure and set to 0. - cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1); - if (!cm->last_frame_seg_map) + cm->above_seg_context = + (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), + sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) goto fail; return 0; @@ -205,10 +224,6 @@ void vp9_remove_common(VP9_COMMON *cm) { vp9_free_internal_frame_buffers(&cm->int_frame_buffers); } -void vp9_initialize_common() { - vp9_init_neighbors(); -} - void vp9_update_frame_size(VP9_COMMON *cm) { const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2); @@ -220,3 +235,19 @@ void vp9_update_frame_size(VP9_COMMON *cm) { if (cm->last_frame_seg_map) vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); } + +void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { + // Current mip will be the prev_mip for the next frame. + MODE_INFO *temp = cm->prev_mip; + MODE_INFO **temp2 = cm->prev_mi_grid_base; + cm->prev_mip = cm->mip; + cm->mip = temp; + cm->prev_mi_grid_base = cm->mi_grid_base; + cm->mi_grid_base = temp2; + + // Update the upper left visible macroblock ptrs. + cm->mi = cm->mip + cm->mi_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; +} diff --git a/source/libvpx/vp9/common/vp9_alloccommon.h b/source/libvpx/vp9/common/vp9_alloccommon.h index 066c778..06636a9 100644 --- a/source/libvpx/vp9/common/vp9_alloccommon.h +++ b/source/libvpx/vp9/common/vp9_alloccommon.h @@ -12,24 +12,23 @@ #ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ #define VP9_COMMON_VP9_ALLOCCOMMON_H_ -#include "vp9/common/vp9_onyxc_int.h" - #ifdef __cplusplus extern "C" { #endif -void vp9_initialize_common(); +struct VP9Common; + +void vp9_remove_common(struct VP9Common *cm); -void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi); +int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height); -void vp9_remove_common(VP9_COMMON *cm); +int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height); -int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height); -int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height); -void vp9_free_frame_buffers(VP9_COMMON *cm); +void vp9_free_frame_buffers(struct VP9Common *cm); +void vp9_update_frame_size(struct VP9Common *cm); -void vp9_update_frame_size(VP9_COMMON *cm); +void vp9_swap_mi_and_prev_mi(struct VP9Common *cm); #ifdef __cplusplus } // extern "C" diff --git a/source/libvpx/vp9/common/vp9_blockd.c b/source/libvpx/vp9/common/vp9_blockd.c index e1d1318..fedfb18 100644 --- a/source/libvpx/vp9/common/vp9_blockd.c +++ b/source/libvpx/vp9/common/vp9_blockd.c @@ -40,7 +40,7 @@ void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. diff --git a/source/libvpx/vp9/common/vp9_blockd.h b/source/libvpx/vp9/common/vp9_blockd.h index 2a0ebfb..55320a6 100644 --- a/source/libvpx/vp9/common/vp9_blockd.h +++ b/source/libvpx/vp9/common/vp9_blockd.h @@ -120,23 +120,23 @@ static INLINE int mi_width_log2(BLOCK_SIZE sb_type) { // This structure now relates to 8x8 block regions. typedef struct { - MB_PREDICTION_MODE mode, uv_mode; - MV_REFERENCE_FRAME ref_frame[2]; + // Common for both INTER and INTRA blocks + BLOCK_SIZE sb_type; + MB_PREDICTION_MODE mode; TX_SIZE tx_size; - int_mv mv[2]; // for each reference frame used - int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; - - uint8_t mode_context[MAX_REF_FRAMES]; - - unsigned char skip; // 0=need to decode coeffs, 1=no coefficients - unsigned char segment_id; // Segment id for this block. + uint8_t skip; + uint8_t segment_id; + uint8_t seg_id_predicted; // valid only when temporal_update is enabled - // Flags used for prediction status of various bit-stream signals - unsigned char seg_id_predicted; + // Only for INTRA blocks + MB_PREDICTION_MODE uv_mode; + // Only for INTER blocks + MV_REFERENCE_FRAME ref_frame[2]; + int_mv mv[2]; + int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; + uint8_t mode_context[MAX_REF_FRAMES]; INTERP_FILTER interp_filter; - - BLOCK_SIZE sb_type; } MB_MODE_INFO; typedef struct { @@ -204,13 +204,10 @@ typedef struct RefBuffer { typedef struct macroblockd { struct macroblockd_plane plane[MAX_MB_PLANE]; - MODE_INFO *last_mi; - int mode_info_stride; + int mi_stride; // A NULL indicates that the 8x8 is not part of the image - MODE_INFO **mi_8x8; - MODE_INFO **prev_mi_8x8; - MODE_INFO *mi_stream; + MODE_INFO **mi; int up_available; int left_available; @@ -234,11 +231,10 @@ typedef struct macroblockd { /* Inverse transform function pointers. */ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); - const InterpKernel *interp_kernel; - int corrupted; - /* Y,U,V,(A) */ + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; @@ -246,8 +242,6 @@ typedef struct macroblockd { PARTITION_CONTEXT left_seg_context[8]; } MACROBLOCKD; - - static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { const BLOCK_SIZE subsize = subsize_lookup[partition][bsize]; @@ -255,28 +249,25 @@ static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, return subsize; } -extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT]; +extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; + +static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, + const MACROBLOCKD *xd) { + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + + if (plane_type != PLANE_TYPE_Y || is_inter_block(mbmi)) + return DCT_DCT; + return intra_mode_to_tx_type_lookup[mbmi->mode]; +} static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int ib) { - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi)) return DCT_DCT; - return mode2txfm_map[get_y_mode(mi, ib)]; -} - -static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type, - const MACROBLOCKD *xd) { - return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode] - : DCT_DCT; -} - -static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type, - const MACROBLOCKD *xd) { - return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode] - : DCT_DCT; + return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; } void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); diff --git a/source/libvpx/vp9/common/vp9_debugmodes.c b/source/libvpx/vp9/common/vp9_debugmodes.c index 24c785f..8f150a4 100644 --- a/source/libvpx/vp9/common/vp9_debugmodes.c +++ b/source/libvpx/vp9/common/vp9_debugmodes.c @@ -22,7 +22,7 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { * and uses the passed in member offset to print out the value of an integer * for each mbmi member value in the mi structure. */ -static void print_mi_data(VP9_COMMON *cm, FILE *file, char *descriptor, +static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, size_t member_offset) { int mi_row; int mi_col; @@ -47,7 +47,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, char *descriptor, } fprintf(file, "\n"); } -void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, char *file) { +void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int mi_row; int mi_col; int mi_index = 0; diff --git a/source/libvpx/vp9/common/vp9_entropy.h b/source/libvpx/vp9/common/vp9_entropy.h index bd5086a..6788eb6 100644 --- a/source/libvpx/vp9/common/vp9_entropy.h +++ b/source/libvpx/vp9/common/vp9_entropy.h @@ -16,7 +16,6 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_scan.h" -#include "vp9/common/vp9_entropymode.h" #ifdef __cplusplus extern "C" { @@ -176,13 +175,13 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, static const INLINE scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, PLANE_TYPE type, int block_idx) { - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) { return &vp9_default_scan_orders[tx_size]; } else { const MB_PREDICTION_MODE mode = get_y_mode(mi, block_idx); - return &vp9_scan_orders[tx_size][mode2txfm_map[mode]]; + return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; } } diff --git a/source/libvpx/vp9/common/vp9_entropymode.c b/source/libvpx/vp9/common/vp9_entropymode.c index 8921539..5b00b00 100644 --- a/source/libvpx/vp9/common/vp9_entropymode.c +++ b/source/libvpx/vp9/common/vp9_entropymode.c @@ -10,7 +10,6 @@ #include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_seg_common.h" @@ -315,18 +314,18 @@ static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] { 149, 144, }, }; -void vp9_init_mbmode_probs(VP9_COMMON *cm) { - vp9_copy(cm->fc.uv_mode_prob, default_if_uv_probs); - vp9_copy(cm->fc.y_mode_prob, default_if_y_probs); - vp9_copy(cm->fc.switchable_interp_prob, default_switchable_interp_prob); - vp9_copy(cm->fc.partition_prob, default_partition_probs); - vp9_copy(cm->fc.intra_inter_prob, default_intra_inter_p); - vp9_copy(cm->fc.comp_inter_prob, default_comp_inter_p); - vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p); - vp9_copy(cm->fc.single_ref_prob, default_single_ref_p); - cm->fc.tx_probs = default_tx_probs; - vp9_copy(cm->fc.skip_probs, default_skip_probs); - vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs); +void vp9_init_mode_probs(FRAME_CONTEXT *fc) { + vp9_copy(fc->uv_mode_prob, default_if_uv_probs); + vp9_copy(fc->y_mode_prob, default_if_y_probs); + vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob); + vp9_copy(fc->partition_prob, default_partition_probs); + vp9_copy(fc->intra_inter_prob, default_intra_inter_p); + vp9_copy(fc->comp_inter_prob, default_comp_inter_p); + vp9_copy(fc->comp_ref_prob, default_comp_ref_p); + vp9_copy(fc->single_ref_prob, default_single_ref_p); + fc->tx_probs = default_tx_probs; + vp9_copy(fc->skip_probs, default_skip_probs); + vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); } const vp9_tree_index vp9_switchable_interp_tree @@ -452,7 +451,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { lf->last_sharpness_level = -1; vp9_default_coef_probs(cm); - vp9_init_mbmode_probs(cm); + vp9_init_mode_probs(&cm->fc); vp9_init_mv_probs(cm); if (cm->frame_type == KEY_FRAME || @@ -466,11 +465,10 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { } if (frame_is_intra_only(cm)) - vpx_memset(cm->prev_mip, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); + vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) * + sizeof(*cm->prev_mip)); - vpx_memset(cm->mip, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); + vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); vp9_zero(cm->ref_frame_sign_bias); diff --git a/source/libvpx/vp9/common/vp9_entropymode.h b/source/libvpx/vp9/common/vp9_entropymode.h index deec3f6..c7b1911 100644 --- a/source/libvpx/vp9/common/vp9_entropymode.h +++ b/source/libvpx/vp9/common/vp9_entropymode.h @@ -12,6 +12,8 @@ #define VP9_COMMON_VP9_ENTROPYMODE_H_ #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymv.h" #ifdef __cplusplus extern "C" { @@ -35,6 +37,42 @@ struct tx_counts { unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; }; +typedef struct frame_contexts { + vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; + vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; + vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; + vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; + vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS - 1]; + vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; + vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; + vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS]; + vp9_prob single_ref_prob[REF_CONTEXTS][2]; + vp9_prob comp_ref_prob[REF_CONTEXTS]; + struct tx_probs tx_probs; + vp9_prob skip_probs[SKIP_CONTEXTS]; + nmv_context nmvc; +} FRAME_CONTEXT; + +typedef struct { + unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; + unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; + unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; + vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; + unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] + [COEF_BANDS][COEFF_CONTEXTS]; + unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS]; + unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; + unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; + unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; + unsigned int single_ref[REF_CONTEXTS][2][2]; + unsigned int comp_ref[REF_CONTEXTS][2]; + struct tx_counts tx; + unsigned int skip[SKIP_CONTEXTS][2]; + nmv_context_counts mv; +} FRAME_COUNTS; + extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] [INTRA_MODES - 1]; @@ -48,7 +86,7 @@ extern const vp9_tree_index vp9_switchable_interp_tree void vp9_setup_past_independence(struct VP9Common *cm); -void vp9_init_mbmode_probs(struct VP9Common *cm); +void vp9_init_mode_probs(FRAME_CONTEXT *fc); void vp9_adapt_mode_probs(struct VP9Common *cm); @@ -59,6 +97,15 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); +static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi, + const MODE_INFO *above_mi, + const MODE_INFO *left_mi, + int block) { + const MB_PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); + const MB_PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); + return vp9_kf_y_mode_prob[above][left]; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/common/vp9_entropymv.c b/source/libvpx/vp9/common/vp9_entropymv.c index e1f5ef7..5bb0482 100644 --- a/source/libvpx/vp9/common/vp9_entropymv.c +++ b/source/libvpx/vp9/common/vp9_entropymv.c @@ -8,14 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymv.h" #define MV_COUNT_SAT 20 #define MV_MAX_UPDATE_FACTOR 128 -/* Integer pel reference mv threshold for use of high-precision 1/8 mv */ +// Integer pel reference mv threshold for use of high-precision 1/8 mv #define COMPANDED_MVREF_THRESH 8 const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { @@ -49,32 +48,30 @@ const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { static const nmv_context default_nmv_context = { {32, 64, 96}, - { // NOLINT - { /* vert component */ // NOLINT - 128, /* sign */ - {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */ - {216}, /* class0 */ - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */ - {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */ - {64, 96, 64}, /* fp */ - 160, /* class0_hp bit */ - 128, /* hp */ + { + { // Vertical component + 128, // sign + {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class + {216}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp }, - { /* hor component */ // NOLINT - 128, /* sign */ - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */ - {208}, /* class0 */ - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */ - {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */ - {64, 96, 64}, /* fp */ - 160, /* class0_hp bit */ - 128, /* hp */ + { // Horizontal component + 128, // sign + {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class + {208}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp } }, }; -#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0) - static const uint8_t log_in_base_2[] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -121,13 +118,13 @@ static const uint8_t log_in_base_2[] = { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 }; -MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { - MV_CLASS_TYPE c = MV_CLASS_0; - if (z >= CLASS0_SIZE * 4096) - c = MV_CLASS_10; - else - c = log_in_base_2[z >> 3]; +static INLINE int mv_class_base(MV_CLASS_TYPE c) { + return c ? CLASS0_SIZE << (c + 2) : 0; +} +MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { + const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? + MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; if (offset) *offset = z - mv_class_base(c); return c; diff --git a/source/libvpx/vp9/common/vp9_entropymv.h b/source/libvpx/vp9/common/vp9_entropymv.h index 7e1f147..e7033e4 100644 --- a/source/libvpx/vp9/common/vp9_entropymv.h +++ b/source/libvpx/vp9/common/vp9_entropymv.h @@ -26,7 +26,7 @@ void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); int vp9_use_mv_hp(const MV *ref); -#define NMV_UPDATE_PROB 252 +#define MV_UPDATE_PROB 252 /* Symbols for coding which components are zero jointly */ #define MV_JOINTS 4 diff --git a/source/libvpx/vp9/common/vp9_enums.h b/source/libvpx/vp9/common/vp9_enums.h index e96e769..068284f 100644 --- a/source/libvpx/vp9/common/vp9_enums.h +++ b/source/libvpx/vp9/common/vp9_enums.h @@ -25,6 +25,23 @@ extern "C" { #define MI_MASK (MI_BLOCK_SIZE - 1) +// Bitstream profiles indicated by 2 bits in the uncompressed header. +// 00: Profile 0. 4:2:0 only. +// 10: Profile 1. adds 4:4:4, 4:2:2, alpha. +// 01: Profile 2. Supports 10-bit and 12-bit color only. +// 11: Undefined profile. +typedef enum BITSTREAM_PROFILE { + PROFILE_0, + PROFILE_1, + PROFILE_2, + MAX_PROFILES +} BITSTREAM_PROFILE; + +typedef enum BIT_DEPTH { + BITS_8, + BITS_10, + BITS_12 +} BIT_DEPTH; typedef enum BLOCK_SIZE { BLOCK_4X4, @@ -94,6 +111,12 @@ typedef enum { SRGB = 7 // RGB } COLOR_SPACE; +typedef enum { + VP9_LAST_FLAG = 1 << 0, + VP9_GOLD_FLAG = 1 << 1, + VP9_ALT_FLAG = 1 << 2, +} VP9_REFFRAME; + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/common/vp9_frame_buffers.c b/source/libvpx/vp9/common/vp9_frame_buffers.c index dffeb8a..a0b1e03 100644 --- a/source/libvpx/vp9/common/vp9_frame_buffers.c +++ b/source/libvpx/vp9/common/vp9_frame_buffers.c @@ -19,8 +19,9 @@ int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { list->num_internal_frame_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; - list->int_fb = vpx_calloc(list->num_internal_frame_buffers, - sizeof(*list->int_fb)); + list->int_fb = + (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, + sizeof(*list->int_fb)); return (list->int_fb == NULL); } diff --git a/source/libvpx/vp9/common/vp9_loopfilter.c b/source/libvpx/vp9/common/vp9_loopfilter.c index 868a66a..3ac5a05 100644 --- a/source/libvpx/vp9/common/vp9_loopfilter.c +++ b/source/libvpx/vp9/common/vp9_loopfilter.c @@ -228,6 +228,12 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { } } +static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, + const MB_MODE_INFO *mbmi) { + return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] + [mode_lf_lut[mbmi->mode]]; +} + void vp9_loop_filter_init(VP9_COMMON *cm) { loop_filter_info_n *lfi = &cm->lf_info; struct loopfilter *lf = &cm->lf; @@ -493,27 +499,25 @@ static void build_masks(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, const int shift_uv, LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->mbmi.sb_type; - const TX_SIZE tx_size_y = mi->mbmi.tx_size; - const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi); - const int skip = mi->mbmi.skip; - const int seg = mi->mbmi.segment_id; - const int ref = mi->mbmi.ref_frame[0]; - const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]]; - uint64_t *left_y = &lfm->left_y[tx_size_y]; - uint64_t *above_y = &lfm->above_y[tx_size_y]; - uint64_t *int_4x4_y = &lfm->int_4x4_y; - uint16_t *left_uv = &lfm->left_uv[tx_size_uv]; - uint16_t *above_uv = &lfm->above_uv[tx_size_uv]; - uint16_t *int_4x4_uv = &lfm->int_4x4_uv; + const MB_MODE_INFO *mbmi = &mi->mbmi; + const BLOCK_SIZE block_size = mbmi->sb_type; + const TX_SIZE tx_size_y = mbmi->tx_size; + const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; int i; - int w = num_8x8_blocks_wide_lookup[block_size]; - int h = num_8x8_blocks_high_lookup[block_size]; // If filter level is 0 we don't loop filter. if (!filter_level) { return; } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { vpx_memset(&lfm->lfl_y[index], filter_level, w); @@ -540,7 +544,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if (skip && ref > INTRA_FRAME) + if (mbmi->skip && is_inter_block(mbmi)) return; // Here we are adding a mask for the transform size. The transform @@ -561,12 +565,11 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // boundaries. These differ from the 4x4 boundaries on the outside edge of // an 8x8 in that the internal ones can be skipped and don't depend on // the prediction block size. - if (tx_size_y == TX_4X4) { + if (tx_size_y == TX_4X4) *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; - } - if (tx_size_uv == TX_4X4) { + + if (tx_size_uv == TX_4X4) *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; - } } // This function does the same thing as the one above with the exception that @@ -575,22 +578,20 @@ static void build_masks(const loop_filter_info_n *const lfi_n, static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->mbmi.sb_type; - const TX_SIZE tx_size_y = mi->mbmi.tx_size; - const int skip = mi->mbmi.skip; - const int seg = mi->mbmi.segment_id; - const int ref = mi->mbmi.ref_frame[0]; - const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]]; - uint64_t *left_y = &lfm->left_y[tx_size_y]; - uint64_t *above_y = &lfm->above_y[tx_size_y]; - uint64_t *int_4x4_y = &lfm->int_4x4_y; + const MB_MODE_INFO *mbmi = &mi->mbmi; + const BLOCK_SIZE block_size = mbmi->sb_type; + const TX_SIZE tx_size_y = mbmi->tx_size; + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; int i; - int w = num_8x8_blocks_wide_lookup[block_size]; - int h = num_8x8_blocks_high_lookup[block_size]; if (!filter_level) { return; } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { vpx_memset(&lfm->lfl_y[index], filter_level, w); @@ -601,7 +602,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; - if (skip && ref > INTRA_FRAME) + if (mbmi->skip && is_inter_block(mbmi)) return; *above_y |= (size_mask[block_size] & @@ -610,9 +611,8 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) << shift_y; - if (tx_size_y == TX_4X4) { + if (tx_size_y == TX_4X4) *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; - } } // This function sets up the bit masks for the entire 64x64 region represented @@ -868,13 +868,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); } -static uint8_t build_lfi(const loop_filter_info_n *lfi_n, - const MB_MODE_INFO *mbmi) { - const int seg = mbmi->segment_id; - const int ref = mbmi->ref_frame[0]; - return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]]; -} - static void filter_selectively_vert(uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, @@ -916,7 +909,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm, const int ss_y = plane->subsampling_y; const int row_step = 1 << ss_x; const int col_step = 1 << ss_y; - const int row_step_stride = cm->mode_info_stride * row_step; + const int row_step_stride = cm->mi_stride * row_step; struct buf_2d *const dst = &plane->dst; uint8_t* const dst0 = dst->buf; unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; @@ -953,7 +946,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm, // Filter level can vary per MI if (!(lfl[(r << 3) + (c >> ss_x)] = - build_lfi(&cm->lf_info, &mi[0].mbmi))) + get_filter_level(&cm->lf_info, &mi[0].mbmi))) continue; // Build masks based on the transform size of each block @@ -1208,17 +1201,17 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, xd->plane[1].subsampling_x == 1); for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; - setup_dst_planes(xd, frame_buffer, mi_row, mi_col); + vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, - cm->mode_info_stride, &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, + &lfm); for (plane = 0; plane < num_planes; ++plane) { if (use_420) diff --git a/source/libvpx/vp9/common/vp9_mvref_common.c b/source/libvpx/vp9/common/vp9_mvref_common.c index e5f3fed..61682c4 100644 --- a/source/libvpx/vp9/common/vp9_mvref_common.c +++ b/source/libvpx/vp9/common/vp9_mvref_common.c @@ -148,28 +148,30 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also // skip all additional processing and jump to done! -#define ADD_MV_REF_LIST(MV) \ +#define ADD_MV_REF_LIST(mv) \ do { \ if (refmv_count) { \ - if ((MV).as_int != mv_ref_list[0].as_int) { \ - mv_ref_list[refmv_count] = (MV); \ + if ((mv).as_int != mv_ref_list[0].as_int) { \ + mv_ref_list[refmv_count] = (mv); \ goto Done; \ } \ } else { \ - mv_ref_list[refmv_count++] = (MV); \ + mv_ref_list[refmv_count++] = (mv); \ } \ } while (0) // If either reference frame is different, not INTRA, and they // are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \ +#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \ do { \ - if ((CANDIDATE)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \ - if ((CANDIDATE)->ref_frame[1] != ref_frame && \ - has_second_ref(CANDIDATE) && \ - (CANDIDATE)->mv[1].as_int != (CANDIDATE)->mv[0].as_int) \ - ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \ + } \ } while (0) @@ -188,15 +190,19 @@ static INLINE int is_inside(const TileInfo *const tile, // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, const TileInfo *const tile, - MODE_INFO *mi, const MODE_INFO *prev_mi, - MV_REFERENCE_FRAME ref_frame, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, - int block_idx, int mi_row, int mi_col) { + int block, int mi_row, int mi_col) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; + const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi + ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] + : NULL; + const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL; + + const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; - const MB_MODE_INFO *const prev_mbmi = cm->coding_use_prev_mi && prev_mi ? - &prev_mi->mbmi : NULL; + int different_ref_found = 0; int context_counter = 0; @@ -209,24 +215,17 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = xd->mi_8x8[mv_ref->col + mv_ref->row - * xd->mode_info_stride]; + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate->mode]; + different_ref_found = 1; - // Check if the candidate comes from the same reference frame. - if (candidate->ref_frame[0] == ref_frame) { - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, - mv_ref->col, block_idx)); - different_ref_found = candidate->ref_frame[1] != ref_frame; - } else { - if (candidate->ref_frame[1] == ref_frame) - // Add second motion vector if it has the same ref_frame. - ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, - mv_ref->col, block_idx)); - different_ref_found = 1; - } + if (candidate->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block)); + else if (candidate->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block)); } } @@ -236,18 +235,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col + - mv_ref->row - * xd->mode_info_stride]->mbmi; + const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]->mbmi; + different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) { + if (candidate->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(candidate->mv[0]); - different_ref_found = candidate->ref_frame[1] != ref_frame; - } else { - if (candidate->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[1]); - different_ref_found = 1; - } + else if (candidate->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(candidate->mv[1]); } } @@ -266,19 +261,17 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col + - mv_ref->row - * xd->mode_info_stride]->mbmi; + const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row + * xd->mi_stride]->mbmi; // If the candidate is INTRA we don't want to consider its mv. - if (is_inter_block(candidate)) - IF_DIFF_REF_FRAME_ADD_MV(candidate); + IF_DIFF_REF_FRAME_ADD_MV(candidate); } } } // Since we still don't have a candidate we'll try the last frame. - if (prev_mbmi && is_inter_block(prev_mbmi)) + if (prev_mbmi) IF_DIFF_REF_FRAME_ADD_MV(prev_mbmi); Done: @@ -292,11 +285,10 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, const TileInfo *const tile, - MODE_INFO *mi, const MODE_INFO *prev_mi, - MV_REFERENCE_FRAME ref_frame, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col) { - find_mv_refs_idx(cm, xd, tile, mi, prev_mi, ref_frame, mv_ref_list, -1, + find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col); } @@ -328,14 +320,14 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, int ref, int mi_row, int mi_col, int_mv *nearest, int_mv *near) { int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; b_mode_info *bmi = mi->bmi; int n; assert(MAX_MV_REF_CANDIDATES == 2); - find_mv_refs_idx(cm, xd, tile, mi, xd->last_mi, mi->mbmi.ref_frame[ref], - mv_list, block, mi_row, mi_col); + find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block, + mi_row, mi_col); near->as_int = 0; switch (block) { diff --git a/source/libvpx/vp9/common/vp9_mvref_common.h b/source/libvpx/vp9/common/vp9_mvref_common.h index 04cb000..903ac02 100644 --- a/source/libvpx/vp9/common/vp9_mvref_common.h +++ b/source/libvpx/vp9/common/vp9_mvref_common.h @@ -31,10 +31,8 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, const TileInfo *const tile, - MODE_INFO *mi, const MODE_INFO *prev_mi, - MV_REFERENCE_FRAME ref_frame, - int_mv *mv_ref_list, - int mi_row, int mi_col); + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, int mi_row, int mi_col); // check a list of motion vectors by sad score using a number rows of pixels // above and a number cols of pixels in the left to select the one with best diff --git a/source/libvpx/vp9/common/vp9_onyx.h b/source/libvpx/vp9/common/vp9_onyx.h deleted file mode 100644 index 2220868..0000000 --- a/source/libvpx/vp9/common/vp9_onyx.h +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_COMMON_VP9_ONYX_H_ -#define VP9_COMMON_VP9_ONYX_H_ - -#include "./vpx_config.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vpx/vp8cx.h" -#include "vpx_scale/yv12config.h" -#include "vp9/common/vp9_ppflags.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_SEGMENTS 8 - - typedef int *VP9_PTR; - - /* Create/destroy static data structures. */ - - typedef enum { - NORMAL = 0, - FOURFIVE = 1, - THREEFIVE = 2, - ONETWO = 3 - } VPX_SCALING; - - typedef enum { - VP9_LAST_FLAG = 1, - VP9_GOLD_FLAG = 2, - VP9_ALT_FLAG = 4 - } VP9_REFFRAME; - - - typedef enum { - USAGE_LOCAL_FILE_PLAYBACK = 0x0, - USAGE_STREAM_FROM_SERVER = 0x1, - USAGE_CONSTRAINED_QUALITY = 0x2, - USAGE_CONSTANT_QUALITY = 0x3, - } END_USAGE; - - - typedef enum { - MODE_GOODQUALITY = 0x1, - MODE_BESTQUALITY = 0x2, - MODE_FIRSTPASS = 0x3, - MODE_SECONDPASS = 0x4, - MODE_SECONDPASS_BEST = 0x5, - MODE_REALTIME = 0x6, - } MODE; - - typedef enum { - FRAMEFLAGS_KEY = 1, - FRAMEFLAGS_GOLDEN = 2, - FRAMEFLAGS_ALTREF = 4, - } FRAMETYPE_FLAGS; - - typedef enum { - NO_AQ = 0, - VARIANCE_AQ = 1, - COMPLEXITY_AQ = 2, - AQ_MODES_COUNT // This should always be the last member of the enum - } AQ_MODES; - - typedef struct { - int version; // 4 versions of bitstream defined: - // 0 - best quality/slowest decode, - // 3 - lowest quality/fastest decode - int width; // width of data passed to the compressor - int height; // height of data passed to the compressor - double framerate; // set to passed in framerate - int64_t target_bandwidth; // bandwidth to be used in kilobits per second - - int noise_sensitivity; // pre processing blur: recommendation 0 - int sharpness; // sharpening output: recommendation 0: - int cpu_used; - unsigned int rc_max_intra_bitrate_pct; - - // mode -> - // (0)=Realtime/Live Encoding. This mode is optimized for realtime - // encoding (for example, capturing a television signal or feed from - // a live camera). ( speed setting controls how fast ) - // (1)=Good Quality Fast Encoding. The encoder balances quality with the - // amount of time it takes to encode the output. ( speed setting - // controls how fast ) - // (2)=One Pass - Best Quality. The encoder places priority on the - // quality of the output over encoding speed. The output is compressed - // at the highest possible quality. This option takes the longest - // amount of time to encode. ( speed setting ignored ) - // (3)=Two Pass - First Pass. The encoder generates a file of statistics - // for use in the second encoding pass. ( speed setting controls how - // fast ) - // (4)=Two Pass - Second Pass. The encoder uses the statistics that were - // generated in the first encoding pass to create the compressed - // output. ( speed setting controls how fast ) - // (5)=Two Pass - Second Pass Best. The encoder uses the statistics that - // were generated in the first encoding pass to create the compressed - // output using the highest possible quality, and taking a - // longer amount of time to encode.. ( speed setting ignored ) - int mode; - - // Key Framing Operations - int auto_key; // autodetect cut scenes and set the keyframes - int key_freq; // maximum distance to key frame. - - int lag_in_frames; // how many frames lag before we start encoding - - // ---------------------------------------------------------------- - // DATARATE CONTROL OPTIONS - - int end_usage; // vbr or cbr - - // buffer targeting aggressiveness - int under_shoot_pct; - int over_shoot_pct; - - // buffering parameters - int64_t starting_buffer_level; // in seconds - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; - - // Frame drop threshold. - int drop_frames_water_mark; - - // controlling quality - int fixed_q; - int worst_allowed_q; - int best_allowed_q; - int cq_level; - int lossless; - int aq_mode; // Adaptive Quantization mode - - // two pass datarate control - int two_pass_vbrbias; // two pass datarate control tweaks - int two_pass_vbrmin_section; - int two_pass_vbrmax_section; - // END DATARATE CONTROL OPTIONS - // ---------------------------------------------------------------- - - // Spatial and temporal scalability. - int ss_number_layers; // Number of spatial layers. - int ts_number_layers; // Number of temporal layers. - // Bitrate allocation for spatial layers. - int ss_target_bitrate[VPX_SS_MAX_LAYERS]; - // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. - int ts_target_bitrate[VPX_TS_MAX_LAYERS]; - int ts_rate_decimator[VPX_TS_MAX_LAYERS]; - - // these parameters aren't to be used in final build don't use!!! - int play_alternate; - int alt_freq; - - int encode_breakout; // early breakout : for video conf recommend 800 - - /* Bitfield defining the error resiliency features to enable. - * Can provide decodable frames after losses in previous - * frames and decodable partitions after losses in the same frame. - */ - unsigned int error_resilient_mode; - - /* Bitfield defining the parallel decoding mode where the - * decoding in successive frames may be conducted in parallel - * just by decoding the frame headers. - */ - unsigned int frame_parallel_decoding_mode; - - int arnr_max_frames; - int arnr_strength; - int arnr_type; - - int tile_columns; - int tile_rows; - - struct vpx_fixed_buf two_pass_stats_in; - struct vpx_codec_pkt_list *output_pkt_list; - - vp8e_tuning tuning; - } VP9_CONFIG; - - - void vp9_initialize_enc(); - - VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf); - void vp9_remove_compressor(VP9_PTR *comp); - - void vp9_change_config(VP9_PTR onyx, VP9_CONFIG *oxcf); - - // receive a frames worth of data. caller can assume that a copy of this - // frame is made and not just a copy of the pointer.. - int vp9_receive_raw_frame(VP9_PTR comp, unsigned int frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time_stamp); - - int vp9_get_compressed_data(VP9_PTR comp, unsigned int *frame_flags, - size_t *size, uint8_t *dest, - int64_t *time_stamp, int64_t *time_end, - int flush); - - int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *flags); - - int vp9_use_as_reference(VP9_PTR comp, int ref_frame_flags); - - int vp9_update_reference(VP9_PTR comp, int ref_frame_flags); - - int vp9_copy_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - - int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb); - - int vp9_set_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag, - YV12_BUFFER_CONFIG *sd); - - int vp9_update_entropy(VP9_PTR comp, int update); - - int vp9_set_roimap(VP9_PTR comp, unsigned char *map, - unsigned int rows, unsigned int cols, - int delta_q[MAX_SEGMENTS], - int delta_lf[MAX_SEGMENTS], - unsigned int threshold[MAX_SEGMENTS]); - - int vp9_set_active_map(VP9_PTR comp, unsigned char *map, - unsigned int rows, unsigned int cols); - - int vp9_set_internal_size(VP9_PTR comp, - VPX_SCALING horiz_mode, VPX_SCALING vert_mode); - - int vp9_set_size_literal(VP9_PTR comp, unsigned int width, - unsigned int height); - - void vp9_set_svc(VP9_PTR comp, int use_svc); - - int vp9_get_quantizer(VP9_PTR c); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_COMMON_VP9_ONYX_H_ diff --git a/source/libvpx/vp9/common/vp9_onyxc_int.h b/source/libvpx/vp9/common/vp9_onyxc_int.h index e6d6ea7..77f563f 100644 --- a/source/libvpx/vp9/common/vp9_onyxc_int.h +++ b/source/libvpx/vp9/common/vp9_onyxc_int.h @@ -48,42 +48,6 @@ extern const struct { PARTITION_CONTEXT left; } partition_context_lookup[BLOCK_SIZES]; -typedef struct frame_contexts { - vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; - vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; - vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; - vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; - vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS - 1]; - vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; - vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; - vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vp9_prob single_ref_prob[REF_CONTEXTS][2]; - vp9_prob comp_ref_prob[REF_CONTEXTS]; - struct tx_probs tx_probs; - vp9_prob skip_probs[SKIP_CONTEXTS]; - nmv_context nmvc; -} FRAME_CONTEXT; - -typedef struct { - unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; - unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; - unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; - vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; - unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] - [COEF_BANDS][COEFF_CONTEXTS]; - unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS]; - unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; - unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; - unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int single_ref[REF_CONTEXTS][2][2]; - unsigned int comp_ref[REF_CONTEXTS][2]; - struct tx_counts tx; - unsigned int skip[SKIP_CONTEXTS][2]; - nmv_context_counts mv; -} FRAME_COUNTS; - typedef enum { SINGLE_REFERENCE = 0, @@ -162,7 +126,7 @@ typedef struct VP9Common { int MBs; int mb_rows, mi_rows; int mb_cols, mi_cols; - int mode_info_stride; + int mi_stride; /* profile settings */ TX_MODE tx_mode; @@ -215,7 +179,10 @@ typedef struct VP9Common { FRAME_COUNTS counts; unsigned int current_video_frame; - int version; + BITSTREAM_PROFILE profile; + + // BITS_8 in versions 0 and 1, BITS_10 or BITS_12 in version 2 + BIT_DEPTH bit_depth; #if CONFIG_VP9_POSTPROC struct postproc_state postproc_state; @@ -238,6 +205,9 @@ typedef struct VP9Common { // Handles memory for the codec. InternalFrameBufferList int_frame_buffers; + + PARTITION_CONTEXT *above_seg_context; + ENTROPY_CONTEXT *above_context; } VP9_COMMON; static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { @@ -270,23 +240,33 @@ static INLINE int mi_cols_aligned_to_sb(int n_mis) { return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); } -static INLINE const vp9_prob* get_partition_probs(VP9_COMMON *cm, int ctx) { +static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) { + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->plane[i].dqcoeff = xd->dqcoeff[i]; + xd->above_context[i] = cm->above_context + + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); + } + + xd->above_seg_context = cm->above_seg_context; + xd->mi_stride = cm->mi_stride; +} + +static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm, + int ctx) { return cm->frame_type == KEY_FRAME ? vp9_kf_partition_probs[ctx] : cm->fc.partition_prob[ctx]; } -static INLINE void set_skip_context( - MACROBLOCKD *xd, - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE], - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16], - int mi_row, int mi_col) { +static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { const int above_idx = mi_col * 2; const int left_idx = (mi_row * 2) & 15; int i; - for (i = 0; i < MAX_MB_PLANE; i++) { + for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; - pd->above_context = above_context[i] + (above_idx >> pd->subsampling_x); - pd->left_context = left_context[i] + (left_idx >> pd->subsampling_y); + pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; + pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; } } @@ -304,7 +284,7 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->left_available = (mi_col > tile->mi_col_start); } -static void set_prev_mi(VP9_COMMON *cm) { +static INLINE void set_prev_mi(VP9_COMMON *cm) { const int use_prev_in_find_mv_refs = cm->width == cm->last_width && cm->height == cm->last_height && !cm->intra_only && @@ -312,19 +292,19 @@ static void set_prev_mi(VP9_COMMON *cm) { // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. cm->prev_mi = use_prev_in_find_mv_refs ? - cm->prev_mip + cm->mode_info_stride + 1 : NULL; + cm->prev_mip + cm->mi_stride + 1 : NULL; } static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { return cm->frame_type == KEY_FRAME || cm->intra_only; } -static INLINE void update_partition_context( - PARTITION_CONTEXT *above_seg_context, - PARTITION_CONTEXT left_seg_context[8], - int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) { - PARTITION_CONTEXT *const above_ctx = above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = left_seg_context + (mi_row & MI_MASK); +static INLINE void update_partition_context(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE subsize, + BLOCK_SIZE bsize) { + PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; + PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); // num_4x4_blocks_wide_lookup[bsize] / 2 const int bs = num_8x8_blocks_wide_lookup[bsize]; @@ -336,12 +316,11 @@ static INLINE void update_partition_context( vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs); } -static INLINE int partition_plane_context( - const PARTITION_CONTEXT *above_seg_context, - const PARTITION_CONTEXT left_seg_context[8], - int mi_row, int mi_col, BLOCK_SIZE bsize) { - const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK); +static INLINE int partition_plane_context(const MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; + const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); const int bsl = mi_width_log2(bsize); const int bs = 1 << bsl; diff --git a/source/libvpx/vp9/common/vp9_postproc.h b/source/libvpx/vp9/common/vp9_postproc.h index b07d5d0..ebebc1a 100644 --- a/source/libvpx/vp9/common/vp9_postproc.h +++ b/source/libvpx/vp9/common/vp9_postproc.h @@ -13,6 +13,7 @@ #define VP9_COMMON_VP9_POSTPROC_H_ #include "vpx_ports/mem.h" +#include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" #ifdef __cplusplus diff --git a/source/libvpx/vp9/common/vp9_ppflags.h b/source/libvpx/vp9/common/vp9_ppflags.h index 8168935..e8b04d2 100644 --- a/source/libvpx/vp9/common/vp9_ppflags.h +++ b/source/libvpx/vp9/common/vp9_ppflags.h @@ -33,10 +33,12 @@ typedef struct { int post_proc_flag; int deblocking_level; int noise_level; +#if CONFIG_POSTPROC_VISUALIZER int display_ref_frame_flag; int display_mb_modes_flag; int display_b_modes_flag; int display_mv_flag; +#endif // CONFIG_POSTPROC_VISUALIZER } vp9_ppflags_t; #ifdef __cplusplus diff --git a/source/libvpx/vp9/common/vp9_pred_common.c b/source/libvpx/vp9/common/vp9_pred_common.c index 197bcb6..bc9d6ef 100644 --- a/source/libvpx/vp9/common/vp9_pred_common.c +++ b/source/libvpx/vp9/common/vp9_pred_common.c @@ -348,7 +348,7 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. int vp9_get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type]; + const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type]; const MB_MODE_INFO *const above_mbmi = get_mbmi(get_above_mi(xd)); const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd)); const int has_above = above_mbmi != NULL; diff --git a/source/libvpx/vp9/common/vp9_pred_common.h b/source/libvpx/vp9/common/vp9_pred_common.h index 6c7a0d3..1a7ba86 100644 --- a/source/libvpx/vp9/common/vp9_pred_common.h +++ b/source/libvpx/vp9/common/vp9_pred_common.h @@ -19,11 +19,11 @@ extern "C" { #endif static INLINE const MODE_INFO *get_above_mi(const MACROBLOCKD *const xd) { - return xd->up_available ? xd->mi_8x8[-xd->mode_info_stride] : NULL; + return xd->up_available ? xd->mi[-xd->mi_stride] : NULL; } static INLINE const MODE_INFO *get_left_mi(const MACROBLOCKD *const xd) { - return xd->left_available ? xd->mi_8x8[-1] : NULL; + return xd->left_available ? xd->mi[-1] : NULL; } int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids, diff --git a/source/libvpx/vp9/common/vp9_reconinter.c b/source/libvpx/vp9/common/vp9_reconinter.c index bdcfafa..e722d6a 100644 --- a/source/libvpx/vp9/common/vp9_reconinter.c +++ b/source/libvpx/vp9/common/vp9_reconinter.c @@ -144,8 +144,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi_8x8[0]; + const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -193,8 +194,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + (scaled_mv.col >> SUBPEL_BITS); inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel, - xs, ys); + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } } @@ -212,7 +212,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) @@ -244,11 +244,13 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, // TODO(jingning): This function serves as a placeholder for decoder prediction // using on demand border extension. It should be moved to /decoder/ directory. static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi_8x8[0]; + const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -265,15 +267,21 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv : mi_mv_pred_q4(mi, ref)) : mi->mbmi.mv[ref].as_mv; + + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + // mv_precision precision is MV_PRECISION_Q4. + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + MV32 scaled_mv; int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, subpel_x, subpel_y; uint8_t *ref_frame, *buf_ptr; const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf; - const MV mv_q4 = { - mv.row * (1 << (1 - pd->subsampling_y)), - mv.col * (1 << (1 - pd->subsampling_x)) - }; // Get reference frame pointer, width and height. if (plane == 0) { @@ -286,24 +294,38 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer; } - // Get block position in current frame. - x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; - y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + if (vp9_is_scaled(sf)) { + // Co-ordinate of containing block to pixel precision. + int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); - // Precision of x0_16 and y0_16 is 1/16th pixel. - x0_16 = x0 << SUBPEL_BITS; - y0_16 = y0 << SUBPEL_BITS; + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = (x_start + x) << SUBPEL_BITS; + y0_16 = (y_start + y) << SUBPEL_BITS; - if (vp9_is_scaled(sf)) { + // Co-ordinate of current block in reference frame + // to 1/16th pixel precision. + x0_16 = sf->scale_value_x(x0_16, sf); + y0_16 = sf->scale_value_y(y0_16, sf); + + // Map the top left corner of the block into the reference frame. + x0 = sf->scale_value_x(x_start + x, sf); + y0 = sf->scale_value_y(y_start + y, sf); + + // Scale the MV and incorporate the sub-pixel offset of the block + // in the reference frame. scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); xs = sf->x_step_q4; ys = sf->y_step_q4; - // Map the top left corner of the block into the reference frame. - x0 = sf->scale_value_x(x0, sf); - y0 = sf->scale_value_y(y0, sf); - x0_16 = sf->scale_value_x(x0_16, sf); - y0_16 = sf->scale_value_y(y0_16, sf); } else { + // Co-ordinate of containing block to pixel precision. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = x0 << SUBPEL_BITS; + y0_16 = y0 << SUBPEL_BITS; + scaled_mv.row = mv_q4.row; scaled_mv.col = mv_q4.col; xs = ys = 16; @@ -347,15 +369,16 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0; // Extend the border. - build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0, - x0, y0, x1 - x0, y1 - y0, frame_width, frame_height); - buf_stride = x1 - x0; + build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1, + x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width, + frame_height); + buf_stride = x1 - x0 + 1; buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3; } } inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys); + subpel_y, sf, w, h, ref, kernel, xs, ys); } } @@ -372,16 +395,51 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - dec_build_inter_predictors(xd, plane, i++, + dec_build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y); } else { - dec_build_inter_predictors(xd, plane, 0, + dec_build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, mi_x, mi_y); } } } + +void vp9_setup_dst_planes(MACROBLOCKD *xd, + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col) { + uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, + pd->subsampling_x, pd->subsampling_y); + } +} + +void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col, + const struct scale_factors *sf) { + if (src != NULL) { + int i; + uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, + sf, pd->subsampling_x, pd->subsampling_y); + } + } +} diff --git a/source/libvpx/vp9/common/vp9_reconinter.h b/source/libvpx/vp9/common/vp9_reconinter.h index dccd609..86f3158 100644 --- a/source/libvpx/vp9/common/vp9_reconinter.h +++ b/source/libvpx/vp9/common/vp9_reconinter.h @@ -57,41 +57,12 @@ static INLINE void setup_pred_plane(struct buf_2d *dst, dst->stride = stride; } -// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col -static void setup_dst_planes(MACROBLOCKD *xd, - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col) { - uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, - src->alpha_buffer}; - const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, - src->alpha_stride}; - int i; +void vp9_setup_dst_planes(MACROBLOCKD *xd, const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col); - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, - pd->subsampling_x, pd->subsampling_y); - } -} - -static void setup_pre_planes(MACROBLOCKD *xd, int idx, - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *sf) { - if (src != NULL) { - int i; - uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer, - src->alpha_buffer}; - const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, - src->alpha_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - struct macroblockd_plane *const pd = &xd->plane[i]; - setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, - sf, pd->subsampling_x, pd->subsampling_y); - } - } -} +void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, + const struct scale_factors *sf); #ifdef __cplusplus } // extern "C" diff --git a/source/libvpx/vp9/common/vp9_reconintra.c b/source/libvpx/vp9/common/vp9_reconintra.c index 71a41a9..44951b5 100644 --- a/source/libvpx/vp9/common/vp9_reconintra.c +++ b/source/libvpx/vp9/common/vp9_reconintra.c @@ -18,21 +18,17 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_onyxc_int.h" -const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = { - DCT_DCT, // DC - ADST_DCT, // V - DCT_ADST, // H - DCT_DCT, // D45 - ADST_ADST, // D135 - ADST_DCT, // D117 - DCT_ADST, // D153 - DCT_ADST, // D207 - ADST_DCT, // D63 - ADST_ADST, // TM - DCT_DCT, // NEARESTMV - DCT_DCT, // NEARMV - DCT_DCT, // ZEROMV - DCT_DCT // NEWMV +const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { + DCT_DCT, // DC + ADST_DCT, // V + DCT_ADST, // H + DCT_DCT, // D45 + ADST_ADST, // D135 + ADST_DCT, // D117 + DCT_ADST, // D153 + DCT_ADST, // D207 + ADST_DCT, // D63 + ADST_ADST, // TM }; #define intra_pred_sized(type, size) \ @@ -351,6 +347,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + vpx_memset(left_col, 129, 64); + // left if (left_available) { if (xd->mb_to_bottom_edge < 0) { @@ -370,8 +368,6 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } - } else { - vpx_memset(left_col, 129, bs); } // TODO(hkuang) do not extend 2*bs pixels for all modes. @@ -438,7 +434,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, } void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, - TX_SIZE tx_size, int mode, + TX_SIZE tx_size, MB_PREDICTION_MODE mode, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, int aoff, int loff, int plane) { diff --git a/source/libvpx/vp9/common/vp9_reconintra.h b/source/libvpx/vp9/common/vp9_reconintra.h index 800736d..abc1767 100644 --- a/source/libvpx/vp9/common/vp9_reconintra.h +++ b/source/libvpx/vp9/common/vp9_reconintra.h @@ -19,7 +19,7 @@ extern "C" { #endif void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, - TX_SIZE tx_size, int mode, + TX_SIZE tx_size, MB_PREDICTION_MODE mode, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, int aoff, int loff, int plane); diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl new file mode 100644 index 0000000..b455592 --- /dev/null +++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -0,0 +1,782 @@ +sub vp9_common_forward_decls() { +print <<EOF +/* + * VP9 + */ + +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_enums.h" + +struct macroblockd; + +/* Encoder forward decls */ +struct macroblock; +struct vp9_variance_vtable; + +#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] +struct mv; +union int_mv; +struct yv12_buffer_config; +EOF +} +forward_decls qw/vp9_common_forward_decls/; + +# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. +if (vpx_config("CONFIG_USE_X86INC") eq "yes") { + $mmx_x86inc = 'mmx'; + $sse_x86inc = 'sse'; + $sse2_x86inc = 'sse2'; + $ssse3_x86inc = 'ssse3'; + $avx_x86inc = 'avx'; + $avx2_x86inc = 'avx2'; +} else { + $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = + $avx_x86inc = $avx2_x86inc = ''; +} + +# this variable is for functions that are 64 bit only. +if ($opts{arch} eq "x86_64") { + $mmx_x86_64 = 'mmx'; + $sse2_x86_64 = 'sse2'; + $ssse3_x86_64 = 'ssse3'; + $avx_x86_64 = 'avx'; + $avx2_x86_64 = 'avx2'; +} else { + $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = + $avx_x86_64 = $avx2_x86_64 = ''; +} + +# +# RECON +# +add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_4x4/; + +add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_4x4/; + +add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc"; + +add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_4x4/; + +add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_4x4/; + +add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_4x4/; + +add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_8x8/; + +add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_8x8/; + +add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc"; + +add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_8x8/; + +add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_8x8/; + +add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_8x8/; + +add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_16x16/; + +add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_16x16/; + +add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc"; + +add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_16x16/; + +add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_16x16/; + +add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_16x16/; + +add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc"; + +add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc"; + +add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d117_predictor_32x32/; + +add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d135_predictor_32x32/; + +add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_d153_predictor_32x32/; + +add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc"; + +add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64"; + +add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc"; + +add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_top_predictor_32x32/; + +add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_left_predictor_32x32/; + +add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vp9_dc_128_predictor_32x32/; + +# +# Loopfilter +# +add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; +specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/; + +add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; +specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/; + +add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; +specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/; + +# +# post proc +# +if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") { +add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit"; +specialize qw/vp9_mbpost_proc_down mmx sse2/; +$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm; + +add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit"; +specialize qw/vp9_mbpost_proc_across_ip sse2/; +$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm; + +add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; +specialize qw/vp9_post_proc_down_and_across mmx sse2/; +$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm; + +add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; +specialize qw/vp9_plane_add_noise mmx sse2/; +$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; +} + +add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; +specialize qw/vp9_blend_mb_inner/; + +add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; +specialize qw/vp9_blend_mb_outer/; + +add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"; +specialize qw/vp9_blend_b/; + +# +# Sub Pixel Filters +# +add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc"; + +add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/; + +add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/; + +add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/; + +add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/; + +add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/; + +add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; +specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/; + +# +# dct +# +add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/; + +add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/; +$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon; + +add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/; + +add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; +specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/; + +add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"; +specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/; + +add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type"; +specialize qw/vp9_iht16x16_256_add sse2 dspr2/; + +# dct and add + +add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_iwht4x4_1_add/; + +add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride"; +specialize qw/vp9_iwht4x4_16_add/; + +# +# Encoder functions below this point. +# +if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { + + +# variance +add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance16x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance32x64/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_16x16 sse2/; +$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2; + +add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc"; + +add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_8x8 sse2/; +$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2; + +add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance8x4/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance4x8/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc"; + +# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form +add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; +#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt + +add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc"; + +add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad64x64/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad32x64/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad64x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad32x16/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad16x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad32x32/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad8x4/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad4x8/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; +specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad4x8_avg/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; +specialize qw/vp9_sad4x4_avg/, "$sse_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc"; + +add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar64x64_h/; + +add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar64x64_v/; + +add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar64x64_hv/; + +add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar32x32_h/; + +add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar32x32_v/; + +add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_variance_halfpixvar32x32_hv/; + +add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x64x3/; + +add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x32x3/; + +add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x16x3 sse3 ssse3/; + +add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x8x3 sse3 ssse3/; + +add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x16x3 sse3/; + +add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x8x3 sse3/; + +add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad4x4x3 sse3/; + +add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad64x64x8/; + +add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad32x32x8/; + +add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad16x16x8 sse4/; + +add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad16x8x8 sse4/; + +add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad8x16x8 sse4/; + +add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad8x8x8 sse4/; + +add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad8x4x8/; + +add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad4x8x8/; + +add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; +specialize qw/vp9_sad4x4x8 sse4/; + +add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x64x4d sse2 avx2/; + +add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x64x4d sse2/; + +add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad64x32x4d sse2/; + +add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x16x4d sse2/; + +add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x32x4d sse2/; + +add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad32x32x4d sse2 avx2/; + +add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x16x4d sse2/; + +add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad16x8x4d sse2/; + +add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x16x4d sse2/; + +add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x8x4d sse2/; + +# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form +add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad8x4x4d sse2/; + +add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad4x8x4d sse/; + +add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; +specialize qw/vp9_sad4x4x4d sse/; + +#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"; +#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/; + +add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; + +add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse8x16/; + +add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse16x8/; + +add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_mse8x8/; + +add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_mse64x64/; + +add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_sub_pixel_mse32x32/; + +add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *"; +specialize qw/vp9_get_mb_ss mmx sse2/; +# ENCODEMB INVOKE + +add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"; +specialize qw/vp9_block_error/, "$sse2_x86inc"; + +add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; +specialize qw/vp9_subtract_block/, "$sse2_x86inc"; + +add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +specialize qw/vp9_quantize_b/, "$ssse3_x86_64"; + +add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; +specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; + +# +# Structured Similarity (SSIM) +# +if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") { + add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64"; + + add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"; + specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64"; +} + +# fdct functions +add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type"; +specialize qw/vp9_fht4x4 sse2 avx2/; + +add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type"; +specialize qw/vp9_fht8x8 sse2 avx2/; + +add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type"; +specialize qw/vp9_fht16x16 sse2 avx2/; + +add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fwht4x4/; + +add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct4x4 sse2 avx2/; + +add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct8x8 sse2 avx2/; + +add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct16x16 sse2 avx2/; + +add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct32x32 sse2 avx2/; + +add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride"; +specialize qw/vp9_fdct32x32_rd sse2 avx2/; + +# +# Motion search +# +add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"; +specialize qw/vp9_full_search_sad sse3 sse4_1/; +$vp9_full_search_sad_sse3=vp9_full_search_sadx3; +$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; + +add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +specialize qw/vp9_refining_search_sad sse3/; +$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4; + +add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +specialize qw/vp9_diamond_search_sad sse3/; +$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4; + +add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +specialize qw/vp9_full_range_search/; + +add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; +specialize qw/vp9_temporal_filter_apply sse2/; + +} +# end encoder functions +1; diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.sh b/source/libvpx/vp9/common/vp9_rtcd_defs.sh deleted file mode 100755 index 5b44970..0000000 --- a/source/libvpx/vp9/common/vp9_rtcd_defs.sh +++ /dev/null @@ -1,760 +0,0 @@ -vp9_common_forward_decls() { -cat <<EOF -/* - * VP9 - */ - -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_enums.h" - -struct macroblockd; - -/* Encoder forward decls */ -struct macroblock; -struct vp9_variance_vtable; - -#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] -struct mv; -union int_mv; -struct yv12_buffer_config; -EOF -} -forward_decls vp9_common_forward_decls - -# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly. -[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse && - sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2 - -# this variable is for functions that are 64 bit only. -[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && - ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2 - -# -# RECON -# -prototype void vp9_d207_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_4x4 $ssse3_x86inc - -prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_4x4 $ssse3_x86inc - -prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_4x4 $ssse3_x86inc - -prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_4x4 $ssse3_x86inc neon dspr2 - -prototype void vp9_d117_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_4x4 - -prototype void vp9_d135_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_4x4 - -prototype void vp9_d153_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_4x4 $ssse3_x86inc - -prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_4x4 $sse_x86inc neon - -prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2 - -prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2 - -prototype void vp9_dc_top_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_4x4 - -prototype void vp9_dc_left_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_4x4 - -prototype void vp9_dc_128_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_4x4 - -prototype void vp9_d207_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_8x8 $ssse3_x86inc - -prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_8x8 $ssse3_x86inc - -prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_8x8 $ssse3_x86inc - -prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_8x8 $ssse3_x86inc neon dspr2 - -prototype void vp9_d117_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_8x8 - -prototype void vp9_d135_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_8x8 - -prototype void vp9_d153_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_8x8 $ssse3_x86inc - -prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_8x8 $sse_x86inc neon - -prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2 - -prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2 - -prototype void vp9_dc_top_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_8x8 - -prototype void vp9_dc_left_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_8x8 - -prototype void vp9_dc_128_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_8x8 - -prototype void vp9_d207_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_16x16 $ssse3_x86inc - -prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_16x16 $ssse3_x86inc - -prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_16x16 $ssse3_x86inc - -prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_16x16 $ssse3_x86inc neon dspr2 - -prototype void vp9_d117_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_16x16 - -prototype void vp9_d135_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_16x16 - -prototype void vp9_d153_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_16x16 $ssse3_x86inc - -prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_16x16 $sse2_x86inc neon - -prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_16x16 $sse2_x86inc neon - -prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2 - -prototype void vp9_dc_top_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_16x16 - -prototype void vp9_dc_left_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_16x16 - -prototype void vp9_dc_128_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_16x16 - -prototype void vp9_d207_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d207_predictor_32x32 $ssse3_x86inc - -prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d45_predictor_32x32 $ssse3_x86inc - -prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d63_predictor_32x32 $ssse3_x86inc - -prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_h_predictor_32x32 $ssse3_x86inc neon - -prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d117_predictor_32x32 - -prototype void vp9_d135_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d135_predictor_32x32 - -prototype void vp9_d153_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_d153_predictor_32x32 - -prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_v_predictor_32x32 $sse2_x86inc neon - -prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon - -prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_predictor_32x32 $sse2_x86inc - -prototype void vp9_dc_top_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_top_predictor_32x32 - -prototype void vp9_dc_left_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_left_predictor_32x32 - -prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_dc_128_predictor_32x32 - -# -# Loopfilter -# -prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_lpf_vertical_16 sse2 neon dspr2 - -prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_lpf_vertical_16_dual sse2 neon dspr2 - -prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_vertical_8 sse2 neon dspr2 - -prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_vertical_8_dual sse2 neon dspr2 - -prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_vertical_4 mmx neon dspr2 - -prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_vertical_4_dual sse2 neon dspr2 - -prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2 - -prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_horizontal_8 sse2 neon dspr2 - -prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2 - -prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_lpf_horizontal_4 mmx neon dspr2 - -prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2 - -# -# post proc -# -if [ "$CONFIG_VP9_POSTPROC" = "yes" ]; then -prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit" -specialize vp9_mbpost_proc_down mmx sse2 -vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm - -prototype void vp9_mbpost_proc_across_ip "uint8_t *src, int pitch, int rows, int cols, int flimit" -specialize vp9_mbpost_proc_across_ip sse2 -vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm - -prototype void vp9_post_proc_down_and_across "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit" -specialize vp9_post_proc_down_and_across mmx sse2 -vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm - -prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch" -specialize vp9_plane_add_noise mmx sse2 -vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt -fi - -prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" -specialize vp9_blend_mb_inner - -prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" -specialize vp9_blend_mb_outer - -prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" -specialize vp9_blend_b - -# -# Sub Pixel Filters -# -prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_copy $sse2_x86inc neon dspr2 - -prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve_avg $sse2_x86inc neon dspr2 - -prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2 - -prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2 - -prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2 - -prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg sse2 ssse3 neon dspr2 - -prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 - -prototype void vp9_convolve8_avg_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h" -specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 - -# -# dct -# -prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct4x4_1_add sse2 neon dspr2 - -prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct4x4_16_add sse2 neon dspr2 - -prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct8x8_1_add sse2 neon dspr2 - -prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct8x8_64_add sse2 neon dspr2 - -prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct8x8_10_add sse2 neon dspr2 - -prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct16x16_1_add sse2 neon dspr2 - -prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct16x16_256_add sse2 neon dspr2 - -prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct16x16_10_add sse2 neon dspr2 - -prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_1024_add sse2 neon dspr2 - -prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_34_add sse2 neon dspr2 -vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon - -prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_idct32x32_1_add sse2 neon dspr2 - -prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" -specialize vp9_iht4x4_16_add sse2 neon dspr2 - -prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type" -specialize vp9_iht8x8_64_add sse2 neon dspr2 - -prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type" -specialize vp9_iht16x16_256_add sse2 dspr2 - -# dct and add - -prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_iwht4x4_1_add - -prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_iwht4x4_16_add - -# -# Encoder functions below this point. -# -if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then - - -# variance -prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x16 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance16x32 $sse2_x86inc - -prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance64x32 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x64 $sse2_x86inc - -prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance32x32 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance64x64 $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance16x16 mmx $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance16x8 mmx $sse2_x86inc - -prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance8x16 mmx $sse2_x86inc - -prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance8x8 mmx $sse2_x86inc - -prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum" -specialize vp9_get_sse_sum_8x8 sse2 -vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2 - -prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance8x4 $sse2_x86inc - -prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance4x8 $sse2_x86inc - -prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance4x4 mmx $sse2_x86inc - -prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x64 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance64x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance64x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2 - -prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance16x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance8x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance8x16 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance16x8 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance16x8 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance8x8 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance8x8 $sse2_x86inc $ssse3_x86inc - -# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form -prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance8x4 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance8x4 $sse2_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance4x8 $sse_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance4x8 $sse_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_variance4x4 $sse_x86inc $ssse3_x86inc -#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt - -prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred" -specialize vp9_sub_pixel_avg_variance4x4 $sse_x86inc $ssse3_x86inc - -prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad64x64 $sse2_x86inc - -prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad32x64 $sse2_x86inc - -prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad64x32 $sse2_x86inc - -prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad32x16 $sse2_x86inc - -prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad16x32 $sse2_x86inc - -prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad32x32 $sse2_x86inc - -prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad16x16 mmx $sse2_x86inc - -prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad16x8 mmx $sse2_x86inc - -prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad8x16 mmx $sse2_x86inc - -prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad8x8 mmx $sse2_x86inc - -prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad8x4 $sse2_x86inc - -prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad4x8 $sse_x86inc - -prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" -specialize vp9_sad4x4 mmx $sse_x86inc - -prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad64x64_avg $sse2_x86inc - -prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad32x64_avg $sse2_x86inc - -prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad64x32_avg $sse2_x86inc - -prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad32x16_avg $sse2_x86inc - -prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad16x32_avg $sse2_x86inc - -prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad32x32_avg $sse2_x86inc - -prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad16x16_avg $sse2_x86inc - -prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad16x8_avg $sse2_x86inc - -prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad8x16_avg $sse2_x86inc - -prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad8x8_avg $sse2_x86inc - -prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad8x4_avg $sse2_x86inc - -prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad4x8_avg $sse_x86inc - -prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad" -specialize vp9_sad4x4_avg $sse_x86inc - -prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc - -prototype unsigned int vp9_variance_halfpixvar16x16_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar16x16_v $sse2_x86inc - -prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar16x16_hv $sse2_x86inc - -prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar64x64_h - -prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar64x64_v - -prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar64x64_hv - -prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar32x32_h - -prototype unsigned int vp9_variance_halfpixvar32x32_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar32x32_v - -prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_variance_halfpixvar32x32_hv - -prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad64x64x3 - -prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x32x3 - -prototype void vp9_sad16x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x16x3 sse3 ssse3 - -prototype void vp9_sad16x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x8x3 sse3 ssse3 - -prototype void vp9_sad8x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x16x3 sse3 - -prototype void vp9_sad8x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x8x3 sse3 - -prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x4x3 sse3 - -prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad64x64x8 - -prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad32x32x8 - -prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad16x16x8 sse4 - -prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad16x8x8 sse4 - -prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad8x16x8 sse4 - -prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad8x8x8 sse4 - -prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad8x4x8 - -prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad4x8x8 - -prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array" -specialize vp9_sad4x4x8 sse4 - -prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad64x64x4d sse2 - -prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x64x4d sse2 - -prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad64x32x4d sse2 - -prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x16x4d sse2 - -prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x32x4d sse2 - -prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad32x32x4d sse2 - -prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x16x4d sse2 - -prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad16x8x4d sse2 - -prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x16x4d sse2 - -prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x8x4d sse2 - -# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form -prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad8x4x4d sse2 - -prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x8x4d sse - -prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array" -specialize vp9_sad4x4x4d sse - -#prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse" -#specialize vp9_sub_pixel_mse16x16 sse2 mmx - -prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse16x16 mmx $sse2_x86inc $avx2_x86inc - -prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse8x16 - -prototype unsigned int vp9_mse16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse16x8 - -prototype unsigned int vp9_mse8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse" -specialize vp9_mse8x8 - -prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_mse64x64 - -prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" -specialize vp9_sub_pixel_mse32x32 - -prototype unsigned int vp9_get_mb_ss "const int16_t *" -specialize vp9_get_mb_ss mmx sse2 -# ENCODEMB INVOKE - -prototype int64_t vp9_block_error "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz" -specialize vp9_block_error $sse2_x86inc - -prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" -specialize vp9_subtract_block $sse2_x86inc - -prototype void vp9_quantize_b "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" -specialize vp9_quantize_b $ssse3_x86_64 - -prototype void vp9_quantize_b_32x32 "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" -specialize vp9_quantize_b_32x32 $ssse3_x86_64 - -# -# Structured Similarity (SSIM) -# -if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then - prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_8x8 $sse2_x86_64 - - prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" - specialize vp9_ssim_parms_16x16 $sse2_x86_64 -fi - -# fdct functions -prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_fht4x4 sse2 avx2 - -prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_fht8x8 sse2 avx2 - -prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type" -specialize vp9_fht16x16 sse2 avx2 - -prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fwht4x4 - -prototype void vp9_fdct4x4 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct4x4 sse2 avx2 - -prototype void vp9_fdct8x8 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct8x8 sse2 avx2 - -prototype void vp9_fdct16x16 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct16x16 sse2 avx2 - -prototype void vp9_fdct32x32 "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct32x32 sse2 avx2 - -prototype void vp9_fdct32x32_rd "const int16_t *input, int16_t *output, int stride" -specialize vp9_fdct32x32_rd sse2 avx2 - -# -# Motion search -# -prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv" -specialize vp9_full_search_sad sse3 sse4_1 -vp9_full_search_sad_sse3=vp9_full_search_sadx3 -vp9_full_search_sad_sse4_1=vp9_full_search_sadx8 - -prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv" -specialize vp9_refining_search_sad sse3 -vp9_refining_search_sad_sse3=vp9_refining_search_sadx4 - -prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv" -specialize vp9_diamond_search_sad sse3 -vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4 - -prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv" -specialize vp9_full_range_search - -prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count" -specialize vp9_temporal_filter_apply sse2 - -fi -# end encoder functions diff --git a/source/libvpx/vp9/common/vp9_scale.c b/source/libvpx/vp9/common/vp9_scale.c index e0f1e34..d3405fc 100644 --- a/source/libvpx/vp9/common/vp9_scale.c +++ b/source/libvpx/vp9/common/vp9_scale.c @@ -13,11 +13,11 @@ #include "vp9/common/vp9_scale.h" static INLINE int scaled_x(int val, const struct scale_factors *sf) { - return val * sf->x_scale_fp >> REF_SCALE_SHIFT; + return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT); } static INLINE int scaled_y(int val, const struct scale_factors *sf) { - return val * sf->y_scale_fp >> REF_SCALE_SHIFT; + return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); } static int unscaled_value(int val, const struct scale_factors *sf) { diff --git a/source/libvpx/vp9/common/vp9_systemdependent.h b/source/libvpx/vp9/common/vp9_systemdependent.h index 72edbca..e971158 100644 --- a/source/libvpx/vp9/common/vp9_systemdependent.h +++ b/source/libvpx/vp9/common/vp9_systemdependent.h @@ -12,11 +12,11 @@ #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ #ifdef _MSC_VER +# include <math.h> // the ceil() definition must precede intrin.h # if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86)) # include <intrin.h> # define USE_MSC_INTRIN # endif -# include <math.h> # define snprintf _snprintf #endif diff --git a/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm b/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm index dd522c6..b263837 100644 --- a/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm +++ b/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm @@ -133,10 +133,14 @@ INIT_MMX sse movh m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg - pavgb m0, [dstq] - pavgb m1, [dstq+dst_strideq] - pavgb m2, [dstq+dst_strideq*2] - pavgb m3, [dstq+r6q] + movh m4, [dstq] + movh m5, [dstq+dst_strideq] + movh m6, [dstq+dst_strideq*2] + movh m7, [dstq+r6q] + pavgb m0, m4 + pavgb m1, m5 + pavgb m2, m6 + pavgb m3, m7 %endif movh [dstq ], m0 movh [dstq+dst_strideq ], m1 diff --git a/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c index efa960c..b84db97 100644 --- a/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ b/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c @@ -32,6 +32,27 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 }; +#if defined(__clang__) +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ + (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0) +# define MM256_BROADCASTSI128_SI256(x) \ + _mm_broadcastsi128_si256((__m128i const *)&(x)) +# else // clang > 3.3, and not 5.0 on macosx. +# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) +# endif // clang <= 3.3 +#elif defined(__GNUC__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6) +# define MM256_BROADCASTSI128_SI256(x) \ + _mm_broadcastsi128_si256((__m128i const *)&(x)) +# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7 +# define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x) +# else // gcc > 4.7 +# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) +# endif // gcc <= 4.6 +#else // !(gcc || clang) +# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) +#endif // __clang__ + void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, @@ -53,18 +74,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr, // in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); // have the same data in both lanes of a 256 bit register -#if defined (__GNUC__) -#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \ -(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0)))) - filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg); -#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0)) - filtersReg32 = _mm_broadcastsi128_si256(filtersReg); -#else - filtersReg32 = _mm256_broadcastsi128_si256(filtersReg); -#endif -#else - filtersReg32 = _mm256_broadcastsi128_si256(filtersReg); -#endif + filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); // duplicate only the first 16 bits (first and second byte) // across 256 bit register @@ -309,18 +319,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr, // same data in both lanes of 128 bit register. filtersReg =_mm_packs_epi16(filtersReg, filtersReg); // have the same data in both lanes of a 256 bit register -#if defined (__GNUC__) -#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \ -(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0)))) - filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg); -#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0)) - filtersReg32 = _mm_broadcastsi128_si256(filtersReg); -#else - filtersReg32 = _mm256_broadcastsi128_si256(filtersReg); -#endif -#else - filtersReg32 = _mm256_broadcastsi128_si256(filtersReg); -#endif + filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg); // duplicate only the first 16 bits (first and second byte) // across 256 bit register diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.c b/source/libvpx/vp9/decoder/vp9_decodeframe.c index 8bebca5..022a429 100644 --- a/source/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/source/libvpx/vp9/decoder/vp9_decodeframe.c @@ -33,9 +33,9 @@ #include "vp9/decoder/vp9_decodeframe.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_dsubexp.h" #include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" #include "vp9/decoder/vp9_reader.h" #include "vp9/decoder/vp9_thread.h" @@ -146,7 +146,7 @@ static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) { static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) { int i; for (i = 0; i < n; ++i) - if (vp9_read(r, NMV_UPDATE_PROB)) + if (vp9_read(r, MV_UPDATE_PROB)) p[i] = (vp9_read_literal(r, 7) << 1) | 1; } @@ -187,54 +187,13 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { xd->plane[i].dequant = cm->uv_dequant[q_index]; } -// Allocate storage for each tile column. -// TODO(jzern): when max_threads <= 1 the same storage could be used for each -// tile. -static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) { - VP9_COMMON *const cm = &pbi->common; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - int i, tile_row, tile_col; - - CHECK_MEM_ERROR(cm, pbi->mi_streams, - vpx_realloc(pbi->mi_streams, tile_rows * tile_cols * - sizeof(*pbi->mi_streams))); - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - TileInfo tile; - vp9_tile_init(&tile, cm, tile_row, tile_col); - pbi->mi_streams[tile_row * tile_cols + tile_col] = - &cm->mi[tile.mi_row_start * cm->mode_info_stride - + tile.mi_col_start]; - } - } - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - CHECK_MEM_ERROR(cm, pbi->above_context[0], - vpx_realloc(pbi->above_context[0], - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols)); - for (i = 1; i < MAX_MB_PLANE; ++i) { - pbi->above_context[i] = pbi->above_context[0] + - i * sizeof(*pbi->above_context[0]) * - 2 * aligned_mi_cols; - } - - // This is sized based on the entire frame. Each tile operates within its - // column bounds. - CHECK_MEM_ERROR(cm, pbi->above_seg_context, - vpx_realloc(pbi->above_seg_context, - sizeof(*pbi->above_seg_context) * - aligned_mi_cols)); -} - static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, TX_SIZE tx_size, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; if (eob > 0) { TX_TYPE tx_type; - const int plane_type = pd->plane_type; + const PLANE_TYPE plane_type = pd->plane_type; int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); switch (tx_size) { case TX_4X4: @@ -245,11 +204,11 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type); break; case TX_8X8: - tx_type = get_tx_type_8x8(plane_type, xd); + tx_type = get_tx_type(plane_type, xd); vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_16X16: - tx_type = get_tx_type_16x16(plane_type, xd); + tx_type = get_tx_type(plane_type, xd); vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_32X32: @@ -282,11 +241,11 @@ struct intra_args { static void predict_and_reconstruct_intra_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct intra_args *const args = arg; + struct intra_args *const args = (struct intra_args *)arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; const MB_PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block) : mi->mbmi.uv_mode; int x, y; @@ -318,7 +277,7 @@ struct inter_args { static void reconstruct_inter_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct inter_args *args = arg; + struct inter_args *args = (struct inter_args *)arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -332,67 +291,57 @@ static void reconstruct_inter_block(int plane, int block, *args->eobtotal += eob; } -static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - BLOCK_SIZE bsize, int mi_row, int mi_col) { +static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int x_mis = MIN(bw, cm->mi_cols - mi_col); const int y_mis = MIN(bh, cm->mi_rows - mi_row); - const int offset = mi_row * cm->mode_info_stride + mi_col; - const int tile_offset = tile->mi_row_start * cm->mode_info_stride + - tile->mi_col_start; + const int offset = mi_row * cm->mi_stride + mi_col; int x, y; - xd->mi_8x8 = cm->mi_grid_visible + offset; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset; - - xd->last_mi = cm->coding_use_prev_mi && cm->prev_mi ? - xd->prev_mi_8x8[0] : NULL; - - xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset; - xd->mi_8x8[0]->mbmi.sb_type = bsize; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + xd->mi[0]->mbmi.sb_type = bsize; for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) - xd->mi_8x8[y * cm->mode_info_stride + x] = xd->mi_8x8[0]; + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; - set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col); + set_skip_context(xd, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + return &xd->mi[0]->mbmi; } static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, int idx, int mi_row, int mi_col) { - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; xd->block_refs[idx] = ref_buffer; if (!vp9_is_valid_scale(&ref_buffer->sf)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid scale factors"); - setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, &ref_buffer->sf); + vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, + &ref_buffer->sf); xd->corrupted |= ref_buffer->buf->corrupted; } -static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - int mi_row, int mi_col, - vp9_reader *r, BLOCK_SIZE bsize) { +static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; - MB_MODE_INFO *mbmi; - - set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; - // Has to be called after set_offsets - mbmi = &xd->mi_8x8[0]->mbmi; - if (mbmi->skip) { reset_skip_context(xd, bsize); } else { @@ -411,8 +360,6 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (has_second_ref(mbmi)) set_ref(cm, xd, 1, mi_row, mi_col); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - // Prediction vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); @@ -432,16 +379,14 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, int mi_row, int mi_col, BLOCK_SIZE bsize, vp9_reader *r) { - const int ctx = partition_plane_context(xd->above_seg_context, - xd->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; PARTITION_TYPE p; if (has_rows && has_cols) - p = vp9_read_tree(r, vp9_partition_tree, probs); + p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs); else if (!has_rows && has_cols) p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; else if (has_rows && !has_cols) @@ -455,10 +400,10 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, return p; } -static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - int mi_row, int mi_col, - vp9_reader* r, BLOCK_SIZE bsize) { +static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + vp9_reader* r, BLOCK_SIZE bsize) { const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; PARTITION_TYPE partition; BLOCK_SIZE subsize; @@ -469,27 +414,27 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); } else { switch (partition) { case PARTITION_NONE: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); break; case PARTITION_HORZ: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); if (mi_row + hbs < cm->mi_rows) - decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); break; case PARTITION_VERT: - decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_block(cm, xd, tile, mi_row, mi_col, r, subsize); if (mi_col + hbs < cm->mi_cols) - decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); break; case PARTITION_SPLIT: - decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize); - decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); - decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); - decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); + decode_partition(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_partition(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_partition(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); break; default: assert(0 && "Invalid partition type"); @@ -499,8 +444,7 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(xd->above_seg_context, xd->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void setup_token_decoder(const uint8_t *data, @@ -668,9 +612,7 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { read_frame_size(rb, &cm->display_width, &cm->display_height); } -static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { - VP9_COMMON *cm = &pbi->common; - +static void apply_frame_size(VP9_COMMON *cm, int width, int height) { if (cm->width != width || cm->height != height) { // Change in frame size. // TODO(agrange) Don't test width/height, check overall size. @@ -697,18 +639,15 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { } } -static void setup_frame_size(VP9D_COMP *pbi, - struct vp9_read_bit_buffer *rb) { +static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { int width, height; read_frame_size(rb, &width, &height); - apply_frame_size(pbi, width, height); - setup_display_size(&pbi->common, rb); + apply_frame_size(cm, width, height); + setup_display_size(cm, rb); } -static void setup_frame_size_with_refs(VP9D_COMP *pbi, +static void setup_frame_size_with_refs(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; - int width, height; int found = 0, i; for (i = 0; i < REFS_PER_FRAME; ++i) { @@ -728,24 +667,11 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame with invalid size"); - apply_frame_size(pbi, width, height); + apply_frame_size(cm, width, height); setup_display_size(cm, rb); } -static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd, - int tile_row, int tile_col) { - int i; - const int tile_cols = 1 << pbi->common.log2_tile_cols; - xd->mi_stream = pbi->mi_streams[tile_row * tile_cols + tile_col]; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - xd->above_context[i] = pbi->above_context[i]; - } - // see note in alloc_tile_storage(). - xd->above_seg_context = pbi->above_seg_context; -} - -static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, +static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; @@ -769,7 +695,7 @@ static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, vp9_zero(xd->left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); + decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); } if (pbi->do_loopfilter_inline) { @@ -850,15 +776,15 @@ typedef struct TileBuffer { int col; // only used with multi-threaded decoding } TileBuffer; -static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { +static const uint8_t *decode_tiles(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; - const uint8_t *const data_end = pbi->source + pbi->source_sz; const uint8_t *end = NULL; vp9_reader r; @@ -867,11 +793,11 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols); + vpx_memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_cols); + vpx_memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_cols); // Load tile data into tile_buffers for (tile_row = 0; tile_row < tile_rows; ++tile_row) { @@ -898,7 +824,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { vp9_tile_init(&tile, cm, tile_row, col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r); - setup_tile_context(pbi, xd, tile_row, col); decode_tile(pbi, &tile, &r); if (last_tile) @@ -909,17 +834,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return end; } -static void setup_tile_macroblockd(TileWorkerData *const tile_data) { - MACROBLOCKD *xd = &tile_data->xd; - struct macroblockd_plane *const pd = xd->plane; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - pd[i].dqcoeff = tile_data->dqcoeff[i]; - vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); - } -} - static int tile_worker_hook(void *arg1, void *arg2) { TileWorkerData *const tile_data = (TileWorkerData*)arg1; const TileInfo *const tile = (TileInfo*)arg2; @@ -931,8 +845,8 @@ static int tile_worker_hook(void *arg1, void *arg2) { vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_modes_sb(tile_data->cm, &tile_data->xd, tile, - mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); + decode_partition(tile_data->cm, &tile_data->xd, tile, + mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); } } return !tile_data->xd.corrupted; @@ -951,10 +865,11 @@ static int compare_tile_buffers(const void *a, const void *b) { } } -static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; const uint8_t *bit_reader_end = NULL; - const uint8_t *const data_end = pbi->source + pbi->source_sz; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -967,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { assert(tile_rows == 1); (void)tile_rows; - if (num_workers > pbi->num_tile_workers) { + // TODO(jzern): See if we can remove the restriction of passing in max + // threads to the decoder. + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->oxcf.max_threads & ~1; int i; + // TODO(jzern): Allocate one less worker, as in the current code we only + // use num_threads - 1 workers. CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_realloc(pbi->tile_workers, - num_workers * sizeof(*pbi->tile_workers))); - for (i = pbi->num_tile_workers; i < num_workers; ++i) { + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (i = 0; i < num_threads; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; ++pbi->num_tile_workers; @@ -980,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(TileWorkerData))); CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); - if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + if (i < num_threads - 1 && !vp9_worker_reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } @@ -988,17 +907,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { } // Reset tile decoding hook - for (n = 0; n < pbi->num_tile_workers; ++n) { + for (n = 0; n < num_workers; ++n) { pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook; } // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols); - vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_mi_cols); + vpx_memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + vpx_memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); // Load tile data into tile_buffers for (n = 0; n < tile_cols; ++n) { @@ -1043,11 +961,10 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; vp9_tile_init(tile, tile_data->cm, 0, buf->col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader); - setup_tile_context(pbi, &tile_data->xd, 0, buf->col); - setup_tile_macroblockd(tile_data); + init_macroblockd(cm, &tile_data->xd); + vp9_zero(tile_data->xd.dqcoeff); worker->had_error = 0; if (i == num_workers - 1 || n == tile_cols - 1) { @@ -1092,12 +1009,13 @@ static void error_handler(void *data) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -#define RESERVED \ - if (vp9_rb_read_bit(rb)) \ - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ - "Reserved bit must be unset") +static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { + int profile = vp9_rb_read_bit(rb); + profile |= vp9_rb_read_bit(rb) << 1; + return (BITSTREAM_PROFILE) profile; +} -static size_t read_uncompressed_header(VP9D_COMP *pbi, +static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; size_t sz; @@ -1109,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); - cm->version = vp9_rb_read_bit(rb); - RESERVED; + cm->profile = read_profile(rb); + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); cm->show_existing_frame = vp9_rb_read_bit(rb); if (cm->show_existing_frame) { @@ -1135,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, if (cm->frame_type == KEY_FRAME) { check_sync_code(cm, rb); - - cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace + if (cm->profile > PROFILE_1) + cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10; + cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_x = vp9_rb_read_bit(rb); cm->subsampling_y = vp9_rb_read_bit(rb); vp9_rb_read_bit(rb); // has extra plane @@ -1147,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->subsampling_y = cm->subsampling_x = 1; } } else { - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_y = cm->subsampling_x = 0; vp9_rb_read_bit(rb); // has extra plane } else { @@ -1163,7 +1084,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->frame_refs[i].buf = get_frame_new_buffer(cm); } - setup_frame_size(pbi, rb); + setup_frame_size(cm, rb); } else { cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb); @@ -1174,7 +1095,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, check_sync_code(cm, rb); pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); - setup_frame_size(pbi, rb); + setup_frame_size(cm, rb); } else { pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); @@ -1186,7 +1107,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); } - setup_frame_size_with_refs(pbi, rb); + setup_frame_size_with_refs(cm, rb); cm->allow_high_precision_mv = vp9_rb_read_bit(rb); cm->interp_filter = read_interp_filter(rb); @@ -1234,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, return sz; } -static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; @@ -1334,14 +1255,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) { } #endif // NDEBUG -int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { - int i; +int vp9_decode_frame(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - const uint8_t *data = pbi->source; - const uint8_t *const data_end = pbi->source + pbi->source_sz; - struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler }; const size_t first_partition_size = read_uncompressed_header(pbi, &rb); const int keyframe = cm->frame_type == KEY_FRAME; @@ -1367,7 +1286,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { pbi->do_loopfilter_inline = (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData))); + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, + vpx_memalign(32, sizeof(LFWorkerData))); pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, @@ -1375,9 +1295,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { } } - alloc_tile_storage(pbi, tile_rows, tile_cols); + init_macroblockd(cm, &pbi->mb); - xd->mode_info_stride = cm->mode_info_stride; if (cm->coding_use_prev_mi) set_prev_mi(cm); else @@ -1388,8 +1307,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cm->counts); - for (i = 0; i < MAX_MB_PLANE; ++i) - vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); + vp9_zero(xd->dqcoeff); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); @@ -1398,9 +1316,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { // single-frame tile decoding. if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && cm->frame_parallel_decoding_mode) { - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size); + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size); + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } new_fb->corrupted |= xd->corrupted; diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.h b/source/libvpx/vp9/decoder/vp9_decodeframe.h index 4537bc8..8a19daf 100644 --- a/source/libvpx/vp9/decoder/vp9_decodeframe.h +++ b/source/libvpx/vp9/decoder/vp9_decodeframe.h @@ -17,10 +17,13 @@ extern "C" { #endif struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; void vp9_init_dequantizer(struct VP9Common *cm); -int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end); + +int vp9_decode_frame(struct VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end); #ifdef __cplusplus } // extern "C" diff --git a/source/libvpx/vp9/decoder/vp9_decodemv.c b/source/libvpx/vp9/decoder/vp9_decodemv.c index 0fb7a15..3618f12 100644 --- a/source/libvpx/vp9/decoder/vp9_decodemv.c +++ b/source/libvpx/vp9/decoder/vp9_decodemv.c @@ -21,7 +21,6 @@ #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decodeframe.h" -#include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_reader.h" static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { @@ -64,7 +63,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_SIZE max_tx_size, vp9_reader *r) { const int ctx = vp9_get_tx_size_context(xd); const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs); - TX_SIZE tx_size = vp9_read(r, tx_probs[0]); + int tx_size = vp9_read(r, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { tx_size += vp9_read(r, tx_probs[1]); if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) @@ -73,7 +72,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, if (!cm->frame_parallel_decoding_mode) ++get_tx_counts(max_tx_size, ctx, &cm->counts.tx)[tx_size]; - return tx_size; + return (TX_SIZE)tx_size; } static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_MODE tx_mode, @@ -105,7 +104,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int segment_id; if (!seg->enabled) @@ -122,7 +121,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; int predicted_segment_id, segment_id; @@ -162,11 +161,12 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, static void read_intra_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - const MODE_INFO *above_mi = xd->mi_8x8[-cm->mode_info_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; + const MODE_INFO *above_mi = xd->mi[-cm->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; const BLOCK_SIZE bsize = mbmi->sb_type; + int i; mbmi->segment_id = read_intra_segment_id(cm, xd, mi_row, mi_col, r); mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); @@ -174,32 +174,28 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm, mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE; - if (bsize >= BLOCK_8X8) { - const MB_PREDICTION_MODE A = vp9_above_block_mode(mi, above_mi, 0); - const MB_PREDICTION_MODE L = vp9_left_block_mode(mi, left_mi, 0); - mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); - } else { - // Only 4x4, 4x8, 8x4 blocks - const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 - const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 - int idx, idy; - - for (idy = 0; idy < 2; idy += num_4x4_h) { - for (idx = 0; idx < 2; idx += num_4x4_w) { - const int ib = idy * 2 + idx; - const MB_PREDICTION_MODE A = vp9_above_block_mode(mi, above_mi, ib); - const MB_PREDICTION_MODE L = vp9_left_block_mode(mi, left_mi, ib); - const MB_PREDICTION_MODE b_mode = read_intra_mode(r, - vp9_kf_y_mode_prob[A][L]); - mi->bmi[ib].as_mode = b_mode; - if (num_4x4_h == 2) - mi->bmi[ib + 2].as_mode = b_mode; - if (num_4x4_w == 2) - mi->bmi[ib + 1].as_mode = b_mode; - } - } - - mbmi->mode = mi->bmi[3].as_mode; + switch (bsize) { + case BLOCK_4X4: + for (i = 0; i < 4; ++i) + mi->bmi[i].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); + mbmi->mode = mi->bmi[3].as_mode; + break; + case BLOCK_4X8: + mi->bmi[0].as_mode = mi->bmi[2].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); + break; + case BLOCK_8X4: + mi->bmi[0].as_mode = mi->bmi[1].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); + break; + default: + mbmi->mode = read_intra_mode(r, + get_y_mode_probs(mi, above_mi, left_mi, 0)); } mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]); @@ -241,14 +237,15 @@ static int read_mv_component(vp9_reader *r, static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, nmv_context_counts *counts, int allow_hp) { - const MV_JOINT_TYPE j = vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); + const MV_JOINT_TYPE joint_type = + (MV_JOINT_TYPE)vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; - if (mv_joint_vertical(j)) + if (mv_joint_vertical(joint_type)) diff.row = read_mv_component(r, &ctx->comps[0], use_hp); - if (mv_joint_horizontal(j)) + if (mv_joint_horizontal(joint_type)) diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); @@ -262,7 +259,8 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, vp9_reader *r) { if (cm->reference_mode == REFERENCE_MODE_SELECT) { const int ctx = vp9_get_reference_mode_context(cm, xd); - const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]); + const REFERENCE_MODE mode = + (REFERENCE_MODE)vp9_read(r, cm->fc.comp_inter_prob[ctx]); if (!cm->frame_parallel_decoding_mode) ++cm->counts.comp_inter[ctx][mode]; return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE @@ -279,7 +277,8 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, FRAME_COUNTS *const counts = &cm->counts; if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); + ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id, + SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); @@ -318,8 +317,9 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, static INLINE INTERP_FILTER read_switchable_interp_filter( VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) { const int ctx = vp9_get_pred_context_switchable_interp(xd); - const int type = vp9_read_tree(r, vp9_switchable_interp_tree, - cm->fc.switchable_interp_prob[ctx]); + const INTERP_FILTER type = + (INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree, + cm->fc.switchable_interp_prob[ctx]); if (!cm->frame_parallel_decoding_mode) ++cm->counts.switchable_interp[ctx][type]; return type; @@ -329,30 +329,29 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi, vp9_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mi->mbmi.sb_type; + int i; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE; - if (bsize >= BLOCK_8X8) { - mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]); - } else { - // Only 4x4, 4x8, 8x4 blocks - const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 - const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 - int idx, idy; - - for (idy = 0; idy < 2; idy += num_4x4_h) { - for (idx = 0; idx < 2; idx += num_4x4_w) { - const int ib = idy * 2 + idx; - const int b_mode = read_intra_mode_y(cm, r, 0); - mi->bmi[ib].as_mode = b_mode; - if (num_4x4_h == 2) - mi->bmi[ib + 2].as_mode = b_mode; - if (num_4x4_w == 2) - mi->bmi[ib + 1].as_mode = b_mode; - } - } - mbmi->mode = mi->bmi[3].as_mode; + switch (bsize) { + case BLOCK_4X4: + for (i = 0; i < 4; ++i) + mi->bmi[i].as_mode = read_intra_mode_y(cm, r, 0); + mbmi->mode = mi->bmi[3].as_mode; + break; + case BLOCK_4X8: + mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, r, 0); + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = + read_intra_mode_y(cm, r, 0); + break; + case BLOCK_8X4: + mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, r, 0); + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = + read_intra_mode_y(cm, r, 0); + break; + default: + mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]); } mbmi->uv_mode = read_intra_mode_uv(cm, r, mbmi->mode); @@ -437,7 +436,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, for (ref = 0; ref < 1 + is_compound; ++ref) { const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; - vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, frame, mbmi->ref_mvs[frame], + vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame], mi_row, mi_col); } @@ -470,7 +469,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm, const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2 int idx, idy; - int b_mode; + MB_PREDICTION_MODE b_mode; int_mv nearest_sub8x8[2], near_sub8x8[2]; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { @@ -516,7 +515,7 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; diff --git a/source/libvpx/vp9/decoder/vp9_decodemv.h b/source/libvpx/vp9/decoder/vp9_decodemv.h index 539c984..7394b62 100644 --- a/source/libvpx/vp9/decoder/vp9_decodemv.h +++ b/source/libvpx/vp9/decoder/vp9_decodemv.h @@ -11,7 +11,6 @@ #ifndef VP9_DECODER_VP9_DECODEMV_H_ #define VP9_DECODER_VP9_DECODEMV_H_ -#include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_reader.h" #ifdef __cplusplus diff --git a/source/libvpx/vp9/decoder/vp9_onyxd_if.c b/source/libvpx/vp9/decoder/vp9_decoder.c index 24248a4..fd74478 100644 --- a/source/libvpx/vp9/decoder/vp9_onyxd_if.c +++ b/source/libvpx/vp9/decoder/vp9_decoder.c @@ -12,23 +12,25 @@ #include <limits.h> #include <stdio.h> +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_timer.h" +#include "vpx_scale/vpx_scale.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif -#include "vp9/decoder/vp9_onyxd.h" -#include "vp9/decoder/vp9_onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_quant_common.h" -#include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_systemdependent.h" -#include "vpx_ports/vpx_timer.h" + #include "vp9/decoder/vp9_decodeframe.h" +#include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_dthread.h" -#include "./vpx_scale_rtcd.h" #define WRITE_RECON_BUFFER 0 #if WRITE_RECON_BUFFER == 1 @@ -102,23 +104,14 @@ void vp9_initialize_dec() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); vp9_init_quant_tables(); init_done = 1; } } -static void init_macroblockd(VP9D_COMP *const pbi) { - MACROBLOCKD *xd = &pbi->mb; - struct macroblockd_plane *const pd = xd->plane; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) - pd[i].dqcoeff = pbi->dqcoeff[i]; -} - -VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { - VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); +VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) { + VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) @@ -126,12 +119,9 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_zero(*pbi); - // Initialize the references to not point to any frame buffers. - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; - vp9_remove_decompressor(pbi); + vp9_decoder_remove(pbi); return NULL; } @@ -140,9 +130,13 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_rtcd(); + // Initialize the references to not point to any frame buffers. + vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + + cm->current_video_frame = 0; pbi->oxcf = *oxcf; pbi->ready_for_new_data = 1; - cm->current_video_frame = 0; + pbi->decoded_key_frame = 0; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of @@ -152,22 +146,17 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_loop_filter_init(cm); cm->error.setjmp = 0; - pbi->decoded_key_frame = 0; - - init_macroblockd(pbi); vp9_worker_init(&pbi->lf_worker); return pbi; } -void vp9_remove_decompressor(VP9D_COMP *pbi) { +void vp9_decoder_remove(VP9Decoder *pbi) { + VP9_COMMON *const cm = &pbi->common; int i; - if (!pbi) - return; - - vp9_remove_common(&pbi->common); + vp9_remove_common(cm); vp9_worker_end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); for (i = 0; i < pbi->num_tile_workers; ++i) { @@ -179,17 +168,11 @@ void vp9_remove_decompressor(VP9D_COMP *pbi) { vpx_free(pbi->tile_workers); if (pbi->num_tile_workers) { - VP9_COMMON *const cm = &pbi->common; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - VP9LfSync *const lf_sync = &pbi->lf_row_sync; - - vp9_loop_filter_dealloc(lf_sync, sb_rows); + vp9_loop_filter_dealloc(&pbi->lf_row_sync, sb_rows); } - vpx_free(pbi->mi_streams); - vpx_free(pbi->above_context[0]); - vpx_free(pbi->above_seg_context); vpx_free(pbi); } @@ -199,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a, a->uv_height == b->uv_height && a->uv_width == b->uv_width; } -vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &pbi->common; @@ -226,17 +209,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, } -vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &pbi->common; RefBuffer *ref_buf = NULL; - /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - * encoder is using the frame buffers for. This is just a stub to keep the - * vpxenc --test-decode functionality working, and will be replaced in a - * later commit that adds VP9-specific controls for this functionality. - */ + // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + // encoder is using the frame buffers for. This is just a stub to keep the + // vpxenc --test-decode functionality working, and will be replaced in a + // later commit that adds VP9-specific controls for this functionality. if (ref_frame_flag == VP9_LAST_FLAG) { ref_buf = &cm->frame_refs[0]; } else if (ref_frame_flag == VP9_GOLD_FLAG) { @@ -244,13 +225,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, } else if (ref_frame_flag == VP9_ALT_FLAG) { ref_buf = &cm->frame_refs[2]; } else { - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); - return pbi->common.error.error_code; + return cm->error.error_code; } if (!equal_dimensions(ref_buf->buf, sd)) { - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); } else { int *ref_fb_ptr = &ref_buf->idx; @@ -267,11 +248,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, vp8_yv12_copy_frame(sd, ref_buf->buf); } - return pbi->common.error.error_code; + return cm->error.error_code; } -int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { +int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &pbi->common; if (index < 0 || index >= REF_FRAMES) @@ -282,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { } /* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9D_COMP *pbi) { +static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; @@ -306,35 +287,24 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { cm->frame_refs[ref_index].idx = INT_MAX; } -int vp9_receive_compressed_data(VP9D_COMP *pbi, +int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource, int64_t time_stamp) { - VP9_COMMON *cm = NULL; + VP9_COMMON *const cm = &pbi->common; const uint8_t *source = *psource; int retcode = 0; - /*if(pbi->ready_for_new_data == 0) - return -1;*/ - - if (!pbi) - return -1; - - cm = &pbi->common; cm->error.error_code = VPX_CODEC_OK; - pbi->source = source; - pbi->source_sz = size; - - if (pbi->source_sz == 0) { - /* This is used to signal that we are missing frames. - * We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - * - * TODO(jkoleszar): Error concealment is undefined and non-normative - * at this point, but if it becomes so, [0] may not always be the correct - * thing to do here. - */ + if (size == 0) { + // This is used to signal that we are missing frames. + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. if (cm->frame_refs[0].idx != INT_MAX) cm->frame_refs[0].buf->corrupted = 1; } @@ -348,14 +318,13 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - * - * TODO(jkoleszar): Error concealment is undefined and non-normative - * at this point, but if it becomes so, [0] may not always be the correct - * thing to do here. - */ + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. if (cm->frame_refs[0].idx != INT_MAX) cm->frame_refs[0].buf->corrupted = 1; @@ -367,7 +336,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, cm->error.setjmp = 1; - retcode = vp9_decode_frame(pbi, psource); + retcode = vp9_decode_frame(pbi, source, source + size, psource); if (retcode < 0) { cm->error.error_code = VPX_CODEC_ERROR; @@ -421,37 +390,20 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame; if (cm->show_frame) { - if (!cm->show_existing_frame) { - // current mip will be the prev_mip for the next frame - MODE_INFO *temp = cm->prev_mip; - MODE_INFO **temp2 = cm->prev_mi_grid_base; - cm->prev_mip = cm->mip; - cm->mip = temp; - cm->prev_mi_grid_base = cm->mi_grid_base; - cm->mi_grid_base = temp2; - - // update the upper left visible macroblock ptrs - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + - cm->mode_info_stride + 1; - - pbi->mb.mi_8x8 = cm->mi_grid_visible; - pbi->mb.mi_8x8[0] = cm->mi; - } + if (!cm->show_existing_frame) + vp9_swap_mi_and_prev_mi(cm); + cm->current_video_frame++; } pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; - pbi->source_sz = 0; cm->error.setjmp = 0; return retcode; } -int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags) { int ret = -1; @@ -470,19 +422,12 @@ int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(&pbi->common, sd, flags); #else - - if (pbi->common.frame_to_show) { *sd = *pbi->common.frame_to_show; sd->y_width = pbi->common.width; sd->y_height = pbi->common.height; sd->uv_width = sd->y_width >> pbi->common.subsampling_x; sd->uv_height = sd->y_height >> pbi->common.subsampling_y; - ret = 0; - } else { - ret = -1; - } - #endif /*!CONFIG_POSTPROC*/ vp9_clear_system_state(); return ret; diff --git a/source/libvpx/vp9/decoder/vp9_onyxd.h b/source/libvpx/vp9/decoder/vp9_decoder.h index 203e9fa..c9dc251 100644 --- a/source/libvpx/vp9/decoder/vp9_onyxd.h +++ b/source/libvpx/vp9/decoder/vp9_decoder.h @@ -8,64 +8,88 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_DECODER_VP9_ONYXD_H_ -#define VP9_DECODER_VP9_ONYXD_H_ +#ifndef VP9_DECODER_VP9_DECODER_H_ +#define VP9_DECODER_VP9_DECODER_H_ +#include "./vpx_config.h" + +#include "vpx/vpx_codec.h" #include "vpx_scale/yv12config.h" + +#include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_ppflags.h" -#include "vpx/vpx_codec.h" + +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/decoder/vp9_dthread.h" +#include "vp9/decoder/vp9_thread.h" #ifdef __cplusplus extern "C" { #endif -struct VP9Decompressor; - typedef struct { int width; int height; int version; - int postprocess; int max_threads; int inv_tile_order; - int input_partition; } VP9D_CONFIG; -typedef enum { - VP9_LAST_FLAG = 1, - VP9_GOLD_FLAG = 2, - VP9_ALT_FLAG = 4 -} VP9_REFFRAME; +typedef struct VP9Decoder { + DECLARE_ALIGNED(16, MACROBLOCKD, mb); + + DECLARE_ALIGNED(16, VP9_COMMON, common); + + VP9D_CONFIG oxcf; + + int64_t last_time_stamp; + int ready_for_new_data; + + int refresh_frame_flags; + + int decoded_key_frame; + + int initial_width; + int initial_height; + + int do_loopfilter_inline; // apply loopfilter to available rows immediately + VP9Worker lf_worker; + + VP9Worker *tile_workers; + int num_tile_workers; + + VP9LfSync lf_row_sync; +} VP9Decoder; void vp9_initialize_dec(); -int vp9_receive_compressed_data(struct VP9Decompressor *pbi, +int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, const uint8_t **dest, int64_t time_stamp); -int vp9_get_raw_frame(struct VP9Decompressor *pbi, +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags); -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -vpx_codec_err_t vp9_set_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp9_get_reference_dec(struct VP9Decompressor *pbi, +int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); -struct VP9Decompressor *vp9_create_decompressor(VP9D_CONFIG *oxcf); +struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf); -void vp9_remove_decompressor(struct VP9Decompressor *pbi); +void vp9_decoder_remove(struct VP9Decoder *pbi); #ifdef __cplusplus } // extern "C" #endif -#endif // VP9_DECODER_VP9_ONYXD_H_ +#endif // VP9_DECODER_VP9_DECODER_H_ diff --git a/source/libvpx/vp9/decoder/vp9_detokenize.c b/source/libvpx/vp9/decoder/vp9_detokenize.c index 52e78cd..860da53 100644 --- a/source/libvpx/vp9/decoder/vp9_detokenize.c +++ b/source/libvpx/vp9/decoder/vp9_detokenize.c @@ -86,7 +86,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type, const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; - const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); int band, c = 0; const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; diff --git a/source/libvpx/vp9/decoder/vp9_detokenize.h b/source/libvpx/vp9/decoder/vp9_detokenize.h index ce3d765..5278e97 100644 --- a/source/libvpx/vp9/decoder/vp9_detokenize.h +++ b/source/libvpx/vp9/decoder/vp9_detokenize.h @@ -12,7 +12,7 @@ #ifndef VP9_DECODER_VP9_DETOKENIZE_H_ #define VP9_DECODER_VP9_DETOKENIZE_H_ -#include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_reader.h" #ifdef __cplusplus diff --git a/source/libvpx/vp9/decoder/vp9_dthread.c b/source/libvpx/vp9/decoder/vp9_dthread.c index 542732a..9b124c9 100644 --- a/source/libvpx/vp9/decoder/vp9_dthread.c +++ b/source/libvpx/vp9/decoder/vp9_dthread.c @@ -9,10 +9,13 @@ */ #include "./vpx_config.h" + +#include "vpx_mem/vpx_mem.h" + #include "vp9/common/vp9_reconinter.h" + #include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_onyxd_int.h" -#include "vpx_mem/vpx_mem.h" +#include "vp9/decoder/vp9_decoder.h" #if CONFIG_MULTITHREAD static INLINE void mutex_lock(pthread_mutex_t *const mutex) { @@ -96,7 +99,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, for (r = start; r < stop; r += num_lf_workers) { const int mi_row = r << MI_BLOCK_SIZE_LOG2; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; for (c = 0; c < sb_cols; ++c) { const int mi_col = c << MI_BLOCK_SIZE_LOG2; @@ -104,9 +107,8 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, sync_read(lf_sync, r, c); - setup_dst_planes(xd, frame_buffer, mi_row, mi_col); - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride, - &lfm); + vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); @@ -130,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. -void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, +void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. @@ -166,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); // Set up loopfilter thread data. - for (i = 0; i < pbi->num_tile_workers; ++i) { + // The decoder is using num_workers instead of pbi->num_tile_workers + // because it has been observed that using more threads on the + // loopfilter, than there are tile columns in the frame will hurt + // performance on Android. This is because the system will only + // schedule the tile decode workers on cores equal to the number + // of tile columns. Then if the decoder tries to use more threads for the + // loopfilter, it will hurt performance because of contention. If the + // multithreading code changes in the future then the number of workers + // used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; LFWorkerData *const lf_data = &tile_data->lfdata; @@ -182,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, lf_data->y_only = y_only; // always do all planes in decoder lf_data->lf_sync = &pbi->lf_row_sync; - lf_data->num_lf_workers = pbi->num_tile_workers; + lf_data->num_lf_workers = num_workers; // Start loopfiltering - if (i == pbi->num_tile_workers - 1) { + if (i == num_workers - 1) { vp9_worker_execute(worker); } else { vp9_worker_launch(worker); @@ -193,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, } // Wait till all rows are finished - for (i = 0; i < pbi->num_tile_workers; ++i) { + for (i = 0; i < num_workers; ++i) { vp9_worker_sync(&pbi->tile_workers[i]); } } diff --git a/source/libvpx/vp9/decoder/vp9_dthread.h b/source/libvpx/vp9/decoder/vp9_dthread.h index 6d4450f..005bd7b 100644 --- a/source/libvpx/vp9/decoder/vp9_dthread.h +++ b/source/libvpx/vp9/decoder/vp9_dthread.h @@ -18,13 +18,12 @@ struct macroblockd; struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; typedef struct TileWorkerData { struct VP9Common *cm; vp9_reader bit_reader; DECLARE_ALIGNED(16, struct macroblockd, xd); - DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); // Row-based parallel loopfilter data LFWorkerData lfdata; @@ -51,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync, void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi, +void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, struct VP9Common *cm, struct macroblockd *xd, int frame_filter_level, diff --git a/source/libvpx/vp9/decoder/vp9_onyxd_int.h b/source/libvpx/vp9/decoder/vp9_onyxd_int.h deleted file mode 100644 index 6c6c239..0000000 --- a/source/libvpx/vp9/decoder/vp9_onyxd_int.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef VP9_DECODER_VP9_ONYXD_INT_H_ -#define VP9_DECODER_VP9_ONYXD_INT_H_ - -#include "./vpx_config.h" - -#include "vp9/common/vp9_onyxc_int.h" -#include "vp9/decoder/vp9_dthread.h" -#include "vp9/decoder/vp9_onyxd.h" -#include "vp9/decoder/vp9_thread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct VP9Decompressor { - DECLARE_ALIGNED(16, MACROBLOCKD, mb); - - DECLARE_ALIGNED(16, VP9_COMMON, common); - - DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); - - VP9D_CONFIG oxcf; - - const uint8_t *source; - size_t source_sz; - - int64_t last_time_stamp; - int ready_for_new_data; - - int refresh_frame_flags; - - int decoded_key_frame; - - int initial_width; - int initial_height; - - int do_loopfilter_inline; // apply loopfilter to available rows immediately - VP9Worker lf_worker; - - VP9Worker *tile_workers; - int num_tile_workers; - - VP9LfSync lf_row_sync; - - /* Each tile column has its own MODE_INFO stream. This array indexes them by - tile column index. */ - MODE_INFO **mi_streams; - - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - PARTITION_CONTEXT *above_seg_context; -} VP9D_COMP; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_DECODER_VP9_ONYXD_INT_H_ diff --git a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.c b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.c new file mode 100644 index 0000000..778a635 --- /dev/null +++ b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "vp9/decoder/vp9_read_bit_buffer.h" + +size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) { + return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0); +} + +int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) { + const size_t off = rb->bit_offset; + const size_t p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - (int)off % CHAR_BIT; + if (rb->bit_buffer + p >= rb->bit_buffer_end) { + rb->error_handler(rb->error_handler_data); + return 0; + } else { + const int bit = (rb->bit_buffer[p] & (1 << q)) >> q; + rb->bit_offset = off + 1; + return bit; + } +} + +int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) { + int value = 0, bit; + for (bit = bits - 1; bit >= 0; bit--) + value |= vp9_rb_read_bit(rb) << bit; + return value; +} + +int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, + int bits) { + const int value = vp9_rb_read_literal(rb, bits); + return vp9_rb_read_bit(rb) ? -value : value; +} diff --git a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h index 8cb4247..fc88bd7 100644 --- a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h +++ b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h @@ -30,36 +30,13 @@ struct vp9_read_bit_buffer { vp9_rb_error_handler error_handler; }; -static size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) { - return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0); -} +size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb); -static int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) { - const size_t off = rb->bit_offset; - const size_t p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - (int)off % CHAR_BIT; - if (rb->bit_buffer + p >= rb->bit_buffer_end) { - rb->error_handler(rb->error_handler_data); - return 0; - } else { - const int bit = (rb->bit_buffer[p] & (1 << q)) >> q; - rb->bit_offset = off + 1; - return bit; - } -} +int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb); -static int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) { - int value = 0, bit; - for (bit = bits - 1; bit >= 0; bit--) - value |= vp9_rb_read_bit(rb) << bit; - return value; -} +int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits); -static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, - int bits) { - const int value = vp9_rb_read_literal(rb, bits); - return vp9_rb_read_bit(rb) ? -value : value; -} +int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, int bits); #ifdef __cplusplus } // extern "C" diff --git a/source/libvpx/vp9/encoder/vp9_aq_complexity.c b/source/libvpx/vp9/encoder/vp9_aq_complexity.c new file mode 100644 index 0000000..47ad8d8 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_aq_complexity.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> +#include <math.h> + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_segmentation.h" + +static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = + {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + +void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + struct segmentation *const seg = &cm->seg; + + // Make SURE use of floating point in this function is safe. + vp9_clear_system_state(); + + if (cm->frame_type == KEY_FRAME || + cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { + int segment; + + // Clear down the segment map. + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + + // Clear down the complexity map used for rd. + vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + + // Select delta coding method. + seg->abs_delta = SEGMENT_DELTADATA; + + // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. + vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + + // Use some of the segments for in frame Q adjustment. + for (segment = 1; segment < 2; segment++) { + const int qindex_delta = + vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, + in_frame_q_adj_ratio[segment]); + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } + } +} + +// Select a segment for the current SB64 +void vp9_select_in_frame_q_segment(VP9_COMP *cpi, + int mi_row, int mi_col, + int output_enabled, int projected_rate) { + VP9_COMMON *const cm = &cpi->common; + + const int mi_offset = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = MIN(cm->mi_cols - mi_col, bw); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + int complexity_metric = 64; + int x, y; + + unsigned char segment; + + if (!output_enabled) { + segment = 0; + } else { + // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). + // It is converted to bits * 256 units. + const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / + (bw * bh); + + if (projected_rate < (target_rate / 4)) { + segment = 1; + } else { + segment = 0; + } + + if (target_rate > 0) { + complexity_metric = + clamp((int)((projected_rate * 64) / target_rate), 16, 255); + } + } + + // Fill in the entires in the segment map corresponding to this SB64. + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; + cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = + (unsigned char)complexity_metric; + } + } +} diff --git a/source/libvpx/vp9/encoder/vp9_aq_complexity.h b/source/libvpx/vp9/encoder/vp9_aq_complexity.h new file mode 100644 index 0000000..af031a4 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_aq_complexity.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +// Select a segment for the current SB64. +void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col, + int output_enabled, int projected_rate); + + +// This function sets up a set of segments with delta Q values around +// the baseline frame quantizer. +void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ diff --git a/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c new file mode 100644 index 0000000..7879091 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> +#include <math.h> + +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_rdopt.h" +#include "vp9/encoder/vp9_segmentation.h" + +struct CYCLIC_REFRESH { + // Percentage of super-blocks per frame that are targeted as candidates + // for cyclic refresh. + int max_sbs_perframe; + // Maximum q-delta as percentage of base q. + int max_qdelta_perc; + // Block size below which we don't apply cyclic refresh. + BLOCK_SIZE min_block_size; + // Superblock starting index for cycling through the frame. + int sb_index; + // Controls how long a block will need to wait to be refreshed again. + int time_for_refresh; + // Actual number of (8x8) blocks that were applied delta-q (segment 1). + int num_seg_blocks; + // Actual encoding bits for segment 1. + int actual_seg_bits; + // RD mult. parameters for segment 1. + int rdmult; + // Cyclic refresh map. + signed char *map; + // Projected rate and distortion for the current superblock. + int64_t projected_rate_sb; + int64_t projected_dist_sb; + // Thresholds applied to projected rate/distortion of the superblock. + int64_t thresh_rate_sb; + int64_t thresh_dist_sb; +}; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { + CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); + if (cr == NULL) + return NULL; + + cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); + if (cr->map == NULL) { + vpx_free(cr); + return NULL; + } + + return cr; +} + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { + vpx_free(cr->map); + vpx_free(cr); +} + +// Check if we should turn off cyclic refresh based on bitrate condition. +static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, + const RATE_CONTROL *rc) { + // Turn off cyclic refresh if bits available per frame is not sufficiently + // larger than bit cost of segmentation. Segment map bit cost should scale + // with number of seg blocks, so compare available bits to number of blocks. + // Average bits available per frame = av_per_frame_bandwidth + // Number of (8x8) blocks in frame = mi_rows * mi_cols; + const float factor = 0.5; + const int number_blocks = cm->mi_rows * cm->mi_cols; + // The condition below corresponds to turning off at target bitrates: + // ~24kbps for CIF, 72kbps for VGA (at 30fps). + // Also turn off at very small frame sizes, to avoid too large fraction of + // superblocks to be refreshed per frame. Threshold below is less than QCIF. + if (rc->av_per_frame_bandwidth < factor * number_blocks || + number_blocks / 64 < 5) + return 0; + else + return 1; +} + +// Check if this coding block, of size bsize, should be considered for refresh +// (lower-qp coding). Decision can be based on various factors, such as +// size of the coding block (i.e., below min_block size rejected), coding +// mode, and rate/distortion. +static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, + const MB_MODE_INFO *mbmi, + BLOCK_SIZE bsize, int use_rd) { + if (use_rd) { + // If projected rate is below the thresh_rate (well below target, + // so undershoot expected), accept it for lower-qp coding. + if (cr->projected_rate_sb < cr->thresh_rate_sb) + return 1; + // Otherwise, reject the block for lower-qp coding if any of the following: + // 1) prediction block size is below min_block_size + // 2) mode is non-zero mv and projected distortion is above thresh_dist + // 3) mode is an intra-mode (we may want to allow some of this under + // another thresh_dist) + else if (bsize < cr->min_block_size || + (mbmi->mv[0].as_int != 0 && + cr->projected_dist_sb > cr->thresh_dist_sb) || + !is_inter_block(mbmi)) + return 0; + else + return 1; + } else { + // Rate/distortion not used for update. + if (bsize < cr->min_block_size || + mbmi->mv[0].as_int != 0 || + !is_inter_block(mbmi)) + return 0; + else + return 1; + } +} + +// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), +// check if we should reset the segment_id, and update the cyclic_refresh map +// and segmentation map. +void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = MIN(cm->mi_cols - mi_col, bw); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd); + // Default is to not update the refresh map. + int new_map_value = cr->map[block_index]; + int x = 0; int y = 0; + + // Check if we should reset the segment_id for this block. + if (mbmi->segment_id > 0 && !refresh_this_block) + mbmi->segment_id = 0; + + // Update the cyclic refresh map, to be used for setting segmentation map + // for the next frame. If the block will be refreshed this frame, mark it + // as clean. The magnitude of the -ve influences how long before we consider + // it for refresh again. + if (mbmi->segment_id == 1) { + new_map_value = -cr->time_for_refresh; + } else if (refresh_this_block) { + // Else if it is accepted as candidate for refresh, and has not already + // been refreshed (marked as 1) then mark it as a candidate for cleanup + // for future time (marked as 0), otherwise don't update it. + if (cr->map[block_index] == 1) + new_map_value = 0; + } else { + // Leave it marked as block that is not candidate for refresh. + new_map_value = 1; + } + // Update entries in the cyclic refresh map with new_map_value, and + // copy mbmi->segment_id into global segmentation map. + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) { + cr->map[block_index + y * cm->mi_cols + x] = new_map_value; + cpi->segmentation_map[block_index + y * cm->mi_cols + x] = + mbmi->segment_id; + } + // Keep track of actual number (in units of 8x8) of blocks in segment 1 used + // for encoding this frame. + if (mbmi->segment_id) + cr->num_seg_blocks += xmis * ymis; +} + +// Setup cyclic background refresh: set delta q and segmentation map. +void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + const RATE_CONTROL *const rc = &cpi->rc; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + struct segmentation *const seg = &cm->seg; + unsigned char *const seg_map = cpi->segmentation_map; + const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); + // Don't apply refresh on key frame or enhancement layer frames. + if (!apply_cyclic_refresh || + (cm->frame_type == KEY_FRAME) || + (cpi->svc.temporal_layer_id > 0)) { + // Set segmentation map to 0 and disable. + vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); + vp9_disable_segmentation(&cm->seg); + if (cm->frame_type == KEY_FRAME) + cr->sb_index = 0; + return; + } else { + int qindex_delta = 0; + int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; + int xmis, ymis, x, y, qindex2; + + // Rate target ratio to set q delta. + const float rate_ratio_qdelta = 2.0; + vp9_clear_system_state(); + // Some of these parameters may be set via codec-control function later. + cr->max_sbs_perframe = 10; + cr->max_qdelta_perc = 50; + cr->min_block_size = BLOCK_8X8; + cr->time_for_refresh = 1; + // Set rate threshold to some fraction of target (and scaled by 256). + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2; + // Distortion threshold, quadratic in Q, scale factor to be adjusted. + cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * + vp9_convert_qindex_to_q(cm->base_qindex)); + if (cpi->sf.use_nonrd_pick_mode) { + // May want to be more conservative with thresholds in non-rd mode for now + // as rate/distortion are derived from model based on prediction residual. + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3; + cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * + vp9_convert_qindex_to_q(cm->base_qindex)); + } + + cr->num_seg_blocks = 0; + // Set up segmentation. + // Clear down the segment map. + vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); + vp9_enable_segmentation(&cm->seg); + vp9_clearall_segfeatures(seg); + // Select delta coding method. + seg->abs_delta = SEGMENT_DELTADATA; + + // Note: setting temporal_update has no effect, as the seg-map coding method + // (temporal or spatial) is determined in vp9_choose_segmap_coding_method(), + // based on the coding cost of each method. For error_resilient mode on the + // last_frame_seg_map is set to 0, so if temporal coding is used, it is + // relative to 0 previous map. + // seg->temporal_update = 0; + + // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. + vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + // Use segment 1 for in-frame Q adjustment. + vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); + + // Set the q delta for segment 1. + qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type, + cm->base_qindex, + rate_ratio_qdelta); + // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from + // previous encoded frame. + if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100) + qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100; + + // Compute rd-mult for segment 1. + qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); + cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); + + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta); + + sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sbs_in_frame = sb_cols * sb_rows; + // Number of target superblocks to get the q delta (segment 1). + block_count = cr->max_sbs_perframe * sbs_in_frame / 100; + // Set the segmentation map: cycle through the superblocks, starting at + // cr->mb_index, and stopping when either block_count blocks have been found + // to be refreshed, or we have passed through whole frame. + assert(cr->sb_index < sbs_in_frame); + i = cr->sb_index; + do { + int sum_map = 0; + // Get the mi_row/mi_col corresponding to superblock index i. + int sb_row_index = (i / sb_cols); + int sb_col_index = i - sb_row_index * sb_cols; + int mi_row = sb_row_index * MI_BLOCK_SIZE; + int mi_col = sb_col_index * MI_BLOCK_SIZE; + assert(mi_row >= 0 && mi_row < cm->mi_rows); + assert(mi_col >= 0 && mi_col < cm->mi_cols); + bl_index = mi_row * cm->mi_cols + mi_col; + // Loop through all 8x8 blocks in superblock and update map. + xmis = MIN(cm->mi_cols - mi_col, + num_8x8_blocks_wide_lookup[BLOCK_64X64]); + ymis = MIN(cm->mi_rows - mi_row, + num_8x8_blocks_high_lookup[BLOCK_64X64]); + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + const int bl_index2 = bl_index + y * cm->mi_cols + x; + // If the block is as a candidate for clean up then mark it + // for possible boost/refresh (segment 1). The segment id may get + // reset to 0 later if block gets coded anything other than ZEROMV. + if (cr->map[bl_index2] == 0) { + seg_map[bl_index2] = 1; + sum_map++; + } else if (cr->map[bl_index2] < 0) { + cr->map[bl_index2]++; + } + } + } + // Enforce constant segment over superblock. + // If segment is partial over superblock, reset to either all 1 or 0. + if (sum_map > 0 && sum_map < xmis * ymis) { + const int new_value = (sum_map >= xmis * ymis / 2); + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) + seg_map[bl_index + y * cm->mi_cols + x] = new_value; + } + i++; + if (i == sbs_in_frame) { + i = 0; + } + if (sum_map >= xmis * ymis /2) + block_count--; + } while (block_count && i != cr->sb_index); + cr->sb_index = i; + } +} + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb) { + cr->projected_rate_sb = rate_sb; + cr->projected_dist_sb = dist_sb; +} + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { + return cr->rdmult; +} diff --git a/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h new file mode 100644 index 0000000..f556d65 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ + +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +struct CYCLIC_REFRESH; +typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr); + +// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), +// check if we should reset the segment_id, and update the cyclic_refresh map +// and segmentation map. +void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd); + +// Setup cyclic background refresh: set delta q and segmentation map. +void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi); + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb); + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ diff --git a/source/libvpx/vp9/encoder/vp9_vaq.c b/source/libvpx/vp9/encoder/vp9_aq_variance.c index c71c171..ae2a163 100644 --- a/source/libvpx/vp9/encoder/vp9_vaq.c +++ b/source/libvpx/vp9/encoder/vp9_aq_variance.c @@ -10,7 +10,7 @@ #include <math.h> -#include "vp9/encoder/vp9_vaq.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" @@ -99,7 +99,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) { continue; } - qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i)); + qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i)); vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta); vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q); diff --git a/source/libvpx/vp9/encoder/vp9_vaq.h b/source/libvpx/vp9/encoder/vp9_aq_variance.h index c73114a..381fe50 100644 --- a/source/libvpx/vp9/encoder/vp9_vaq.h +++ b/source/libvpx/vp9/encoder/vp9_aq_variance.h @@ -9,8 +9,8 @@ */ -#ifndef VP9_ENCODER_VP9_VAQ_H_ -#define VP9_ENCODER_VP9_VAQ_H_ +#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#define VP9_ENCODER_VP9_AQ_VARIANCE_H_ #include "vp9/encoder/vp9_onyx_int.h" @@ -31,4 +31,4 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_VAQ_H_ +#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/source/libvpx/vp9/encoder/vp9_bitstream.c b/source/libvpx/vp9/encoder/vp9_bitstream.c index 0f1692d..8d2afb9 100644 --- a/source/libvpx/vp9/encoder/vp9_bitstream.c +++ b/source/libvpx/vp9/encoder/vp9_bitstream.c @@ -26,6 +26,7 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_bitstream.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_mcomp.h" @@ -34,10 +35,6 @@ #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_write_bit_buffer.h" -#ifdef ENTROPY_STATS -extern unsigned int active_section; -#endif - static struct vp9_token intra_mode_encodings[INTRA_MODES]; static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS]; static struct vp9_token partition_encodings[PARTITION_TYPES]; @@ -97,13 +94,13 @@ static void write_selected_tx_size(const VP9_COMP *cpi, } } -static int write_skip(const VP9_COMP *cpi, int segment_id, MODE_INFO *m, +static int write_skip(const VP9_COMP *cpi, int segment_id, const MODE_INFO *mi, vp9_writer *w) { const MACROBLOCKD *const xd = &cpi->mb.e_mbd; if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { - const int skip = m->mbmi.skip; + const int skip = mi->mbmi.skip; vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd)); return skip; } @@ -195,7 +192,7 @@ static void write_segment_id(vp9_writer *w, const struct segmentation *seg, static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) { const VP9_COMMON *const cm = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); const int segment_id = mbmi->segment_id; @@ -228,169 +225,147 @@ static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) { } } -static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { +static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, + vp9_writer *w) { VP9_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc.nmvc; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; + const MACROBLOCK *const x = &cpi->mb; + const MACROBLOCKD *const xd = &x->e_mbd; const struct segmentation *const seg = &cm->seg; - const MB_MODE_INFO *const mi = &m->mbmi; - const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; - const MV_REFERENCE_FRAME ref1 = mi->ref_frame[1]; - const MB_PREDICTION_MODE mode = mi->mode; - const int segment_id = mi->segment_id; - const BLOCK_SIZE bsize = mi->sb_type; + const MB_MODE_INFO *const mbmi = &mi->mbmi; + const MB_PREDICTION_MODE mode = mbmi->mode; + const int segment_id = mbmi->segment_id; + const BLOCK_SIZE bsize = mbmi->sb_type; const int allow_hp = cm->allow_high_precision_mv; - int skip; - -#ifdef ENTROPY_STATS - active_section = 9; -#endif + const int is_inter = is_inter_block(mbmi); + const int is_compound = has_second_ref(mbmi); + int skip, ref; if (seg->update_map) { if (seg->temporal_update) { - const int pred_flag = mi->seg_id_predicted; + const int pred_flag = mbmi->seg_id_predicted; vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); - vp9_write(bc, pred_flag, pred_prob); + vp9_write(w, pred_flag, pred_prob); if (!pred_flag) - write_segment_id(bc, seg, segment_id); + write_segment_id(w, seg, segment_id); } else { - write_segment_id(bc, seg, segment_id); + write_segment_id(w, seg, segment_id); } } - skip = write_skip(cpi, segment_id, m, bc); + skip = write_skip(cpi, segment_id, mi, w); if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - vp9_write(bc, ref0 != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd)); + vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd)); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && - !(ref0 != INTRA_FRAME && + !(is_inter && (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) { - write_selected_tx_size(cpi, mi->tx_size, bsize, bc); + write_selected_tx_size(cpi, mbmi->tx_size, bsize, w); } - if (ref0 == INTRA_FRAME) { -#ifdef ENTROPY_STATS - active_section = 6; -#endif - + if (!is_inter) { if (bsize >= BLOCK_8X8) { - write_intra_mode(bc, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]); + write_intra_mode(w, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]); } else { int idx, idy; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode; - write_intra_mode(bc, bm, cm->fc.y_mode_prob[0]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { + const MB_PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode; + write_intra_mode(w, b_mode, cm->fc.y_mode_prob[0]); } } } - write_intra_mode(bc, mi->uv_mode, cm->fc.uv_mode_prob[mode]); + write_intra_mode(w, mbmi->uv_mode, cm->fc.uv_mode_prob[mode]); } else { - vp9_prob *mv_ref_p; - write_ref_frames(cpi, bc); - mv_ref_p = cm->fc.inter_mode_probs[mi->mode_context[ref0]]; - -#ifdef ENTROPY_STATS - active_section = 3; -#endif + const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]]; + const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx]; + write_ref_frames(cpi, w); // If segment skip is not enabled code the mode. if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { if (bsize >= BLOCK_8X8) { - write_inter_mode(bc, mode, mv_ref_p); - ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(mode)]; + write_inter_mode(w, mode, inter_probs); + ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(mode)]; } } if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); - vp9_write_token(bc, vp9_switchable_interp_tree, + vp9_write_token(w, vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[ctx], - &switchable_interp_encodings[mi->interp_filter]); + &switchable_interp_encodings[mbmi->interp_filter]); } else { - assert(mi->interp_filter == cm->interp_filter); + assert(mbmi->interp_filter == cm->interp_filter); } if (bsize < BLOCK_8X8) { - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int idx, idy; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; - const MB_PREDICTION_MODE b_mode = m->bmi[j].as_mode; - write_inter_mode(bc, b_mode, mv_ref_p); - ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(b_mode)]; + const MB_PREDICTION_MODE b_mode = mi->bmi[j].as_mode; + write_inter_mode(w, b_mode, inter_probs); + ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; if (b_mode == NEWMV) { -#ifdef ENTROPY_STATS - active_section = 11; -#endif - vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv, - &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp); - - if (has_second_ref(mi)) - vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv, - &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp); + for (ref = 0; ref < 1 + is_compound; ++ref) + vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv, + &mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, + nmvc, allow_hp); } } } - } else if (mode == NEWMV) { -#ifdef ENTROPY_STATS - active_section = 5; -#endif - vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, - &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp); - - if (has_second_ref(mi)) - vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, - &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp); + } else { + if (mode == NEWMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) + vp9_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, + &mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc, + allow_hp); + } } } } static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8, - vp9_writer *bc) { + vp9_writer *w) { const VP9_COMMON *const cm = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const struct segmentation *const seg = &cm->seg; - MODE_INFO *m = mi_8x8[0]; - const int ym = m->mbmi.mode; - const int segment_id = m->mbmi.segment_id; - MODE_INFO *above_mi = mi_8x8[-xd->mode_info_stride]; - MODE_INFO *left_mi = xd->left_available ? mi_8x8[-1] : NULL; + const MODE_INFO *const mi = mi_8x8[0]; + const MODE_INFO *const above_mi = mi_8x8[-xd->mi_stride]; + const MODE_INFO *const left_mi = xd->left_available ? mi_8x8[-1] : NULL; + const MB_MODE_INFO *const mbmi = &mi->mbmi; + const BLOCK_SIZE bsize = mbmi->sb_type; if (seg->update_map) - write_segment_id(bc, seg, m->mbmi.segment_id); + write_segment_id(w, seg, mbmi->segment_id); - write_skip(cpi, segment_id, m, bc); + write_skip(cpi, mbmi->segment_id, mi, w); - if (m->mbmi.sb_type >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT) - write_selected_tx_size(cpi, m->mbmi.tx_size, m->mbmi.sb_type, bc); + if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT) + write_selected_tx_size(cpi, mbmi->tx_size, bsize, w); - if (m->mbmi.sb_type >= BLOCK_8X8) { - const MB_PREDICTION_MODE A = vp9_above_block_mode(m, above_mi, 0); - const MB_PREDICTION_MODE L = vp9_left_block_mode(m, left_mi, 0); - write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]); + if (bsize >= BLOCK_8X8) { + write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0)); } else { + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int idx, idy; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type]; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - int i = idy * 2 + idx; - const MB_PREDICTION_MODE A = vp9_above_block_mode(m, above_mi, i); - const MB_PREDICTION_MODE L = vp9_left_block_mode(m, left_mi, i); - const int bm = m->bmi[i].as_mode; - write_intra_mode(bc, bm, vp9_kf_y_mode_prob[A][L]); + + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { + const int block = idy * 2 + idx; + write_intra_mode(w, mi->bmi[block].as_mode, + get_y_mode_probs(mi, above_mi, left_mi, block)); } } } - write_intra_mode(bc, m->mbmi.uv_mode, vp9_kf_uv_mode_prob[ym]); + write_intra_mode(w, mbmi->uv_mode, vp9_kf_uv_mode_prob[mbmi->mode]); } static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -400,35 +375,27 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->mb.e_mbd; MODE_INFO *m; - xd->mi_8x8 = cm->mi_grid_visible + (mi_row * cm->mode_info_stride + mi_col); - m = xd->mi_8x8[0]; + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + m = xd->mi[0]; set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type], cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { - write_mb_modes_kf(cpi, xd->mi_8x8, w); -#ifdef ENTROPY_STATS - active_section = 8; -#endif + write_mb_modes_kf(cpi, xd->mi, w); } else { pack_inter_mode_mvs(cpi, m, w); -#ifdef ENTROPY_STATS - active_section = 1; -#endif } assert(*tok < tok_end); pack_mb_tokens(w, tok, tok_end); } -static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, +static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, + int hbs, int mi_row, int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) { - VP9_COMMON *const cm = &cpi->common; - const int ctx = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; @@ -446,21 +413,24 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, } } -static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes_sb(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int bsl = b_width_log2(bsize); const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; BLOCK_SIZE subsize; - MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mode_info_stride + mi_col]; + MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = partition_lookup[bsl][m->mbmi.sb_type]; - write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w); + write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); @@ -496,29 +466,30 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } -static void write_modes(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) { int mi_row, mi_col; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { - vp9_zero(cpi->left_seg_context); + vp9_zero(cpi->mb.e_mbd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64); + write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, + BLOCK_64X64); } } -static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { +static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, + vp9_coeff_stats *coef_branch_ct) { vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size]; vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; - vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size]; int i, j, k, l, m; for (i = 0; i < PLANE_TYPES; ++i) { @@ -541,16 +512,16 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { } static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, - TX_SIZE tx_size) { + TX_SIZE tx_size, + vp9_coeff_stats *frame_branch_ct) { vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size]; vp9_coeff_probs_model *old_frame_coef_probs = cpi->common.fc.coef_probs[tx_size]; - vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size]; const vp9_prob upd = DIFF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; int i, j, k, l, t; switch (cpi->sf.use_fast_coef_updates) { - case 0: { + case TWO_LOOP: { /* dry run to see if there is any udpate at all needed */ int savings = 0; int update[2] = {0, 0}; @@ -625,14 +596,14 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, return; } - case 1: - case 2: { + case ONE_LOOP: + case ONE_LOOP_REDUCED: { const int prev_coef_contexts_to_update = - cpi->sf.use_fast_coef_updates == 2 ? COEFF_CONTEXTS >> 1 - : COEFF_CONTEXTS; + cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ? + COEFF_CONTEXTS >> 1 : COEFF_CONTEXTS; const int coef_band_to_update = - cpi->sf.use_fast_coef_updates == 2 ? COEF_BANDS >> 1 - : COEF_BANDS; + cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ? + COEF_BANDS >> 1 : COEF_BANDS; int updates = 0; int noupdates_before_first = 0; for (i = 0; i < PLANE_TYPES; ++i) { @@ -698,13 +669,15 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) { const TX_MODE tx_mode = cpi->common.tx_mode; const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; + vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; + vp9_clear_system_state(); for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size) - build_tree_distribution(cpi, tx_size); + build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size]); for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - update_coef_probs_common(w, cpi, tx_size); + update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size]); } static void encode_loopfilter(struct loopfilter *lf, @@ -961,7 +934,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) * + vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); tok[0][0] = cpi->tok; @@ -1058,19 +1031,22 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) { vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8); } +static void write_profile(BITSTREAM_PROFILE profile, + struct vp9_write_bit_buffer *wb) { + assert(profile < MAX_PROFILES); + vp9_wb_write_bit(wb, profile & 1); + vp9_wb_write_bit(wb, profile >> 1); +} + static void write_uncompressed_header(VP9_COMP *cpi, struct vp9_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2); - // bitstream version. - // 00 - profile 0. 4:2:0 only - // 10 - profile 1. adds 4:4:4, 4:2:2, alpha - vp9_wb_write_bit(wb, cm->version); - vp9_wb_write_bit(wb, 0); + write_profile(cm->profile, wb); - vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, 0); // show_existing_frame vp9_wb_write_bit(wb, cm->frame_type); vp9_wb_write_bit(wb, cm->show_frame); vp9_wb_write_bit(wb, cm->error_resilient_mode); @@ -1078,16 +1054,20 @@ static void write_uncompressed_header(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { const COLOR_SPACE cs = UNKNOWN; write_sync_code(wb); + if (cm->profile > PROFILE_1) { + assert(cm->bit_depth > BITS_8); + vp9_wb_write_bit(wb, cm->bit_depth - BITS_10); + } vp9_wb_write_literal(wb, cs, 3); if (cs != SRGB) { vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { vp9_wb_write_bit(wb, cm->subsampling_x); vp9_wb_write_bit(wb, cm->subsampling_y); vp9_wb_write_bit(wb, 0); // has extra plane } } else { - assert(cm->version == 1); + assert(cm->profile == PROFILE_1); vp9_wb_write_bit(wb, 0); // has extra plane } @@ -1150,18 +1130,10 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { encode_txfm_probs(cm, &header_bc); update_coef_probs(cpi, &header_bc); - -#ifdef ENTROPY_STATS - active_section = 2; -#endif - update_skip_probs(cm, &header_bc); if (!frame_is_intra_only(cm)) { int i; -#ifdef ENTROPY_STATS - active_section = 1; -#endif for (i = 0; i < INTER_MODE_CONTEXTS; ++i) prob_diff_update(vp9_inter_mode_tree, cm->fc.inter_mode_probs[i], @@ -1223,7 +1195,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { uint8_t *data = dest; - size_t first_part_size; + size_t first_part_size, uncompressed_hdr_size; struct vp9_write_bit_buffer wb = {data, 0}; struct vp9_write_bit_buffer saved_wb; @@ -1231,17 +1203,11 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { saved_wb = wb; vp9_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size - data += vp9_rb_bytes_written(&wb); + uncompressed_hdr_size = vp9_rb_bytes_written(&wb); + data += uncompressed_hdr_size; vp9_compute_update_table(); -#ifdef ENTROPY_STATS - if (cm->frame_type == INTER_FRAME) - active_section = 0; - else - active_section = 7; -#endif - vp9_clear_system_state(); first_part_size = write_compressed_header(cpi, data); diff --git a/source/libvpx/vp9/encoder/vp9_block.h b/source/libvpx/vp9/encoder/vp9_block.h index 85f6c97..7729d84 100644 --- a/source/libvpx/vp9/encoder/vp9_block.h +++ b/source/libvpx/vp9/encoder/vp9_block.h @@ -11,7 +11,6 @@ #ifndef VP9_ENCODER_VP9_BLOCK_H_ #define VP9_ENCODER_VP9_BLOCK_H_ -#include "vp9/common/vp9_onyx.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropy.h" #include "vpx_ports/mem.h" @@ -116,7 +115,6 @@ struct macroblock { unsigned int source_variance; unsigned int pred_sse[MAX_REF_FRAMES]; int pred_mv_sad[MAX_REF_FRAMES]; - int mode_sad[MAX_REF_FRAMES][INTER_MODES + 1]; int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; @@ -132,9 +130,9 @@ struct macroblock { int *nmvsadcost_hp[2]; int **mvsadcost; - int mbmode_cost[MB_MODE_COUNT]; + int mbmode_cost[INTRA_MODES]; unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; - int intra_uv_mode_cost[2][MB_MODE_COUNT]; + int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; @@ -155,11 +153,10 @@ struct macroblock { int encode_breakout; - unsigned char *active_ptr; + int in_active_map; // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; - DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); int optimize; @@ -199,7 +196,8 @@ struct macroblock { // TODO(jingning): the variables used here are little complicated. need further // refactoring on organizing the temporary buffers, when recursive // partition down to 4x4 block size is enabled. -static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { +static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, + BLOCK_SIZE bsize) { switch (bsize) { case BLOCK_64X64: return &x->sb64_context; diff --git a/source/libvpx/vp9/encoder/vp9_cost.c b/source/libvpx/vp9/encoder/vp9_cost.c new file mode 100644 index 0000000..1c3c3d2 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_cost.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/encoder/vp9_cost.h" + +const unsigned int vp9_prob_cost[256] = { + 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, + 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, + 873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733, + 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, + 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, + 534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472, + 467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415, + 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, + 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, + 317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281, + 278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246, + 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, + 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, + 181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156, + 154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131, + 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, + 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, + 82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63, + 61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43, + 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, + 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, + 4, 3, 1, 1}; + +static void cost(int *costs, vp9_tree tree, const vp9_prob *probs, + int i, int c) { + const vp9_prob prob = probs[i / 2]; + int b; + + for (b = 0; b <= 1; ++b) { + const int cc = c + vp9_cost_bit(prob, b); + const vp9_tree_index ii = tree[i + b]; + + if (ii <= 0) + costs[-ii] = cc; + else + cost(costs, tree, probs, ii, cc); + } +} + +void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) { + cost(costs, tree, probs, 0, 0); +} + +void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) { + assert(tree[0] <= 0 && tree[1] > 0); + + costs[-tree[0]] = vp9_cost_bit(probs[0], 0); + cost(costs, tree, probs, 2, 0); +} diff --git a/source/libvpx/vp9/encoder/vp9_cost.h b/source/libvpx/vp9/encoder/vp9_cost.h new file mode 100644 index 0000000..6d2b940 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_cost.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_COST_H_ +#define VP9_ENCODER_VP9_COST_H_ + +#include "vp9/common/vp9_prob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const unsigned int vp9_prob_cost[256]; + +#define vp9_cost_zero(prob) (vp9_prob_cost[prob]) + +#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob)) + +#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \ + : (prob)) + +static INLINE unsigned int cost_branch256(const unsigned int ct[2], + vp9_prob p) { + return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p); +} + +static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs, + int bits, int len) { + int cost = 0; + vp9_tree_index i = 0; + + do { + const int bit = (bits >> --len) & 1; + cost += vp9_cost_bit(probs[i >> 1], bit); + i = tree[i + bit]; + } while (len); + + return cost; +} + +void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree); +void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_COST_H_ diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c index b8dc72a..c52e4f3 100644 --- a/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -29,16 +29,24 @@ #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" + +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_extend.h" -#include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_vaq.h" + +#define GF_ZEROMV_ZBIN_BOOST 0 +#define LF_ZEROMV_ZBIN_BOOST 0 +#define MV_ZBIN_BOOST 0 +#define SPLIT_MV_ZBIN_BOOST 0 +#define INTRA_ZBIN_BOOST 0 static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) { switch (subsize) { @@ -74,10 +82,10 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); // (infinite lambda), which confounds analysis. // This also avoids the need for divide by zero checks in // vp9_activity_masking(). -#define ACTIVITY_AVG_MIN (64) +#define ACTIVITY_AVG_MIN 64 // Motion vector component magnitude threshold for defining fast motion. -#define FAST_MOTION_MV_THRESH (24) +#define FAST_MOTION_MV_THRESH 24 // This is used as a reference when computing the source variance for the // purposes of activity masking. @@ -149,24 +157,446 @@ static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi, return BLOCK_16X16; } +// Lighter version of set_offsets that only sets the mode info +// pointers. +static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + int mi_row, + int mi_col) { + const int idx_str = xd->mi_stride * mi_row + mi_col; + xd->mi = cm->mi_grid_visible + idx_str; + xd->mi[0] = cm->mi + idx_str; +} + +static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const VP9_COMMON *const cm = &cpi->common; + const int mb_rows = cm->mb_rows; + const int mb_cols = cm->mb_cols; + const int mb_row = mi_row >> 1; + const int mb_col = mi_col >> 1; + const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1; + const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1; + int r, c; + if (bsize <= BLOCK_16X16) { + return cpi->active_map[mb_row * mb_cols + mb_col]; + } + for (r = 0; r < mb_height; ++r) { + for (c = 0; c < mb_width; ++c) { + int row = mb_row + r; + int col = mb_col + c; + if (row >= mb_rows || col >= mb_cols) + continue; + if (cpi->active_map[row * mb_cols + col]) + return 1; + } + } + return 0; +} + +static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->active_map_enabled && !x->e_mbd.lossless) { + return is_block_in_mb_map(cpi, mi_row, mi_col, bsize); + } else { + return 1; + } +} + +static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mb_row = mi_row >> 1; + const int mb_col = mi_col >> 1; + const int idx_map = mb_row * cm->mb_cols + mb_col; + const struct segmentation *const seg = &cm->seg; + + set_skip_context(xd, mi_row, mi_col); + + // Activity map pointer + x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); + + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + + mbmi = &xd->mi[0]->mbmi; + + // Set up destination pointers. + vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + + // Set up limit values for MV components. + // Mv beyond the range do not produce new/different prediction block. + x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; + x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, + cm->mi_rows, cm->mi_cols); + + // Set up source buffers. + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + + // R/D setup. + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + // Setup segment ID. + if (seg->enabled) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + vp9_init_plane_quantizers(cpi, x); + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + } else { + mbmi->segment_id = 0; + x->encode_breakout = cpi->encode_breakout; + } +} + +static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, + MACROBLOCKD *const xd, + int mi_row, + int mi_col, + BLOCK_SIZE bsize) { + const int block_width = num_8x8_blocks_wide_lookup[bsize]; + const int block_height = num_8x8_blocks_high_lookup[bsize]; + int i, j; + for (j = 0; j < block_height; ++j) + for (i = 0; i < block_width; ++i) { + if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols) + xd->mi[j * xd->mi_stride + i] = xd->mi[0]; + } +} + +static void set_block_size(VP9_COMP * const cpi, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col); + xd->mi[0]->mbmi.sb_type = bsize; + duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); + } +} + +typedef struct { + int64_t sum_square_error; + int64_t sum_error; + int count; + int variance; +} var; + +typedef struct { + var none; + var horz[2]; + var vert[2]; +} partition_variance; + +typedef struct { + partition_variance part_variances; + var split[4]; +} v8x8; + +typedef struct { + partition_variance part_variances; + v8x8 split[4]; +} v16x16; + +typedef struct { + partition_variance part_variances; + v16x16 split[4]; +} v32x32; + +typedef struct { + partition_variance part_variances; + v32x32 split[4]; +} v64x64; + +typedef struct { + partition_variance *part_variances; + var *split[4]; +} variance_node; + +typedef enum { + V16X16, + V32X32, + V64X64, +} TREE_LEVEL; + +static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { + int i; + switch (bsize) { + case BLOCK_64X64: { + v64x64 *vt = (v64x64 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_32X32: { + v32x32 *vt = (v32x32 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_16X16: { + v16x16 *vt = (v16x16 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_8X8: { + v8x8 *vt = (v8x8 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i]; + break; + } + default: { + assert(0); + } + } +} + +// Set variance values given sum square error, sum error, count. +static void fill_variance(int64_t s2, int64_t s, int c, var *v) { + v->sum_square_error = s2; + v->sum_error = s; + v->count = c; + if (c > 0) + v->variance = (int)(256 * + (v->sum_square_error - v->sum_error * v->sum_error / + v->count) / v->count); + else + v->variance = 0; +} + +void sum_2_variances(const var *a, const var *b, var *r) { + fill_variance(a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->count + b->count, r); +} + +static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { + variance_node node; + tree_to_node(data, bsize, &node); + sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); + sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); + sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); + sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); + sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], + &node.part_variances->none); +} + +static int set_vt_partitioning(VP9_COMP *cpi, + void *data, + const TileInfo *const tile, + BLOCK_SIZE bsize, + int mi_row, + int mi_col, + int mi_size) { + VP9_COMMON * const cm = &cpi->common; + variance_node vt; + const int block_width = num_8x8_blocks_wide_lookup[bsize]; + const int block_height = num_8x8_blocks_high_lookup[bsize]; + // TODO(debargha): Choose this more intelligently. + const int64_t threshold_multiplier = 25; + int64_t threshold = threshold_multiplier * cpi->common.base_qindex; + assert(block_height == block_width); + + tree_to_node(data, bsize, &vt); + + // Split none is available only if we have more than half a block size + // in width and height inside the visible image. + if (mi_col + block_width / 2 < cm->mi_cols && + mi_row + block_height / 2 < cm->mi_rows && + vt.part_variances->none.variance < threshold) { + set_block_size(cpi, tile, mi_row, mi_col, bsize); + return 1; + } + + // Vertical split is available on all but the bottom border. + if (mi_row + block_height / 2 < cm->mi_rows && + vt.part_variances->vert[0].variance < threshold && + vt.part_variances->vert[1].variance < threshold) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); + set_block_size(cpi, tile, mi_row, mi_col, subsize); + set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize); + return 1; + } + + // Horizontal split is available on all but the right border. + if (mi_col + block_width / 2 < cm->mi_cols && + vt.part_variances->horz[0].variance < threshold && + vt.part_variances->horz[1].variance < threshold) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); + set_block_size(cpi, tile, mi_row, mi_col, subsize); + set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize); + return 1; + } + return 0; +} + +// TODO(debargha): Fix this function and make it work as expected. +static void choose_partitioning(VP9_COMP *cpi, + const TileInfo *const tile, + int mi_row, int mi_col) { + VP9_COMMON * const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + + int i, j, k; + v64x64 vt; + uint8_t *s; + const uint8_t *d; + int sp; + int dp; + int pixels_wide = 64, pixels_high = 64; + int_mv nearest_mv, near_mv; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); + const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; + + vp9_zero(vt); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + + if (xd->mb_to_right_edge < 0) + pixels_wide += (xd->mb_to_right_edge >> 3); + if (xd->mb_to_bottom_edge < 0) + pixels_high += (xd->mb_to_bottom_edge >> 3); + + s = x->plane[0].src.buf; + sp = x->plane[0].src.stride; + + if (cm->frame_type != KEY_FRAME) { + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf); + + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.sb_type = BLOCK_64X64; + vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, + xd->mi[0]->mbmi.ref_mvs[LAST_FRAME], + &nearest_mv, &near_mv); + + xd->mi[0]->mbmi.mv[0] = nearest_mv; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64); + + d = xd->plane[0].dst.buf; + dp = xd->plane[0].dst.stride; + } else { + d = VP9_VAR_OFFS; + dp = 0; + } + + // Fill in the entire tree of 8x8 variances for splits. + for (i = 0; i < 4; i++) { + const int x32_idx = ((i & 1) << 5); + const int y32_idx = ((i >> 1) << 5); + for (j = 0; j < 4; j++) { + const int x16_idx = x32_idx + ((j & 1) << 4); + const int y16_idx = y32_idx + ((j >> 1) << 4); + v16x16 *vst = &vt.split[i].split[j]; + for (k = 0; k < 4; k++) { + int x_idx = x16_idx + ((k & 1) << 3); + int y_idx = y16_idx + ((k >> 1) << 3); + unsigned int sse = 0; + int sum = 0; + if (x_idx < pixels_wide && y_idx < pixels_high) + vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); + fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); + } + } + } + // Fill the rest of the variance tree by summing split partition values. + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); + } + fill_variance_tree(&vt.split[i], BLOCK_32X32); + } + fill_variance_tree(&vt, BLOCK_64X64); + + // Now go through the entire structure, splitting every block size until + // we get to one that's got a variance lower than our threshold, or we + // hit 8x8. + if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64, + mi_row, mi_col, 8)) { + for (i = 0; i < 4; ++i) { + const int x32_idx = ((i & 1) << 2); + const int y32_idx = ((i >> 1) << 2); + if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32, + (mi_row + y32_idx), (mi_col + x32_idx), 4)) { + for (j = 0; j < 4; ++j) { + const int x16_idx = ((j & 1) << 1); + const int y16_idx = ((j >> 1) << 1); + // NOTE: This is a temporary hack to disable 8x8 partitions, + // since it works really bad - possibly due to a bug +#define DISABLE_8X8_VAR_BASED_PARTITION +#ifdef DISABLE_8X8_VAR_BASED_PARTITION + if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows && + mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) { + set_block_size(cpi, tile, + (mi_row + y32_idx + y16_idx), + (mi_col + x32_idx + x16_idx), + BLOCK_16X16); + } else { + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1); + const int y8_idx = (k >> 1); + set_block_size(cpi, tile, + (mi_row + y32_idx + y16_idx + y8_idx), + (mi_col + x32_idx + x16_idx + x8_idx), + BLOCK_8X8); + } + } +#else + if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile, + BLOCK_16X16, + (mi_row + y32_idx + y16_idx), + (mi_col + x32_idx + x16_idx), 2)) { + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1); + const int y8_idx = (k >> 1); + set_block_size(cpi, tile, + (mi_row + y32_idx + y16_idx + y8_idx), + (mi_col + x32_idx + x16_idx + x8_idx), + BLOCK_8X8); + } + } +#endif + } + } + } + } +} + // Original activity measure from Tim T's code. static unsigned int tt_activity_measure(MACROBLOCK *x) { unsigned int sse; - /* TODO: This could also be done over smaller areas (8x8), but that would - * require extensive changes elsewhere, as lambda is assumed to be fixed - * over an entire MB in most of the code. - * Another option is to compute four 8x8 variances, and pick a single - * lambda using a non-linear combination (e.g., the smallest, or second - * smallest, etc.). - */ - unsigned int act = vp9_variance16x16(x->plane[0].src.buf, - x->plane[0].src.stride, - VP9_VAR_OFFS, 0, &sse) << 4; + // TODO: This could also be done over smaller areas (8x8), but that would + // require extensive changes elsewhere, as lambda is assumed to be fixed + // over an entire MB in most of the code. + // Another option is to compute four 8x8 variances, and pick a single + // lambda using a non-linear combination (e.g., the smallest, or second + // smallest, etc.). + const unsigned int act = vp9_variance16x16(x->plane[0].src.buf, + x->plane[0].src.stride, + VP9_VAR_OFFS, 0, &sse) << 4; // If the region is flat, lower the activity some more. - if (act < (8 << 12)) - act = MIN(act, 5 << 12); - - return act; + return act < (8 << 12) ? MIN(act, 5 << 12) : act; } // Stub for alternative experimental activity measures. @@ -387,54 +817,9 @@ static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { adjust_act_zbin(cpi, x); } -// Select a segment for the current SB64 -static void select_in_frame_q_segment(VP9_COMP *cpi, - int mi_row, int mi_col, - int output_enabled, int projected_rate) { - VP9_COMMON *const cm = &cpi->common; - - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); - int complexity_metric = 64; - int x, y; - - unsigned char segment; - - if (!output_enabled) { - segment = 0; - } else { - // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). - // It is converted to bits * 256 units - const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / - (bw * bh); - - if (projected_rate < (target_rate / 4)) { - segment = 1; - } else { - segment = 0; - } - - if (target_rate > 0) { - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); - } - } - - // Fill in the entires in the segment map corresponding to this SB64 - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; - cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = - (unsigned char)complexity_metric; - } - } -} - static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, - BLOCK_SIZE bsize, int output_enabled) { + int mi_row, int mi_col, BLOCK_SIZE bsize, + int output_enabled) { int i, x_idx, y; VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; @@ -442,26 +827,37 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - MODE_INFO *mi_addr = xd->mi_8x8[0]; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; + const struct segmentation *const seg = &cm->seg; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; - assert(mi->mbmi.mode < MB_MODE_COUNT); - assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES); - assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES); assert(mi->mbmi.sb_type == bsize); - // For in frame adaptive Q copy over the chosen segment id into the - // mode innfo context for the chosen mode / partition. - if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && output_enabled) - mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id; - *mi_addr = *mi; + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); + vp9_init_plane_quantizers(cpi, x); + } + } + max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; @@ -483,13 +879,11 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, for (x_idx = 0; x_idx < mi_width; x_idx++) if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { - xd->mi_8x8[x_idx + y * mis] = mi_addr; + xd->mi[x_idx + y * mis] = mi_addr; } - if ((cpi->oxcf.aq_mode == VARIANCE_AQ) || - (cpi->oxcf.aq_mode == COMPLEXITY_AQ)) { + if (cpi->oxcf.aq_mode) vp9_init_plane_quantizers(cpi, x); - } // FIXME(rbultje) I'm pretty sure this should go to the end of this block // (i.e. after the output_enabled) @@ -538,12 +932,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, #endif if (!frame_is_intra_only(cm)) { if (is_inter_block(mbmi)) { - if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) { - MV best_mv[2]; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) - best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv; - vp9_update_mv_count(cm, xd, best_mv); - } + vp9_update_mv_count(cm, xd); if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); @@ -577,89 +966,6 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, x->e_mbd.plane[i].subsampling_y); } -static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, int mi_col, BLOCK_SIZE bsize) { - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; - const int idx_str = xd->mode_info_stride * mi_row + mi_col; - const int mi_width = num_8x8_blocks_wide_lookup[bsize]; - const int mi_height = num_8x8_blocks_high_lookup[bsize]; - const int mb_row = mi_row >> 1; - const int mb_col = mi_col >> 1; - const int idx_map = mb_row * cm->mb_cols + mb_col; - const struct segmentation *const seg = &cm->seg; - - set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); - - // Activity map pointer - x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; - x->active_ptr = cpi->active_map + idx_map; - - xd->mi_8x8 = cm->mi_grid_visible + idx_str; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - - xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL; - - xd->mi_8x8[0] = cm->mi + idx_str; - - mbmi = &xd->mi_8x8[0]->mbmi; - - // Set up destination pointers - setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); - - // Set up limit values for MV components - // mv beyond the range do not produce new/different prediction block - x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); - x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); - x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; - x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; - - // Set up distance of MB to edge of frame in 1/8th pel units - assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); - set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, - cm->mi_rows, cm->mi_cols); - - /* set up source buffers */ - vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); - - /* R/D setup */ - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - - /* segment ID */ - if (seg->enabled) { - if (cpi->oxcf.aq_mode != VARIANCE_AQ) { - const uint8_t *const map = seg->update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; - mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); - } - vp9_init_plane_quantizers(cpi, x); - - if (seg->enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - const int y = mb_row & ~3; - const int x = mb_col & ~3; - const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); - const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); - const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1; - const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1; - - cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) - << 16) / cm->MBs; - } - - x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; - } else { - mbmi->segment_id = 0; - x->encode_breakout = cpi->encode_breakout; - } -} - static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, int *totalrate, int64_t *totaldist, @@ -668,10 +974,11 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; - int i; - int orig_rdmult = x->rdmult; + const AQ_MODE aq_mode = cpi->oxcf.aq_mode; + int i, orig_rdmult; double rdmult_ratio; vp9_clear_system_state(); @@ -691,7 +998,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, } set_offsets(cpi, tile, mi_row, mi_col, bsize); - xd->mi_8x8[0]->mbmi.sb_type = bsize; + mbmi = &xd->mi[0]->mbmi; + mbmi->sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][0]; @@ -703,44 +1011,49 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, x->skip_recode = 0; // Set to zero to make sure we do not use the previous encoded frame stats - xd->mi_8x8[0]->mbmi.skip = 0; + mbmi->skip = 0; x->source_variance = get_sby_perpixel_variance(cpi, x, bsize); - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { + if (aq_mode == VARIANCE_AQ) { const int energy = bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize); if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy); + mbmi->segment_id = vp9_vaq_segment_id(energy); } else { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - xd->mi_8x8[0]->mbmi.segment_id = - vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); } rdmult_ratio = vp9_vaq_rdmult_ratio(energy); vp9_init_plane_quantizers(cpi, x); } + // Save rdmult before it might be changed, so it can be restored later. + orig_rdmult = x->rdmult; if (cpi->oxcf.tuning == VP8_TUNE_SSIM) activity_masking(cpi, x); - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { + if (aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); x->rdmult = (int)round(x->rdmult * rdmult_ratio); - } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + } else if (aq_mode == COMPLEXITY_AQ) { const int mi_offset = mi_row * cm->mi_cols + mi_col; unsigned char complexity = cpi->complexity_map[mi_offset]; const int is_edge = (mi_row <= 1) || (mi_row >= (cm->mi_rows - 2)) || (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2)); - - if (!is_edge && (complexity > 128)) { - x->rdmult = x->rdmult + ((x->rdmult * (complexity - 128)) / 256); - } + if (!is_edge && (complexity > 128)) + x->rdmult += ((x->rdmult * (complexity - 128)) / 256); + } else if (aq_mode == CYCLIC_REFRESH_AQ) { + const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + // If segment 1, use rdmult for that segment. + if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } // Find best coding mode & reconstruct the MB so it is available @@ -757,14 +1070,13 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, totaldist, bsize, ctx, best_rd); } - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { + if (aq_mode == VARIANCE_AQ) { x->rdmult = orig_rdmult; if (*totalrate != INT_MAX) { vp9_clear_system_state(); *totalrate = (int)round(*totalrate * rdmult_ratio); } - } - else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) { x->rdmult = orig_rdmult; } } @@ -773,7 +1085,7 @@ static void update_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const MACROBLOCK *const x = &cpi->mb; const MACROBLOCKD *const xd = &x->e_mbd; - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; if (!frame_is_intra_only(cm)) { @@ -840,21 +1152,21 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy( - cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), + xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(cpi->above_seg_context + mi_col, sa, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl, - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(xd->above_seg_context + mi_col, sa, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(xd->left_seg_context[0]) * mi_height); } static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], @@ -873,20 +1185,20 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, for (p = 0; p < MAX_MB_PLANE; ++p) { vpx_memcpy( a + num_4x4_blocks_wide * p, - cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), + xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( l + num_4x4_blocks_high * p, - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(sa, cpi->above_seg_context + mi_col, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK), - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(sa, xd->above_seg_context + mi_col, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), + sizeof(xd->left_seg_context[0]) * mi_height); } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -901,7 +1213,8 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, return; } set_offsets(cpi, tile, mi_row, mi_col, bsize); - update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); + update_state(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize, + output_enabled); encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); if (output_enabled) { @@ -917,6 +1230,8 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -926,8 +1241,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, return; if (bsize >= BLOCK_8X8) { - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = *get_sb_partitioning(x, bsize); } else { ctx = 0; @@ -982,8 +1296,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } // Check to see if the given partition size is allowed for a specified number @@ -1011,11 +1324,11 @@ static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, // However, at the bottom and right borders of the image the requested size // may not be allowed in which case this code attempts to choose the largest // allowable partition. -static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile, - MODE_INFO **mi_8x8, int mi_row, int mi_col, - BLOCK_SIZE bsize) { +static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, + MODE_INFO **mi_8x8, int mi_row, int mi_col, + BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int row8x8_remaining = tile->mi_row_end - mi_row; int col8x8_remaining = tile->mi_col_end - mi_col; int block_row, block_col; @@ -1051,15 +1364,79 @@ static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } +static void constrain_copy_partitioning(VP9_COMP *const cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + MODE_INFO **prev_mi_8x8, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mi_stride; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; + MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + int block_row, block_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // If the SB64 if it is all "in image". + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { + for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { + const int index = block_row * mis + block_col; + MODE_INFO *prev_mi = prev_mi_8x8[index]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + // Use previous partition if block size is not larger than bsize. + if (prev_mi && sb_type <= bsize) { + int block_row2, block_col2; + for (block_row2 = 0; block_row2 < bh; ++block_row2) { + for (block_col2 = 0; block_col2 < bw; ++block_col2) { + const int index2 = (block_row + block_row2) * mis + + block_col + block_col2; + prev_mi = prev_mi_8x8[index2]; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[index2] = cm->mi + offset; + mi_8x8[index2]->mbmi.sb_type = prev_mi->mbmi.sb_type; + } + } + } + } else { + // Otherwise, use fixed partition of size bsize. + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } + } else { + // Else this is a partial SB64, copy previous partition. + for (block_row = 0; block_row < 8; ++block_row) { + for (block_col = 0; block_col < 8; ++block_col) { + MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[block_row * mis + block_col] = cm->mi + offset; + mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; + } + } + } + } +} + static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, MODE_INFO **prev_mi_8x8) { - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int block_row, block_col; for (block_row = 0; block_row < 8; ++block_row) { for (block_col = 0; block_col < 8; ++block_col) { MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { const ptrdiff_t offset = prev_mi - cm->prev_mi; mi_8x8[block_row * mis + block_col] = cm->mi + offset; @@ -1069,8 +1446,127 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, } } +const struct { + int row; + int col; +} coord_lookup[16] = { + // 32x32 index = 0 + {0, 0}, {0, 2}, {2, 0}, {2, 2}, + // 32x32 index = 1 + {0, 4}, {0, 6}, {2, 4}, {2, 6}, + // 32x32 index = 2 + {4, 0}, {4, 2}, {6, 0}, {6, 2}, + // 32x32 index = 3 + {4, 4}, {4, 6}, {6, 4}, {6, 6}, +}; + +static void set_source_var_based_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + const int mis = cm->mi_stride; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int r, c; + MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // In-image SB64 + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + const int src_stride = x->plane[0].src.stride; + const int pre_stride = cpi->Last_Source->y_stride; + const uint8_t *src = x->plane[0].src.buf; + const int pre_offset = (mi_row * MI_SIZE) * pre_stride + + (mi_col * MI_SIZE); + const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset; + const int thr_32x32 = cpi->sf.source_var_thresh; + const int thr_64x64 = thr_32x32 << 1; + int i, j; + int index; + diff d32[4]; + int use16x16 = 0; + + for (i = 0; i < 4; i++) { + diff d16[4]; + + for (j = 0; j < 4; j++) { + int b_mi_row = coord_lookup[i * 4 + j].row; + int b_mi_col = coord_lookup[i * 4 + j].col; + int b_offset = b_mi_row * MI_SIZE * src_stride + + b_mi_col * MI_SIZE; + + vp9_get_sse_sum_16x16(src + b_offset, + src_stride, + pre_src + b_offset, + pre_stride, &d16[j].sse, &d16[j].sum); + + d16[j].var = d16[j].sse - + (((uint32_t)d16[j].sum * d16[j].sum) >> 8); + + index = b_mi_row * mis + b_mi_col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_16X16; + + // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition + // size to further improve quality. + } + + if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 && + d16[2].var < thr_32x32 && d16[3].var < thr_32x32) { + d32[i].sse = d16[0].sse; + d32[i].sum = d16[0].sum; + + for (j = 1; j < 4; j++) { + d32[i].sse += d16[j].sse; + d32[i].sum += d16[j].sum; + } + + d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10); + + index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_32X32; + + if (!((cm->current_video_frame - 1) % + cpi->sf.search_type_check_frequency)) + cpi->use_large_partition_rate += 1; + } else { + use16x16 = 1; + } + } + + if (!use16x16) { + if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 && + d32[2].var < thr_64x64 && d32[3].var < thr_64x64) { + mi_8x8[0] = mi_upper_left; + mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + } + } + } else { // partial in-image SB64 + BLOCK_SIZE bsize = BLOCK_16X16; + int bh = num_8x8_blocks_high_lookup[bsize]; + int bw = num_8x8_blocks_wide_lookup[bsize]; + + for (r = 0; r < MI_BLOCK_SIZE; r += bh) { + for (c = 0; c < MI_BLOCK_SIZE; c += bw) { + int index = r * mis + c; + // Find a partition size that fits + bsize = find_partition_size(bsize, + (row8x8_remaining - r), + (col8x8_remaining - c), &bh, &bw); + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } +} + static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int block_row, block_col; if (cm->prev_mi) { @@ -1088,55 +1584,39 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { return 0; } -static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx) { - int i; +static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, + int mi_row, int mi_col, int bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const struct segmentation *const seg = &cm->seg; - x->skip = ctx->skip; + *(xd->mi[0]) = ctx->mic; -#if CONFIG_INTERNAL_STATS - if (frame_is_intra_only(cm)) { - static const int kf_mode_index[] = { - THR_DC /*DC_PRED*/, - THR_V_PRED /*V_PRED*/, - THR_H_PRED /*H_PRED*/, - THR_D45_PRED /*D45_PRED*/, - THR_D135_PRED /*D135_PRED*/, - THR_D117_PRED /*D117_PRED*/, - THR_D153_PRED /*D153_PRED*/, - THR_D207_PRED /*D207_PRED*/, - THR_D63_PRED /*D63_PRED*/, - THR_TM /*TM_PRED*/, - }; - ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]]; - } else { - // Note how often each mode chosen as best - ++cpi->mode_chosen_counts[ctx->best_mode_index]; + // For in frame adaptive Q, check for reseting the segment_id and updating + // the cyclic refresh map. + if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); + vp9_init_plane_quantizers(cpi, x); } -#endif - if (!frame_is_intra_only(cm)) { - if (is_inter_block(mbmi)) { - if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) { - MV best_mv[2]; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) - best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv; - vp9_update_mv_count(cm, xd, best_mv); - } - if (cm->interp_filter == SWITCHABLE) { - const int pred_ctx = vp9_get_pred_context_switchable_interp(xd); - ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter]; - } + if (is_inter_block(mbmi)) { + vp9_update_mv_count(cm, xd); + + if (cm->interp_filter == SWITCHABLE) { + const int pred_ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter]; } } + + x->skip = ctx->skip; } static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, - TOKENEXTRA **tp, int mi_row, int mi_col, - int output_enabled, BLOCK_SIZE bsize) { + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; if (bsize < BLOCK_8X8) { @@ -1145,8 +1625,9 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, if (x->ab_index > 0) return; } + set_offsets(cpi, tile, mi_row, mi_col, bsize); - update_state_rt(cpi, get_block_context(x, bsize)); + update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize); encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); update_stats(cpi); @@ -1160,6 +1641,8 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1170,10 +1653,9 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int idx_str = xd->mode_info_stride * mi_row + mi_col; + const int idx_str = xd->mi_stride * mi_row + mi_col; MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = mi_8x8[0]->mbmi.sb_type; } else { ctx = 0; @@ -1232,8 +1714,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void rd_use_partition(VP9_COMP *cpi, @@ -1244,12 +1725,10 @@ static void rd_use_partition(VP9_COMP *cpi, int do_recon) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int mis = cm->mode_info_stride; + MACROBLOCKD *const xd = &x->e_mbd; + const int mis = cm->mi_stride; const int bsl = b_width_log2(bsize); - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - const int ms = num_4x4_blocks_wide / 2; - const int mh = num_4x4_blocks_high / 2; + const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; const int bss = (1 << bsl) / 4; int i, pl; PARTITION_TYPE partition = PARTITION_NONE; @@ -1268,10 +1747,14 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; + int do_partition_search = 1; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + assert(num_4x4_blocks_wide_lookup[bsize] == + num_4x4_blocks_high_lookup[bsize]); + partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); @@ -1291,9 +1774,22 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } - if (cpi->sf.partition_search_type == SEARCH_PARTITION && + if (!x->in_active_map) { + do_partition_search = 0; + if (mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { + *(get_sb_partitioning(x, bsize)) = bsize; + bs_type = mi_8x8[0]->mbmi.sb_type = bsize; + subsize = bsize; + partition = PARTITION_NONE; + } + } + if (do_partition_search && + cpi->sf.partition_search_type == SEARCH_PARTITION && cpi->sf.adjust_partitioning_from_last_frame) { // Check if any of the sub blocks are further split. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { @@ -1311,15 +1807,13 @@ static void rd_use_partition(VP9_COMP *cpi, // If partition is not none try none unless each of the 4 splits are split // even further.. if (partition != PARTITION_NONE && !splits_below && - mi_row + (ms >> 1) < cm->mi_rows && - mi_col + (ms >> 1) < cm->mi_cols) { + mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { *(get_sb_partitioning(x, bsize)) = bsize; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, get_block_context(x, bsize), INT64_MAX); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rate < INT_MAX) { none_rate += x->partition_cost[pl][PARTITION_NONE]; @@ -1344,13 +1838,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) { + bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { int rt = 0; int64_t dt = 0; - update_state(cpi, get_block_context(x, subsize), subsize, 0); + update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, + subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1368,13 +1863,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) { + bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { int rt = 0; int64_t dt = 0; - update_state(cpi, get_block_context(x, subsize), subsize, 0); + update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, + subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1390,8 +1886,8 @@ static void rd_use_partition(VP9_COMP *cpi, last_part_rate = 0; last_part_dist = 0; for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (ms >> 1); - int y_idx = (i >> 1) * (ms >> 1); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; int rt; int64_t dt; @@ -1417,18 +1913,20 @@ static void rd_use_partition(VP9_COMP *cpi, assert(0); } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rate < INT_MAX) { last_part_rate += x->partition_cost[pl][partition]; last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist); } - if (cpi->sf.adjust_partitioning_from_last_frame + if (do_partition_search + && cpi->sf.adjust_partitioning_from_last_frame && cpi->sf.partition_search_type == SEARCH_PARTITION && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 - && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows) - && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) { + && (mi_row + mi_step < cm->mi_rows || + mi_row + (mi_step >> 1) == cm->mi_rows) + && (mi_col + mi_step < cm->mi_cols || + mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rate = 0; chosen_dist = 0; @@ -1436,8 +1934,8 @@ static void rd_use_partition(VP9_COMP *cpi, // Split partition. for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2); - int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int rt = 0; int64_t dt = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -1471,14 +1969,11 @@ static void rd_use_partition(VP9_COMP *cpi, encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row + y_idx, mi_col + x_idx, + pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rate += x->partition_cost[pl][PARTITION_NONE]; } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rate < INT_MAX) { chosen_rate += x->partition_cost[pl][PARTITION_SPLIT]; chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist); @@ -1516,10 +2011,14 @@ static void rd_use_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, - output_enabled, chosen_rate); + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, + output_enabled, chosen_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + chosen_rate, chosen_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } @@ -1567,7 +2066,7 @@ static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8, *min_block_size = MIN(*min_block_size, sb_type); *max_block_size = MAX(*max_block_size, sb_type); } - index += xd->mode_info_stride; + index += xd->mi_stride; } } @@ -1583,77 +2082,71 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, - int row, int col, + int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { - VP9_COMMON * const cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MODE_INFO ** mi_8x8 = xd->mi_8x8; - MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8; - + MODE_INFO **mi_8x8 = xd->mi; const int left_in_image = xd->left_available && mi_8x8[-1]; const int above_in_image = xd->up_available && - mi_8x8[-xd->mode_info_stride]; - MODE_INFO ** above_sb64_mi_8x8; - MODE_INFO ** left_sb64_mi_8x8; + mi_8x8[-xd->mi_stride]; + MODE_INFO **above_sb64_mi_8x8; + MODE_INFO **left_sb64_mi_8x8; - int row8x8_remaining = tile->mi_row_end - row; - int col8x8_remaining = tile->mi_col_end - col; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; - + BLOCK_SIZE min_size = BLOCK_4X4; + BLOCK_SIZE max_size = BLOCK_64X64; // Trap case where we do not have a prediction. - if (!left_in_image && !above_in_image && - ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) { - *min_block_size = BLOCK_4X4; - *max_block_size = BLOCK_64X64; - } else { + if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" - *min_block_size = BLOCK_64X64; - *max_block_size = BLOCK_4X4; + min_size = BLOCK_64X64; + max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous // passed in values for min and max as a starting point. - // // Find the min and max partition used in previous frame at this location - if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) { - get_sb_partition_size_range(cpi, prev_mi_8x8, - min_block_size, max_block_size); + if (cm->frame_type != KEY_FRAME) { + MODE_INFO **const prev_mi = + &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; + get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size); } - // Find the min and max partition sizes used in the left SB64 if (left_in_image) { left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, left_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); } - // Find the min and max partition sizes used in the above SB64. if (above_in_image) { - above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE]; + above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, above_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); + } + // adjust observed min and max + if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { + min_size = min_partition_size[min_size]; + max_size = max_partition_size[max_size]; } } - // adjust observed min and max - if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { - *min_block_size = min_partition_size[*min_block_size]; - *max_block_size = max_partition_size[*max_block_size]; - } - - // Check border cases where max and min from neighbours may not be legal. - *max_block_size = find_partition_size(*max_block_size, - row8x8_remaining, col8x8_remaining, - &bh, &bw); - *min_block_size = MIN(*min_block_size, *max_block_size); + // Check border cases where max and min from neighbors may not be legal. + max_size = find_partition_size(max_size, + row8x8_remaining, col8x8_remaining, + &bh, &bw); + min_size = MIN(min_size, max_size); // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as // *min_block_size. if (cpi->sf.use_square_partition_only && - (*max_block_size - *min_block_size) < 2) { - *min_block_size = next_square_size[*min_block_size]; + next_square_size[max_size] < min_size) { + min_size = next_square_size[max_size]; } + *min_block_size = min_size; + *max_block_size = max_size; } static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { @@ -1673,10 +2166,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int64_t *dist, int do_recon, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; + PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); int i, pl; BLOCK_SIZE subsize; int this_rate, sum_rate = 0, best_rate = INT_MAX; @@ -1685,8 +2180,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int do_split = bsize >= BLOCK_8X8; int do_rect = 1; // Override skipping rectangular partition operations for edge blocks - const int force_horz_split = (mi_row + ms >= cm->mi_rows); - const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); + const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); const int xss = x->e_mbd.plane[1].subsampling_x; const int yss = x->e_mbd.plane[1].subsampling_y; @@ -1712,6 +2207,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } // Determine partition types in search according to the speed features. @@ -1745,15 +2242,15 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } + if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) + do_split = 0; // PARTITION_NONE if (partition_none_allowed) { rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize, - get_block_context(x, bsize), best_rd); + ctx, best_rd); if (this_rate != INT_MAX) { if (bsize >= BLOCK_8X8) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rate += x->partition_cost[pl][PARTITION_NONE]; } sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); @@ -1780,12 +2277,16 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } } + if (!x->in_active_map) { + do_split = 0; + do_rect = 0; + } restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); } // store estimated motion vector if (cpi->sf.adaptive_motion_search) - store_pred_mv(x, get_block_context(x, bsize)); + store_pred_mv(x, ctx); // PARTITION_SPLIT sum_rd = 0; @@ -1794,19 +2295,19 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (do_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); for (i = 0; i < 4 && sum_rd < best_rd; ++i) { - const int x_idx = (i & 1) * ms; - const int y_idx = (i >> 1) * ms; + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; *get_sb_index(x, subsize) = i; if (cpi->sf.adaptive_motion_search) - load_pred_mv(x, get_block_context(x, bsize)); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = - get_block_context(x, bsize)->mic.mbmi.interp_filter; + ctx->mic.mbmi.interp_filter; rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd); @@ -1819,9 +2320,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd && i == 4) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -1844,27 +2343,28 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, subsize = get_subsize(bsize, PARTITION_HORZ); *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) - load_pred_mv(x, get_block_context(x, bsize)); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = - get_block_context(x, bsize)->mic.mbmi.interp_filter; + ctx->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); + if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) { + update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, + subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) - load_pred_mv(x, get_block_context(x, bsize)); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = - get_block_context(x, bsize)->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, + ctx->mic.mbmi.interp_filter; + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -1876,9 +2376,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_HORZ]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -1897,26 +2395,27 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) - load_pred_mv(x, get_block_context(x, bsize)); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = - get_block_context(x, bsize)->mic.mbmi.interp_filter; + ctx->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); + if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) { + update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, + subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) - load_pred_mv(x, get_block_context(x, bsize)); + load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = - get_block_context(x, bsize)->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, + ctx->mic.mbmi.interp_filter; + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -1928,9 +2427,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_VERT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -1958,8 +2455,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate); + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); } + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } if (bsize == BLOCK_64X64) { @@ -1974,11 +2477,13 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + SPEED_FEATURES *const sf = &cpi->sf; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; @@ -1989,7 +2494,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE i; MACROBLOCK *x = &cpi->mb; - if (cpi->sf.adaptive_pred_interp_filter) { + if (sf->adaptive_pred_interp_filter) { for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) { const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; const int num_4x4_h = num_4x4_blocks_high_lookup[i]; @@ -2003,64 +2508,69 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if ((cpi->sf.partition_search_type == SEARCH_PARTITION && - cpi->sf.use_lastframe_partitioning) || - cpi->sf.partition_search_type == FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { - const int idx_str = cm->mode_info_stride * mi_row + mi_col; + if ((sf->partition_search_type == SEARCH_PARTITION && + sf->use_lastframe_partitioning) || + sf->partition_search_type == FIXED_PARTITION || + sf->partition_search_type == VAR_BASED_PARTITION || + sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { + const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - cpi->mb.source_variance = UINT_MAX; - if (cpi->sf.partition_search_type == FIXED_PARTITION) { + if (sf->partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); - set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - cpi->sf.always_this_block_size); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, + sf->always_this_block_size); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_PARTITION) { - // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case. - // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION - // map to the same thing. + } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); - set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1); + } else if (sf->partition_search_type == VAR_BASED_PARTITION) { + choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } else { if ((cm->current_video_frame - % cpi->sf.last_partitioning_redo_frequency) == 0 + % sf->last_partitioning_redo_frequency) == 0 || cm->prev_mi == 0 || cm->show_frame == 0 || cm->frame_type == KEY_FRAME || cpi->rc.is_src_frame_alt_ref - || ((cpi->sf.use_lastframe_partitioning == + || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && sb_has_motion(cm, prev_mi_8x8))) { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { - copy_partitioning(cm, mi_8x8, prev_mi_8x8); + if (sf->constrain_copy_partition && + sb_has_motion(cm, prev_mi_8x8)) + constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8, + mi_row, mi_col, BLOCK_16X16); + else + copy_partitioning(cm, mi_8x8, prev_mi_8x8); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } } } else { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); @@ -2075,40 +2585,27 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); x->act_zbin_adj = 0; - cpi->seg0_idx = 0; - - xd->mode_info_stride = cm->mode_info_stride; // Copy data over into macro block data structures. vp9_setup_src_planes(x, cpi->Source, 0, 0); // TODO(jkoleszar): are these initializations required? - setup_pre_planes(xd, 0, get_ref_frame_buffer(cpi, LAST_FRAME), 0, 0, NULL); - setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0); + vp9_setup_pre_planes(xd, 0, get_ref_frame_buffer(cpi, LAST_FRAME), 0, 0, + NULL); + vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0); vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); - xd->mi_8x8[0]->mbmi.mode = DC_PRED; - xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED; - - vp9_zero(cm->counts.y_mode); - vp9_zero(cm->counts.uv_mode); - vp9_zero(cm->counts.inter_mode); - vp9_zero(cm->counts.partition); - vp9_zero(cm->counts.intra_inter); - vp9_zero(cm->counts.comp_inter); - vp9_zero(cm->counts.single_ref); - vp9_zero(cm->counts.comp_ref); - vp9_zero(cm->counts.tx); - vp9_zero(cm->counts.skip); + xd->mi[0]->mbmi.mode = DC_PRED; + xd->mi[0]->mbmi.uv_mode = DC_PRED; // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cpi->above_context[0], 0, - sizeof(*cpi->above_context[0]) * + vpx_memset(xd->above_context[0], 0, + sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); - vpx_memset(cpi->above_seg_context, 0, - sizeof(*cpi->above_seg_context) * aligned_mi_cols); + vpx_memset(xd->above_seg_context, 0, + sizeof(*xd->above_seg_context) * aligned_mi_cols); } static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { @@ -2127,13 +2624,6 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { } } -static void switch_tx_mode(VP9_COMP *cpi) { - if (cpi->sf.tx_size_search_method == USE_LARGESTALL && - cpi->common.tx_mode >= ALLOW_32X32) - cpi->common.tx_mode = ALLOW_32X32; -} - - static int check_dual_ref_flags(VP9_COMP *cpi) { const int ref_flags = cpi->ref_frame_flags; @@ -2145,105 +2635,20 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - if (!mi_8x8[y * mis + x]->mbmi.skip) - return 0; - } - } - - return 1; -} - -static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs, - TX_SIZE tx_size) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) - mi_8x8[y * mis + x]->mbmi.tx_size = tx_size; - } -} - -static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis, - TX_SIZE max_tx_size, int bw, int bh, - int mi_row, int mi_col, - MODE_INFO **mi_8x8) { - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { - return; - } else { - MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi; - if (mbmi->tx_size > max_tx_size) { - const int ymbs = MIN(bh, cm->mi_rows - mi_row); - const int xmbs = MIN(bw, cm->mi_cols - mi_col); - - assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || - get_skip_flag(mi_8x8, mis, ymbs, xmbs)); - set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); - } - } -} - -static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8, - TX_SIZE max_tx_size, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const int mis = cm->mode_info_stride; - int bw, bh; - const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type]; - - if (bw == bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col, - mi_8x8); - } else if (bw == bs && bh < bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs, - mi_col, mi_8x8 + hbs * mis); - } else if (bw < bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, - mi_col + hbs, mi_8x8 + hbs); - } else { - const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; - int n; - - assert(bw < bs && bh < bs); - - for (n = 0; n < 4; n++) { - const int mi_dc = hbs * (n & 1); - const int mi_dr = hbs * (n >> 1); - - reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size, - mi_row + mi_dr, mi_col + mi_dc, subsize); - } - } -} - static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) { int mi_row, mi_col; - const int mis = cm->mode_info_stride; - MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible; + const int mis = cm->mi_stride; + MODE_INFO **mi_ptr = cm->mi_grid_visible; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) { - mi_8x8 = mi_ptr; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) { - reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col, - BLOCK_64X64); + for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { + for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { + if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max) + mi_ptr[mi_col]->mbmi.tx_size = txfm_max; } } } -static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) { +static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { if (frame_is_intra_only(&cpi->common)) return INTRA_FRAME; else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) @@ -2254,30 +2659,31 @@ static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) { return GOLDEN_FRAME; } -static void select_tx_mode(VP9_COMP *cpi) { +static TX_MODE select_tx_mode(const VP9_COMP *cpi) { if (cpi->oxcf.lossless) { - cpi->common.tx_mode = ONLY_4X4; + return ONLY_4X4; } else if (cpi->common.current_video_frame == 0) { - cpi->common.tx_mode = TX_MODE_SELECT; + return TX_MODE_SELECT; } else { if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { - cpi->common.tx_mode = ALLOW_32X32; + return ALLOW_32X32; } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) { - int frame_type = get_frame_type(cpi); - cpi->common.tx_mode = - cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] - > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? - ALLOW_32X32 : TX_MODE_SELECT; + const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); + return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] > + cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? + ALLOW_32X32 : TX_MODE_SELECT; } else { unsigned int total = 0; int i; for (i = 0; i < TX_SIZES; ++i) total += cpi->tx_stepdown_count[i]; + if (total) { - double fraction = (double)cpi->tx_stepdown_count[0] / total; - cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT; - // printf("fraction = %f\n", fraction); - } // else keep unchanged + const double fraction = (double)cpi->tx_stepdown_count[0] / total; + return fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT; + } else { + return cpi->common.tx_mode; + } } } } @@ -2296,171 +2702,573 @@ typedef enum { static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, MB_PREDICTION_MODE mode) { - mbmi->interp_filter = EIGHTTAP; mbmi->mode = mode; + mbmi->uv_mode = mode; mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; - if (mode < NEARESTMV) { - mbmi->ref_frame[0] = INTRA_FRAME; - } else { - mbmi->ref_frame[0] = LAST_FRAME; - } - - mbmi->ref_frame[1] = INTRA_FRAME; + mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE; mbmi->tx_size = max_txsize_lookup[bsize]; - mbmi->uv_mode = mode; mbmi->skip = 0; mbmi->sb_type = bsize; mbmi->segment_id = 0; } -static INLINE int get_block_row(int b32i, int b16i, int b8i) { - return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1); +static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int *rate, int64_t *dist, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + set_offsets(cpi, tile, mi_row, mi_col, bsize); + xd->mi[0]->mbmi.sb_type = bsize; + + if (!frame_is_intra_only(cm)) { + vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, + rate, dist, bsize); + } else { + MB_PREDICTION_MODE intramode = DC_PRED; + set_mode_info(&xd->mi[0]->mbmi, bsize, intramode); + } + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); } -static INLINE int get_block_col(int b32i, int b16i, int b8i) { - return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1); +static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE subsize) { + MACROBLOCKD *xd = &x->e_mbd; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = partition_lookup[bsl][subsize]; + + assert(bsize >= BLOCK_8X8); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + break; + case PARTITION_VERT: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + + if (mi_col + hbs < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize); + } + break; + case PARTITION_HORZ: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + if (mi_row + hbs < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize); + } + break; + case PARTITION_SPLIT: + *get_sb_index(x, subsize) = 0; + fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 1; + fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 2; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 3; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + break; + default: + break; + } } -static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, - TOKENEXTRA **tp, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist) { +static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *rate, + int64_t *dist, int do_recon, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; - int mis = cm->mode_info_stride; - int br, bc; - int i, j; - MB_PREDICTION_MODE mode = DC_PRED; - int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row); - int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col); + MACROBLOCKD *const xd = &x->e_mbd; + const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + TOKENEXTRA *tp_orig = *tp; + PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); + int i; + BLOCK_SIZE subsize; + int this_rate, sum_rate = 0, best_rate = INT_MAX; + int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; + int64_t sum_rd = 0; + int do_split = bsize >= BLOCK_8X8; + int do_rect = 1; + // Override skipping rectangular partition operations for edge blocks + const int force_horz_split = (mi_row + ms >= cm->mi_rows); + const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int xss = x->e_mbd.plane[1].subsampling_x; + const int yss = x->e_mbd.plane[1].subsampling_y; - int bw = num_8x8_blocks_wide_lookup[bsize]; - int bh = num_8x8_blocks_high_lookup[bsize]; + int partition_none_allowed = !force_horz_split && !force_vert_split; + int partition_horz_allowed = !force_vert_split && yss <= xss && + bsize >= BLOCK_8X8; + int partition_vert_allowed = !force_horz_split && xss <= yss && + bsize >= BLOCK_8X8; + (void) *tp_orig; - int brate = 0; - int64_t bdist = 0; - *rate = 0; - *dist = 0; + if (bsize < BLOCK_8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. + if (x->ab_index != 0) { + *rate = 0; + *dist = 0; + return; + } + } + + assert(num_8x8_blocks_wide_lookup[bsize] == + num_8x8_blocks_high_lookup[bsize]); + + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); + + // Determine partition types in search according to the speed features. + // The threshold set here has to be of square block size. + if (cpi->sf.auto_min_max_partition_size) { + partition_none_allowed &= (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size); + partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_horz_split); + partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_vert_split); + do_split &= bsize > cpi->sf.min_partition_size; + } + if (cpi->sf.use_square_partition_only) { + partition_horz_allowed &= force_horz_split; + partition_vert_allowed &= force_vert_split; + } - // find prediction mode for each 8x8 block - for (br = 0; br < rows; br += bh) { - for (bc = 0; bc < cols; bc += bw) { - int row = mi_row + br; - int col = mi_col + bc; + if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) + do_split = 0; - BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc, - &bh, &bw); + // PARTITION_NONE + if (partition_none_allowed) { + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, bsize); + ctx->mic.mbmi = xd->mi[0]->mbmi; - set_offsets(cpi, tile, row, col, bs); + if (this_rate != INT_MAX) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_NONE]; + sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); + if (sum_rd < best_rd) { + int64_t stop_thresh = 4096; + int64_t stop_thresh_rd; - if (cm->frame_type != KEY_FRAME) - vp9_pick_inter_mode(cpi, x, tile, row, col, - &brate, &bdist, bs); - else - set_mode_info(&xd->mi_8x8[0]->mbmi, bs, mode); + best_rate = this_rate; + best_dist = this_dist; + best_rd = sum_rd; + if (bsize >= BLOCK_8X8) + *(get_sb_partitioning(x, bsize)) = bsize; + + // Adjust threshold according to partition size. + stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); + // If obtained distortion is very small, choose current partition + // and stop splitting. + if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + do_split = 0; + do_rect = 0; + } + } + } + if (!x->in_active_map) { + do_split = 0; + do_rect = 0; + } + } + + // store estimated motion vector + store_pred_mv(x, ctx); + + // PARTITION_SPLIT + sum_rd = 0; + if (do_split) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; + subsize = get_subsize(bsize, PARTITION_SPLIT); + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { + const int x_idx = (i & 1) * ms; + const int y_idx = (i >> 1) * ms; + + if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) + continue; + + *get_sb_index(x, subsize) = i; + load_pred_mv(x, ctx); + + nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, + subsize, &this_rate, &this_dist, 0, + best_rd - sum_rd); + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } else { + // skip rectangular partition test when larger block size + // gives better rd cost + if (cpi->sf.less_rectangular_check) + do_rect &= !partition_none_allowed; + } + } + + // PARTITION_HORZ + if (partition_horz_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_HORZ); + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - *rate += brate; - *dist += bdist; + if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); - for (j = 0; j < bh; ++j) - for (i = 0; i < bw; ++i) - xd->mi_8x8[j * mis + i] = xd->mi_8x8[0]; + nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_HORZ]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rd = sum_rd; + best_rate = sum_rate; + best_dist = sum_dist; + *(get_sb_partitioning(x, bsize)) = subsize; } } + + // PARTITION_VERT + if (partition_vert_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_VERT); + + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_VERT]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + *rate = best_rate; + *dist = best_dist; + + if (best_rate == INT_MAX) + return; + + // update mode info array + fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, + *(get_sb_partitioning(x, bsize))); + + if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) { + int output_enabled = (bsize == BLOCK_64X64); + + // Check the projected output rate for this SB against it's target + // and and if necessary apply a Q delta using segmentation to get + // closer to the target. + if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); + } + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); + } + + if (bsize == BLOCK_64X64) { + assert(tp_orig < *tp); + assert(best_rate < INT_MAX); + assert(best_dist < INT64_MAX); + } else { + assert(tp_orig == *tp); + } +} + +static void nonrd_use_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + TOKENEXTRA **tp, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int output_enabled, + int *totrate, int64_t *totdist) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + const int mis = cm->mi_stride; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + int rate = INT_MAX; + int64_t dist = INT64_MAX; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4; + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + break; + case PARTITION_VERT: + *get_sb_index(x, subsize) = 0; + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + if (mi_col + hbs < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, + &rate, &dist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + } + break; + case PARTITION_HORZ: + *get_sb_index(x, subsize) = 0; + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + if (mi_row + hbs < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, + &rate, &dist, subsize); + get_block_context(x, subsize)->mic.mbmi = mi_8x8[0]->mbmi; + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + } + break; + case PARTITION_SPLIT: + subsize = get_subsize(bsize, PARTITION_SPLIT); + *get_sb_index(x, subsize) = 0; + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + subsize, output_enabled, totrate, totdist); + *get_sb_index(x, subsize) = 1; + nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp, + mi_row, mi_col + hbs, subsize, output_enabled, + &rate, &dist); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + *get_sb_index(x, subsize) = 2; + nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp, + mi_row + hbs, mi_col, subsize, output_enabled, + &rate, &dist); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + *get_sb_index(x, subsize) = 3; + nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp, + mi_row + hbs, mi_col + hbs, subsize, output_enabled, + &rate, &dist); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + break; + default: + assert("Invalid partition type."); + } + + if (bsize == BLOCK_64X64 && output_enabled) { + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + *totrate, *totdist); + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize); + } } static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - int dummy_rate; - int64_t dummy_dist; + int dummy_rate = 0; + int64_t dummy_dist = 0; + const int idx_str = cm->mi_stride * mi_row + mi_col; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; + MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; + BLOCK_SIZE bsize; cpi->mb.source_variance = UINT_MAX; + vp9_zero(cpi->mb.pred_mv); - if (cpi->sf.partition_search_type == FIXED_PARTITION) { - nonrd_use_partition(cpi, tile, tp, mi_row, mi_col, - cpi->sf.always_this_block_size, - &dummy_rate, &dummy_dist); - encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_PARTITION) { - // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case. - // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION - // map to the same thing. - BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi, - mi_row, - mi_col); - nonrd_use_partition(cpi, tile, tp, mi_row, mi_col, - bsize, &dummy_rate, &dummy_dist); - encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); - } else { - assert(0); + // Set the partition type of the 64X64 block + switch (cpi->sf.partition_search_type) { + case VAR_BASED_PARTITION: + choose_partitioning(cpi, tile, mi_row, mi_col); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist); + break; + case SOURCE_VAR_BASED_PARTITION: + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist); + break; + case VAR_BASED_FIXED_PARTITION: + case FIXED_PARTITION: + bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? + cpi->sf.always_this_block_size : + get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + 1, &dummy_rate, &dummy_dist); + break; + case REFERENCE_PARTITION: + if (cpi->sf.partition_check || sb_has_motion(cm, prev_mi_8x8)) { + nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, INT64_MAX); + } else { + copy_partitioning(cm, mi_8x8, prev_mi_8x8); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rate, &dummy_dist); + } + break; + default: + assert(0); } } } // end RTC play code static void encode_frame_internal(VP9_COMP *cpi) { - int mi_row; + SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; - vp9_zero(cm->counts.switchable_interp); + vp9_zero(cm->counts); + vp9_zero(cpi->coef_counts); vp9_zero(cpi->tx_stepdown_count); + vp9_zero(cpi->rd_comp_pred_diff); + vp9_zero(cpi->rd_filter_diff); + vp9_zero(cpi->rd_tx_select_diff); + vp9_zero(cpi->rd_tx_select_threshes); - xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; - - xd->last_mi = cm->prev_mi; - - vp9_zero(cm->counts.mv); - vp9_zero(cpi->coef_counts); - vp9_zero(cm->counts.eob_branch); + cm->tx_mode = select_tx_mode(cpi); - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 - && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); vp9_frame_init_quantizer(cpi); vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); - switch_tx_mode(cpi); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // Re-initialize encode frame context. init_encode_frame_mb_context(cpi); - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + build_activity_map(cpi); set_prev_mi(cm); - if (cpi->sf.use_nonrd_pick_mode) { + if (sf->use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD // mode decision is used and hence no buffer pointer swap needed. int i; @@ -2474,6 +3282,30 @@ static void encode_frame_internal(VP9_COMP *cpi) { pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; p[i].eobs = ctx->eobs_pbuf[i][0]; } + vp9_zero(x->zcoeff_blk); + + if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION && + cm->current_video_frame > 0) { + int check_freq = sf->search_type_check_frequency; + + if ((cm->current_video_frame - 1) % check_freq == 0) { + cpi->use_large_partition_rate = 0; + } + + if ((cm->current_video_frame - 1) % check_freq == 1) { + const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] - + b_width_log2_lookup[BLOCK_16X16]) + + (b_height_log2_lookup[BLOCK_32X32] - + b_height_log2_lookup[BLOCK_16X16])); + cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 * + mbs_in_b32x32 / cm->MBs; + } + + if ((cm->current_video_frame - 1) % check_freq >= 1) { + if (cpi->use_large_partition_rate < 15) + sf->partition_search_type = FIXED_PARTITION; + } + } } { @@ -2491,12 +3323,13 @@ static void encode_frame_internal(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileInfo tile; TOKENEXTRA *tp_old = tp; + int mi_row; // For each row of SBs in the frame vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode) + if (sf->use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) encode_nonrd_sb_row(cpi, &tile, mi_row, &tp); else encode_rd_sb_row(cpi, &tile, mi_row, &tp); @@ -2511,18 +3344,18 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } - if (cpi->sf.skip_encode_sb) { + if (sf->skip_encode_sb) { int j; unsigned int intra_count = 0, inter_count = 0; for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { intra_count += cm->counts.intra_inter[j][0]; inter_count += cm->counts.intra_inter[j][1]; } - cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count); - cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME); - cpi->sf.skip_encode_frame &= cm->show_frame; + sf->skip_encode_frame = (intra_count << 2) < inter_count && + cm->frame_type != KEY_FRAME && + cm->show_frame; } else { - cpi->sf.skip_encode_frame = 0; + sf->skip_encode_frame = 0; } #if 0 @@ -2556,33 +3389,31 @@ void vp9_encode_frame(VP9_COMP *cpi) { if (cpi->sf.frame_parameter_update) { int i; - REFERENCE_MODE reference_mode; - /* - * This code does a single RD pass over the whole frame assuming - * either compound, single or hybrid prediction as per whatever has - * worked best for that type of frame in the past. - * It also predicts whether another coding mode would have worked - * better that this coding mode. If that is the case, it remembers - * that for subsequent frames. - * It does the same analysis for transform size selection also. - */ + + // This code does a single RD pass over the whole frame assuming + // either compound, single or hybrid prediction as per whatever has + // worked best for that type of frame in the past. + // It also predicts whether another coding mode would have worked + // better that this coding mode. If that is the case, it remembers + // that for subsequent frames. + // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type]; const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type]; /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == 3 || !cm->allow_comp_inter_inter) - reference_mode = SINGLE_REFERENCE; + if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter) + cm->reference_mode = SINGLE_REFERENCE; else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] && mode_thresh[COMPOUND_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) - reference_mode = COMPOUND_REFERENCE; + cm->reference_mode = COMPOUND_REFERENCE; else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT]) - reference_mode = SINGLE_REFERENCE; + cm->reference_mode = SINGLE_REFERENCE; else - reference_mode = REFERENCE_MODE_SELECT; + cm->reference_mode = REFERENCE_MODE_SELECT; if (cm->interp_filter == SWITCHABLE) { if (frame_type != ALTREF_FRAME && @@ -2598,12 +3429,6 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; - - /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */ - select_tx_mode(cpi); - cm->reference_mode = reference_mode; - encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { @@ -2682,8 +3507,8 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } } else { - // Force the usage of the BILINEAR interp_filter. - cm->interp_filter = BILINEAR; + cm->reference_mode = SINGLE_REFERENCE; + cm->interp_filter = SWITCHABLE; encode_frame_internal(cpi); } } @@ -2693,18 +3518,18 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { const MB_PREDICTION_MODE uv_mode = mi->mbmi.uv_mode; const BLOCK_SIZE bsize = mi->mbmi.sb_type; - ++counts->uv_mode[y_mode][uv_mode]; - if (bsize < BLOCK_8X8) { int idx, idy; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) + const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; + for (idy = 0; idy < 2; idy += num_4x4_h) + for (idx = 0; idx < 2; idx += num_4x4_w) ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode]; } else { ++counts->y_mode[size_group_lookup[bsize]][y_mode]; } + + ++counts->uv_mode[y_mode][uv_mode]; } // Experimental stub function to create a per MB zbin adjustment based on @@ -2713,13 +3538,10 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX x->act_zbin_adj = *(x->mb_activity_ptr); #else - int64_t a; - int64_t b; - int64_t act = *(x->mb_activity_ptr); - // Apply the masking to the RD multiplier. - a = act + 4 * cpi->activity_avg; - b = 4 * act + cpi->activity_avg; + const int64_t act = *(x->mb_activity_ptr); + const int64_t a = act + 4 * cpi->activity_avg; + const int64_t b = 4 * act + cpi->activity_avg; if (act > cpi->activity_avg) x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1; @@ -2751,23 +3573,26 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO **mi_8x8 = xd->mi_8x8; + MODE_INFO **mi_8x8 = xd->mi; MODE_INFO *mi = mi_8x8[0]; MB_MODE_INFO *mbmi = &mi->mbmi; PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); unsigned int segment_id = mbmi->segment_id; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && - (cpi->oxcf.aq_mode != COMPLEXITY_AQ) && - !cpi->sf.use_nonrd_pick_mode; + cpi->oxcf.aq_mode != COMPLEXITY_AQ && + cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && + cpi->sf.allow_skip_recode; + x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH); + if (x->skip_encode) return; @@ -2778,7 +3603,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, } } else { set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. @@ -2806,7 +3630,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, for (ref = 0; ref < 1 + is_compound; ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]); - setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); + vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, + &xd->block_refs[ref]->sf); } vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.h b/source/libvpx/vp9/encoder/vp9_encodeframe.h index f7d17c3..131e932 100644 --- a/source/libvpx/vp9/encoder/vp9_encodeframe.h +++ b/source/libvpx/vp9/encoder/vp9_encodeframe.h @@ -18,11 +18,20 @@ extern "C" { struct macroblock; struct yv12_buffer_config; +struct VP9_COMP; + +typedef struct { + unsigned int sse; + int sum; + unsigned int var; +} diff; void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); +void vp9_encode_frame(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.c b/source/libvpx/vp9/encoder/vp9_encodemb.c index 513730e..5e98e4e 100644 --- a/source/libvpx/vp9/encoder/vp9_encodemb.c +++ b/source/libvpx/vp9/encoder/vp9_encodemb.c @@ -107,11 +107,11 @@ static int trellis_get_coeff_context(const int16_t *scan, static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, MACROBLOCK *mb, - struct optimize_ctx *ctx) { + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *p = &mb->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); @@ -133,18 +133,13 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, const scan_order *so = get_scan(xd, tx_size, type, block); const int16_t *scan = so->scan; const int16_t *nb = so->neighbors; - ENTROPY_CONTEXT *a, *l; - int tx_x, tx_y; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &tx_x, &tx_y); - a = &ctx->ta[plane][tx_x]; - l = &ctx->tl[plane][tx_y]; assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; - if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi)) + if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi)) rdmult = (rdmult * 9) >> 4; rddiv = mb->rddiv; /* Initialize the sentinel node of the trellis. */ @@ -380,15 +375,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); int i, j; uint8_t *dst; + ENTROPY_CONTEXT *a, *l; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; + a = &ctx->ta[plane][i]; + l = &ctx->tl[plane][j]; // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. if (x->zcoeff_blk[tx_size][block] && plane == 0) { p->eobs[block] = 0; - ctx->ta[plane][i] = 0; - ctx->tl[plane][j] = 0; + *a = *l = 0; return; } @@ -396,10 +393,9 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(x, plane, block, plane_bsize, tx_size); if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { - optimize_b(plane, block, plane_bsize, tx_size, x, ctx); + optimize_b(plane, block, plane_bsize, tx_size, x, a, l); } else { - ctx->ta[plane][i] = p->eobs[block] > 0; - ctx->tl[plane][j] = p->eobs[block] > 0; + *a = *l = p->eobs[block] > 0; } if (p->eobs[block]) @@ -428,6 +424,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, assert(0 && "Invalid transform size"); } } + static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { MACROBLOCK *const x = (MACROBLOCK *)arg; @@ -455,7 +452,7 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct encode_b_args arg = {x, &ctx, &mbmi->skip}; int plane; @@ -480,7 +477,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t *coeff = BLOCK_OFFSET(p->coeff, block); @@ -502,9 +499,6 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, src = &p->src.buf[4 * (j * src_stride + i)]; src_diff = &p->src_diff[4 * (j * diff_stride + i)]; - // if (x->optimize) - // optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); - switch (tx_size) { case TX_32X32: scan_order = &vp9_default_scan_orders[TX_32X32]; @@ -526,7 +520,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); break; case TX_16X16: - tx_type = get_tx_type_16x16(pd->plane_type, xd); + tx_type = get_tx_type(pd->plane_type, xd); scan_order = &vp9_scan_orders[TX_16X16][tx_type]; mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, @@ -546,7 +540,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; case TX_8X8: - tx_type = get_tx_type_8x8(pd->plane_type, xd); + tx_type = get_tx_type(pd->plane_type, xd); scan_order = &vp9_scan_orders[TX_8X8][tx_type]; mode = plane == 0 ? mbmi->mode : mbmi->uv_mode; vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, @@ -568,7 +562,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? get_y_mode(xd->mi_8x8[0], block) : mbmi->uv_mode; + mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, @@ -614,14 +608,14 @@ void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block, void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { const MACROBLOCKD *const xd = &x->e_mbd; - struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip}; + struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip}; vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra, &arg); } int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) { - MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi; x->skip_encode = 0; mbmi->mode = DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; diff --git a/source/libvpx/vp9/encoder/vp9_encodemv.c b/source/libvpx/vp9/encoder/vp9_encodemv.c index 5079699..9d44865 100644 --- a/source/libvpx/vp9/encoder/vp9_encodemv.c +++ b/source/libvpx/vp9/encoder/vp9_encodemv.c @@ -13,11 +13,9 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_encodemv.h" -#ifdef ENTROPY_STATS -extern unsigned int active_section; -#endif +#include "vp9/encoder/vp9_cost.h" +#include "vp9/encoder/vp9_encodemv.h" static struct vp9_token mv_joint_encodings[MV_JOINTS]; static struct vp9_token mv_class_encodings[MV_CLASSES]; @@ -160,7 +158,7 @@ static void write_mv_update(const vp9_tree_index *tree, vp9_tree_probs_from_distribution(tree, branch_ct, counts); for (i = 0; i < n - 1; ++i) - update_mv(w, branch_ct[i], &probs[i], NMV_UPDATE_PROB); + update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB); } void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) { @@ -174,13 +172,13 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) { nmv_component *comp = &mvc->comps[i]; nmv_component_counts *comp_counts = &counts->comps[i]; - update_mv(w, comp_counts->sign, &comp->sign, NMV_UPDATE_PROB); + update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB); write_mv_update(vp9_mv_class_tree, comp->classes, comp_counts->classes, MV_CLASSES, w); write_mv_update(vp9_mv_class0_tree, comp->class0, comp_counts->class0, CLASS0_SIZE, w); for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(w, comp_counts->bits[j], &comp->bits[j], NMV_UPDATE_PROB); + update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { @@ -195,8 +193,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) { if (usehp) { for (i = 0; i < 2; ++i) { update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp, - NMV_UPDATE_PROB); - update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, NMV_UPDATE_PROB); + MV_UPDATE_PROB); + update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB); } } } @@ -231,22 +229,21 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp); } -static void inc_mvs(const int_mv mv[2], const MV ref[2], int is_compound, +static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2], nmv_context_counts *counts) { int i; - for (i = 0; i < 1 + is_compound; ++i) { - const MV diff = { mv[i].as_mv.row - ref[i].row, - mv[i].as_mv.col - ref[i].col }; + + for (i = 0; i < 1 + has_second_ref(mbmi); ++i) { + const MV *ref = &mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv; + const MV diff = {mvs[i].as_mv.row - ref->row, + mvs[i].as_mv.col - ref->col}; vp9_inc_mv(&diff, counts); } } -void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd, - const MV best_ref_mv[2]) { - const MODE_INFO *mi = xd->mi_8x8[0]; +void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) { + const MODE_INFO *mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; - const int is_compound = has_second_ref(mbmi); - nmv_context_counts *counts = &cm->counts.mv; if (mbmi->sb_type < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type]; @@ -257,11 +254,12 @@ void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd, for (idx = 0; idx < 2; idx += num_4x4_w) { const int i = idy * 2 + idx; if (mi->bmi[i].as_mode == NEWMV) - inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, counts); + inc_mvs(mbmi, mi->bmi[i].as_mv, &cm->counts.mv); } } - } else if (mbmi->mode == NEWMV) { - inc_mvs(mbmi->mv, best_ref_mv, is_compound, counts); + } else { + if (mbmi->mode == NEWMV) + inc_mvs(mbmi, mbmi->mv, &cm->counts.mv); } } diff --git a/source/libvpx/vp9/encoder/vp9_encodemv.h b/source/libvpx/vp9/encoder/vp9_encodemv.h index f16b2c1..50cb961 100644 --- a/source/libvpx/vp9/encoder/vp9_encodemv.h +++ b/source/libvpx/vp9/encoder/vp9_encodemv.h @@ -28,8 +28,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref, void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], const nmv_context* mvctx, int usehp); -void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd, - const MV best_ref_mv[2]); +void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd); #ifdef __cplusplus } // extern "C" diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c index 8e454e6..db32ef8 100644 --- a/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -20,9 +20,10 @@ #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_quant_common.h" -#include "vp9/common/vp9_reconinter.h" // setup_dst_planes() +#include "vp9/common/vp9_reconinter.h" // vp9_setup_dst_planes() #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" @@ -34,7 +35,6 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_variance.h" #define OUTPUT_FPF 0 @@ -54,7 +54,14 @@ #define MIN_KF_BOOST 300 -#define DISABLE_RC_LONG_TERM_MEM 0 +#if CONFIG_MULTIPLE_ARF +// Set MIN_GF_INTERVAL to 1 for the full decomposition. +#define MIN_GF_INTERVAL 2 +#else +#define MIN_GF_INTERVAL 4 +#endif + +#define DISABLE_RC_LONG_TERM_MEM static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; @@ -62,22 +69,6 @@ static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { *b = temp; } -static int select_cq_level(int qindex) { - int ret_val = QINDEX_RANGE - 1; - int i; - - double target_q = (vp9_convert_qindex_to_q(qindex) * 0.5847) + 1.0; - - for (i = 0; i < QINDEX_RANGE; ++i) { - if (target_q <= vp9_convert_qindex_to_q(i)) { - ret_val = i; - break; - } - } - - return ret_val; -} - static int gfboost_qadjust(int qindex) { const double q = vp9_convert_qindex_to_q(qindex); return (int)((0.00000828 * q * q * q) + @@ -85,17 +76,10 @@ static int gfboost_qadjust(int qindex) { (1.32 * q) + 79.3); } -static int kfboost_qadjust(int qindex) { - const double q = vp9_convert_qindex_to_q(qindex); - return (int)((0.00000973 * q * q * q) + - (-0.00613 * q * q) + - (1.316 * q) + 121.2); -} - // Resets the first pass file to the given position using a relative seek from // the current position. static void reset_fpf_position(struct twopass_rc *p, - FIRSTPASS_STATS *position) { + const FIRSTPASS_STATS *position) { p->stats_in = position; } @@ -197,10 +181,13 @@ static void zero_stats(FIRSTPASS_STATS *section) { section->new_mv_count = 0.0; section->count = 0.0; section->duration = 1.0; + section->spatial_layer_id = 0; } -static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { +static void accumulate_stats(FIRSTPASS_STATS *section, + const FIRSTPASS_STATS *frame) { section->frame += frame->frame; + section->spatial_layer_id = frame->spatial_layer_id; section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->sr_coded_error += frame->sr_coded_error; @@ -221,7 +208,8 @@ static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { section->duration += frame->duration; } -static void subtract_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { +static void subtract_stats(FIRSTPASS_STATS *section, + const FIRSTPASS_STATS *frame) { section->frame -= frame->frame; section->intra_error -= frame->intra_error; section->coded_error -= frame->coded_error; @@ -269,12 +257,22 @@ static void avg_stats(FIRSTPASS_STATS *section) { // harder frames. static double calculate_modified_err(const VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame) { - const struct twopass_rc *const twopass = &cpi->twopass; - const FIRSTPASS_STATS *const stats = &twopass->total_stats; - const double av_err = stats->ssim_weighted_pred_err / stats->count; - double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(av_err), - cpi->oxcf.two_pass_vbrbias / 100.0); + const struct twopass_rc *twopass = &cpi->twopass; + const SVC *const svc = &cpi->svc; + const FIRSTPASS_STATS *stats; + double av_err; + double modified_error; + + if (svc->number_spatial_layers > 1 && + svc->number_temporal_layers == 1) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; + } + + stats = &twopass->total_stats; + av_err = stats->ssim_weighted_pred_err / stats->count; + modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / + DOUBLE_DIVIDE_CHECK(av_err), + cpi->oxcf.two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); @@ -338,15 +336,13 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) { } // This function returns the maximum target rate per frame. -static int frame_max_bits(const VP9_COMP *cpi) { - int64_t max_bits = - ((int64_t)cpi->rc.av_per_frame_bandwidth * - (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100; - +static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) { + int64_t max_bits = ((int64_t)rc->av_per_frame_bandwidth * + (int64_t)oxcf->two_pass_vbrmax_section) / 100; if (max_bits < 0) max_bits = 0; - else if (max_bits > cpi->rc.max_frame_bandwidth) - max_bits = cpi->rc.max_frame_bandwidth; + else if (max_bits > rc->max_frame_bandwidth) + max_bits = rc->max_frame_bandwidth; return (int)max_bits; } @@ -356,7 +352,15 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + int i; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + output_stats(&cpi->svc.layer_context[i].twopass.total_stats, + cpi->output_pkt_list); + } + } else { + output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + } } static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { @@ -379,7 +383,7 @@ static unsigned int zz_motion_search(const MACROBLOCK *x) { const uint8_t *const ref = xd->plane[0].pre[0].buf; const int ref_stride = xd->plane[0].pre[0].stride; unsigned int sse; - vp9_variance_fn_t fn = get_block_variance_fn(xd->mi_8x8[0]->mbmi.sb_type); + vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type); fn(src, src_stride, ref, ref_stride, &sse); return sse; } @@ -393,7 +397,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int num00, tmp_err, n, sr = 0; int step_param = 3; int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; int new_mv_mode_penalty = 256; const int quart_frm = MIN(cpi->common.width, cpi->common.height); @@ -415,6 +419,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->sadperbit16, &num00, &v_fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); + if (tmp_err < INT_MAX) + tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty; @@ -439,6 +445,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &num00, &v_fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); + if (tmp_err < INT_MAX) + tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty; @@ -474,11 +482,11 @@ void vp9_first_pass(VP9_COMP *cpi) { int recon_yoffset, recon_uvoffset; YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - YV12_BUFFER_CONFIG *const gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); - const int recon_y_stride = lst_yv12->y_stride; - const int recon_uv_stride = lst_yv12->uv_stride; - const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); + int recon_y_stride = lst_yv12->y_stride; + int recon_uv_stride = lst_yv12->uv_stride; + int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); int64_t intra_error = 0; int64_t coded_error = 0; int64_t sr_coded_error = 0; @@ -494,17 +502,47 @@ void vp9_first_pass(VP9_COMP *cpi) { int new_mv_count = 0; int sum_in_vectors = 0; uint32_t lastmv_as_int = 0; - struct twopass_rc *const twopass = &cpi->twopass; + struct twopass_rc *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; + const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; vp9_clear_system_state(); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + MV_REFERENCE_FRAME ref_frame = LAST_FRAME; + const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; + twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + + vp9_scale_references(cpi); + + // Use either last frame or alt frame for motion search. + if (cpi->ref_frame_flags & VP9_LAST_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); + ref_frame = LAST_FRAME; + } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME); + ref_frame = ALTREF_FRAME; + } + + if (scaled_ref_buf != NULL) { + // Update the stride since we are using scaled reference buffer + first_ref_buf = scaled_ref_buf; + recon_y_stride = first_ref_buf->y_stride; + recon_uv_stride = first_ref_buf->uv_stride; + uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height); + } + + // Disable golden frame for svc first pass for now. + gld_yv12 = NULL; + set_ref_ptrs(cm, xd, ref_frame, NONE); + } + vp9_setup_src_planes(x, cpi->Source, 0, 0); - setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL); - setup_dst_planes(xd, new_yv12, 0, 0); + vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); + vp9_setup_dst_planes(xd, new_yv12, 0, 0); - xd->mi_8x8 = cm->mi_grid_visible; - xd->mi_8x8[0] = cm->mi; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); @@ -552,8 +590,8 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - xd->mi_8x8[0]->mbmi.sb_type = bsize; - xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize], mb_col << 1, num_8x8_blocks_wide_lookup[bsize], @@ -593,7 +631,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int tmp_err, motion_error; int_mv mv, tmp_mv; - xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; motion_error = zz_motion_search(x); // Assume 0,0 motion with no mv overhead. mv.as_int = tmp_mv.as_int = 0; @@ -625,7 +663,7 @@ void vp9_first_pass(VP9_COMP *cpi) { } // Search in an older reference frame. - if (cm->current_video_frame > 1) { + if (cm->current_video_frame > 1 && gld_yv12 != NULL) { // Assume 0,0 motion with no mv overhead. int gf_motion_error; @@ -643,9 +681,9 @@ void vp9_first_pass(VP9_COMP *cpi) { ++second_ref_count; // Reset to last frame as reference buffer. - xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; - xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset; - xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset; + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; + xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; + xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; // In accumulating a score for the older reference frame take the // best of the motion predicted score and the intra coded error @@ -672,10 +710,11 @@ void vp9_first_pass(VP9_COMP *cpi) { mv.as_mv.row *= 8; mv.as_mv.col *= 8; this_error = motion_error; - vp9_set_mbmode_and_mvs(xd, NEWMV, &mv.as_mv); - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; - xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0] = mv; + xd->mi[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.ref_frame[1] = NONE; vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); vp9_encode_sby_pass1(x, bsize); sum_mvr += mv.as_mv.row; @@ -752,6 +791,7 @@ void vp9_first_pass(VP9_COMP *cpi) { FIRSTPASS_STATS fps; fps.frame = cm->current_video_frame; + fps.spatial_layer_id = cpi->svc.spatial_layer_id; fps.intra_error = (double)(intra_error >> 8); fps.coded_error = (double)(coded_error >> 8); fps.sr_coded_error = (double)(sr_coded_error >> 8); @@ -801,20 +841,28 @@ void vp9_first_pass(VP9_COMP *cpi) { (twopass->this_frame_stats.pcnt_inter > 0.20) && ((twopass->this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { - vp8_yv12_copy_frame(lst_yv12, gld_yv12); + if (gld_yv12 != NULL) { + vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } twopass->sr_update_lag = 1; } else { ++twopass->sr_update_lag; } - // Swap frame pointers so last frame refers to the frame we just compressed. - swap_yv12(lst_yv12, new_yv12); + + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + vp9_update_reference_frames(cpi); + } else { + // Swap frame pointers so last frame refers to the frame we just compressed. + swap_yv12(lst_yv12, new_yv12); + } vp9_extend_frame_borders(lst_yv12); // Special case for the first frame. Copy into the GF buffer as a second // reference. - if (cm->current_video_frame == 0) + if (cm->current_video_frame == 0 && gld_yv12 != NULL) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } // Use this to see what the first pass reconstruction looks like. if (0) { @@ -835,12 +883,6 @@ void vp9_first_pass(VP9_COMP *cpi) { ++cm->current_video_frame; } -// Estimate a cost per mb attributable to overheads such as the coding of modes -// and motion vectors. This currently makes simplistic assumptions for testing. -static double bitcost(double prob) { - return -(log(prob) / log(2.0)); -} - static double calc_correction_factor(double err_per_mb, double err_divisor, double pt_low, @@ -868,21 +910,21 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, const double section_err = fpstats->coded_error / fpstats->count; const double err_per_mb = section_err / num_mbs; + const double speed_term = 1.0 + ((double)cpi->speed * 0.04); if (section_target_bandwitdh <= 0) return rc->worst_quality; // Highest value allowed - target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); + target_norm_bits_per_mb = + ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs; // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { const double err_correction_factor = calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.5, 0.90, q); - const int bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q, - err_correction_factor); + const int bits_per_mb_at_this_q = + vp9_rc_bits_per_mb(INTER_FRAME, q, (err_correction_factor * speed_term)); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } @@ -897,10 +939,18 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, extern void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_init_second_pass(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; FIRSTPASS_STATS this_frame; - FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; + const FIRSTPASS_STATS *start_pos; + struct twopass_rc *twopass = &cpi->twopass; const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int is_spatial_svc = (svc->number_spatial_layers > 1) && + (svc->number_temporal_layers == 1); + double frame_rate; + + if (is_spatial_svc) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; + } zero_stats(&twopass->total_stats); zero_stats(&twopass->total_left_stats); @@ -911,30 +961,44 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->total_stats = *twopass->stats_in_end; twopass->total_left_stats = twopass->total_stats; + frame_rate = 10000000.0 * twopass->total_stats.count / + twopass->total_stats.duration; // Each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. - vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count / - twopass->total_stats.duration); + + if (is_spatial_svc) { + vp9_update_spatial_layer_framerate(cpi, frame_rate); + twopass->bits_left = + (int64_t)(twopass->total_stats.duration * + svc->layer_context[svc->spatial_layer_id].target_bandwidth / + 10000000.0); + } else { + vp9_new_framerate(cpi, frame_rate); + twopass->bits_left = (int64_t)(twopass->total_stats.duration * + oxcf->target_bandwidth / 10000000.0); + } cpi->output_framerate = oxcf->framerate; - twopass->bits_left = (int64_t)(twopass->total_stats.duration * - oxcf->target_bandwidth / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio // scores used in the second pass. We have this minimum to make sure // that clips that are static but "low complexity" in the intra domain // are still boosted appropriately for KF/GF/ARF. - twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; - twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + if (!is_spatial_svc) { + // We don't know the number of MBs for each layer at this point. + // So we will do it later. + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; - // Scan the first pass file and calculate an average Intra / Inter error score - // ratio for the sequence. + // Scan the first pass file and calculate an average Intra / Inter error + // score ratio for the sequence. { double sum_iiratio = 0.0; start_pos = twopass->stats_in; @@ -993,8 +1057,8 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, - int still_interval, +static int detect_transition_to_still(struct twopass_rc *twopass, + int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { int trans_to_still = 0; @@ -1006,19 +1070,19 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; - FIRSTPASS_STATS *position = cpi->twopass.stats_in; + const FIRSTPASS_STATS *position = twopass->stats_in; FIRSTPASS_STATS tmp_next_frame; // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { - if (EOF == input_stats(&cpi->twopass, &tmp_next_frame)) + if (EOF == input_stats(twopass, &tmp_next_frame)) break; if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) break; } - reset_fpf_position(&cpi->twopass, position); + reset_fpf_position(twopass, position); // Only if it does do we signal a transition to still. if (j == still_interval) @@ -1340,9 +1404,11 @@ void define_fixed_arf_period(VP9_COMP *cpi) { // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - FIRSTPASS_STATS next_frame = { 0 }; - FIRSTPASS_STATS *start_pos; + RATE_CONTROL *const rc = &cpi->rc; + VP9_CONFIG *const oxcf = &cpi->oxcf; struct twopass_rc *const twopass = &cpi->twopass; + FIRSTPASS_STATS next_frame = { 0 }; + const FIRSTPASS_STATS *start_pos; int i; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1361,16 +1427,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mv_ratio_accumulator_thresh; - const int max_bits = frame_max_bits(cpi); // Max bits for a single frame. - - unsigned int allow_alt_ref = cpi->oxcf.play_alternate && - cpi->oxcf.lag_in_frames; + // Max bits for a single frame. + const int max_bits = frame_max_bits(rc, oxcf); + unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames; int f_boost = 0; int b_boost = 0; int flash_detected; int active_max_gf_interval; - RATE_CONTROL *const rc = &cpi->rc; twopass->gf_group_bits = 0; @@ -1407,7 +1471,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { active_max_gf_interval = rc->max_gf_interval; i = 0; - while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) { + while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) { ++i; // Accumulate error score of frames in this gf group. @@ -1442,7 +1506,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, + if (detect_transition_to_still(twopass, i, 5, loop_decay_rate, last_loop_decay_rate)) { allow_alt_ref = 0; break; @@ -1581,8 +1645,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the bits to be allocated to the group as a whole. if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) { - twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits * - (gf_group_err / cpi->twopass.kf_group_error_left)); + twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits * + (gf_group_err / twopass->kf_group_error_left)); } else { twopass->gf_group_bits = 0; } @@ -1671,10 +1735,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { { // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= (int64_t)gf_group_err; - twopass->kf_group_bits -= twopass->gf_group_bits; - - if (twopass->kf_group_bits < 0) - twopass->kf_group_bits = 0; // If this is an arf update we want to remove the score for the overlay // frame at the end which will usually be very cheap to code. @@ -1691,11 +1751,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->gf_group_error_left = (int64_t)gf_group_err; } - twopass->gf_group_bits -= twopass->gf_bits; - - if (twopass->gf_group_bits < 0) - twopass->gf_group_bits = 0; - // This condition could fail if there are two kfs very close together // despite MIN_GF_INTERVAL and would cause a divide by 0 in the // calculation of alt_extra_bits. @@ -1704,8 +1759,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (boost >= 150) { const int pct_extra = MIN(20, (boost - 100) / 50); - const int alt_extra_bits = (int)((twopass->gf_group_bits * pct_extra) / - 100); + const int alt_extra_bits = (int)(( + MAX(twopass->gf_group_bits - twopass->gf_bits, 0) * + pct_extra) / 100); twopass->gf_group_bits -= alt_extra_bits; } } @@ -1734,40 +1790,36 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Allocate bits to a normal frame that is neither a gf an arf or a key frame. static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + struct twopass_rc *twopass = &cpi->twopass; + // For a single frame. + const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + // Calculate modified prediction error used in bit allocation. + const double modified_err = calculate_modified_err(cpi, this_frame); int target_frame_size; - double modified_err; double err_fraction; - const int max_bits = frame_max_bits(cpi); // Max for a single frame. - // Calculate modified prediction error used in bit allocation. - modified_err = calculate_modified_err(cpi, this_frame); - - if (cpi->twopass.gf_group_error_left > 0) + if (twopass->gf_group_error_left > 0) // What portion of the remaining GF group error is used by this frame. - err_fraction = modified_err / cpi->twopass.gf_group_error_left; + err_fraction = modified_err / twopass->gf_group_error_left; else err_fraction = 0.0; // How many of those bits available for allocation should we give it? - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); + target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction); // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at // the top end. target_frame_size = clamp(target_frame_size, 0, - MIN(max_bits, (int)cpi->twopass.gf_group_bits)); + MIN(max_bits, (int)twopass->gf_group_bits)); // Adjust error and bits remaining. - cpi->twopass.gf_group_error_left -= (int64_t)modified_err; - cpi->twopass.gf_group_bits -= target_frame_size; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; + twopass->gf_group_error_left -= (int64_t)modified_err; // Per frame bit target for this frame. vp9_rc_set_frame_target(cpi, target_frame_size); } -static int test_candidate_kf(VP9_COMP *cpi, +static int test_candidate_kf(struct twopass_rc *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *next_frame) { @@ -1788,19 +1840,12 @@ static int test_candidate_kf(VP9_COMP *cpi, ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) { int i; - FIRSTPASS_STATS *start_pos; - - FIRSTPASS_STATS local_next_frame; - + const FIRSTPASS_STATS *start_pos = twopass->stats_in; + FIRSTPASS_STATS local_next_frame = *next_frame; double boost_score = 0.0; double old_boost_score = 0.0; double decay_accumulator = 1.0; - local_next_frame = *next_frame; - - // Note the starting file position so we can reset to it. - start_pos = cpi->twopass.stats_in; - // Examine how well the key frame predicts subsequent frames. for (i = 0; i < 16; ++i) { double next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / @@ -1832,7 +1877,7 @@ static int test_candidate_kf(VP9_COMP *cpi, old_boost_score = boost_score; // Get the next frame details - if (EOF == input_stats(&cpi->twopass, &local_next_frame)) + if (EOF == input_stats(twopass, &local_next_frame)) break; } @@ -1842,7 +1887,7 @@ static int test_candidate_kf(VP9_COMP *cpi, is_viable_kf = 1; } else { // Reset the file position - reset_fpf_position(&cpi->twopass, start_pos); + reset_fpf_position(twopass, start_pos); is_viable_kf = 0; } @@ -1853,28 +1898,21 @@ static int test_candidate_kf(VP9_COMP *cpi, static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; - FIRSTPASS_STATS last_frame; - FIRSTPASS_STATS first_frame; + RATE_CONTROL *const rc = &cpi->rc; + struct twopass_rc *const twopass = &cpi->twopass; + const FIRSTPASS_STATS first_frame = *this_frame; + const FIRSTPASS_STATS *start_position = twopass->stats_in; FIRSTPASS_STATS next_frame; - FIRSTPASS_STATS *start_position; - + FIRSTPASS_STATS last_frame; double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; - double boost_score = 0; - double loop_decay_rate; - + double boost_score = 0.0; double kf_mod_err = 0.0; double kf_group_err = 0.0; double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - RATE_CONTROL *const rc = &cpi->rc; - struct twopass_rc *const twopass = &cpi->twopass; - vp9_zero(next_frame); - vp9_clear_system_state(); - - start_position = twopass->stats_in; cpi->common.frame_type = KEY_FRAME; // Is this a forced key frame by interval. @@ -1888,9 +1926,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->frames_to_key = 1; - // Take a copy of the initial frame details. - first_frame = *this_frame; - twopass->kf_group_bits = 0; // Total bits available to kf group twopass->kf_group_error_left = 0; // Group modified error score. @@ -1909,8 +1944,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Provided that we are not at the end of the file... if (cpi->oxcf.auto_key && lookup_next_frame_stats(twopass, &next_frame) != EOF) { + double loop_decay_rate; + // Check for a scene cut. - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) + if (test_candidate_kf(twopass, &last_frame, this_frame, &next_frame)) break; // How fast is the prediction quality decaying? @@ -1926,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special check for transition or high motion followed by a // static scene. - if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i, + if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i, loop_decay_rate, decay_accumulator)) break; @@ -1949,13 +1986,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // is between 1x and 2x. if (cpi->oxcf.auto_key && rc->frames_to_key > (int)cpi->key_frame_frequency) { - FIRSTPASS_STATS tmp_frame; + FIRSTPASS_STATS tmp_frame = first_frame; rc->frames_to_key /= 2; - // Copy first frame details. - tmp_frame = first_frame; - // Reset to the start of the group. reset_fpf_position(twopass, start_position); @@ -1963,10 +1997,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Rescan to get the correct error data for the forced kf group. for (i = 0; i < rc->frames_to_key; ++i) { - // Accumulate kf group errors. kf_group_err += calculate_modified_err(cpi, &tmp_frame); - - // Load the next frame's stats. input_stats(twopass, &tmp_frame); } rc->next_key_frame_forced = 1; @@ -1985,7 +2016,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the number of bits that should be assigned to the kf group. if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) { // Maximum number of bits for a single normal frame (not key frame). - int max_bits = frame_max_bits(cpi); + const int max_bits = frame_max_bits(rc, &cpi->oxcf); // Maximum number of bits allocated to the key frame group. int64_t max_grp_bits; @@ -2012,20 +2043,19 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Scan through the kf group collating various stats. for (i = 0; i < rc->frames_to_key; ++i) { - double r; - if (EOF == input_stats(twopass, &next_frame)) break; // Monitor for static sections. if ((next_frame.pcnt_inter - next_frame.pcnt_motion) < - zero_motion_accumulator) { - zero_motion_accumulator = - (next_frame.pcnt_inter - next_frame.pcnt_motion); + zero_motion_accumulator) { + zero_motion_accumulator = (next_frame.pcnt_inter - + next_frame.pcnt_motion); } // For the first few frames collect data to decide kf boost. if (i <= (rc->max_gf_interval * 2)) { + double r; if (next_frame.intra_error > twopass->kf_intra_err_min) r = (IIKFACTOR2 * next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); @@ -2038,10 +2068,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // How fast is prediction quality decaying. if (!detect_flash(twopass, 0)) { - loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); + const double loop_decay_rate = get_prediction_decay_rate(&cpi->common, + &next_frame); decay_accumulator *= loop_decay_rate; - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; + decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR); } boost_score += (decay_accumulator * r); @@ -2072,7 +2102,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (1) { int kf_boost = (int)boost_score; int allocation_chunks; - int alt_kf_bits; if (kf_boost < (rc->frames_to_key * 3)) kf_boost = (rc->frames_to_key * 3); @@ -2106,14 +2135,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Prevent overflow. if (kf_boost > 1028) { - int divisor = kf_boost >> 10; + const int divisor = kf_boost >> 10; kf_boost /= divisor; allocation_chunks /= divisor; } - twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0 - : twopass->kf_group_bits; - + twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); // Calculate the number of bits to be spent on the key frame. twopass->kf_bits = (int)((double)kf_boost * ((double)twopass->kf_group_bits / allocation_chunks)); @@ -2123,11 +2150,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // then use an alternate calculation based on the kf error score // which should give a smaller key frame. if (kf_mod_err < kf_group_err / rc->frames_to_key) { - double alt_kf_grp_bits = ((double)twopass->bits_left * + double alt_kf_grp_bits = ((double)twopass->bits_left * (kf_mod_err * (double)rc->frames_to_key) / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)); - alt_kf_bits = (int)((double)kf_boost * + const int alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); if (twopass->kf_bits > alt_kf_bits) @@ -2136,12 +2163,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Else if it is much harder than other frames in the group make sure // it at least receives an allocation in keeping with its relative // error score. - alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / + const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left))); - if (alt_kf_bits > twopass->kf_bits) { + if (alt_kf_bits > twopass->kf_bits) twopass->kf_bits = alt_kf_bits; - } } twopass->kf_group_bits -= twopass->kf_bits; // Per frame bit target for this frame. @@ -2161,7 +2187,7 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || - cm->frame_flags & FRAMEFLAGS_KEY)) { + (cm->frame_flags & FRAMEFLAGS_KEY))) { cm->frame_type = KEY_FRAME; } else { cm->frame_type = INTER_FRAME; @@ -2174,14 +2200,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - const int frames_left = (int)(twopass->total_stats.count - - cm->current_video_frame); + int frames_left; FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; double this_frame_intra_error; double this_frame_coded_error; int target; + LAYER_CONTEXT *lc = NULL; + int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); + + if (is_spatial_svc) { + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + frames_left = (int)(twopass->total_stats.count - + lc->current_video_frame_in_layer); + } else { + frames_left = (int)(twopass->total_stats.count - + cm->current_video_frame); + } if (!twopass->stats_in) return; @@ -2194,9 +2230,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_clear_system_state(); + if (is_spatial_svc && twopass->kf_intra_err_min == 0) { + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } + if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0) { + } else if (cm->current_video_frame == 0 || + (is_spatial_svc && lc->current_video_frame_in_layer == 0)) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2219,6 +2261,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Define next KF group and assign bits to it. this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); + // Don't place key frame in any enhancement layers in spatial svc + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && + cpi->svc.spatial_layer_id > 0) { + cm->frame_type = INTER_FRAME; + } } else { cm->frame_type = INTER_FRAME; } @@ -2278,23 +2325,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { subtract_stats(&twopass->total_left_stats, &this_frame); } -void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { +void vp9_twopass_postencode_update(VP9_COMP *cpi) { #ifdef DISABLE_RC_LONG_TERM_MEM - cpi->twopass.bits_left -= cpi->rc.this_frame_target; + const uint64_t bits_used = cpi->rc.this_frame_target; #else - cpi->twopass.bits_left -= 8 * bytes_used; + const uint64_t bits_used = cpi->rc.projected_frame_size; +#endif + cpi->twopass.bits_left -= bits_used; + cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0); // Update bits left to the kf and gf groups to account for overshoot or // undershoot on these frames. - if (cm->frame_type == KEY_FRAME) { - cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - - cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); - } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { - cpi->twopass.gf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - + if (cpi->common.frame_type == KEY_FRAME) { + // For key frames kf_group_bits already had the target bits subtracted out. + // So now update to the correct value based on the actual bits used. + cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used; + } else { + cpi->twopass.kf_group_bits -= bits_used; + cpi->twopass.gf_group_bits -= bits_used; cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); } -#endif + cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); } diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.h b/source/libvpx/vp9/encoder/vp9_firstpass.h index 03c0e20..7a16c8f 100644 --- a/source/libvpx/vp9/encoder/vp9_firstpass.h +++ b/source/libvpx/vp9/encoder/vp9_firstpass.h @@ -35,6 +35,7 @@ typedef struct { double new_mv_count; double duration; double count; + int64_t spatial_layer_id; } FIRSTPASS_STATS; struct twopass_rc { @@ -43,7 +44,9 @@ struct twopass_rc { unsigned int this_iiratio; FIRSTPASS_STATS total_stats; FIRSTPASS_STATS this_frame_stats; - FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; + const FIRSTPASS_STATS *stats_in; + const FIRSTPASS_STATS *stats_in_start; + const FIRSTPASS_STATS *stats_in_end; FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; @@ -55,7 +58,6 @@ struct twopass_rc { double modified_error_left; double kf_intra_err_min; double gf_intra_err_min; - int static_scene_max_gf_interval; int kf_bits; // Remaining error from uncoded frames in a gf group. Two pass use only int64_t gf_group_error_left; @@ -93,8 +95,7 @@ int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh); // Post encode update of the rate control parameters for 2-pass -void vp9_twopass_postencode_update(struct VP9_COMP *cpi, - uint64_t bytes_used); +void vp9_twopass_postencode_update(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_lookahead.c b/source/libvpx/vp9/encoder/vp9_lookahead.c index a88d5ec..cf03e01 100644 --- a/source/libvpx/vp9/encoder/vp9_lookahead.c +++ b/source/libvpx/vp9/encoder/vp9_lookahead.c @@ -28,8 +28,8 @@ struct lookahead_ctx { /* Return the buffer at the given absolute index and increment the index */ -static struct lookahead_entry * pop(struct lookahead_ctx *ctx, - unsigned int *idx) { +static struct lookahead_entry *pop(struct lookahead_ctx *ctx, + unsigned int *idx) { unsigned int index = *idx; struct lookahead_entry *buf = ctx->buf + index; @@ -55,16 +55,19 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { } -struct lookahead_ctx * vp9_lookahead_init(unsigned int width, - unsigned int height, - unsigned int subsampling_x, - unsigned int subsampling_y, - unsigned int depth) { +struct lookahead_ctx *vp9_lookahead_init(unsigned int width, + unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, + unsigned int depth) { struct lookahead_ctx *ctx = NULL; // Clamp the lookahead queue depth depth = clamp(depth, 1, MAX_LAG_BUFFERS); + // Allocate memory to keep previous source frames available. + depth += MAX_PRE_FRAMES; + // Allocate the lookahead structures ctx = calloc(1, sizeof(*ctx)); if (ctx) { @@ -96,7 +99,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int mb_cols = (src->y_width + 15) >> 4; #endif - if (ctx->sz + 1 > ctx->max_sz) + if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); @@ -159,11 +162,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, } -struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx, - int drain) { +struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, + int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) { + if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } @@ -171,16 +174,28 @@ struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx, } -struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx, - int index) { +struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, + int index) { struct lookahead_entry *buf = NULL; - if (index < (int)ctx->sz) { - index += ctx->read_idx; - if (index >= (int)ctx->max_sz) - index -= ctx->max_sz; - buf = ctx->buf + index; + if (index >= 0) { + // Forward peek + if (index < (int)ctx->sz) { + index += ctx->read_idx; + if (index >= (int)ctx->max_sz) + index -= ctx->max_sz; + buf = ctx->buf + index; + } + } else if (index < 0) { + // Backward peek + if (-index <= MAX_PRE_FRAMES) { + index += ctx->read_idx; + if (index < 0) + index += ctx->max_sz; + buf = ctx->buf + index; + } } + return buf; } diff --git a/source/libvpx/vp9/encoder/vp9_lookahead.h b/source/libvpx/vp9/encoder/vp9_lookahead.h index ff63c0d..046c533 100644 --- a/source/libvpx/vp9/encoder/vp9_lookahead.h +++ b/source/libvpx/vp9/encoder/vp9_lookahead.h @@ -20,6 +20,9 @@ extern "C" { #define MAX_LAG_BUFFERS 25 +// The max of past frames we want to keep in the queue. +#define MAX_PRE_FRAMES 1 + struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; diff --git a/source/libvpx/vp9/encoder/vp9_mbgraph.c b/source/libvpx/vp9/encoder/vp9_mbgraph.c index d3e19b4..44b171f 100644 --- a/source/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/source/libvpx/vp9/encoder/vp9_mbgraph.c @@ -61,7 +61,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, &sse); } - vp9_set_mbmode_and_mvs(xd, NEWMV, dst_mv); + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv; + vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); /* restore UMV window */ @@ -143,7 +145,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; - xd->mi_8x8[0]->mbmi.mode = mode; + xd->mi[0]->mbmi.mode = mode; vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, @@ -250,7 +252,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; - xd->mi_8x8[0] = &mi_local; + xd->mi[0] = &mi_local; mi_local.mbmi.sb_type = BLOCK_16X16; mi_local.mbmi.ref_frame[0] = LAST_FRAME; mi_local.mbmi.ref_frame[1] = NONE; @@ -368,7 +370,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) { else cpi->static_mb_pct = 0; - cpi->seg0_cnt = ncnt[0]; vp9_enable_segmentation(&cm->seg); } else { cpi->static_mb_pct = 0; diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.c b/source/libvpx/vp9/encoder/vp9_mcomp.c index 7d6fd3b..f7a02a4 100644 --- a/source/libvpx/vp9/encoder/vp9_mcomp.c +++ b/source/libvpx/vp9/encoder/vp9_mcomp.c @@ -23,6 +23,11 @@ // #define NEW_DIAMOND_SEARCH +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); @@ -98,42 +103,23 @@ static int mvsad_err_cost(const MV *mv, const MV *ref, } void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { - int len; - int search_site_count = 0; + int len, ss_count = 1; - // Generate offsets for 4 search sites per step. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; + x->ss[0].mv.col = x->ss[0].mv.row = 0; + x->ss[0].offset = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -len; - x->ss[search_site_count].offset = -len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = len; - x->ss[search_site_count].offset = len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = len; - search_site_count++; + // Generate offsets for 4 search sites per step. + const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; + int i; + for (i = 0; i < 4; ++i) { + search_site *const ss = &x->ss[ss_count++]; + ss->mv = ss_mvs[i]; + ss->offset = ss->mv.row * stride + ss->mv.col; + } } - x->ss_count = search_site_count; + x->ss_count = ss_count; x->searches_per_step = 4; } @@ -389,9 +375,9 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, unsigned int sse; unsigned int whichdir; int thismse; - unsigned int halfiters = iters_per_step; - unsigned int quarteriters = iters_per_step; - unsigned int eighthiters = iters_per_step; + const unsigned int halfiters = iters_per_step; + const unsigned int quarteriters = iters_per_step; + const unsigned int eighthiters = iters_per_step; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; @@ -418,7 +404,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. - comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -514,8 +500,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, - int do_init_search, - int do_refine, + int do_init_search, int do_refine, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, @@ -527,20 +512,15 @@ static int vp9_pattern_search(const MACROBLOCK *x, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, }; int i, j, s, t; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; - MV this_mv; int bestsad = INT_MAX; int thissad; - const uint8_t *base_offset; - const uint8_t *this_offset; int k = -1; - int best_site = -1; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_init_s = search_param_to_steps[search_param]; - const int *mvjsadcost = x->nmvjointsadcost; + const int *const mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; // adjust ref_mv to make sure it is within MV range @@ -549,13 +529,10 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc = ref_mv->col; // Work out the start point for the search - base_offset = xd->plane[0].pre[0].buf; - this_offset = base_offset + (br * in_what_stride) + bc; - this_mv.row = br; - this_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) - + mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + bestsad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -564,27 +541,25 @@ static int vp9_pattern_search(const MACROBLOCK *x, s = best_init_s; best_init_s = -1; for (t = 0; t <= s; ++t) { - best_site = -1; + int best_site = -1; if (check_bounds(x, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { - this_mv.row = br + candidates[t][i].row; - this_mv.col = bc + candidates[t][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[t]; i++) { - this_mv.row = br + candidates[t][i].row; - this_mv.col = bc + candidates[t][i].col; + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -604,31 +579,30 @@ static int vp9_pattern_search(const MACROBLOCK *x, // If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { + int best_site = -1; s = best_init_s; - best_site = -1; + do { // No need to search all 6 points the 1st time if initial search was used if (!do_init_search || s != best_init_s) { if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { - this_mv.row = br + candidates[s][i].row; - this_mv.col = bc + candidates[s][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { - this_mv.row = br + candidates[s][i].row; - this_mv.col = bc + candidates[s][i].col; + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -651,24 +625,22 @@ static int vp9_pattern_search(const MACROBLOCK *x, if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; - this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; - this_offset = base_offset + (this_mv.row * (in_what_stride)) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; - this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * (in_what_stride)) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -685,29 +657,28 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Check 4 1-away neighbors if do_refine is true. // For most well-designed schemes do_refine will not be necessary. if (do_refine) { - static const MV neighbors[4] = { {0, -1}, { -1, 0}, {1, 0}, {0, 1} }; + static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; + for (j = 0; j < 16; j++) { - best_site = -1; + int best_site = -1; if (check_bounds(x, br, bc, 1)) { for (i = 0; i < 4; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; - this_offset = base_offset + this_mv.row * in_what_stride + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 4; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + this_mv.row * in_what_stride + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -724,20 +695,41 @@ static int vp9_pattern_search(const MACROBLOCK *x, best_mv->row = br; best_mv->col = bc; - this_offset = base_offset + (best_mv->row * in_what_stride) + - best_mv->col; - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - if (bestsad == INT_MAX) - return INT_MAX; + return bestsad; +} - return vfp->vf(what, what_stride, this_offset, in_what_stride, - (unsigned int *)&bestsad) + - use_mvcost ? mv_err_cost(&this_mv, center_mv, - x->nmvjointcost, x->mvcost, x->errorperbit) - : 0; +int vp9_get_mvpred_var(const MACROBLOCK *x, + const MV *best_mv, const MV *center_mv, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = {best_mv->row * 8, best_mv->col * 8}; + unsigned int unused; + + return vfp->vf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, + x->mvcost, x->errorperbit) : 0); } +int vp9_get_mvpred_av_var(const MACROBLOCK *x, + const MV *best_mv, const MV *center_mv, + const uint8_t *second_pred, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = {best_mv->row * 8, best_mv->col * 8}; + unsigned int unused; + + return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, + what->buf, what->stride, &unused, second_pred) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, + x->mvcost, x->errorperbit) : 0); +} int vp9_hex_search(const MACROBLOCK *x, MV *ref_mv, @@ -853,184 +845,34 @@ int vp9_square_search(const MACROBLOCK *x, do_init_search, 0, vfp, use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); -}; - -// Number of candidates in first hex search -#define FIRST_HEX_CANDIDATES 6 -// Index of previous hex search's best match -#define PRE_BEST_CANDIDATE 6 -// Number of candidates in following hex search -#define NEXT_HEX_CANDIDATES 3 -// Number of candidates in refining search -#define REFINE_CANDIDATES 4 +} int vp9_fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, + int do_init_search, // must be zero for fast_hex const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { - const MACROBLOCKD* const xd = &x->e_mbd; - static const MV hex[FIRST_HEX_CANDIDATES] = { - { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} - }; - static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = { - {{ -2, 0}, { -1, -2}, {1, -2}}, - {{ -1, -2}, {1, -2}, {2, 0}}, - {{1, -2}, {2, 0}, {1, 2}}, - {{2, 0}, {1, 2}, { -1, 2}}, - {{1, 2}, { -1, 2}, { -2, 0}}, - {{ -1, 2}, { -2, 0}, { -1, -2}} - }; - static const MV neighbors[REFINE_CANDIDATES] = { - {0, -1}, { -1, 0}, {1, 0}, {0, 1} - }; - int i, j; - - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - int br, bc; - MV this_mv; - unsigned int bestsad = 0x7fffffff; - unsigned int thissad; - const uint8_t *base_offset; - const uint8_t *this_offset; - int k = -1; - int best_site = -1; - const int max_hex_search = 512; - const int max_dia_search = 32; - - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - - // Adjust ref_mv to make sure it is within MV range - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - br = ref_mv->row; - bc = ref_mv->col; - - // Check the start point - base_offset = xd->plane[0].pre[0].buf; - this_offset = base_offset + (br * in_what_stride) + bc; - this_mv.row = br; - this_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) - + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); - - // Initial 6-point hex search - if (check_bounds(x, br, bc, 2)) { - for (i = 0; i < FIRST_HEX_CANDIDATES; i++) { - this_mv.row = br + hex[i].row; - this_mv.col = bc + hex[i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); - CHECK_BETTER - } - } else { - for (i = 0; i < FIRST_HEX_CANDIDATES; i++) { - this_mv.row = br + hex[i].row; - this_mv.col = bc + hex[i].col; - if (!is_mv_in(x, &this_mv)) - continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); - CHECK_BETTER - } - } - - // Continue hex search if we find a better match in first round - if (best_site != -1) { - br += hex[best_site].row; - bc += hex[best_site].col; - k = best_site; - - // Allow search covering maximum MV range - for (j = 1; j < max_hex_search; j++) { - best_site = -1; - - if (check_bounds(x, br, bc, 2)) { - for (i = 0; i < 3; i++) { - this_mv.row = br + next_chkpts[k][i].row; - this_mv.col = bc + next_chkpts[k][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); - CHECK_BETTER - } - } else { - for (i = 0; i < 3; i++) { - this_mv.row = br + next_chkpts[k][i].row; - this_mv.col = bc + next_chkpts[k][i].col; - if (!is_mv_in(x, &this_mv)) - continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); - CHECK_BETTER - } - } - - if (best_site == -1) { - break; - } else { - br += next_chkpts[k][best_site].row; - bc += next_chkpts[k][best_site].col; - k += 5 + best_site; - if (k >= 12) k -= 12; - else if (k >= 6) k -= 6; - } - } - } - - // Check 4 1-away neighbors - for (j = 0; j < max_dia_search; j++) { - best_site = -1; - - if (check_bounds(x, br, bc, 1)) { - for (i = 0; i < REFINE_CANDIDATES; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); - CHECK_BETTER - } - } else { - for (i = 0; i < REFINE_CANDIDATES; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; - if (!is_mv_in(x, &this_mv)) - continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); - CHECK_BETTER - } - } - - if (best_site == -1) { - break; - } else { - br += neighbors[best_site].row; - bc += neighbors[best_site].col; - } - } - - best_mv->row = br; - best_mv->col = bc; + return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), + sad_per_bit, do_init_search, vfp, use_mvcost, + center_mv, best_mv); +} - return bestsad; +int vp9_fast_dia_search(const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int sad_per_bit, + int do_init_search, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost, + const MV *center_mv, + MV *best_mv) { + return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), + sad_per_bit, do_init_search, vfp, use_mvcost, + center_mv, best_mv); } #undef CHECK_BETTER @@ -1045,9 +887,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, const int what_stride = x->plane[0].src.stride; const uint8_t *in_what; const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address; - - MV this_mv; unsigned int bestsad = INT_MAX; int ref_row, ref_col; @@ -1076,7 +915,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, // Work out the start point for the search in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - best_address = in_what; // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) @@ -1100,8 +938,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, for (i = 0; i < 4; ++i) { if (sad_array[i] < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad = sad_array[i] + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1119,8 +956,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, bestsad); if (thissad < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1134,20 +970,9 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, } } } - best_mv->row += best_tr; best_mv->col += best_tc; - - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); + return bestsad; } int vp9_diamond_search_sad_c(const MACROBLOCK *x, @@ -1156,77 +981,49 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - int i, j, step; - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address; - - MV this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row, ref_col; - int this_row_offset, this_col_offset; - + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; // search_param determines the length of the initial step and hence the number // of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = // (MAX_FIRST_STEP/4) pel... etc. const search_site *const ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - int thissad; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; + const uint8_t *best_address; + int best_sad = INT_MAX; + int best_site = 0; + int last_site = 0; + int i, j, step; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->row; - ref_col = ref_mv->col; + best_address = get_buf_from_mv(in_what, ref_mv); *num00 = 0; - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Work out the start point for the search - in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - best_address = in_what; + *best_mv = *ref_mv; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + best_sad = fn_ptr->sdf(what->buf, what->stride, + in_what->buf, in_what->stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); i = 1; for (step = 0; step < tot_steps; step++) { for (j = 0; j < x->searches_per_step; j++) { - // Trap illegal vectors - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - if ((this_col_offset > x->mv_col_min) && - (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && - (this_row_offset < x->mv_row_max)) { - const uint8_t *const check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - this_mv.row = this_row_offset; - this_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; + if (is_mv_in(x, &mv)) { + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[i].offset, in_what->stride, + best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = i; } } @@ -1242,22 +1039,17 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, last_site = best_site; #if defined(NEW_DIAMOND_SEARCH) while (1) { - this_row_offset = best_mv->row + ss[best_site].mv.row; - this_col_offset = best_mv->col + ss[best_site].mv.col; - if ((this_col_offset > x->mv_col_min) && - (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && - (this_row_offset < x->mv_row_max)) { - check_here = ss[best_site].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - if (thissad < bestsad) { - this_mv.row = this_row_offset; - this_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + const MV this_mv = {best_mv->row + ss[best_site].mv.row, + best_mv->col + ss[best_site].mv.col}; + if (is_mv_in(x, &this_mv)) { + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[best_site].offset, + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_mv->row += ss[best_site].mv.row; best_mv->col += ss[best_site].mv.col; best_address += ss[best_site].offset; @@ -1268,21 +1060,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, break; }; #endif - } else if (best_address == in_what) { + } else if (best_address == in_what->buf) { (*num00)++; } } - - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); + return best_sad; } int vp9_diamond_search_sadx4(const MACROBLOCK *x, @@ -1300,16 +1082,12 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, const int in_what_stride = xd->plane[0].pre[0].stride; const uint8_t *best_address; - MV this_mv; - unsigned int bestsad = INT_MAX; int best_site = 0; int last_site = 0; int ref_row; int ref_col; - int this_row_offset; - int this_col_offset; // search_param determines the length of the initial step and hence the number // of iterations. @@ -1319,7 +1097,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, const search_site *ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; - unsigned int thissad; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const int *mvjsadcost = x->nmvjointsadcost; @@ -1370,8 +1147,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { - this_mv.row = best_mv->row + ss[i].mv.row; - this_mv.col = best_mv->col + ss[i].mv.col; + const MV this_mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1385,20 +1162,15 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, } else { for (j = 0; j < x->searches_per_step; j++) { // Trap illegal vectors - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; + const MV this_mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; - if ((this_col_offset > x->mv_col_min) && - (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && - (this_row_offset < x->mv_row_max)) { + if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); + unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, + in_what_stride, bestsad); if (thissad < bestsad) { - this_mv.row = this_row_offset; - this_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1418,18 +1190,13 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, last_site = best_site; #if defined(NEW_DIAMOND_SEARCH) while (1) { - this_row_offset = best_mv->row + ss[best_site].mv.row; - this_col_offset = best_mv->col + ss[best_site].mv.col; - if ((this_col_offset > x->mv_col_min) && - (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && - (this_row_offset < x->mv_row_max)) { - check_here = ss[best_site].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); + const MV this_mv = {best_mv->row + ss[best_site].mv.row, + best_mv->col + ss[best_site].mv.col}; + if (is_mv_in(x, &this_mv)) { + const uint8_t *const check_here = ss[best_site].offset + best_address; + unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, + in_what_stride, bestsad); if (thissad < bestsad) { - this_mv.row = this_row_offset; - this_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1448,24 +1215,14 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, (*num00)++; } } - - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); + return bestsad; } /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, +int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, const vp9_variance_fn_ptr_t *fn_ptr, @@ -1476,6 +1233,8 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, step_param, sadpb, &n, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); *dst_mv = temp_mv; // If there won't be more n-step search, check to see if refining search is @@ -1493,6 +1252,8 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, step_param + n, sadpb, &num00, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); + if (thissme < INT_MAX) + thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); // check to see if refining search is needed. if (num00 > further_steps - n) @@ -1512,12 +1273,13 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x, thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, x->nmvjointcost, x->mvcost, ref_mv); + if (thissme < INT_MAX) + thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); if (thissme < bestsme) { bestsme = thissme; *dst_mv = best_mv; } } - return bestsme; } @@ -1528,10 +1290,8 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MV *center_mv, MV *best_mv) { int r, c; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); @@ -1539,38 +1299,26 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, - 0x7fffffff) + + int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); *best_mv = *ref_mv; for (r = row_min; r < row_max; ++r) { for (c = col_min; c < col_max; ++c) { - const MV this_mv = {r, c}; - const uint8_t *check_here = &in_what[r * in_what_stride + c]; - const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - best_sad) + - mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + const MV mv = {r, c}; + const int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); if (sad < best_sad) { best_sad = sad; - *best_mv = this_mv; - best_address = check_here; + *best_mv = mv; } } } - - if (best_sad < INT_MAX) { - unsigned int unused; - const MV mv = {best_mv->row * 8, best_mv->col * 8}; - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &unused) - + mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit); - } else { - return INT_MAX; - } + return best_sad; } int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, @@ -1635,10 +1383,8 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, bestsad = thissad; best_mv->row = r; best_mv->col = c; - bestaddress = check_here; } } - check_here++; c++; } @@ -1657,7 +1403,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, bestsad = thissad; best_mv->row = r; best_mv->col = c; - bestaddress = check_here; } } @@ -1665,17 +1410,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, c++; } } - - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); - else - return INT_MAX; + return bestsad; } int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, @@ -1691,7 +1426,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, MV this_mv; unsigned int bestsad = INT_MAX; int r, c; - unsigned int thissad; int ref_row = ref_mv->row; int ref_col = ref_mv->col; @@ -1731,7 +1465,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; i++) { - thissad = (unsigned int)sad_array8[i]; + unsigned int thissad = (unsigned int)sad_array8[i]; if (thissad < bestsad) { this_mv.col = c; @@ -1742,7 +1476,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, bestsad = thissad; best_mv->row = r; best_mv->col = c; - bestaddress = check_here; } } @@ -1757,18 +1490,17 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { - thissad = sad_array[i]; + unsigned int thissad = sad_array[i]; if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; best_mv->col = c; - bestaddress = check_here; } } @@ -1778,8 +1510,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, } while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); + unsigned int thissad = fn_ptr->sdf(what, what_stride, + check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.col = c; @@ -1790,7 +1522,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, bestsad = thissad; best_mv->row = r; best_mv->col = c; - bestaddress = check_here; } } @@ -1798,17 +1529,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, c++; } } - - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); - else - return INT_MAX; + return bestsad; } int vp9_refining_search_sad_c(const MACROBLOCK *x, @@ -1817,41 +1538,34 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - - const int what_stride = x->plane[0].src.stride; - const uint8_t *const what = x->plane[0].src.buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; for (j = 0; j < 4; j++) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + error_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1863,19 +1577,9 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; - best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col]; } } - - if (bestsad < INT_MAX) { - unsigned int unused; - const MV mv = {ref_mv->row * 8, ref_mv->col * 8}; - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - &unused) + - mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit); - } else { - return INT_MAX; - } + return best_sad; } int vp9_refining_search_sadx4(const MACROBLOCK *x, @@ -1885,82 +1589,64 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; - MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - int this_row_offset, this_col_offset; - - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *what = x->plane[0].src.buf; - const uint8_t *best_address = xd->plane[0].pre[0].buf + - (ref_mv->row * xd->plane[0].pre[0].stride) + - ref_mv->col; - unsigned int thissad; - MV this_mv; - + const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; - int all_in = ((ref_mv->row - 1) > x->mv_row_min) & - ((ref_mv->row + 1) < x->mv_row_max) & - ((ref_mv->col - 1) > x->mv_col_min) & - ((ref_mv->col + 1) < x->mv_col_max); + const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & + ((ref_mv->row + 1) < x->mv_row_max) & + ((ref_mv->col - 1) > x->mv_col_min) & + ((ref_mv->col + 1) < x->mv_col_max); if (all_in) { - unsigned int sad_array[4]; - uint8_t const *block_offset[4] = { - best_address - in_what_stride, + unsigned int sads[4]; + const uint8_t *const positions[4] = { + best_address - in_what->stride, best_address - 1, best_address + 1, - best_address + in_what_stride + best_address + in_what->stride }; - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, - sad_array); + fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); - for (j = 0; j < 4; j++) { - if (sad_array[j] < bestsad) { - this_mv.row = ref_mv->row + neighbors[j].row; - this_mv.col = ref_mv->col + neighbors[j].col; - sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, + for (j = 0; j < 4; ++j) { + if (sads[j] < best_sad) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + sads[j] += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (sad_array[j] < bestsad) { - bestsad = sad_array[j]; + if (sads[j] < best_sad) { + best_sad = sads[j]; best_site = j; } } } } else { - for (j = 0; j < 4; j++) { - this_row_offset = ref_mv->row + neighbors[j].row; - this_col_offset = ref_mv->col + neighbors[j].col; - - if ((this_col_offset > x->mv_col_min) && - (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && - (this_row_offset < x->mv_row_max)) { - const uint8_t *check_here = neighbors[j].row * in_what_stride + - neighbors[j].col + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - this_mv.row = this_row_offset; - this_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + for (j = 0; j < 4; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, + mvjsadcost, mvsadcost, error_per_bit); + + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1973,21 +1659,11 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; - best_address += (neighbors[best_site].row) * in_what_stride + - neighbors[best_site].col; + best_address = get_buf_from_mv(in_what, ref_mv); } } - this_mv.row = ref_mv->row * 8; - this_mv.col = ref_mv->col * 8; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); - else - return INT_MAX; + return best_sad; } // This function is called when we do joint motion search in comp_inter_inter @@ -1999,48 +1675,36 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; - int i, j; - - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - unsigned int thissad; - MV this_mv; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - /* Get compound pred by averaging two pred blocks. */ - unsigned int bestsad = fn_ptr->sdaf(what, what_stride, - best_address, in_what_stride, - second_pred, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + second_pred, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; ++i) { int best_site = -1; - for (j = 0; j < 8; j++) { - this_mv.row = ref_mv->row + neighbors[j].row; - this_mv.col = ref_mv->col + neighbors[j].col; + for (j = 0; j < 8; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; - - thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, - second_pred, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, + second_pred, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -2052,21 +1716,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; - best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col]; } } - - this_mv.row = ref_mv->row * 8; - this_mv.col = ref_mv->col * 8; - - if (bestsad < INT_MAX) { - // FIXME(rbultje, yunqing): add full-pixel averaging variance functions - // so we don't have to use the subpixel with xoff=0,yoff=0 here. - return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, - (unsigned int *)(&thissad), second_pred) + - mv_err_cost(&this_mv, center_mv, - mvjcost, mvcost, x->errorperbit); - } else { - return INT_MAX; - } + return best_sad; } diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.h b/source/libvpx/vp9/encoder/vp9_mcomp.h index 586a74c..f7b7c5e 100644 --- a/source/libvpx/vp9/encoder/vp9_mcomp.h +++ b/source/libvpx/vp9/encoder/vp9_mcomp.h @@ -35,6 +35,17 @@ extern "C" { void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv); int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, int *mvcost[2], int weight); + +// Utility to compute variance + MV rate cost for a given MV +int vp9_get_mvpred_var(const MACROBLOCK *x, + const MV *best_mv, const MV *center_mv, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost); +int vp9_get_mvpred_av_var(const MACROBLOCK *x, + const MV *best_mv, const MV *center_mv, + const uint8_t *second_pred, + const vp9_variance_fn_ptr_t *vfp, + int use_mvcost); void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); @@ -42,47 +53,28 @@ struct VP9_COMP; int vp9_init_search_range(struct VP9_COMP *cpi, int size); // Runs sequence of diamond searches in smaller steps for RD -int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, +int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv); -int vp9_hex_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int error_per_bit, - int do_init_search, - const vp9_variance_fn_ptr_t *vf, - int use_mvcost, - const MV *center_mv, - MV *best_mv); -int vp9_bigdia_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int error_per_bit, - int do_init_search, - const vp9_variance_fn_ptr_t *vf, - int use_mvcost, - const MV *center_mv, - MV *best_mv); -int vp9_square_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int error_per_bit, - int do_init_search, - const vp9_variance_fn_ptr_t *vf, - int use_mvcost, - const MV *center_mv, - MV *best_mv); -int vp9_fast_hex_search(const MACROBLOCK *x, - MV *ref_mv, - int search_param, - int sad_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int use_mvcost, - const MV *center_mv, - MV *best_mv); +typedef int (integer_mv_pattern_search_fn) ( + const MACROBLOCK *x, + MV *ref_mv, + int search_param, + int error_per_bit, + int do_init_search, + const vp9_variance_fn_ptr_t *vf, + int use_mvcost, + const MV *center_mv, + MV *best_mv); + +integer_mv_pattern_search_fn vp9_hex_search; +integer_mv_pattern_search_fn vp9_bigdia_search; +integer_mv_pattern_search_fn vp9_square_search; +integer_mv_pattern_search_fn vp9_fast_hex_search; +integer_mv_pattern_search_fn vp9_fast_dia_search; typedef int (fractional_mv_step_fp) ( const MACROBLOCK *x, diff --git a/source/libvpx/vp9/encoder/vp9_onyx_if.c b/source/libvpx/vp9/encoder/vp9_onyx_if.c index f985545..0ac9d5f 100644 --- a/source/libvpx/vp9/encoder/vp9_onyx_if.c +++ b/source/libvpx/vp9/encoder/vp9_onyx_if.c @@ -27,7 +27,11 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_bitstream.h" +#include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" @@ -36,9 +40,13 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_speed_features.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_ssim.h" +#endif #include "vp9/encoder/vp9_temporal_filter.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_resize.h" +#include "vp9/encoder/vp9_svc_layercontext.h" void vp9_coef_tree_initialize(); @@ -53,30 +61,11 @@ void vp9_coef_tree_initialize(); // now so that HIGH_PRECISION is always // chosen. -// Masks for partially or completely disabling split mode -#define DISABLE_ALL_SPLIT 0x3F -#define DISABLE_ALL_INTER_SPLIT 0x1F -#define DISABLE_COMPOUND_SPLIT 0x18 -#define LAST_AND_INTRA_SPLIT_ONLY 0x1E - // Max rate target for 1080P and below encodes under normal circumstances // (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB #define MAX_MB_RATE 250 #define MAXRATE_1080P 2025000 -#if CONFIG_INTERNAL_STATS -extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, int lumamask, - double *weight); - - -extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, double *ssim_y, - double *ssim_u, double *ssim_v); - - -#endif - // #define OUTPUT_YUV_REC #ifdef OUTPUT_YUV_SRC @@ -92,12 +81,7 @@ FILE *kf_list; FILE *keyfile; #endif -void vp9_init_quantizer(VP9_COMP *cpi); - -static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = - {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - -static INLINE void Scale2Ratio(int mode, int *hr, int *hs) { +static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { switch (mode) { case NORMAL: *hr = 1; @@ -135,17 +119,33 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { } } +static void setup_key_frame(VP9_COMP *cpi) { + vp9_setup_past_independence(&cpi->common); + + // All buffers are implicitly updated on key frames. + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; +} + +static void setup_inter_frame(VP9_COMMON *cm) { + if (cm->error_resilient_mode || cm->intra_only) + vp9_setup_past_independence(cm); + + assert(cm->frame_context_idx < FRAME_CONTEXTS); + cm->fc = cm->frame_contexts[cm->frame_context_idx]; +} + void vp9_initialize_enc() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); + vp9_init_quant_tables(); + vp9_coef_tree_initialize(); vp9_tokenize_initialize(); - vp9_init_quant_tables(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); - // init_base_skip_probs(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); init_done = 1; @@ -154,6 +154,7 @@ void vp9_initialize_enc() { static void dealloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + int i; // Delete sementation map vpx_free(cpi->segmentation_map); @@ -164,14 +165,19 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->coding_context.last_frame_seg_map_copy = NULL; vpx_free(cpi->complexity_map); - cpi->complexity_map = 0; + cpi->complexity_map = NULL; + + vp9_cyclic_refresh_free(cpi->cyclic_refresh); + cpi->cyclic_refresh = NULL; + vpx_free(cpi->active_map); - cpi->active_map = 0; + cpi->active_map = NULL; vp9_free_frame_buffers(cm); vp9_free_frame_buffer(&cpi->last_frame_uf); vp9_free_frame_buffer(&cpi->scaled_source); + vp9_free_frame_buffer(&cpi->scaled_last_source); vp9_free_frame_buffer(&cpi->alt_ref_buffer); vp9_lookahead_destroy(cpi->lookahead); @@ -184,108 +190,65 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->mb_norm_activity_map); cpi->mb_norm_activity_map = 0; - vpx_free(cpi->above_context[0]); - cpi->above_context[0] = NULL; - - vpx_free(cpi->above_seg_context); - cpi->above_seg_context = NULL; -} - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a target value -// target q value -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) { - const RATE_CONTROL *const rc = &cpi->rc; - int start_index = rc->worst_quality; - int target_index = rc->worst_quality; - int i; - - // Convert the average q value to an index. - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - start_index = i; - if (vp9_convert_qindex_to_q(i) >= qstart) - break; - } - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - target_index = i; - if (vp9_convert_qindex_to_q(i) >= qtarget) - break; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; + vpx_free(lc->rc_twopass_stats_in.buf); + lc->rc_twopass_stats_in.buf = NULL; + lc->rc_twopass_stats_in.sz = 0; } - - return target_index - start_index; } -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a value that should equate to thegiven rate ratio. +static void save_coding_context(VP9_COMP *cpi) { + CODING_CONTEXT *const cc = &cpi->coding_context; + VP9_COMMON *cm = &cpi->common; -static int compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, - double rate_target_ratio) { - int i; - int target_index = cpi->rc.worst_quality; + // Stores a snapshot of key state variables which can subsequently be + // restored with a call to vp9_restore_coding_context. These functions are + // intended for use in a re-code loop in vp9_compress_frame where the + // quantizer value is adjusted between loop iterations. + vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); + vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); + vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - // Look up the current projected bits per block for the base index - const int base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type, - base_q_index, 1.0); + vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - // Find the target bits per mb based on the base value and given ratio. - const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, + cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - // Convert the q target to an index - for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; ++i) { - target_index = i; - if (vp9_rc_bits_per_mb(cpi->common.frame_type, i, 1.0) <= - target_bits_per_mb ) - break; - } + vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); + vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - return target_index - base_q_index; + cc->fc = cm->fc; } -// This function sets up a set of segments with delta Q values around -// the baseline frame quantizer. -static void setup_in_frame_q_adj(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - struct segmentation *const seg = &cm->seg; - - // Make SURE use of floating point in this function is safe. - vp9_clear_system_state(); - - if (cm->frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - int segment; - - // Clear down the segment map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); +static void restore_coding_context(VP9_COMP *cpi) { + CODING_CONTEXT *const cc = &cpi->coding_context; + VP9_COMMON *cm = &cpi->common; - // Clear down the complexity map used for rd - vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + // Restore key state variables to the snapshot state stored in the + // previous call to vp9_save_coding_context. + vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); + vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); + vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - vp9_enable_segmentation(seg); - vp9_clearall_segfeatures(seg); + vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - // Select delta coding method - seg->abs_delta = SEGMENT_DELTADATA; + vpx_memcpy(cm->last_frame_seg_map, + cpi->coding_context.last_frame_seg_map_copy, + (cm->mi_rows * cm->mi_cols)); - // Segment 0 "Q" feature is disabled so it defaults to the baseline Q - vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); + vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - // Use some of the segments for in frame Q adjustment - for (segment = 1; segment < 2; segment++) { - const int qindex_delta = compute_qdelta_by_rate(cpi, cm->base_qindex, - in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); - } - } + cm->fc = cc->fc; } + static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + const RATE_CONTROL *const rc = &cpi->rc; struct segmentation *const seg = &cm->seg; - int high_q = (int)(cpi->rc.avg_q > 48.0); + int high_q = (int)(rc->avg_q > 48.0); int qi_delta; // Disable and clear down for KF @@ -323,9 +286,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_map = 1; seg->update_data = 1; - qi_delta = vp9_compute_qdelta( - cpi, cpi->rc.avg_q, (cpi->rc.avg_q * 0.875)); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2)); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); @@ -338,16 +300,15 @@ static void configure_static_seg_features(VP9_COMP *cpi) { // All other frames if segmentation has been enabled // First normal frame in a valid gf or alt ref group - if (cpi->rc.frames_since_golden == 0) { + if (rc->frames_since_golden == 0) { // Set up segment features for normal frames in an arf group - if (cpi->rc.source_alt_ref_active) { + if (rc->source_alt_ref_active) { seg->update_map = 0; seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; - qi_delta = vp9_compute_qdelta(cpi, cpi->rc.avg_q, - (cpi->rc.avg_q * 1.125)); - vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2)); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125); + vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); @@ -372,7 +333,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_clearall_segfeatures(seg); } - } else if (cpi->rc.is_src_frame_alt_ref) { + } else if (rc->is_src_frame_alt_ref) { // Special case where we are coding over the top of a previous // alt ref frame. // Segment coding disabled for compred testing @@ -404,27 +365,6 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } } -// DEBUG: Print out the segment id of each MB in the current frame. -static void print_seg_map(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int row, col; - int map_index = 0; - FILE *statsfile = fopen("segmap.stt", "a"); - - fprintf(statsfile, "%10d\n", cm->current_video_frame); - - for (row = 0; row < cpi->common.mi_rows; row++) { - for (col = 0; col < cpi->common.mi_cols; col++) { - fprintf(statsfile, "%10d", cpi->segmentation_map[map_index]); - map_index++; - } - fprintf(statsfile, "\n"); - } - fprintf(statsfile, "\n"); - - fclose(statsfile); -} - static void update_reference_segmentation_map(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; @@ -436,7 +376,7 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { uint8_t *cache = cache_ptr; for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) cache[0] = mi_8x8[0]->mbmi.segment_id; - mi_8x8_ptr += cm->mode_info_stride; + mi_8x8_ptr += cm->mi_stride; cache_ptr += cm->mi_cols; } } @@ -445,539 +385,137 @@ static int is_slowest_mode(int mode) { } static void set_rd_speed_thresholds(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; int i; // Set baseline threshold values for (i = 0; i < MAX_MODES; ++i) - sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG] = 0; - sf->thresh_mult[THR_NEARESTA] = 0; - - sf->thresh_mult[THR_DC] += 1000; - - sf->thresh_mult[THR_NEWMV] += 1000; - sf->thresh_mult[THR_NEWA] += 1000; - sf->thresh_mult[THR_NEWG] += 1000; - - sf->thresh_mult[THR_NEARMV] += 1000; - sf->thresh_mult[THR_NEARA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTLA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - sf->thresh_mult[THR_TM] += 1000; - - sf->thresh_mult[THR_COMP_NEARLA] += 1500; - sf->thresh_mult[THR_COMP_NEWLA] += 2000; - sf->thresh_mult[THR_NEARG] += 1000; - sf->thresh_mult[THR_COMP_NEARGA] += 1500; - sf->thresh_mult[THR_COMP_NEWGA] += 2000; - - sf->thresh_mult[THR_ZEROMV] += 2000; - sf->thresh_mult[THR_ZEROG] += 2000; - sf->thresh_mult[THR_ZEROA] += 2000; - sf->thresh_mult[THR_COMP_ZEROLA] += 2500; - sf->thresh_mult[THR_COMP_ZEROGA] += 2500; - - sf->thresh_mult[THR_H_PRED] += 2000; - sf->thresh_mult[THR_V_PRED] += 2000; - sf->thresh_mult[THR_D45_PRED ] += 2500; - sf->thresh_mult[THR_D135_PRED] += 2500; - sf->thresh_mult[THR_D117_PRED] += 2500; - sf->thresh_mult[THR_D153_PRED] += 2500; - sf->thresh_mult[THR_D207_PRED] += 2500; - sf->thresh_mult[THR_D63_PRED] += 2500; + cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + + cpi->rd_thresh_mult[THR_NEARESTMV] = 0; + cpi->rd_thresh_mult[THR_NEARESTG] = 0; + cpi->rd_thresh_mult[THR_NEARESTA] = 0; + + cpi->rd_thresh_mult[THR_DC] += 1000; + + cpi->rd_thresh_mult[THR_NEWMV] += 1000; + cpi->rd_thresh_mult[THR_NEWA] += 1000; + cpi->rd_thresh_mult[THR_NEWG] += 1000; + + cpi->rd_thresh_mult[THR_NEARMV] += 1000; + cpi->rd_thresh_mult[THR_NEARA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000; + + cpi->rd_thresh_mult[THR_TM] += 1000; + + cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000; + cpi->rd_thresh_mult[THR_NEARG] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000; + + cpi->rd_thresh_mult[THR_ZEROMV] += 2000; + cpi->rd_thresh_mult[THR_ZEROG] += 2000; + cpi->rd_thresh_mult[THR_ZEROA] += 2000; + cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500; + cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500; + + cpi->rd_thresh_mult[THR_H_PRED] += 2000; + cpi->rd_thresh_mult[THR_V_PRED] += 2000; + cpi->rd_thresh_mult[THR_D45_PRED ] += 2500; + cpi->rd_thresh_mult[THR_D135_PRED] += 2500; + cpi->rd_thresh_mult[THR_D117_PRED] += 2500; + cpi->rd_thresh_mult[THR_D153_PRED] += 2500; + cpi->rd_thresh_mult[THR_D207_PRED] += 2500; + cpi->rd_thresh_mult[THR_D63_PRED] += 2500; /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX; } } static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; + const SPEED_FEATURES *const sf = &cpi->sf; int i; for (i = 0; i < MAX_REFS; ++i) - sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - sf->thresh_mult_sub8x8[THR_LAST] += 2500; - sf->thresh_mult_sub8x8[THR_GOLD] += 2500; - sf->thresh_mult_sub8x8[THR_ALTR] += 2500; - sf->thresh_mult_sub8x8[THR_INTRA] += 2500; - sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500; // Check for masked out split cases. - for (i = 0; i < MAX_REFS; i++) { + for (i = 0; i < MAX_REFS; i++) if (sf->disable_split_mask & (1 << i)) - sf->thresh_mult_sub8x8[i] = INT_MAX; - } + cpi->rd_thresh_mult_sub8x8[i] = INT_MAX; // disable mode test if frame flag is not set if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; -} - -static void set_good_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - int i; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 32; - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed == 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 200; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed >= 5) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->partition_search_type = FIXED_PARTITION; - sf->always_this_block_size = BLOCK_16X16; - sf->tx_size_search_method = frame_is_intra_only(cm) ? - USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->use_rd_breakout = 1; - sf->use_lp32x32fdct = 1; - sf->optimize_coefficients = 0; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->search_method = HEX; - sf->subpel_iters_per_step = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } -} - -static void set_rt_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - sf->static_segmentation = 0; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - sf->encode_breakout_thresh = 1; - - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 8; - } - if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 200; - } - if (speed >= 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->disable_filter_search_var_thresh = 100; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - sf->encode_breakout_thresh = 400; - } - if (speed >= 4) { - sf->optimize_coefficients = 0; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->use_fast_lpf_pick = 2; - sf->encode_breakout_thresh = 700; - } - if (speed >= 5) { - int i; - sf->adaptive_rd_thresh = 5; - sf->auto_min_max_partition_size = frame_is_intra_only(cm) ? - RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type == KEY_FRAME || (0 == - (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); - sf->subpel_force_stop = 1; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->frame_parameter_update = 0; - sf->encode_breakout_thresh = 1000; - sf->search_method = FAST_HEX; - } - if (speed >= 6) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; - sf->search_method = HEX; - } - if (speed >= 7) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_HEX; - } - if (speed >= 8) { - int i; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) - } + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } -void vp9_set_speed_features(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; - VP9_COMMON *cm = &cpi->common; - int speed = cpi->speed; - int i; - - // Convert negative speed to positive - if (speed < 0) - speed = -speed; - +static void set_speed_features(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS + int i; for (i = 0; i < MAX_MODES; ++i) cpi->mode_chosen_counts[i] = 0; #endif - // best quality defaults - sf->frame_parameter_update = 1; - sf->search_method = NSTEP; - sf->recode_loop = ALLOW_RECODE; - sf->subpel_search_method = SUBPEL_TREE; - sf->subpel_iters_per_step = 2; - sf->subpel_force_stop = 0; - sf->optimize_coefficients = !cpi->oxcf.lossless; - sf->reduce_first_step_size = 0; - sf->auto_mv_step_size = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; - sf->tx_size_search_method = USE_FULL_RD; - sf->use_lp32x32fdct = 0; - sf->adaptive_motion_search = 0; - sf->adaptive_pred_interp_filter = 0; - sf->reference_masking = 0; - sf->partition_search_type = SEARCH_PARTITION; - sf->less_rectangular_check = 0; - sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = NOT_IN_USE; - sf->max_partition_size = BLOCK_64X64; - sf->min_partition_size = BLOCK_4X4; - sf->adjust_partitioning_from_last_frame = 0; - sf->last_partitioning_redo_frequency = 4; - sf->disable_split_mask = 0; - sf->mode_search_skip_flags = 0; - sf->disable_split_var_thresh = 0; - sf->disable_filter_search_var_thresh = 0; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; - } - sf->use_rd_breakout = 0; - sf->skip_encode_sb = 0; - sf->use_uv_intra_rd_estimate = 0; - sf->use_fast_lpf_pick = 0; - sf->use_fast_coef_updates = 0; - sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - sf->use_nonrd_pick_mode = 0; - sf->encode_breakout_thresh = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; - - switch (cpi->oxcf.mode) { - case MODE_BESTQUALITY: - case MODE_SECONDPASS_BEST: // This is the best quality mode. - cpi->diamond_search_sad = vp9_full_range_search; - break; - case MODE_FIRSTPASS: - case MODE_GOODQUALITY: - case MODE_SECONDPASS: - set_good_speed_feature(cm, sf, speed); - break; - case MODE_REALTIME: - set_rt_speed_feature(cm, sf, speed); - break; - }; /* switch */ + vp9_set_speed_features(cpi); // Set rd thresholds based on mode and speed setting set_rd_speed_thresholds(cpi); set_rd_speed_thresholds_sub8x8(cpi); - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (cpi->pass == 1) { - sf->optimize_coefficients = 0; - } - - // No recode for 1 pass. - if (cpi->pass == 0) { - sf->recode_loop = DISALLOW_RECODE; - sf->optimize_coefficients = 0; - } - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { cpi->mb.fwd_txm4x4 = vp9_fwht4x4; } - - if (cpi->sf.subpel_search_method == SUBPEL_TREE) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; - cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; - } - - cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; - - if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME && - sf->encode_breakout_thresh > cpi->encode_breakout) - cpi->encode_breakout = sf->encode_breakout_thresh; - - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) - sf->adaptive_pred_interp_filter = 0; } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { @@ -1020,6 +558,13 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); + if (vp9_alloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate scaled last source buffer"); + vpx_free(cpi->tok); { @@ -1037,24 +582,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map, vpx_calloc(sizeof(unsigned int), cm->mb_rows * cm->mb_cols)); - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - vpx_free(cpi->above_context[0]); - CHECK_MEM_ERROR(cm, cpi->above_context[0], - vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * - MAX_MB_PLANE, - sizeof(*cpi->above_context[0]))); - - vpx_free(cpi->above_seg_context); - CHECK_MEM_ERROR(cm, cpi->above_seg_context, - vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), - sizeof(*cpi->above_seg_context))); } static void update_frame_size(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; vp9_update_frame_size(cm); @@ -1073,6 +606,13 @@ static void update_frame_size(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); + if (vp9_realloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to reallocate scaled last source buffer"); + { int y_stride = cpi->scaled_source.y_stride; @@ -1083,57 +623,23 @@ static void update_frame_size(VP9_COMP *cpi) { } } - { - int i; - for (i = 1; i < MAX_MB_PLANE; ++i) { - cpi->above_context[i] = cpi->above_context[0] + - i * sizeof(*cpi->above_context[0]) * 2 * - mi_cols_aligned_to_sb(cm->mi_cols); - } - } + init_macroblockd(cm, xd); } - -// Table that converts 0-63 Q range values passed in outside to the Qindex -// range used internally. -static const int q_trans[] = { - 0, 4, 8, 12, 16, 20, 24, 28, - 32, 36, 40, 44, 48, 52, 56, 60, - 64, 68, 72, 76, 80, 84, 88, 92, - 96, 100, 104, 108, 112, 116, 120, 124, - 128, 132, 136, 140, 144, 148, 152, 156, - 160, 164, 168, 172, 176, 180, 184, 188, - 192, 196, 200, 204, 208, 212, 216, 220, - 224, 228, 232, 236, 240, 244, 249, 255, -}; - -int vp9_reverse_trans(int x) { - int i; - - for (i = 0; i < 64; i++) - if (q_trans[i] >= x) - return i; - - return 63; -}; - void vp9_new_framerate(VP9_COMP *cpi, double framerate) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; + VP9_CONFIG *const oxcf = &cpi->oxcf; int vbr_max_bits; - if (framerate < 0.1) - framerate = 30; - - cpi->oxcf.framerate = framerate; + oxcf->framerate = framerate < 0.1 ? 30 : framerate; cpi->output_framerate = cpi->oxcf.framerate; - cpi->rc.av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth - / cpi->output_framerate); - cpi->rc.min_frame_bandwidth = (int)(cpi->rc.av_per_frame_bandwidth * - cpi->oxcf.two_pass_vbrmin_section / 100); - + rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / + cpi->output_framerate); + rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); - cpi->rc.min_frame_bandwidth = MAX(cpi->rc.min_frame_bandwidth, - FRAME_OVERHEAD_BITS); + rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); // A maximum bitrate for a frame is defined. // The baseline for this aligns with HW implementations that @@ -1143,31 +649,31 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) { // be acheived because of a user specificed max q (e.g. when the user // specifies lossless encode. // - vbr_max_bits = (int)(((int64_t)cpi->rc.av_per_frame_bandwidth * - cpi->oxcf.two_pass_vbrmax_section) / 100); - cpi->rc.max_frame_bandwidth = - MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); + vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), + vbr_max_bits); // Set Maximum gf/arf interval - cpi->rc.max_gf_interval = 16; + rc->max_gf_interval = 16; // Extended interval for genuinely static scenes - cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; // Special conditions when alt ref frame enabled in lagged compress mode - if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { - if (cpi->rc.max_gf_interval > cpi->oxcf.lag_in_frames - 1) - cpi->rc.max_gf_interval = cpi->oxcf.lag_in_frames - 1; + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (rc->max_gf_interval > oxcf->lag_in_frames - 1) + rc->max_gf_interval = oxcf->lag_in_frames - 1; - if (cpi->twopass.static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1) - cpi->twopass.static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1; + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; } - if (cpi->rc.max_gf_interval > cpi->twopass.static_scene_max_gf_interval) - cpi->rc.max_gf_interval = cpi->twopass.static_scene_max_gf_interval; + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; } -static int64_t rescale(int64_t val, int64_t num, int denom) { +int64_t vp9_rescale(int64_t val, int64_t num, int denom) { int64_t llnum = num; int64_t llden = denom; int64_t llval = val; @@ -1175,124 +681,6 @@ static int64_t rescale(int64_t val, int64_t num, int denom) { return (llval * llnum / llden); } -// Initialize layer context data from init_config(). -static void init_layer_context(VP9_COMP *const cpi) { - const VP9_CONFIG *const oxcf = &cpi->oxcf; - int temporal_layer = 0; - cpi->svc.spatial_layer_id = 0; - cpi->svc.temporal_layer_id = 0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; - RATE_CONTROL *const lrc = &lc->rc; - lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q]; - lrc->total_actual_bits = 0; - lrc->total_target_vs_actual = 0; - lrc->ni_tot_qi = 0; - lrc->tot_q = 0.0; - lrc->avg_q = 0.0; - lrc->ni_frames = 0; - lrc->decimation_count = 0; - lrc->decimation_factor = 0; - lrc->rate_correction_factor = 1.0; - lrc->key_frame_rate_correction_factor = 1.0; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * - 1000; - lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level), - lc->target_bandwidth, 1000); - lrc->bits_off_target = lrc->buffer_level; - } -} - -// Update the layer context from a change_config() call. -static void update_layer_context_change_config(VP9_COMP *const cpi, - const int target_bandwidth) { - const VP9_CONFIG *const oxcf = &cpi->oxcf; - const RATE_CONTROL *const rc = &cpi->rc; - int temporal_layer = 0; - float bitrate_alloc = 1.0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; - RATE_CONTROL *const lrc = &lc->rc; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000; - bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth; - // Update buffer-related quantities. - lc->starting_buffer_level = - (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); - lc->optimal_buffer_level = - (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc); - lc->maximum_buffer_size = - (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); - // Update framerate-related quantities. - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; - lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->max_frame_bandwidth = rc->max_frame_bandwidth; - // Update qp-related quantities. - lrc->worst_quality = rc->worst_quality; - lrc->best_quality = rc->best_quality; - } -} - -// Prior to encoding the frame, update framerate-related quantities -// for the current layer. -static void update_layer_framerate(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - const VP9_CONFIG *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; - RATE_CONTROL *const lrc = &lc->rc; - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; - lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; - // Update the average layer frame size (non-cumulative per-frame-bw). - if (temporal_layer == 0) { - lc->avg_frame_size = lrc->av_per_frame_bandwidth; - } else { - double prev_layer_framerate = oxcf->framerate / - oxcf->ts_rate_decimator[temporal_layer - 1]; - int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[temporal_layer - 1] * 1000; - lc->avg_frame_size = - (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / - (lc->framerate - prev_layer_framerate)); - } -} - -// Prior to encoding the frame, set the layer context, for the current layer -// to be encoded, to the cpi struct. -static void restore_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; - int frame_since_key = cpi->rc.frames_since_key; - int frame_to_key = cpi->rc.frames_to_key; - cpi->rc = lc->rc; - cpi->oxcf.target_bandwidth = lc->target_bandwidth; - cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; - cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; - cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; - cpi->output_framerate = lc->framerate; - // Reset the frames_since_key and frames_to_key counters to their values - // before the layer restore. Keep these defined for the stream (not layer). - cpi->rc.frames_since_key = frame_since_key; - cpi->rc.frames_to_key = frame_to_key; -} - -// Save the layer context after encoding the frame. -static void save_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; - lc->rc = cpi->rc; - lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth; - lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; - lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; - lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; - lc->framerate = cpi->output_framerate; -} - static void set_tile_limits(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1304,14 +692,14 @@ static void set_tile_limits(VP9_COMP *cpi) { cm->log2_tile_rows = cpi->oxcf.tile_rows; } -static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); +static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; int i; cpi->oxcf = *oxcf; - cm->version = oxcf->version; + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; cm->width = oxcf->width; cm->height = oxcf->height; @@ -1324,42 +712,15 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - init_layer_context(cpi); + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && + cpi->oxcf.mode == MODE_SECONDPASS_BEST)) { + vp9_init_layer_context(cpi); } // change includes all joint functionality - vp9_change_config(ptr, oxcf); - - // Initialize active best and worst q and average q values. - if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - cpi->rc.avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q; - cpi->rc.avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q; - cpi->rc.avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q; - } else { - cpi->rc.avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - cpi->rc.avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - cpi->rc.avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - } - cpi->rc.last_q[0] = cpi->oxcf.best_allowed_q; - cpi->rc.last_q[1] = cpi->oxcf.best_allowed_q; - cpi->rc.last_q[2] = cpi->oxcf.best_allowed_q; - - // Initialise the starting buffer levels - cpi->rc.buffer_level = cpi->oxcf.starting_buffer_level; - cpi->rc.bits_off_target = cpi->oxcf.starting_buffer_level; - - cpi->rc.rolling_target_bits = cpi->rc.av_per_frame_bandwidth; - cpi->rc.rolling_actual_bits = cpi->rc.av_per_frame_bandwidth; - cpi->rc.long_rolling_target_bits = cpi->rc.av_per_frame_bandwidth; - cpi->rc.long_rolling_actual_bits = cpi->rc.av_per_frame_bandwidth; - - cpi->rc.total_actual_bits = 0; - cpi->rc.total_target_vs_actual = 0; + vp9_change_config(cpi, oxcf); cpi->static_mb_pct = 0; @@ -1374,16 +735,18 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->fixed_divide[i] = 0x80000 / i; } -void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); +void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; - if (!cpi || !oxcf) - return; + if (cm->profile != oxcf->profile) + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; - if (cm->version != oxcf->version) { - cm->version = oxcf->version; - } + if (cm->profile <= PROFILE_1) + assert(cm->bit_depth == BITS_8); + else + assert(cm->bit_depth > BITS_8); cpi->oxcf = *oxcf; @@ -1397,6 +760,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5); break; + case MODE_BESTQUALITY: + cpi->pass = 0; + break; + case MODE_FIRSTPASS: cpi->pass = 1; break; @@ -1415,15 +782,17 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { break; } - cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; - cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; - cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - cpi->oxcf.lossless = oxcf->lossless; - cpi->mb.e_mbd.itxm_add = cpi->oxcf.lossless ? vp9_iwht4x4_add - : vp9_idct4x4_add; - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - + if (cpi->oxcf.lossless) { + // In lossless mode, make sure right quantizer range and correct transform + // is set. + cpi->oxcf.worst_allowed_q = 0; + cpi->oxcf.best_allowed_q = 0; + cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; + } else { + cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; + } + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; cpi->refresh_golden_frame = 0; @@ -1452,34 +821,35 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { // Convert target bandwidth from Kbit/s to Bit/s cpi->oxcf.target_bandwidth *= 1000; - cpi->oxcf.starting_buffer_level = rescale(cpi->oxcf.starting_buffer_level, - cpi->oxcf.target_bandwidth, 1000); + cpi->oxcf.starting_buffer_level = + vp9_rescale(cpi->oxcf.starting_buffer_level, + cpi->oxcf.target_bandwidth, 1000); // Set or reset optimal and maximum buffer levels. if (cpi->oxcf.optimal_buffer_level == 0) cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.optimal_buffer_level = rescale(cpi->oxcf.optimal_buffer_level, - cpi->oxcf.target_bandwidth, 1000); + cpi->oxcf.optimal_buffer_level = + vp9_rescale(cpi->oxcf.optimal_buffer_level, + cpi->oxcf.target_bandwidth, 1000); if (cpi->oxcf.maximum_buffer_size == 0) cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.maximum_buffer_size = rescale(cpi->oxcf.maximum_buffer_size, - cpi->oxcf.target_bandwidth, 1000); + cpi->oxcf.maximum_buffer_size = + vp9_rescale(cpi->oxcf.maximum_buffer_size, + cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target, - cpi->oxcf.maximum_buffer_size); - cpi->rc.buffer_level = MIN(cpi->rc.buffer_level, - cpi->oxcf.maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size); + rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); // Set absolute upper and lower quality limits - cpi->rc.worst_quality = cpi->oxcf.worst_allowed_q; - cpi->rc.best_quality = cpi->oxcf.best_allowed_q; + rc->worst_quality = cpi->oxcf.worst_allowed_q; + rc->best_quality = cpi->oxcf.best_allowed_q; // active values should only be modified if out of new range @@ -1504,9 +874,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { } update_frame_size(cpi); - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { + vp9_update_layer_context_change_config(cpi, + (int)cpi->oxcf.target_bandwidth); } cpi->speed = abs(cpi->oxcf.cpu_used); @@ -1520,7 +892,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { #else cpi->alt_ref_source = NULL; #endif - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; #if 0 // Experimental RD Code @@ -1541,7 +913,7 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) { mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; mvjointsadcost[2] = 300; - mvjointsadcost[0] = 300; + mvjointsadcost[3] = 300; } static void cal_nmvsadcosts(int *mvsadcost[2]) { @@ -1693,30 +1065,19 @@ static void free_pick_mode_context(MACROBLOCK *x) { } } -VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { +VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; - volatile union { - VP9_COMP *cpi; - VP9_PTR ptr; - } ctx; - - VP9_COMP *cpi; - VP9_COMMON *cm; - - cpi = ctx.cpi = vpx_memalign(32, sizeof(VP9_COMP)); - // Check that the CPI instance is valid - if (!cpi) - return 0; + VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); + VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; - cm = &cpi->common; + if (!cm) + return NULL; vp9_zero(*cpi); if (setjmp(cm->error.jmp)) { - VP9_PTR ptr = ctx.ptr; - - ctx.cpi->common.error.setjmp = 0; - vp9_remove_compressor(&ptr); + cm->error.setjmp = 0; + vp9_remove_compressor(cpi); return 0; } @@ -1729,20 +1090,18 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->use_svc = 0; - init_config((VP9_PTR)cpi, oxcf); - + init_config(cpi, oxcf); + vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc); init_pick_mode_context(cpi); - cm->current_video_frame = 0; + cm->current_video_frame = 0; // Set reference frame sign bias for ALTREF frame to 1 (for now) cm->ref_frame_sign_bias[ALTREF_FRAME] = 1; - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->gold_is_last = 0; - cpi->alt_is_last = 0; - cpi->gold_is_alt = 0; + cpi->alt_is_last = 0; + cpi->gold_is_alt = 0; // Create the encoder segmentation map and set all entries to 0 CHECK_MEM_ERROR(cm, cpi->segmentation_map, @@ -1752,6 +1111,9 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { CHECK_MEM_ERROR(cm, cpi->complexity_map, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); + // Create a map used for cyclic background refresh. + CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, + vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); // And a place holder structure is the coding context // for use if we want to save and restore it @@ -1772,13 +1134,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; - - cpi->rc.frames_since_key = 8; // Sensible default for first frame. - cpi->rc.this_key_frame_forced = 0; - cpi->rc.next_key_frame_forced = 0; - - cpi->rc.source_alt_ref_pending = 0; - cpi->rc.source_alt_ref_active = 0; cpi->refresh_alt_ref_frame = 0; #if CONFIG_MULTIPLE_ARF @@ -1834,18 +1189,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->first_time_stamp_ever = INT64_MAX; - cpi->rc.frames_till_gf_update_due = 0; - - cpi->rc.ni_av_qi = cpi->oxcf.worst_allowed_q; - cpi->rc.ni_tot_qi = 0; - cpi->rc.ni_frames = 0; - cpi->rc.tot_q = 0.0; - cpi->rc.avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q); - - cpi->rc.rate_correction_factor = 1.0; - cpi->rc.key_frame_rate_correction_factor = 1.0; - cpi->rc.gf_rate_correction_factor = 1.0; - cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX]; cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX]; @@ -1878,17 +1221,56 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { if (cpi->pass == 1) { vp9_init_first_pass(cpi); } else if (cpi->pass == 2) { - size_t packet_sz = sizeof(FIRSTPASS_STATS); - int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); + const size_t packet_sz = sizeof(FIRSTPASS_STATS); + const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = (void *)((char *)cpi->twopass.stats_in - + (packets - 1) * packet_sz); - vp9_init_second_pass(cpi); + if (cpi->svc.number_spatial_layers > 1 + && cpi->svc.number_temporal_layers == 1) { + FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; + FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0}; + int i; + + for (i = 0; i < oxcf->ss_number_layers; ++i) { + FIRSTPASS_STATS *const last_packet_for_layer = + &stats[packets - oxcf->ss_number_layers + i]; + const int layer_id = (int)last_packet_for_layer->spatial_layer_id; + const int packets_in_layer = (int)last_packet_for_layer->count + 1; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; + + vpx_free(lc->rc_twopass_stats_in.buf); + + lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz; + CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf, + vpx_malloc(lc->rc_twopass_stats_in.sz)); + lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf; + lc->twopass.stats_in = lc->twopass.stats_in_start; + lc->twopass.stats_in_end = lc->twopass.stats_in_start + + packets_in_layer - 1; + stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; + } + } + + for (i = 0; i < packets; ++i) { + const int layer_id = (int)stats[i].spatial_layer_id; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers + && stats_copy[layer_id] != NULL) { + *stats_copy[layer_id] = stats[i]; + ++stats_copy[layer_id]; + } + } + + vp9_init_second_pass_spatial_svc(cpi); + } else { + cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; + cpi->twopass.stats_in = cpi->twopass.stats_in_start; + cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; + + vp9_init_second_pass(cpi); + } } - vp9_set_speed_features(cpi); + set_speed_features(cpi); // Default rd threshold factors for mode selection for (i = 0; i < BLOCK_SIZES; ++i) { @@ -2010,11 +1392,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_zero(cpi->mode_test_hits); #endif - return (VP9_PTR) cpi; + return cpi; } -void vp9_remove_compressor(VP9_PTR *ptr) { - VP9_COMP *cpi = (VP9_COMP *)(*ptr); +void vp9_remove_compressor(VP9_COMP *cpi) { int i; if (!cpi) @@ -2121,7 +1502,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) { vp9_remove_common(&cpi->common); vpx_free(cpi); - *ptr = 0; #ifdef OUTPUT_YUV_SRC fclose(yuv_file); @@ -2143,53 +1523,42 @@ void vp9_remove_compressor(VP9_PTR *ptr) { #endif } +static int64_t get_sse(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int width, int height) { + const int dw = width % 16; + const int dh = height % 16; + int64_t total_sse = 0; + unsigned int sse = 0; + int sum = 0; + int x, y; + + if (dw > 0) { + variance(&a[width - dw], a_stride, &b[width - dw], b_stride, + dw, height, &sse, &sum); + total_sse += sse; + } + if (dh > 0) { + variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); + total_sse += sse; + } -static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride, - const uint8_t *recon, int recon_stride, - unsigned int cols, unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row + 16 <= rows; row += 16) { - for (col = 0; col + 16 <= cols; col += 16) { - unsigned int sse; - - vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); + for (y = 0; y < height / 16; ++y) { + const uint8_t *pa = a; + const uint8_t *pb = b; + for (x = 0; x < width / 16; ++x) { + vp9_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; - } - - /* Handle odd-sized width */ - if (col < cols) { - unsigned int border_row, border_col; - const uint8_t *border_orig = orig; - const uint8_t *border_recon = recon; - - for (border_row = 0; border_row < 16; border_row++) { - for (border_col = col; border_col < cols; border_col++) { - diff = border_orig[border_col] - border_recon[border_col]; - total_sse += diff * diff; - } - - border_orig += orig_stride; - border_recon += recon_stride; - } - } - - orig += orig_stride * 16; - recon += recon_stride * 16; - } - /* Handle odd-sized height */ - for (; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; + pa += 16; + pb += 16; } - orig += orig_stride; - recon += recon_stride; + a += 16 * a_stride; + b += 16 * b_stride; } return total_sse; @@ -2217,9 +1586,9 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, const int w = widths[i]; const int h = heights[i]; const uint32_t samples = w * h; - const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); + const uint64_t sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); psnr->sse[1 + i] = sse; psnr->samples[1 + i] = samples; psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse); @@ -2248,9 +1617,7 @@ static void generate_psnr_packet(VP9_COMP *cpi) { vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } -int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - +int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) { if (ref_frame_flags > 7) return -1; @@ -2258,27 +1625,11 @@ int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) { return 0; } -int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - if (ref_frame_flags > 7) - return -1; - - cpi->ext_refresh_golden_frame = 0; - cpi->ext_refresh_alt_ref_frame = 0; - cpi->ext_refresh_last_frame = 0; - - if (ref_frame_flags & VP9_LAST_FLAG) - cpi->ext_refresh_last_frame = 1; - - if (ref_frame_flags & VP9_GOLD_FLAG) - cpi->ext_refresh_golden_frame = 1; - - if (ref_frame_flags & VP9_ALT_FLAG) - cpi->ext_refresh_alt_ref_frame = 1; - +void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) { + cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0; + cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0; + cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0; cpi->ext_refresh_frame_flags_pending = 1; - return 0; } static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi, @@ -2294,9 +1645,8 @@ static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi, return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame); } -int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, +int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { - VP9_COMP *const cpi = (VP9_COMP *)ptr; YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag); if (cfg) { vp8_yv12_copy_frame(cfg, sd); @@ -2306,8 +1656,7 @@ int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, } } -int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) { - VP9_COMP *cpi = (VP9_COMP *)ptr; +int vp9_get_reference_enc(VP9_COMP *cpi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &cpi->common; if (index < 0 || index >= REF_FRAMES) @@ -2317,9 +1666,8 @@ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) { return 0; } -int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, +int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { - VP9_COMP *cpi = (VP9_COMP *)ptr; YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag); if (cfg) { vp8_yv12_copy_frame(sd, cfg); @@ -2329,9 +1677,9 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag, } } -int vp9_update_entropy(VP9_PTR comp, int update) { - ((VP9_COMP *)comp)->ext_refresh_frame_context = update; - ((VP9_COMP *)comp)->ext_refresh_frame_context_pending = 1; +int vp9_update_entropy(VP9_COMP * cpi, int update) { + cpi->ext_refresh_frame_context = update; + cpi->ext_refresh_frame_context_pending = 1; return 0; } @@ -2532,36 +1880,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { } #endif -static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) { -#define EDGE_THRESH 128 - int i, j; - int num_edge_pels = 0; - int num_pels = (frame->y_height - 2) * (frame->y_width - 2); - uint8_t *prev = frame->y_buffer + 1; - uint8_t *curr = frame->y_buffer + 1 + frame->y_stride; - uint8_t *next = frame->y_buffer + 1 + 2 * frame->y_stride; - for (i = 1; i < frame->y_height - 1; i++) { - for (j = 1; j < frame->y_width - 1; j++) { - /* Sobel hor and ver gradients */ - int v = 2 * (curr[1] - curr[-1]) + (prev[1] - prev[-1]) + - (next[1] - next[-1]); - int h = 2 * (prev[0] - next[0]) + (prev[1] - next[1]) + - (prev[-1] - next[-1]); - h = (h < 0 ? -h : h); - v = (v < 0 ? -v : v); - if (h > EDGE_THRESH || v > EDGE_THRESH) - num_edge_pels++; - curr++; - prev++; - next++; - } - curr += frame->y_stride - frame->y_width + 2; - prev += frame->y_stride - frame->y_width + 2; - next += frame->y_stride - frame->y_width + 2; - } - return (double)num_edge_pels / num_pels; -} - // Function to test for conditions that indicate we should loop // back and recode a frame. static int recode_loop_test(const VP9_COMP *cpi, @@ -2598,7 +1916,7 @@ static int recode_loop_test(const VP9_COMP *cpi, return force_recode; } -static void update_reference_frames(VP9_COMP * const cpi) { +void vp9_update_reference_frames(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; // At this point the new frame has been encoded. @@ -2669,21 +1987,20 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_usec_timer_start(&timer); - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick); + vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } if (lf->filter_level > 0) { - vp9_set_alt_lf_level(cpi, lf->filter_level); vp9_loop_filter_frame(cm, xd, lf->filter_level, 0, 0); } vp9_extend_frame_inner_borders(cm->frame_to_show); } -static void scale_references(VP9_COMP *cpi) { +void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; @@ -2745,7 +2062,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { vp9_clear_system_state(); - recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10u %10d %10d %10d %10d %10d " @@ -2801,7 +2118,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, int q) { VP9_COMMON *const cm = &cpi->common; vp9_clear_system_state(); - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cm, q); // Set up entropy context depending on frame type. The decoder mandates // the use of the default context, index 0, for keyframes and inter @@ -2809,19 +2126,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { - cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) + cm->frame_context_idx = cpi->refresh_alt_ref_frame; + + setup_inter_frame(cm); } // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); + } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_setup(cpi); } // transform / motion compensation build reconstruction frame vp9_encode_frame(cpi); @@ -2856,7 +2175,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, do { vp9_clear_system_state(); - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cm, q); if (loop_count == 0) { // Set up entropy context depending on frame type. The decoder mandates @@ -2865,12 +2184,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + + setup_inter_frame(cm); } } @@ -2879,7 +2198,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } // transform / motion compensation build reconstruction frame @@ -2895,13 +2214,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // accurate estimate of output frame size to determine if we need // to recode. if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { - vp9_save_coding_context(cpi); + save_coding_context(cpi); cpi->dummy_packing = 1; if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size); rc->projected_frame_size = (int)(*size) << 3; - vp9_restore_coding_context(cpi); + restore_coding_context(cpi); if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; @@ -2914,7 +2233,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; - int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; int low_err_target = cpi->ambient_err >> 1; @@ -3118,7 +2437,20 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } else { cpi->Source = cpi->un_scaled_source; } - scale_references(cpi); + + // Scale the last source buffer, if required. + if (cpi->unscaled_last_source != NULL) { + if (cm->mi_cols * MI_SIZE != cpi->unscaled_last_source->y_width || + cm->mi_rows * MI_SIZE != cpi->unscaled_last_source->y_height) { + scale_and_extend_frame_nonnormative(cpi->unscaled_last_source, + &cpi->scaled_last_source); + cpi->Last_Source = &cpi->scaled_last_source; + } else { + cpi->Last_Source = cpi->unscaled_last_source; + } + } + + vp9_scale_references(cpi); vp9_clear_system_state(); @@ -3155,7 +2487,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); // Reset the loop filter deltas and segmentation map. vp9_reset_segment_features(&cm->seg); @@ -3237,6 +2569,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_write_yuv_frame(cpi->Source); #endif + set_speed_features(cpi); + // Decide q and q bounds. q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index); @@ -3256,7 +2590,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // fixed interval. Note the reconstruction error if it is the frame before // the force key frame if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { - cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } // If the encoder forced a KEY_FRAME decision @@ -3294,7 +2628,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, update_reference_segmentation_map(cpi); release_scaled_references(cpi); - update_reference_frames(cpi); + vp9_update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]); @@ -3371,29 +2705,14 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->last_show_frame = cm->show_frame; if (cm->show_frame) { - // current mip will be the prev_mip for the next frame - MODE_INFO *temp = cm->prev_mip; - MODE_INFO **temp2 = cm->prev_mi_grid_base; - cm->prev_mip = cm->mip; - cm->mip = temp; - cm->prev_mi_grid_base = cm->mi_grid_base; - cm->mi_grid_base = temp2; - - // update the upper left visible macroblock ptrs - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; - - cpi->mb.e_mbd.mi_8x8 = cm->mi_grid_visible; - cpi->mb.e_mbd.mi_8x8[0] = cm->mi; + vp9_swap_mi_and_prev_mi(cm); // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; + if (cpi->use_svc) + vp9_inc_frame_in_layer(&cpi->svc); } - - // restore prev_mi - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -3419,7 +2738,7 @@ static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, (void) frame_flags; vp9_rc_get_first_pass_params(cpi); - vp9_set_quantizer(cpi, find_fp_qindex()); + vp9_set_quantizer(&cpi->common, find_fp_qindex()); vp9_first_pass(cpi); } @@ -3430,7 +2749,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_rc_get_second_pass_params(cpi); encode_frame_to_data_rate(cpi, size, dest, frame_flags); - vp9_twopass_postencode_update(cpi, *size); + vp9_twopass_postencode_update(cpi); } static void check_initial_width(VP9_COMP *cpi, int subsampling_x, @@ -3447,10 +2766,9 @@ static void check_initial_width(VP9_COMP *cpi, int subsampling_x, } -int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, +int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { - VP9_COMP *cpi = (VP9_COMP *)ptr; VP9_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; int res = 0; @@ -3465,7 +2783,7 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); - if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) { + if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) { vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, "Non-4:2:0 color space requires profile >= 1"); res = -1; @@ -3533,12 +2851,12 @@ void adjust_frame_rate(VP9_COMP *cpi) { cpi->last_end_time_stamp_seen = cpi->source->ts_end; } -int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, +int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { - VP9_COMP *cpi = (VP9_COMP *) ptr; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + RATE_CONTROL *const rc = &cpi->rc; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; MV_REFERENCE_FRAME ref_frame; @@ -3546,9 +2864,14 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (!cpi) return -1; + if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) { + vp9_restore_layer_context(cpi); + } + vpx_usec_timer_start(&cmptimer); cpi->source = NULL; + cpi->last_source = NULL; set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); @@ -3560,7 +2883,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->refresh_alt_ref_frame = 0; // Should we code an alternate reference frame. - if (cpi->oxcf.play_alternate && cpi->rc.source_alt_ref_pending) { + if (cpi->oxcf.play_alternate && rc->source_alt_ref_pending) { int frames_to_arf; #if CONFIG_MULTIPLE_ARF @@ -3572,9 +2895,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, - cpi->next_frame_in_order; else #endif - frames_to_arf = cpi->rc.frames_till_gf_update_due; + frames_to_arf = rc->frames_till_gf_update_due; - assert(frames_to_arf <= cpi->rc.frames_to_key); + assert(frames_to_arf <= rc->frames_to_key); if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) { #if CONFIG_MULTIPLE_ARF @@ -3586,7 +2909,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. // TODO(agrange) merge these two functions. - vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost); + vp9_configure_arnr_filter(cpi, frames_to_arf, rc->gfu_boost); vp9_temporal_filter_prepare(cpi, frames_to_arf); vp9_extend_frame_borders(&cpi->alt_ref_buffer); force_src_buffer = &cpi->alt_ref_buffer; @@ -3596,14 +2919,14 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } else { - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } } @@ -3611,25 +2934,32 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF int i; #endif + + // Get last frame source. + if (cm->current_video_frame > 0) { + if ((cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL) + return -1; + } + if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) { cm->show_frame = 1; cm->intra_only = 0; #if CONFIG_MULTIPLE_ARF // Is this frame the ARF overlay. - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; for (i = 0; i < cpi->arf_buffered; ++i) { if (cpi->source == cpi->alt_ref_source[i]) { - cpi->rc.is_src_frame_alt_ref = 1; + rc->is_src_frame_alt_ref = 1; cpi->refresh_golden_frame = 1; break; } } #else - cpi->rc.is_src_frame_alt_ref = cpi->alt_ref_source - && (cpi->source == cpi->alt_ref_source); + rc->is_src_frame_alt_ref = cpi->alt_ref_source && + (cpi->source == cpi->alt_ref_source); #endif - if (cpi->rc.is_src_frame_alt_ref) { + if (rc->is_src_frame_alt_ref) { // Current frame is an ARF overlay frame. #if CONFIG_MULTIPLE_ARF cpi->alt_ref_source[i] = NULL; @@ -3649,13 +2979,20 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->source) { cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; + + if (cpi->last_source != NULL) { + cpi->unscaled_last_source = &cpi->last_source->img; + } else { + cpi->unscaled_last_source = NULL; + } + *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; #if CONFIG_MULTIPLE_ARF - if ((cm->frame_type != KEY_FRAME) && (cpi->pass == 2)) - cpi->rc.source_alt_ref_pending = is_next_frame_arf(cpi); + if (cm->frame_type != KEY_FRAME && cpi->pass == 2) + rc->source_alt_ref_pending = is_next_frame_arf(cpi); #endif } else { *size = 0; @@ -3678,8 +3015,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - update_layer_framerate(cpi); - restore_layer_context(cpi); + vp9_update_temporal_layer_framerate(cpi); + vp9_restore_layer_context(cpi); } // start with a 0 size frame @@ -3728,19 +3065,19 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, } set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); - xd->interp_kernel = vp9_get_interp_kernel( - DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_init(); } - if (cpi->use_svc) { - SvcEncode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 1) { + if (cpi->pass == 1 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass1Encode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 2) { + } else if (cpi->pass == 2 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass2Encode(cpi, size, dest, frame_flags); + } else if (cpi->use_svc) { + SvcEncode(cpi, size, dest, frame_flags); } else { // One pass encode Pass0Encode(cpi, size, dest, frame_flags); @@ -3759,9 +3096,10 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, } // Save layer specific state. - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - save_layer_context(cpi); + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { + vp9_save_layer_context(cpi); } vpx_usec_timer_mark(&cmptimer); @@ -3846,9 +3184,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, return 0; } -int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest, +int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags) { - VP9_COMP *cpi = (VP9_COMP *)comp; VP9_COMMON *cm = &cpi->common; if (!cm->show_frame) { @@ -3876,11 +3213,10 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest, } } -int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, +int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[MAX_SEGMENTS], int delta_lf[MAX_SEGMENTS], unsigned int threshold[MAX_SEGMENTS]) { - VP9_COMP *cpi = (VP9_COMP *) comp; signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS]; struct segmentation *seg = &cpi->common.seg; int i; @@ -3926,10 +3262,8 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows, return 0; } -int vp9_set_active_map(VP9_PTR comp, unsigned char *map, +int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols) { - VP9_COMP *cpi = (VP9_COMP *) comp; - if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { if (map) { vpx_memcpy(cpi->active_map, map, rows * cols); @@ -3945,9 +3279,8 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map, } } -int vp9_set_internal_size(VP9_PTR comp, +int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode) { - VP9_COMP *cpi = (VP9_COMP *) comp; VP9_COMMON *cm = &cpi->common; int hr = 0, hs = 0, vr = 0, vs = 0; @@ -3967,9 +3300,8 @@ int vp9_set_internal_size(VP9_PTR comp, return 0; } -int vp9_set_size_literal(VP9_PTR comp, unsigned int width, +int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, unsigned int height) { - VP9_COMP *cpi = (VP9_COMP *)comp; VP9_COMMON *cm = &cpi->common; check_initial_width(cpi, 1, 1); @@ -4004,37 +3336,20 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width, return 0; } -void vp9_set_svc(VP9_PTR comp, int use_svc) { - VP9_COMP *cpi = (VP9_COMP *)comp; +void vp9_set_svc(VP9_COMP *cpi, int use_svc) { cpi->use_svc = use_svc; return; } -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference) { - int i, j; - int total = 0; - - const uint8_t *src = source->y_buffer; - const uint8_t *ref = reference->y_buffer; - - // Loop through the Y plane raw and reconstruction data summing - // (square differences) - for (i = 0; i < source->y_height; i += 16) { - for (j = 0; j < source->y_width; j += 16) { - unsigned int sse; - total += vp9_mse16x16(src + j, source->y_stride, - ref + j, reference->y_stride, &sse); - } - - src += 16 * source->y_stride; - ref += 16 * reference->y_stride; - } +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); - return total; + return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); } -int vp9_get_quantizer(VP9_PTR c) { - return ((VP9_COMP *)c)->common.base_qindex; +int vp9_get_quantizer(VP9_COMP *cpi) { + return cpi->common.base_qindex; } diff --git a/source/libvpx/vp9/encoder/vp9_onyx_int.h b/source/libvpx/vp9/encoder/vp9_onyx_int.h index 019cb13..18203f9 100644 --- a/source/libvpx/vp9/encoder/vp9_onyx_int.h +++ b/source/libvpx/vp9/encoder/vp9_onyx_int.h @@ -16,12 +16,14 @@ #include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8cx.h" +#include "vp9/common/vp9_ppflags.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" -#include "vp9/common/vp9_onyx.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_lookahead.h" @@ -29,8 +31,9 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_speed_features.h" +#include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_treewriter.h" #include "vp9/encoder/vp9_variance.h" #ifdef __cplusplus @@ -39,30 +42,11 @@ extern "C" { // #define MODE_TEST_HIT_STATS -#if CONFIG_MULTIPLE_ARF -// Set MIN_GF_INTERVAL to 1 for the full decomposition. -#define MIN_GF_INTERVAL 2 -#else -#define MIN_GF_INTERVAL 4 -#endif #define DEFAULT_GF_INTERVAL 10 -#define DEFAULT_KF_BOOST 2000 -#define DEFAULT_GF_BOOST 2000 - -#define KEY_FRAME_CONTEXT 5 #define MAX_MODES 30 #define MAX_REFS 6 -#define MIN_THRESHMULT 32 -#define MAX_THRESHMULT 512 - -#define GF_ZEROMV_ZBIN_BOOST 0 -#define LF_ZEROMV_ZBIN_BOOST 0 -#define MV_ZBIN_BOOST 0 -#define SPLIT_MV_ZBIN_BOOST 0 -#define INTRA_ZBIN_BOOST 0 - typedef struct { int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; @@ -132,84 +116,6 @@ typedef enum { } THR_MODES_SUB8X8; typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2, - BIGDIA = 3, - SQUARE = 4, - FAST_HEX = 5 -} SEARCH_METHODS; - -typedef enum { - USE_FULL_RD = 0, - USE_LARGESTINTRA, - USE_LARGESTINTRA_MODELINTER, - USE_LARGESTALL -} TX_SIZE_SEARCH_METHOD; - -typedef enum { - NOT_IN_USE = 0, - RELAXED_NEIGHBORING_MIN_MAX = 1, - STRICT_NEIGHBORING_MIN_MAX = 2 -} AUTO_MIN_MAX_MODE; - -typedef enum { - // Values should be powers of 2 so that they can be selected as bits of - // an integer flags field - - // terminate search early based on distortion so far compared to - // qp step, distortion in the neighborhood of the frame, etc. - FLAG_EARLY_TERMINATE = 1, - - // skips comp inter modes if the best so far is an intra mode - FLAG_SKIP_COMP_BESTINTRA = 2, - - // skips comp inter modes if the best single intermode so far does - // not have the same reference as one of the two references being - // tested - FLAG_SKIP_COMP_REFMISMATCH = 4, - - // skips oblique intra modes if the best so far is an inter mode - FLAG_SKIP_INTRA_BESTINTER = 8, - - // skips oblique intra modes at angles 27, 63, 117, 153 if the best - // intra so far is not one of the neighboring directions - FLAG_SKIP_INTRA_DIRMISMATCH = 16, - - // skips intra modes other than DC_PRED if the source variance - // is small - FLAG_SKIP_INTRA_LOWVAR = 32, -} MODE_SEARCH_SKIP_LOGIC; - -typedef enum { - SUBPEL_TREE = 0, - // Other methods to come -} SUBPEL_SEARCH_METHODS; - -#define ALL_INTRA_MODES 0x3FF -#define INTRA_DC_ONLY 0x01 -#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) -#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) -#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) - -typedef enum { - LAST_FRAME_PARTITION_OFF = 0, - LAST_FRAME_PARTITION_LOW_MOTION = 1, - LAST_FRAME_PARTITION_ALL = 2 -} LAST_FRAME_PARTITION_METHOD; - -typedef enum { - // No recode. - DISALLOW_RECODE = 0, - // Allow recode for KF and exceeding maximum frame bandwidth. - ALLOW_RECODE_KFMAXBW = 1, - // Allow recode only for KF/ARF/GF frames. - ALLOW_RECODE_KFARFGF = 2, - // Allow recode for all frames based on bitrate constraints. - ALLOW_RECODE = 3, -} RECODE_LOOP_TYPE; - -typedef enum { // encode_breakout is disabled. ENCODE_BREAKOUT_DISABLED = 0, // encode_breakout is enabled. @@ -219,237 +125,164 @@ typedef enum { } ENCODE_BREAKOUT_TYPE; typedef enum { - // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, - - // Always use a fixed size partition - FIXED_PARTITION = 1, + NORMAL = 0, + FOURFIVE = 1, + THREEFIVE = 2, + ONETWO = 3 +} VPX_SCALING; - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB - VAR_BASED_PARTITION -} PARTITION_SEARCH_TYPE; +typedef enum { + USAGE_LOCAL_FILE_PLAYBACK = 0, + USAGE_STREAM_FROM_SERVER = 1, + USAGE_CONSTRAINED_QUALITY = 2, + USAGE_CONSTANT_QUALITY = 3, +} END_USAGE; -typedef struct { - // Frame level coding parameter update - int frame_parameter_update; +typedef enum { + // Good Quality Fast Encoding. The encoder balances quality with the + // amount of time it takes to encode the output. (speed setting + // controls how fast) + MODE_GOODQUALITY = 1, + + // One Pass - Best Quality. The encoder places priority on the + // quality of the output over encoding speed. The output is compressed + // at the highest possible quality. This option takes the longest + // amount of time to encode. (speed setting ignored) + MODE_BESTQUALITY = 2, + + // Two Pass - First Pass. The encoder generates a file of statistics + // for use in the second encoding pass. (speed setting controls how fast) + MODE_FIRSTPASS = 3, + + // Two Pass - Second Pass. The encoder uses the statistics that were + // generated in the first encoding pass to create the compressed + // output. (speed setting controls how fast) + MODE_SECONDPASS = 4, + + // Two Pass - Second Pass Best. The encoder uses the statistics that + // were generated in the first encoding pass to create the compressed + // output using the highest possible quality, and taking a + // longer amount of time to encode. (speed setting ignored) + MODE_SECONDPASS_BEST = 5, + + // Realtime/Live Encoding. This mode is optimized for realtime + // encoding (for example, capturing a television signal or feed from + // a live camera). (speed setting controls how fast) + MODE_REALTIME = 6, +} MODE; - // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). - SEARCH_METHODS search_method; +typedef enum { + FRAMEFLAGS_KEY = 1 << 0, + FRAMEFLAGS_GOLDEN = 1 << 1, + FRAMEFLAGS_ALTREF = 1 << 2, +} FRAMETYPE_FLAGS; - RECODE_LOOP_TYPE recode_loop; +typedef enum { + NO_AQ = 0, + VARIANCE_AQ = 1, + COMPLEXITY_AQ = 2, + CYCLIC_REFRESH_AQ = 3, + AQ_MODE_COUNT // This should always be the last member of the enum +} AQ_MODE; + +typedef struct VP9_CONFIG { + BITSTREAM_PROFILE profile; + BIT_DEPTH bit_depth; + int width; // width of data passed to the compressor + int height; // height of data passed to the compressor + double framerate; // set to passed in framerate + int64_t target_bandwidth; // bandwidth to be used in kilobits per second + + int noise_sensitivity; // pre processing blur: recommendation 0 + int sharpness; // sharpening output: recommendation 0: + int cpu_used; + unsigned int rc_max_intra_bitrate_pct; - // Subpel_search_method can only be subpel_tree which does a subpixel - // logarithmic search that keeps stepping at 1/2 pixel units until - // you stop getting a gain, and then goes on to 1/4 and repeats - // the same process. Along the way it skips many diagonals. - SUBPEL_SEARCH_METHODS subpel_search_method; + MODE mode; - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; + // Key Framing Operations + int auto_key; // autodetect cut scenes and set the keyframes + int key_freq; // maximum distance to key frame. - // Control when to stop subpel search - int subpel_force_stop; + int lag_in_frames; // how many frames lag before we start encoding - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - // This parameter controls the number of steps we'll do in a diamond - // search. - int max_step_search_steps; - - // This parameter controls which step in the n-step process we start at. - // It's changed adaptively based on circumstances. - int reduce_first_step_size; - - // If this is set to 1, we limit the motion search range to 2 times the - // largest motion vector found in the last frame. - int auto_mv_step_size; - - // Trellis (dynamic programming) optimization of quantized values (+1, 0). - int optimize_coefficients; - - // Always set to 0. If on it enables 0 cost background transmission - // (except for the initial transmission of the segmentation). The feature is - // disabled because the addition of very large block sizes make the - // backgrounds very to cheap to encode, and the segmentation we have - // adds overhead. - int static_segmentation; - - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - int comp_inter_joint_search_thresh; - - // This variable is used to cap the maximum number of times we skip testing a - // mode to be evaluated. A high value means we will be faster. - int adaptive_rd_thresh; - - // Enables skipping the reconstruction step (idct, recon) in the - // intermediate steps assuming the last frame didn't have too many intra - // blocks and the q is less than a threshold. - int skip_encode_sb; - int skip_encode_frame; - - // This variable allows us to reuse the last frames partition choices - // (64x64 v 32x32 etc) for this frame. It can be set to only use the last - // frame as a starting point in low motion scenes or always use it. If set - // we use last partitioning_redo frequency to determine how often to redo - // the partitioning from scratch. Adjust_partitioning_from_last_frame - // enables us to adjust up or down one partitioning from the last frames - // partitioning. - LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; - - // Determine which method we use to determine transform size. We can choose - // between options like full rd, largest for prediction size, largest - // for intra and model coefs for the rest. - TX_SIZE_SEARCH_METHOD tx_size_search_method; - - // Low precision 32x32 fdct keeps everything in 16 bits and thus is less - // precise but significantly faster than the non lp version. - int use_lp32x32fdct; - - // TODO(JBB): remove this as its no longer used. - - // After looking at the first set of modes (set by index here), skip - // checking modes for reference frames that don't match the reference frame - // of the best so far. - int mode_skip_start; - - // TODO(JBB): Remove this. - int reference_masking; - - PARTITION_SEARCH_TYPE partition_search_type; - - // Used if partition_search_type = FIXED_SIZE_PARTITION - BLOCK_SIZE always_this_block_size; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. - int less_rectangular_check; - - // Disable testing non square partitions. (eg 16x32) - int use_square_partition_only; - - // Sets min and max partition sizes for this 64x64 region based on the - // same 64x64 in last encoded frame, and the left and above neighbor. - AUTO_MIN_MAX_MODE auto_min_max_partition_size; - - // Min and max partition size we enable (block_size) as per auto - // min max, but also used by adjust partitioning, and pick_partitioning. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - - // Whether or not we allow partitions one smaller or one greater than the last - // frame's partitioning. Only used if use_lastframe_partitioning is set. - int adjust_partitioning_from_last_frame; - - // How frequently we re do the partitioning from scratch. Only used if - // use_lastframe_partitioning is set. - int last_partitioning_redo_frequency; - - // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable - // it always, to allow it for only Last frame and Intra, disable it for all - // inter modes or to enable it always. - int disable_split_mask; - - // TODO(jingning): combine the related motion search speed features - // This allows us to use motion search at other sizes as a starting - // point for this motion search and limits the search range around it. - int adaptive_motion_search; - - // Allows sub 8x8 modes to use the prediction filter that was determined - // best for 8x8 mode. If set to 0 we always re check all the filters for - // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter - // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_interp_filter; - - // Implements various heuristics to skip searching modes - // The heuristics selected are based on flags - // defined in the MODE_SEARCH_SKIP_HEURISTICS enum - unsigned int mode_search_skip_flags; - - // A source variance threshold below which the split mode is disabled - unsigned int disable_split_var_thresh; - - // A source variance threshold below which filter search is disabled - // Choose a very large value (UINT_MAX) to use 8-tap always - unsigned int disable_filter_search_var_thresh; - - // These bit masks allow you to enable or disable intra modes for each - // transform size separately. - int intra_y_mode_mask[TX_SIZES]; - int intra_uv_mode_mask[TX_SIZES]; - - // This variable enables an early break out of mode testing if the model for - // rd built from the prediction signal indicates a value that's much - // higher than the best rd we've seen so far. - int use_rd_breakout; - - // This enables us to use an estimate for intra rd based on dc mode rather - // than choosing an actual uv mode in the stage of encoding before the actual - // final encode. - int use_uv_intra_rd_estimate; - - // This feature controls how the loop filter level is determined: - // 0: Try the full image with different values. - // 1: Try a small portion of the image with different values. - // 2: Estimate the level based on quantizer and frame type - int use_fast_lpf_pick; - - // This feature limits the number of coefficients updates we actually do - // by only looking at counts from 1/2 the bands. - int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced - - // This flag controls the use of non-RD mode decision. - int use_nonrd_pick_mode; + // ---------------------------------------------------------------- + // DATARATE CONTROL OPTIONS - // This variable sets the encode_breakout threshold. Currently, it is only - // enabled in real time mode. - int encode_breakout_thresh; + END_USAGE end_usage; // vbr or cbr - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; -} SPEED_FEATURES; + // buffer targeting aggressiveness + int under_shoot_pct; + int over_shoot_pct; -typedef struct { - RATE_CONTROL rc; - int target_bandwidth; - int64_t starting_buffer_level; + // buffering parameters + int64_t starting_buffer_level; // in seconds int64_t optimal_buffer_level; int64_t maximum_buffer_size; - double framerate; - int avg_frame_size; -} LAYER_CONTEXT; -typedef struct VP9_COMP { - DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); - - DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); - -#if CONFIG_ALPHA - DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); -#endif + // Frame drop threshold. + int drop_frames_water_mark; + + // controlling quality + int fixed_q; + int worst_allowed_q; + int best_allowed_q; + int cq_level; + int lossless; + AQ_MODE aq_mode; // Adaptive Quantization mode + + // Enable feature to reduce the frame quantization every x frames. + int frame_periodic_boost; + + // two pass datarate control + int two_pass_vbrbias; // two pass datarate control tweaks + int two_pass_vbrmin_section; + int two_pass_vbrmax_section; + // END DATARATE CONTROL OPTIONS + // ---------------------------------------------------------------- + + // Spatial and temporal scalability. + int ss_number_layers; // Number of spatial layers. + int ts_number_layers; // Number of temporal layers. + // Bitrate allocation for spatial layers. + int ss_target_bitrate[VPX_SS_MAX_LAYERS]; + // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. + int ts_target_bitrate[VPX_TS_MAX_LAYERS]; + int ts_rate_decimator[VPX_TS_MAX_LAYERS]; + + // these parameters aren't to be used in final build don't use!!! + int play_alternate; + int alt_freq; + + int encode_breakout; // early breakout : for video conf recommend 800 + + /* Bitfield defining the error resiliency features to enable. + * Can provide decodable frames after losses in previous + * frames and decodable partitions after losses in the same frame. + */ + unsigned int error_resilient_mode; + + /* Bitfield defining the parallel decoding mode where the + * decoding in successive frames may be conducted in parallel + * just by decoding the frame headers. + */ + unsigned int frame_parallel_decoding_mode; + + int arnr_max_frames; + int arnr_strength; + int arnr_type; + + int tile_columns; + int tile_rows; + + struct vpx_fixed_buf two_pass_stats_in; + struct vpx_codec_pkt_list *output_pkt_list; + + vp8e_tuning tuning; +} VP9_CONFIG; +typedef struct VP9_COMP { + QUANTS quants; MACROBLOCK mb; VP9_COMMON common; VP9_CONFIG oxcf; @@ -460,10 +293,14 @@ typedef struct VP9_COMP { #else struct lookahead_entry *alt_ref_source; #endif + struct lookahead_entry *last_source; YV12_BUFFER_CONFIG *Source; + YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames YV12_BUFFER_CONFIG *un_scaled_source; YV12_BUFFER_CONFIG scaled_source; + YV12_BUFFER_CONFIG *unscaled_last_source; + YV12_BUFFER_CONFIG scaled_last_source; int key_frame_frequency; @@ -506,19 +343,26 @@ typedef struct VP9_COMP { // Ambient reconstruction err target for force key frames int ambient_err; + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. + int rd_thresh_mult[MAX_MODES]; + int rd_thresh_mult_sub8x8[MAX_REFS]; + int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS]; int64_t rd_comp_pred_diff[REFERENCE_MODES]; - int64_t rd_prediction_type_threshes[4][REFERENCE_MODES]; + int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t rd_tx_select_diff[TX_MODES]; // FIXME(rbultje) can this overflow? - int rd_tx_select_threshes[4][TX_MODES]; + int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - int64_t rd_filter_threshes[4][SWITCHABLE_FILTER_CONTEXTS]; + int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS]; int64_t mask_filter_rd; @@ -543,14 +387,12 @@ typedef struct VP9_COMP { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES]; - vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; struct vpx_codec_pkt_list *output_pkt_list; MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation - int seg0_progress, seg0_idx, seg0_cnt; // for real time encoding int speed; @@ -558,9 +400,6 @@ typedef struct VP9_COMP { int cpu_used; int pass; - vp9_prob last_skip_false_probs[3][SKIP_CONTEXTS]; - int last_skip_probs_q[3]; - int ref_frame_flags; SPEED_FEATURES sf; @@ -585,6 +424,8 @@ typedef struct VP9_COMP { unsigned char *active_map; unsigned int active_map_enabled; + CYCLIC_REFRESH *cyclic_refresh; + fractional_mv_step_fp *find_fractional_mv_step; fractional_mv_step_comp_fp *find_fractional_mv_step_comp; vp9_full_search_fn_t full_search_sad; @@ -641,10 +482,6 @@ typedef struct VP9_COMP { unsigned int activity_avg; unsigned int *mb_activity_map; int *mb_norm_activity_map; - int output_partition; - - // Force next frame to intra when kf_auto says so. - int force_next_frame_intra; int droppable; @@ -657,15 +494,9 @@ typedef struct VP9_COMP { int use_svc; - struct svc { - int spatial_layer_id; - int temporal_layer_id; - int number_spatial_layers; - int number_temporal_layers; - // Layer context used for rate control in CBR mode, only defined for - // temporal layers for now. - LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; - } svc; + SVC svc; + + int use_large_partition_rate; #if CONFIG_MULTIPLE_ARF // ARF tracking variables. @@ -680,26 +511,68 @@ typedef struct VP9_COMP { int max_arf_level; #endif -#ifdef ENTROPY_STATS - int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2]; -#endif - - #ifdef MODE_TEST_HIT_STATS // Debug / test stats int64_t mode_test_hits[BLOCK_SIZES]; #endif +} VP9_COMP; - // Y,U,V,(A) - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; +void vp9_initialize_enc(); - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; -} VP9_COMP; +struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf); +void vp9_remove_compressor(VP9_COMP *cpi); -static int get_ref_frame_idx(const VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { +void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf); + + // receive a frames worth of data. caller can assume that a copy of this + // frame is made and not just a copy of the pointer.. +int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, + YV12_BUFFER_CONFIG *sd, int64_t time_stamp, + int64_t end_time_stamp); + +int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, + size_t *size, uint8_t *dest, + int64_t *time_stamp, int64_t *time_end, int flush); + +int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, + vp9_ppflags_t *flags); + +int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags); + +void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); + +int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +int vp9_get_reference_enc(VP9_COMP *cpi, int index, + YV12_BUFFER_CONFIG **fb); + +int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +int vp9_update_entropy(VP9_COMP *cpi, int update); + +int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, + unsigned int rows, unsigned int cols, + int delta_q[MAX_SEGMENTS], + int delta_lf[MAX_SEGMENTS], + unsigned int threshold[MAX_SEGMENTS]); + +int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, + unsigned int rows, unsigned int cols); + +int vp9_set_internal_size(VP9_COMP *cpi, + VPX_SCALING horiz_mode, VPX_SCALING vert_mode); + +int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, + unsigned int height); + +void vp9_set_svc(VP9_COMP *cpi, int use_svc); + +int vp9_get_quantizer(struct VP9_COMP *cpi); + +static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { @@ -709,30 +582,43 @@ static int get_ref_frame_idx(const VP9_COMP *cpi, } } -static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON *const cm = &cpi->common; - return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, - ref_frame)]].buf; +static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( + VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { + VP9_COMMON * const cm = &cpi->common; + return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] + .buf; } -void vp9_encode_frame(VP9_COMP *cpi); +// Intra only frames, golden frames (except alt ref overlays) and +// alt ref frames tend to be coded at a higher than ambient quality +static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) { + return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); +} -void vp9_set_speed_features(VP9_COMP *cpi); +static INLINE int get_token_alloc(int mb_rows, int mb_cols) { + // TODO(JBB): make this work for alpha channel and double check we can't + // exceed this token count if we have a 32x32 transform crossing a boundary + // at a multiple of 16. + // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full + // resolution. We assume up to 1 token per pixel, and then allow + // a head room of 4. + return mb_rows * mb_cols * (16 * 16 * 3 + 4); +} -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference); +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); void vp9_alloc_compressor_data(VP9_COMP *cpi); -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); +void vp9_scale_references(VP9_COMP *cpi); -static int get_token_alloc(int mb_rows, int mb_cols) { - return mb_rows * mb_cols * (48 * 16 + 4); -} +void vp9_update_reference_frames(VP9_COMP *cpi); + +int64_t vp9_rescale(int64_t val, int64_t num, int denom); -static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, - MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { +static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref0, + MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0]; xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.c b/source/libvpx/vp9/encoder/vp9_picklpf.c index 5b0ecf7..3ac8522 100644 --- a/source/libvpx/vp9/encoder/vp9_picklpf.c +++ b/source/libvpx/vp9/encoder/vp9_picklpf.c @@ -10,39 +10,32 @@ #include <assert.h> #include <limits.h> + +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_quant_common.h" + #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_quant_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpx_scale.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "./vpx_scale_rtcd.h" - -static int get_min_filter_level(VP9_COMP *cpi, int base_qindex) { - return 0; -} -static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) { +static int get_max_filter_level(VP9_COMP *cpi) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 : MAX_LOOP_FILTER; } -// Stub function for now Alt LF not used -void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) { -} static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, - MACROBLOCKD *const xd, VP9_COMMON *const cm, int filt_level, int partial_frame) { + VP9_COMMON *const cm = &cpi->common; int filt_err; - vp9_set_alt_lf_level(cpi, filt_level); - vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); + filt_err = vp9_get_y_sse(sd, cm->frame_to_show); // Re-instate the unfiltered frame vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); @@ -52,11 +45,10 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial_frame) { - MACROBLOCKD *const xd = &cpi->mb.e_mbd; VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; - const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); - const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); + const int min_filter_level = 0; + const int max_filter_level = get_max_filter_level(cpi); int best_err; int filt_best; int filt_direction = 0; @@ -73,7 +65,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial_frame); + best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame); filt_best = filt_mid; ss_err[filt_mid] = best_err; @@ -95,7 +87,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score if (ss_err[filt_low] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_low, partial_frame); ss_err[filt_low] = filt_err; } else { filt_err = ss_err[filt_low]; @@ -114,7 +106,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { if (ss_err[filt_high] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_high, partial_frame); ss_err[filt_high] = filt_err; } else { filt_err = ss_err[filt_high]; @@ -128,7 +120,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Half the step distance if the best filter value was the same as last time if (filt_best == filt_mid) { - filter_step = filter_step / 2; + filter_step /= 2; filt_direction = 0; } else { filt_direction = (filt_best < filt_mid) ? -1 : 1; @@ -140,25 +132,24 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, - int method) { + LPF_PICK_METHOD method) { VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness; - if (method == 2) { - const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); - const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); + if (method == LPF_PICK_FROM_Q) { + const int min_filter_level = 0; + const int max_filter_level = get_max_filter_level(cpi); const int q = vp9_ac_quant(cm->base_qindex, 0); // These values were determined by linear fitting the result of the - // searched level - // filt_guess = q * 0.316206 + 3.87252 - int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18; + // searched level, filt_guess = q * 0.316206 + 3.87252 + int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { - search_filter_level(sd, cpi, method == 1); + search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); } } diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.h b/source/libvpx/vp9/encoder/vp9_picklpf.h index 0fc1f88..7d08ddb 100644 --- a/source/libvpx/vp9/encoder/vp9_picklpf.h +++ b/source/libvpx/vp9/encoder/vp9_picklpf.h @@ -16,13 +16,13 @@ extern "C" { #endif +#include "vp9/encoder/vp9_onyx_int.h" + struct yv12_buffer_config; struct VP9_COMP; -void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); - void vp9_pick_filter_level(const struct yv12_buffer_config *sd, - struct VP9_COMP *cpi, int method); + struct VP9_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c index 9ba48a1..f3fe99c 100644 --- a/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -26,19 +26,18 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" -static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, +static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; - int bestsme = INT_MAX; - int further_steps, step_param; + int step_param; int sadpb = x->sadperbit16; MV mvp_full; int ref = mbmi->ref_frame[0]; - int_mv ref_mv = mbmi->ref_mvs[ref][0]; + const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; int i; int tmp_col_min = x->mv_col_min; @@ -46,9 +45,6 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; - int buf_offset; - int stride = xd->plane[0].pre[0].stride; - const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); if (scaled_ref_frame) { @@ -59,15 +55,14 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; - setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); + vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } - vp9_set_mv_search_range(x, &ref_mv.as_mv); + vp9_set_mv_search_range(x, &ref_mv); // TODO(jingning) exploiting adaptive motion search control in non-RD // mode decision too. step_param = 6; - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { @@ -78,36 +73,50 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } - return INT_MAX; + return; } } - - mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + assert(x->mv_best_ref_index[ref] <= 2); + if (x->mv_best_ref_index[ref] < 2) + mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + else + mvp_full = x->pred_mv[ref].as_mv; mvp_full.col >>= 3; mvp_full.row >>= 3; - if (cpi->sf.search_method == FAST_HEX) { - bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, - &cpi->fn_ptr[bsize], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + if (cpi->sf.search_method == FAST_DIAMOND) { + // NOTE: this returns SAD + vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + } else if (cpi->sf.search_method == FAST_HEX) { + // NOTE: this returns SAD + vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); } else if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + // NOTE: this returns SAD + vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + // NOTE: this returns SAD + vp9_square_search(x, &mvp_full, step_param, sadpb, 1, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv.as_mv, &tmp_mv->as_mv); + // NOTE: this returns SAD + vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); } else { - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 1, - &cpi->fn_ptr[bsize], - &ref_mv.as_mv, &tmp_mv->as_mv); + int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + // NOTE: this returns variance + vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 1, + &cpi->fn_ptr[bsize], + &ref_mv, &tmp_mv->as_mv); } x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -120,23 +129,11 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].pre[0] = backup_yv12[i]; } - // TODO(jingning) This step can be merged into full pixel search step in the - // re-designed log-diamond search - buf_offset = tmp_mv->as_mv.row * stride + tmp_mv->as_mv.col; - - // Find sad for current vector. - bestsme = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf + buf_offset, - stride, 0x7fffffff); - - // scale to 1/8 pixel resolution - tmp_mv->as_mv.row = tmp_mv->as_mv.row * 8; - tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8; - // calculate the bit cost on motion vector - *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv, + mvp_full.row = tmp_mv->as_mv.row * 8; + mvp_full.col = tmp_mv->as_mv.col * 8; + *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - return bestsme; } static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -144,7 +141,7 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, MV *tmp_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int ref = mbmi->ref_frame[0]; MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; @@ -160,12 +157,9 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; - setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); + vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } - tmp_mv->col >>= 3; - tmp_mv->row >>= 3; - cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, @@ -180,6 +174,30 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + + x->pred_mv[ref].as_mv = *tmp_mv; +} + +static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, + MACROBLOCK *x, MACROBLOCKD *xd, + int *out_rate_sum, int64_t *out_dist_sum) { + // Note our transform coeffs are 8 times an orthogonal transform. + // Hence quantizer step is also 8 times. To get effective quantizer + // we need to divide by 8 before sending to modeling function. + unsigned int sse; + int rate; + int64_t dist; + + struct macroblock_plane *const p = &x->plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; + + int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); + + vp9_model_rd_from_var_lapndz(sse + var, 1 << num_pels_log2_lookup[bsize], + pd->dequant[1] >> 3, &rate, &dist); + *out_rate_sum = rate; + *out_dist_sum = dist << 3; } // TODO(jingning) placeholder for inter-frame non-RD mode decision. @@ -191,29 +209,41 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t *returndistortion, BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode, best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; + INTERP_FILTER best_pred_filter = EIGHTTAP; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int64_t best_rd = INT64_MAX; int64_t this_rd = INT64_MAX; - static const int cost[4]= { 0, 2, 4, 6 }; - const int64_t inter_mode_thresh = 300; + int rate = INT_MAX; + int64_t dist = INT64_MAX; + + VP9_COMMON *cm = &cpi->common; + int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); + + const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, + intra_cost_penalty, 0); const int64_t intra_mode_cost = 50; + unsigned char segment_id = mbmi->segment_id; + const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame. + int mode_idx[MB_MODE_COUNT] = {0}; + INTERP_FILTER filter_ref = SWITCHABLE; + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; - if (cpi->active_map_enabled && x->active_ptr[0] == 0) + if (!x->in_active_map) x->skip = 1; - // initialize mode decisions *returnrate = INT_MAX; *returndistortion = INT64_MAX; @@ -226,21 +256,25 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ? EIGHTTAP : cpi->common.interp_filter; mbmi->skip = 0; - mbmi->segment_id = 0; + mbmi->segment_id = segment_id; for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + if (xd->up_available) + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; + else if (xd->left_available) + filter_ref = xd->mi[-1]->mbmi.interp_filter; + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { - int rate_mv = 0; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; @@ -252,58 +286,131 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = ref_frame; + // Set conversion index for LAST_FRAME. + if (ref_frame == LAST_FRAME) { + mode_idx[NEARESTMV] = THR_NEARESTMV; // LAST_FRAME, NEARESTMV + mode_idx[NEARMV] = THR_NEARMV; // LAST_FRAME, NEARMV + mode_idx[ZEROMV] = THR_ZEROMV; // LAST_FRAME, ZEROMV + mode_idx[NEWMV] = THR_NEWMV; // LAST_FRAME, NEWMV + } + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - int rate = cost[INTER_OFFSET(this_mode)] - << (num_pels_log2_lookup[bsize] - 4); - int64_t dist; + int rate_mv = 0; + if (cpi->sf.disable_inter_mode_mask[bsize] & (1 << INTER_OFFSET(this_mode))) continue; + if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] * + rd_thresh_freq_fact[this_mode] >> 5) || + rd_threshes[mode_idx[this_mode]] == INT_MAX) + continue; + if (this_mode == NEWMV) { + int rate_mode = 0; if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; - x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] = - full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv); + full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv); if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) continue; + rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; + if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd) + continue; + sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame].as_mv); } - if (frame_mv[this_mode][ref_frame].as_int == 0) { - dist = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)]; - } else if (this_mode != NEARESTMV && - frame_mv[NEARESTMV][ref_frame].as_int == - frame_mv[this_mode][ref_frame].as_int) { - dist = x->mode_sad[ref_frame][INTER_OFFSET(NEARESTMV)]; + if (this_mode != NEARESTMV) + if (frame_mv[this_mode][ref_frame].as_int == + frame_mv[NEARESTMV][ref_frame].as_int) + continue; + + mbmi->mode = this_mode; + mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + + // Search for the best prediction filter type, when the resulting + // motion vector is at sub-pixel accuracy level for luma component, i.e., + // the last three bits are all zeros. + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && + ((mbmi->mv[0].as_mv.row & 0x07) != 0 || + (mbmi->mv[0].as_mv.col & 0x07) != 0)) { + int64_t tmp_rdcost1 = INT64_MAX; + int64_t tmp_rdcost2 = INT64_MAX; + int64_t tmp_rdcost3 = INT64_MAX; + int pf_rate[3]; + int64_t pf_dist[3]; + + mbmi->interp_filter = EIGHTTAP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP], + &pf_dist[EIGHTTAP]); + tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP], + pf_dist[EIGHTTAP]); + + mbmi->interp_filter = EIGHTTAP_SHARP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP], + &pf_dist[EIGHTTAP_SHARP]); + tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP], + pf_dist[EIGHTTAP_SHARP]); + + mbmi->interp_filter = EIGHTTAP_SMOOTH; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH], + &pf_dist[EIGHTTAP_SMOOTH]); + tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH], + pf_dist[EIGHTTAP_SMOOTH]); + + if (tmp_rdcost2 < tmp_rdcost1) { + if (tmp_rdcost2 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP_SHARP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } else { + if (tmp_rdcost1 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } + + rate = pf_rate[mbmi->interp_filter]; + dist = pf_dist[mbmi->interp_filter]; } else { - mbmi->mode = this_mode; - mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] = - cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, INT_MAX); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); } - this_rd = rate + dist; + rate += rate_mv; + rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); if (this_rd < best_rd) { best_rd = this_rd; + *returnrate = rate; + *returndistortion = dist; best_mode = this_mode; + best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; } } } mbmi->mode = best_mode; + mbmi->interp_filter = best_pred_filter; mbmi->ref_frame[0] = best_ref_frame; mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; // Perform intra prediction search, if the best SAD is above a certain // threshold. @@ -314,13 +421,15 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &p->src.buf[0], p->src.stride, &pd->dst.buf[0], pd->dst.stride, 0, 0, 0); - this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf, - p->src.stride, - pd->dst.buf, - pd->dst.stride, INT_MAX); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + rate += x->mbmode_cost[this_mode]; + rate += intra_cost_penalty; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); if (this_rd + intra_mode_cost < best_rd) { best_rd = this_rd; + *returnrate = rate; + *returndistortion = dist; mbmi->mode = this_mode; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->uv_mode = this_mode; @@ -328,5 +437,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } } + return INT64_MAX; } diff --git a/source/libvpx/vp9/encoder/vp9_quantize.c b/source/libvpx/vp9/encoder/vp9_quantize.c index 4ab8995..31f3b3e 100644 --- a/source/libvpx/vp9/encoder/vp9_quantize.c +++ b/source/libvpx/vp9/encoder/vp9_quantize.c @@ -153,6 +153,7 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) { void vp9_init_quantizer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + QUANTS *const quants = &cpi->quants; int i, q, quant; for (q = 0; q < QINDEX_RANGE; q++) { @@ -163,48 +164,49 @@ void vp9_init_quantizer(VP9_COMP *cpi) { // y quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q) : vp9_ac_quant(q, 0); - invert_quant(&cpi->y_quant[q][i], &cpi->y_quant_shift[q][i], quant); - cpi->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->y_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant); + quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->y_round[q][i] = (qrounding_factor * quant) >> 7; cm->y_dequant[q][i] = quant; // uv quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q) : vp9_ac_quant(q, cm->uv_ac_delta_q); - invert_quant(&cpi->uv_quant[q][i], &cpi->uv_quant_shift[q][i], quant); - cpi->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->uv_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->uv_quant[q][i], + &quants->uv_quant_shift[q][i], quant); + quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->uv_round[q][i] = (qrounding_factor * quant) >> 7; cm->uv_dequant[q][i] = quant; #if CONFIG_ALPHA // alpha quant = i == 0 ? vp9_dc_quant(q, cm->a_dc_delta_q) : vp9_ac_quant(q, cm->a_ac_delta_q); - invert_quant(&cpi->a_quant[q][i], &cpi->a_quant_shift[q][i], quant); - cpi->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->a_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->a_quant[q][i], &quants->a_quant_shift[q][i], quant); + quants->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->a_round[q][i] = (qrounding_factor * quant) >> 7; cm->a_dequant[q][i] = quant; #endif } for (i = 2; i < 8; i++) { - cpi->y_quant[q][i] = cpi->y_quant[q][1]; - cpi->y_quant_shift[q][i] = cpi->y_quant_shift[q][1]; - cpi->y_zbin[q][i] = cpi->y_zbin[q][1]; - cpi->y_round[q][i] = cpi->y_round[q][1]; + quants->y_quant[q][i] = quants->y_quant[q][1]; + quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; + quants->y_zbin[q][i] = quants->y_zbin[q][1]; + quants->y_round[q][i] = quants->y_round[q][1]; cm->y_dequant[q][i] = cm->y_dequant[q][1]; - cpi->uv_quant[q][i] = cpi->uv_quant[q][1]; - cpi->uv_quant_shift[q][i] = cpi->uv_quant_shift[q][1]; - cpi->uv_zbin[q][i] = cpi->uv_zbin[q][1]; - cpi->uv_round[q][i] = cpi->uv_round[q][1]; + quants->uv_quant[q][i] = quants->uv_quant[q][1]; + quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1]; + quants->uv_zbin[q][i] = quants->uv_zbin[q][1]; + quants->uv_round[q][i] = quants->uv_round[q][1]; cm->uv_dequant[q][i] = cm->uv_dequant[q][1]; #if CONFIG_ALPHA - cpi->a_quant[q][i] = cpi->a_quant[q][1]; - cpi->a_quant_shift[q][i] = cpi->a_quant_shift[q][1]; - cpi->a_zbin[q][i] = cpi->a_zbin[q][1]; - cpi->a_round[q][i] = cpi->a_round[q][1]; + quants->a_quant[q][i] = quants->a_quant[q][1]; + quants->a_quant_shift[q][i] = quants->a_quant_shift[q][1]; + quants->a_zbin[q][i] = quants->a_zbin[q][1]; + quants->a_round[q][i] = quants->a_round[q][1]; cm->a_dequant[q][i] = cm->a_dequant[q][1]; #endif } @@ -213,27 +215,28 @@ void vp9_init_quantizer(VP9_COMP *cpi) { void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; + MACROBLOCKD *const xd = &x->e_mbd; + QUANTS *const quants = &cpi->quants; + const int segment_id = xd->mi[0]->mbmi.segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj; int i; // Y - x->plane[0].quant = cpi->y_quant[qindex]; - x->plane[0].quant_shift = cpi->y_quant_shift[qindex]; - x->plane[0].zbin = cpi->y_zbin[qindex]; - x->plane[0].round = cpi->y_round[qindex]; + x->plane[0].quant = quants->y_quant[qindex]; + x->plane[0].quant_shift = quants->y_quant_shift[qindex]; + x->plane[0].zbin = quants->y_zbin[qindex]; + x->plane[0].round = quants->y_round[qindex]; x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; // UV for (i = 1; i < 3; i++) { - x->plane[i].quant = cpi->uv_quant[qindex]; - x->plane[i].quant_shift = cpi->uv_quant_shift[qindex]; - x->plane[i].zbin = cpi->uv_zbin[qindex]; - x->plane[i].round = cpi->uv_round[qindex]; + x->plane[i].quant = quants->uv_quant[qindex]; + x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; + x->plane[i].zbin = quants->uv_zbin[qindex]; + x->plane[i].round = quants->uv_round[qindex]; x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; } @@ -273,9 +276,7 @@ void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_init_plane_quantizers(cpi, &cpi->mb); } -void vp9_set_quantizer(struct VP9_COMP *cpi, int q) { - VP9_COMMON *const cm = &cpi->common; - +void vp9_set_quantizer(VP9_COMMON *cm, int q) { // quantizer has to be reinitialized with vp9_init_quantizer() if any // delta_q changes. cm->base_qindex = q; @@ -283,3 +284,30 @@ void vp9_set_quantizer(struct VP9_COMP *cpi, int q) { cm->uv_dc_delta_q = 0; cm->uv_ac_delta_q = 0; } + +// Table that converts 0-63 Q-range values passed in outside to the Qindex +// range used internally. +static const int quantizer_to_qindex[] = { + 0, 4, 8, 12, 16, 20, 24, 28, + 32, 36, 40, 44, 48, 52, 56, 60, + 64, 68, 72, 76, 80, 84, 88, 92, + 96, 100, 104, 108, 112, 116, 120, 124, + 128, 132, 136, 140, 144, 148, 152, 156, + 160, 164, 168, 172, 176, 180, 184, 188, + 192, 196, 200, 204, 208, 212, 216, 220, + 224, 228, 232, 236, 240, 244, 249, 255, +}; + +int vp9_quantizer_to_qindex(int quantizer) { + return quantizer_to_qindex[quantizer]; +} + +int vp9_qindex_to_quantizer(int qindex) { + int quantizer; + + for (quantizer = 0; quantizer < 64; ++quantizer) + if (quantizer_to_qindex[quantizer] >= qindex) + return quantizer; + + return 63; +} diff --git a/source/libvpx/vp9/encoder/vp9_quantize.h b/source/libvpx/vp9/encoder/vp9_quantize.h index f356b12..7a93883 100644 --- a/source/libvpx/vp9/encoder/vp9_quantize.h +++ b/source/libvpx/vp9/encoder/vp9_quantize.h @@ -17,12 +17,30 @@ extern "C" { #endif +typedef struct { + DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); + + DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); + +#if CONFIG_ALPHA + DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); +#endif +} QUANTS; + void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); struct VP9_COMP; - -void vp9_set_quantizer(struct VP9_COMP *cpi, int q); +struct VP9Common; void vp9_frame_init_quantizer(struct VP9_COMP *cpi); @@ -32,6 +50,12 @@ void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); +void vp9_set_quantizer(struct VP9Common *cm, int q); + +int vp9_quantizer_to_qindex(int quantizer); + +int vp9_qindex_to_quantizer(int qindex); + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c index 89aa821..b4e883f 100644 --- a/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -27,14 +27,14 @@ #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ratectrl.h" +#define DEFAULT_KF_BOOST 2000 +#define DEFAULT_GF_BOOST 2000 + #define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1 #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 -// Bits Per MB at different Q (Multiplied by 512) -#define BPER_MB_NORMBITS 9 - // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; @@ -52,10 +52,9 @@ static int kf_low = 400; // formulaic approach to facilitate easier adjustment of the Q tables. // The formulae were derived from computing a 3rd order polynomial best // fit to the original data (after plotting real maxq vs minq (not q index)) -static int calculate_minq_index(double maxq, - double x3, double x2, double x1, double c) { +static int get_minq_index(double maxq, double x3, double x2, double x1) { int i; - const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c, + const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq); // Special case handling to deal with the step from q2.0 @@ -63,57 +62,26 @@ static int calculate_minq_index(double maxq, if (minqtarget <= 2.0) return 0; - for (i = 0; i < QINDEX_RANGE; i++) { + for (i = 0; i < QINDEX_RANGE; i++) if (minqtarget <= vp9_convert_qindex_to_q(i)) return i; - } return QINDEX_RANGE - 1; } -void vp9_rc_init_minq_luts(void) { +void vp9_rc_init_minq_luts() { int i; for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i); - - kf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.000001, - -0.0004, - 0.15, - 0.0); - kf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.000002, - -0.0012, - 0.50, - 0.0); - - gf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.32, - 0.0); - gf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.50, - 0.0); - afq_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.33, - 0.0); - afq_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.55, - 0.0); - inter_minq[i] = calculate_minq_index(maxq, - 0.00000271, - -0.00113, - 0.75, - 0.0); + kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15); + kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50); + gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32); + gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); + afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); + afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); } } @@ -135,79 +103,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, return (int)(0.5 + (enumerator * correction_factor / q)); } -void vp9_save_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to vp9_restore_coding_context. These functions are - // intended for use in a re-code loop in vp9_compress_frame where the - // quantizer value is adjusted between loop iterations. - vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); - vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); - vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - - vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); - vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - - cc->fc = cm->fc; -} - -void vp9_restore_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Restore key state variables to the snapshot state stored in the - // previous call to vp9_save_coding_context. - vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); - vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); - vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - - vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - - vpx_memcpy(cm->last_frame_seg_map, - cpi->coding_context.last_frame_seg_map_copy, - (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); - vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - - cm->fc = cc->fc; -} - -void vp9_setup_key_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - vp9_setup_past_independence(cm); - - /* All buffers are implicitly updated on key frames. */ - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; -} - -void vp9_setup_inter_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - if (cm->error_resilient_mode || cm->intra_only) - vp9_setup_past_independence(cm); - - assert(cm->frame_context_idx < FRAME_CONTEXTS); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; -} - -static int estimate_bits_at_q(int frame_kind, int q, int mbs, +static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor) { - const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor)); - - // Attempt to retain reasonable accuracy without overflow. The cutoff is - // chosen such that the maximum product of Bpm and MBs fits 31 bits. The - // largest Bpm takes 20 bits. - return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs - : (bpm * mbs) >> BPER_MB_NORMBITS; + const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor)); + return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS; } int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { @@ -244,13 +143,12 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { // Update the buffer level for higher layers, given the encoded current layer. -static void update_layer_buffer_level(VP9_COMP *const cpi, - int encoded_frame_size) { +static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { int temporal_layer = 0; - int current_temporal_layer = cpi->svc.temporal_layer_id; + int current_temporal_layer = svc->temporal_layer_id; for (temporal_layer = current_temporal_layer + 1; - temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + temporal_layer < svc->number_temporal_layers; ++temporal_layer) { + LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer]; RATE_CONTROL *lrc = &lc->rc; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); @@ -280,10 +178,60 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->buffer_level = rc->bits_off_target; if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - update_layer_buffer_level(cpi, encoded_frame_size); + update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } +void vp9_rc_init(const VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc) { + if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + rc->avg_frame_qindex[0] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[1] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[2] = oxcf->worst_allowed_q; + } else { + rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + } + + rc->last_q[0] = oxcf->best_allowed_q; + rc->last_q[1] = oxcf->best_allowed_q; + rc->last_q[2] = oxcf->best_allowed_q; + + rc->buffer_level = oxcf->starting_buffer_level; + rc->bits_off_target = oxcf->starting_buffer_level; + + rc->rolling_target_bits = rc->av_per_frame_bandwidth; + rc->rolling_actual_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_target_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth; + + rc->total_actual_bits = 0; + rc->total_target_vs_actual = 0; + + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; + rc->frames_since_key = 8; // Sensible default for first frame. + rc->this_key_frame_forced = 0; + rc->next_key_frame_forced = 0; + rc->source_alt_ref_pending = 0; + rc->source_alt_ref_active = 0; + + rc->frames_till_gf_update_due = 0; + + rc->ni_av_qi = oxcf->worst_allowed_q; + rc->ni_tot_qi = 0; + rc->ni_frames = 0; + + rc->tot_q = 0.0; + rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q); + + rc->rate_correction_factor = 1.0; + rc->key_frame_rate_correction_factor = 1.0; + rc->gf_rate_correction_factor = 1.0; +} + int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; @@ -327,6 +275,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { return cpi->rc.key_frame_rate_correction_factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) return cpi->rc.gf_rate_correction_factor; else @@ -339,6 +288,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { cpi->rc.key_frame_rate_correction_factor = factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) cpi->rc.gf_rate_correction_factor = factor; else @@ -347,7 +297,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { } void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { - const int q = cpi->common.base_qindex; + const VP9_COMMON *const cm = &cpi->common; int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; @@ -360,8 +310,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { // Work out how big we would have expected the frame to be at this Q given // the current correction factor. // Stay in double to avoid int overflow when values are large - projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q, - cpi->common.MBs, + projected_size_based_on_q = estimate_bits_at_q(cm->frame_type, + cm->base_qindex, cm->MBs, rate_correction_factor); // Work out a size correction factor. if (projected_size_based_on_q > 0) @@ -385,20 +335,18 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { if (correction_factor > 102) { // We are not already at the worst allowable quality - correction_factor = - (int)(100 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 + ((correction_factor - 100) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; } else if (correction_factor < 99) { // We are not already at the best allowable quality - correction_factor = - (int)(100 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 - ((100 - correction_factor) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor < MIN_BPB_FACTOR) @@ -419,11 +367,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. - if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - // Case where we would overflow int - target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS; - else - target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; + target_bits_per_mb = + ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; i = active_best_quality; @@ -462,33 +407,25 @@ static int get_active_quality(int q, int gfu_boost, int low, int high, } static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { + const RATE_CONTROL *const rc = &cpi->rc; + const unsigned int curr_frame = cpi->common.current_video_frame; int active_worst_quality; + if (cpi->common.frame_type == KEY_FRAME) { - if (cpi->common.current_video_frame == 0) { - active_worst_quality = cpi->rc.worst_quality; - } else { - // Choose active worst quality twice as large as the last q. - active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; - } - } else if (!cpi->rc.is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - if (cpi->common.current_video_frame == 1) { - active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4; - } else { - // Choose active worst quality twice as large as the last q. - active_worst_quality = cpi->rc.last_q[INTER_FRAME]; - } + active_worst_quality = curr_frame == 0 ? rc->worst_quality + : rc->last_q[KEY_FRAME] * 2; } else { - if (cpi->common.current_video_frame == 1) { - active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; + if (!rc->is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4 + : rc->last_q[INTER_FRAME]; } else { - // Choose active worst quality twice as large as the last q. - active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2; + active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2 + : rc->last_q[INTER_FRAME] * 2; } } - if (active_worst_quality > cpi->rc.worst_quality) - active_worst_quality = cpi->rc.worst_quality; - return active_worst_quality; + + return MIN(active_worst_quality, rc->worst_quality); } // Adjust active_worst_quality level based on buffer level. @@ -498,6 +435,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // If buffer is below the optimal level, let the active_worst_quality go from // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). + const VP9_COMMON *const cm = &cpi->common; const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. @@ -505,9 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; - if (cpi->common.frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) return rc->worst_quality; - if (cpi->common.current_video_frame > 1) + if (cm->current_video_frame > 1) active_worst_quality = MIN(rc->worst_quality, rc->avg_frame_qindex[INTER_FRAME] * 5 / 4); else @@ -561,7 +499,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, (last_boosted_q * 0.75)); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { @@ -583,10 +521,11 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } } else if (!rc->is_src_frame_alt_ref && + !cpi->use_svc && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was @@ -639,7 +578,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -672,8 +611,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { // not first frame of one pass and kf_boost is set @@ -694,15 +633,15 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -805,7 +744,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -821,7 +760,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -854,8 +793,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else { // Not forced keyframe. @@ -879,15 +818,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -988,7 +927,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate. - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -1004,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -1020,8 +959,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, } int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, - int *bottom_index, - int *top_index) { + int *bottom_index, int *top_index) { int q; if (cpi->pass == 0) { if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) @@ -1032,14 +970,14 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index); } - // JBB : This is realtime mode. In real time mode the first frame - // should be larger. Q of 0 is disabled because we force tx size to be + // Q of 0 is disabled because we force tx size to be // 16x16... if (cpi->sf.use_nonrd_pick_mode) { - if (cpi->common.current_video_frame == 0) - q /= 3; if (q == 0) q++; + if (cpi->sf.force_frame_boost == 1) + q -= cpi->sf.max_delta_qindex; + if (q < *bottom_index) *bottom_index = q; else if (q > *top_index) @@ -1057,28 +995,14 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { - if (cpi->common.frame_type == KEY_FRAME) { - *frame_over_shoot_limit = this_frame_target * 9 / 8; - *frame_under_shoot_limit = this_frame_target * 7 / 8; - } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) { - *frame_over_shoot_limit = this_frame_target * 9 / 8; - *frame_under_shoot_limit = this_frame_target * 7 / 8; - } else { - // Stron overshoot limit for constrained quality - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - *frame_over_shoot_limit = this_frame_target * 11 / 8; - *frame_under_shoot_limit = this_frame_target * 2 / 8; - } else { - *frame_over_shoot_limit = this_frame_target * 11 / 8; - *frame_under_shoot_limit = this_frame_target * 5 / 8; - } - } - } + int recode_tolerance = + (cpi->sf.recode_tolerance * this_frame_target) / 100; + + *frame_over_shoot_limit = this_frame_target + recode_tolerance; + *frame_under_shoot_limit = this_frame_target - recode_tolerance; // For very small rate targets where the fractional adjustment - // (eg * 7/8) may be tiny make sure there is at least a minimum - // range. + // may be tiny make sure there is at least a minimum range. *frame_over_shoot_limit += 200; *frame_under_shoot_limit -= 200; if (*frame_under_shoot_limit < 0) @@ -1103,16 +1027,17 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { static void update_alt_ref_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user - cpi->rc.frames_since_golden = 0; + RATE_CONTROL *const rc = &cpi->rc; + rc->frames_since_golden = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif // Clear the alternate reference update pending flag. - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; // Set the alternate reference frame active flag - cpi->rc.source_alt_ref_active = 1; + rc->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP9_COMP *cpi) { @@ -1141,6 +1066,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; cm->last_frame_type = cm->frame_type; @@ -1150,7 +1076,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Post encode loop adjustment of Q prediction. vp9_rc_update_rate_correction_factors( cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF || - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { @@ -1159,7 +1085,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { + !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { rc->last_q[2] = cm->base_qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); @@ -1205,12 +1131,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Actual bits spent rc->total_actual_bits += rc->projected_frame_size; + rc->total_target_bits += (cm->show_frame ? rc->av_per_frame_bandwidth : 0); - // Debug stats - rc->total_target_vs_actual += (rc->this_frame_target - - rc->projected_frame_size); + rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame && + if (oxcf->play_alternate && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); @@ -1243,15 +1168,15 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { static const int af_ratio = 10; - const RATE_CONTROL *rc = &cpi->rc; + const RATE_CONTROL *const rc = &cpi->rc; int target; #if USE_ALTREF_FOR_ONE_PASS target = (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ? - (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) / - (cpi->rc.baseline_gf_interval + af_ratio - 1) : - (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) / - (cpi->rc.baseline_gf_interval + af_ratio - 1); + (rc->av_per_frame_bandwidth * rc->baseline_gf_interval * af_ratio) / + (rc->baseline_gf_interval + af_ratio - 1) : + (rc->av_per_frame_bandwidth * rc->baseline_gf_interval) / + (rc->baseline_gf_interval + af_ratio - 1); #else target = rc->av_per_frame_bandwidth; #endif @@ -1271,7 +1196,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { int target; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || - cm->frame_flags & FRAMEFLAGS_KEY || + (cm->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; @@ -1303,18 +1228,19 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; + const SVC *const svc = &cpi->svc; const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->av_per_frame_bandwidth; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (svc->number_temporal_layers > 1 && + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { // Note that for layers, av_per_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). - int current_temporal_layer = cpi->svc.temporal_layer_id; - const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer]; + int current_temporal_layer = svc->temporal_layer_id; + const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer]; target = lc->avg_frame_size; min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } @@ -1351,13 +1277,14 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - int target = cpi->rc.av_per_frame_bandwidth; + RATE_CONTROL *const rc = &cpi->rc; + int target = rc->av_per_frame_bandwidth; if ((cm->current_video_frame == 0) || (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % + (cpi->oxcf.auto_key && (rc->frames_since_key % cpi->key_frame_frequency == 0))) { cm->frame_type = KEY_FRAME; - cpi->rc.source_alt_ref_active = 0; + rc->source_alt_ref_active = 0; if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { target = calc_iframe_target_size_one_pass_cbr(cpi); } @@ -1368,8 +1295,8 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } } vp9_rc_set_frame_target(cpi, target); - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { @@ -1377,7 +1304,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target; if ((cm->current_video_frame == 0 || - cm->frame_flags & FRAMEFLAGS_KEY || + (cm->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; @@ -1396,3 +1323,46 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { rc->frames_till_gf_update_due = INT_MAX; rc->baseline_gf_interval = INT_MAX; } + +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) { + int start_index = rc->worst_quality; + int target_index = rc->worst_quality; + int i; + + // Convert the average q value to an index. + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + start_index = i; + if (vp9_convert_qindex_to_q(i) >= qstart) + break; + } + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_convert_qindex_to_q(i) >= qtarget) + break; + } + + return target_index - start_index; +} + +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio) { + int target_index = rc->worst_quality; + int i; + + // Look up the current projected bits per block for the base index + const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0); + + // Find the target bits per mb based on the base value and given ratio. + const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb ) + break; + } + + return target_index - qindex; +} diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.h b/source/libvpx/vp9/encoder/vp9_ratectrl.h index 5dbc7d1..7693c2b 100644 --- a/source/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/source/libvpx/vp9/encoder/vp9_ratectrl.h @@ -12,12 +12,19 @@ #ifndef VP9_ENCODER_VP9_RATECTRL_H_ #define VP9_ENCODER_VP9_RATECTRL_H_ +#include "vpx/vpx_integer.h" + +#include "vp9/common/vp9_blockd.h" + #ifdef __cplusplus extern "C" { #endif #define FRAME_OVERHEAD_BITS 200 +// Bits Per MB at different Q (Multiplied by 512) +#define BPER_MB_NORMBITS 9 + typedef struct { // Rate targetting variables int this_frame_target; @@ -37,6 +44,7 @@ typedef struct { int frames_since_golden; int frames_till_gf_update_due; int max_gf_interval; + int static_scene_max_gf_interval; int baseline_gf_interval; int frames_to_key; int frames_since_key; @@ -53,7 +61,7 @@ typedef struct { int ni_av_qi; int ni_tot_qi; int ni_frames; - int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF + int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF double tot_q; double avg_q; @@ -70,7 +78,8 @@ typedef struct { int long_rolling_actual_bits; int64_t total_actual_bits; - int total_target_vs_actual; // debug stats + int64_t total_target_bits; + int64_t total_target_vs_actual; int worst_quality; int best_quality; @@ -78,17 +87,13 @@ typedef struct { } RATE_CONTROL; struct VP9_COMP; +struct VP9_CONFIG; -void vp9_save_coding_context(struct VP9_COMP *cpi); -void vp9_restore_coding_context(struct VP9_COMP *cpi); - -void vp9_setup_key_frame(struct VP9_COMP *cpi); -void vp9_setup_inter_frame(struct VP9_COMP *cpi); +void vp9_rc_init(const struct VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc); double vp9_convert_qindex_to_q(int qindex); -// initialize luts for minq -void vp9_rc_init_minq_luts(void); +void vp9_rc_init_minq_luts(); // Generally at the high level, the following flow is expected // to be enforced for rate control: @@ -161,6 +166,15 @@ int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi, // This function is called only from the vp9_rc_get_..._params() functions. void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target); +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a target q value +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget); + +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a value that should equate to the given rate ratio. +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio); + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.c b/source/libvpx/vp9/encoder/vp9_rdopt.c index ed81fbe..b292b42 100644 --- a/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -30,6 +30,7 @@ #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_mcomp.h" @@ -38,9 +39,13 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_treewriter.h" #include "vp9/encoder/vp9_variance.h" +#define RD_THRESH_MAX_FACT 64 +#define RD_THRESH_INC 1 +#define RD_THRESH_POW 1.25 +#define RD_MULT_EPB_RATIO 64 + /* Factor to weigh the rate for switchable interp filters */ #define SWITCHABLE_INTERP_RATE_FACTOR 1 @@ -72,6 +77,7 @@ struct rdcost_block_args { int64_t this_rd; int64_t best_rd; int skip; + int use_fast_coef_costing; const scan_order *so; }; @@ -145,9 +151,8 @@ static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize, } static void fill_mode_costs(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - FRAME_CONTEXT *const fc = &cm->fc; + const FRAME_CONTEXT *const fc = &cpi->common.fc; int i, j; for (i = 0; i < INTRA_MODES; i++) @@ -157,15 +162,14 @@ static void fill_mode_costs(VP9_COMP *cpi) { // TODO(rbultje) separate tables for superblock costing? vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); - vp9_cost_tokens(x->intra_uv_mode_cost[1], - fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree); - vp9_cost_tokens(x->intra_uv_mode_cost[0], - vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree); + vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME], + vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); + vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME], + fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) vp9_cost_tokens((int *)x->switchable_interp_costs[i], - fc->switchable_interp_prob[i], - vp9_switchable_interp_tree); + fc->switchable_interp_prob[i], vp9_switchable_interp_tree); } static void fill_token_costs(vp9_coeff_cost *c, @@ -214,7 +218,7 @@ void vp9_init_me_luts() { } } -int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) { +int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { const int q = vp9_dc_quant(qindex, 0); // TODO(debargha): Adjust the function below int rdmult = 88 * q * q / 25; @@ -228,12 +232,9 @@ int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) { } static int compute_rd_thresh_factor(int qindex) { - int q; // TODO(debargha): Adjust the function below - q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); - if (q < 8) - q = 8; - return q; + const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); + return MAX(q, 8); } void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { @@ -242,9 +243,8 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { } static void set_block_thresholds(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; int i, bsize, segment_id; - VP9_COMMON *cm = &cpi->common; - SPEED_FEATURES *sf = &cpi->sf; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id, @@ -260,13 +260,13 @@ static void set_block_thresholds(VP9_COMP *cpi) { for (i = 0; i < MAX_MODES; ++i) cpi->rd_threshes[segment_id][bsize][i] = - sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4 + cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 : INT_MAX; for (i = 0; i < MAX_REFS; ++i) { cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - sf->thresh_mult_sub8x8[i] < thresh_max - ? sf->thresh_mult_sub8x8[i] * t / 4 + cpi->rd_thresh_mult_sub8x8[i] < thresh_max + ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 : INT_MAX; } } @@ -274,8 +274,8 @@ static void set_block_thresholds(VP9_COMP *cpi) { } void vp9_initialize_rd_consts(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCK *x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; int i; vp9_clear_system_state(); @@ -286,14 +286,12 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; x->errorperbit += (x->errorperbit == 0); - vp9_set_speed_features(cpi); - x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? 0 : 1; set_block_thresholds(cpi); - if (!cpi->sf.use_nonrd_pick_mode) { + if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { fill_token_costs(x->token_costs, cm->fc.coef_probs); for (i = 0; i < PARTITION_CONTEXTS; i++) @@ -301,7 +299,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_partition_tree); } - if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) { + if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || + cm->frame_type == KEY_FRAME) { fill_mode_costs(cpi); if (!frame_is_intra_only(cm)) { @@ -400,9 +399,9 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; } -static void model_rd_from_var_lapndz(unsigned int var, unsigned int n, - unsigned int qstep, int *rate, - int64_t *dist) { +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, + unsigned int qstep, int *rate, + int64_t *dist) { // This function models the rate and distortion for a Laplacian // source with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expressions are in: @@ -433,7 +432,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int i; int64_t rate_sum = 0; int64_t dist_sum = 0; - int ref = xd->mi_8x8[0]->mbmi.ref_frame[0]; + const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -464,8 +463,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, } else { int rate; int64_t dist; - model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], - pd->dequant[1] >> 3, &rate, &dist); + vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; dist_sum += dist; } @@ -482,8 +481,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, int *out_skip) { int j, k; BLOCK_SIZE bs; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; + const struct macroblock_plane *const p = &x->plane[0]; + const struct macroblockd_plane *const pd = &xd->plane[0]; const int width = 4 * num_4x4_blocks_wide_lookup[bsize]; const int height = 4 * num_4x4_blocks_high_lookup[bsize]; int rate_sum = 0; @@ -512,7 +511,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, &sse); // sse works better than var, since there is no dc prediction used - model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist); + vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, + &rate, &dist); rate_sum += rate; dist_sum += dist; *out_skip &= (rate < 1024); @@ -549,26 +549,25 @@ static const int16_t band_counts[TX_SIZES][8] = { { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 }, }; - static INLINE int cost_coeffs(MACROBLOCK *x, int plane, int block, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, - const int16_t *scan, const int16_t *nb) { + const int16_t *scan, const int16_t *nb, + int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; - struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + const struct macroblock_plane *p = &x->plane[plane]; + const struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; const int16_t *band_count = &band_counts[tx_size][1]; const int eob = p->eobs[block]; const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; - uint8_t *p_tok = x->token_cache; + uint8_t token_cache[32 * 32]; int pt = combine_entropy_contexts(*A, *L); int c, cost; - // Check for consistency of tx_size with mode info assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : get_uv_tx_size(mbmi) == tx_size); @@ -584,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int v = qcoeff[0]; int prev_t = vp9_dct_value_tokens_ptr[v].token; cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; - p_tok[0] = vp9_pt_energy_class[prev_t]; + token_cache[0] = vp9_pt_energy_class[prev_t]; ++token_costs; // ac tokens @@ -594,9 +593,13 @@ static INLINE int cost_coeffs(MACROBLOCK *x, v = qcoeff[rc]; t = vp9_dct_value_tokens_ptr[v].token; - pt = get_coef_context(nb, p_tok, c); - cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; - p_tok[rc] = vp9_pt_energy_class[t]; + if (use_fast_coef_costing) { + cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v]; + } else { + pt = get_coef_context(nb, token_cache, c); + cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; + token_cache[rc] = vp9_pt_energy_class[t]; + } prev_t = t; if (!--band_left) { band_left = *band_count++; @@ -606,8 +609,12 @@ static INLINE int cost_coeffs(MACROBLOCK *x, // eob token if (band_left) { - pt = get_coef_context(nb, p_tok, c); - cost += (*token_costs)[0][pt][EOB_TOKEN]; + if (use_fast_coef_costing) { + cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; + } else { + pt = get_coef_context(nb, token_cache, c); + cost += (*token_costs)[0][pt][EOB_TOKEN]; + } } } @@ -616,14 +623,13 @@ static INLINE int cost_coeffs(MACROBLOCK *x, return cost; } - static void dist_block(int plane, int block, TX_SIZE tx_size, struct rdcost_block_args* args) { const int ss_txfrm_size = tx_size << 1; MACROBLOCK* const x = args->x; MACROBLOCKD* const xd = &x->e_mbd; - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; + const struct macroblock_plane *const p = &x->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; int64_t this_sse; int shift = tx_size == TX_32X32 ? 0 : 2; int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); @@ -632,7 +638,7 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, &this_sse) >> shift; args->sse = this_sse >> shift; - if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) { + if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> (shift + 2); @@ -648,7 +654,8 @@ static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx, args->t_left + y_idx, tx_size, - args->so->scan, args->so->neighbors); + args->so->scan, args->so->neighbors, + args->use_fast_coef_costing); } static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, @@ -656,7 +663,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int64_t rd1, rd2, rd; if (args->skip) @@ -732,15 +739,17 @@ static void txfm_rd_in_plane(MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, - BLOCK_SIZE bsize, TX_SIZE tx_size) { + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting) { MACROBLOCKD *const xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[plane]; + const struct macroblockd_plane *const pd = &xd->plane[plane]; struct rdcost_block_args args = { 0 }; args.x = x; args.best_rd = ref_best_rd; + args.use_fast_coef_costing = use_fast_coef_casting; if (plane == 0) - xd->mi_8x8[0]->mbmi.tx_size = tx_size; + xd->mi[0]->mbmi.tx_size = tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); @@ -770,13 +779,13 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; mbmi->tx_size = MIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, - mbmi->tx_size); + mbmi->tx_size, cpi->sf.use_fast_coef_costing); cpi->tx_stepdown_count[0]++; } @@ -789,7 +798,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -872,7 +881,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -920,7 +929,8 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, // Actually encode using the chosen mode if a model was used, but do not // update the r, d costs txfm_rd_in_plane(x, rate, distortion, skip, - &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); + &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { cpi->tx_stepdown_count[0]++; @@ -941,7 +951,7 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const TX_SIZE max_tx_size = max_txsize_lookup[bs]; TX_SIZE tx_size; @@ -968,7 +978,8 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size], &sse[tx_size], - ref_best_rd, 0, bs, tx_size); + ref_best_rd, 0, bs, tx_size, + cpi->sf.use_fast_coef_costing); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, bs); } @@ -983,7 +994,7 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t ref_best_rd) { int64_t sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; assert(bs == mbmi->sb_type); if (cpi->sf.tx_size_search_method != USE_FULL_RD) { @@ -997,7 +1008,8 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size) txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size], &sse[tx_size], - ref_best_rd, 0, bs, tx_size); + ref_best_rd, 0, bs, tx_size, + cpi->sf.use_fast_coef_costing); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, bs); } @@ -1029,7 +1041,7 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode, static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, MB_PREDICTION_MODE *best_mode, - int *bmode_costs, + const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, @@ -1058,7 +1070,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vpx_memcpy(ta, a, sizeof(ta)); vpx_memcpy(tl, l, sizeof(tl)); - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.tx_size = TX_4X4; for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; @@ -1087,7 +1099,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); - xd->mi_8x8[0]->bmi[block].as_mode = mode; + xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, @@ -1100,7 +1112,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_fwht4x4(src_diff, coeff, 8); vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, - so->scan, so->neighbors); + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, @@ -1112,7 +1125,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vp9_fht4x4(src_diff, coeff, 8, tx_type); vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, - so->scan, so->neighbors); + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) @@ -1152,18 +1166,16 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, return best_rd; } -static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, - MACROBLOCK * const mb, - int * const rate, - int * const rate_y, - int64_t * const distortion, +static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, + int *rate, int *rate_y, + int64_t *distortion, int64_t best_rd) { int i, j; - MACROBLOCKD *const xd = &mb->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; - const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const MACROBLOCKD *const xd = &mb->e_mbd; + MODE_INFO *const mic = xd->mi[0]; + const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; @@ -1172,13 +1184,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi, int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; - int *bmode_costs; + const int *bmode_costs = mb->mbmode_cost; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); - bmode_costs = mb->mbmode_cost; - // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { @@ -1232,7 +1242,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = DC_PRED; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; + MODE_INFO *const mic = xd->mi[0]; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; @@ -1246,8 +1256,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_tx_cache[TX_MODES]; - MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; - MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; + MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) continue; @@ -1296,12 +1306,12 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } -static void super_block_uvrd(MACROBLOCK *x, +static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); int plane; int pnrate = 0, pnskip = 1; @@ -1323,7 +1333,8 @@ static void super_block_uvrd(MACROBLOCK *x, for (plane = 1; plane < MAX_MB_PLANE; ++plane) { txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, - ref_best_rd, plane, bsize, uv_txfm_size); + ref_best_rd, plane, bsize, uv_txfm_size, + cpi->sf.use_fast_coef_costing); if (pnrate == INT_MAX) goto term; *rate += pnrate; @@ -1357,9 +1368,9 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - xd->mi_8x8[0]->mbmi.uv_mode = mode; + xd->mi[0]->mbmi.uv_mode = mode; - super_block_uvrd(x, &this_rate_tokenonly, + super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1398,18 +1409,19 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - xd->mi_8x8[0]->mbmi.uv_mode = mode_selected; + xd->mi[0]->mbmi.uv_mode = mode_selected; return best_rd; } -static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x, +static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize) { + const VP9_COMMON *cm = &cpi->common; int64_t unused; - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; - super_block_uvrd(x, rate_tokenonly, distortion, + x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; + super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); @@ -1425,7 +1437,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. if (cpi->sf.use_uv_intra_rd_estimate) { - rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv, + rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); // Else do a proper rd search for each possible transform size that may // be considered in the main rd loop. @@ -1434,13 +1446,13 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } - *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; + *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; } -static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, +static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode, int mode_context) { - MACROBLOCK *const x = &cpi->mb; - const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id; + const MACROBLOCK *const x = &cpi->mb; + const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id; // Don't account for mode here if segment skip is enabled. if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { @@ -1451,12 +1463,6 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, } } -void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode, - const MV *mv) { - xd->mi_8x8[0]->mbmi.mode = mode; - xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv; -} - static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, @@ -1464,59 +1470,56 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv single_newmv[MAX_REF_FRAMES], int *rate_mv); -static int labels2mode(MACROBLOCK *x, int i, +static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, MB_PREDICTION_MODE mode, - int_mv *this_mv, int_mv *this_second_mv, + int_mv this_mv[2], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int_mv seg_mvs[MAX_REF_FRAMES], - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { - MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; - MB_MODE_INFO *mbmi = &mic->mbmi; + int_mv *best_ref_mv[2], + const int *mvjcost, int *mvcost[2]) { + MODE_INFO *const mic = xd->mi[0]; + const MB_MODE_INFO *const mbmi = &mic->mbmi; int thismvcost = 0; int idx, idy; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; - const int has_second_rf = has_second_ref(mbmi); + const int is_compound = has_second_ref(mbmi); // the only time we should do costing for new motion vector or mode // is when we are on a new label (jbb May 08, 2007) switch (mode) { case NEWMV: - this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; - thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv, + this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; + thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); - if (has_second_rf) { - this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; - thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv, - &second_best_ref_mv->as_mv, + if (is_compound) { + this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; + thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); } break; case NEARESTMV: - this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; - if (has_second_rf) - this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; + this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; + if (is_compound) + this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; break; case NEARMV: - this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; - if (has_second_rf) - this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; + this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; + if (is_compound) + this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; break; case ZEROMV: - this_mv->as_int = 0; - if (has_second_rf) - this_second_mv->as_int = 0; + this_mv[0].as_int = 0; + if (is_compound) + this_mv[1].as_int = 0; break; default: break; } - mic->bmi[i].as_mv[0].as_int = this_mv->as_int; - if (has_second_rf) - mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; + mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int; + if (is_compound) + mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int; mic->bmi[i].as_mode = mode; @@ -1542,7 +1545,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; @@ -1556,6 +1559,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, int thisrate = 0, ref; const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); + for (ref = 0; ref < 1 + is_compound; ++ref) { const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i, pd->pre[ref].stride)]; @@ -1563,7 +1568,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, - xd->interp_kernel, MV_PRECISION_Q3, + kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } @@ -1588,7 +1593,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, 16, &ssz); thissse += ssz; thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, - so->scan, so->neighbors); + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); rd = MIN(rd1, rd2); @@ -1638,7 +1644,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { - MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; @@ -1653,7 +1659,7 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, struct buf_2d orig_pre[2]) { - MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre[0]; if (has_second_ref(mbmi)) @@ -1664,6 +1670,45 @@ static INLINE int mv_has_subpel(const MV *mv) { return (mv->row & 0x0F) || (mv->col & 0x0F); } +// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. +// TODO(aconverse): Find out if this is still productive then clean up or remove +static int check_best_zero_mv( + const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int disable_inter_mode_mask, int this_mode, int ref_frame, + int second_ref_frame) { + if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && + frame_mv[this_mode][ref_frame].as_int == 0 && + (second_ref_frame == NONE || + frame_mv[this_mode][second_ref_frame].as_int == 0)) { + int rfc = mode_context[ref_frame]; + int c1 = cost_mv_ref(cpi, NEARMV, rfc); + int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); + int c3 = cost_mv_ref(cpi, ZEROMV, rfc); + + if (this_mode == NEARMV) { + if (c1 > c3) return 0; + } else if (this_mode == NEARESTMV) { + if (c2 > c3) return 0; + } else { + assert(this_mode == ZEROMV); + if (second_ref_frame == NONE) { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) + return 0; + } else { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && + frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && + frame_mv[NEARMV][second_ref_frame].as_int == 0)) + return 0; + } + } + } + return 1; +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BEST_SEG_INFO *bsi_buf, int filter_idx, @@ -1674,7 +1719,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE this_mode; MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MODE_INFO *mi = xd->mi_8x8[0]; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -1691,6 +1736,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int mode_idx; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); + const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); @@ -1706,7 +1752,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { // TODO(jingning,rbultje): rewrite the rate-distortion optimization // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop - int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT]; + int_mv mode_mv[MB_MODE_COUNT][2]; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; MB_PREDICTION_MODE mode_selected = ZEROMV; int64_t best_rd = INT64_MAX; @@ -1728,45 +1774,14 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; - if (cpi->sf.disable_inter_mode_mask[bsize] & (1 << mode_idx)) + if (disable_inter_mode_mask & (1 << mode_idx)) continue; - // if we're near/nearest and mv == 0,0, compare to zeromv - if ((this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && - (!has_second_rf || - frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { - int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!has_second_rf) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) - continue; - } - } - } + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, + this_mode, mbmi->ref_frame[0], + mbmi->ref_frame[1])) + continue; vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, @@ -1777,7 +1792,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { - int_mv *const new_mv = &mode_mv[NEWMV]; + int_mv *const new_mv = &mode_mv[NEWMV][0]; int step_param = 0; int further_steps; int thissme, bestsme = INT_MAX; @@ -1835,18 +1850,30 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, sadpb, 1, v_fn_ptr, 1, &bsi->ref_mv[0]->as_mv, &new_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + v_fn_ptr, 1); } else if (cpi->sf.search_method == SQUARE) { bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, v_fn_ptr, 1, &bsi->ref_mv[0]->as_mv, &new_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + v_fn_ptr, 1); } else if (cpi->sf.search_method == BIGDIA) { bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, v_fn_ptr, 1, &bsi->ref_mv[0]->as_mv, &new_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, + &bsi->ref_mv[0]->as_mv, + v_fn_ptr, 1); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 0, v_fn_ptr, @@ -1925,55 +1952,43 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, } bsi->rdstat[i][mode_idx].brate = - labels2mode(x, i, this_mode, &mode_mv[this_mode], - &second_mode_mv[this_mode], frame_mv, seg_mvs[i], - bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost, - x->mvcost, cpi); - - - bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int; - if (num_4x4_blocks_wide > 1) - bsi->rdstat[i + 1][mode_idx].mvs[0].as_int = - mode_mv[this_mode].as_int; - if (num_4x4_blocks_high > 1) - bsi->rdstat[i + 2][mode_idx].mvs[0].as_int = - mode_mv[this_mode].as_int; - if (has_second_rf) { - bsi->rdstat[i][mode_idx].mvs[1].as_int = - second_mode_mv[this_mode].as_int; + labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv, + seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost); + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + bsi->rdstat[i][mode_idx].mvs[ref].as_int = + mode_mv[this_mode][ref].as_int; if (num_4x4_blocks_wide > 1) - bsi->rdstat[i + 1][mode_idx].mvs[1].as_int = - second_mode_mv[this_mode].as_int; + bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = + mode_mv[this_mode][ref].as_int; if (num_4x4_blocks_high > 1) - bsi->rdstat[i + 2][mode_idx].mvs[1].as_int = - second_mode_mv[this_mode].as_int; + bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = + mode_mv[this_mode][ref].as_int; } // Trap vectors that reach beyond the UMV borders - if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) || + if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) || (has_second_rf && - mv_check_bounds(x, &second_mode_mv[this_mode].as_mv))) + mv_check_bounds(x, &mode_mv[this_mode][1].as_mv))) continue; if (filter_idx > 0) { BEST_SEG_INFO *ref_bsi = bsi_buf; - subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv); - have_ref = mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; - if (has_second_rf) { - subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv); - have_ref &= second_mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; + subpelmv = 0; + have_ref = 1; + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); + have_ref &= mode_mv[this_mode][ref].as_int == + ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; } if (filter_idx > 1 && !subpelmv && !have_ref) { ref_bsi = bsi_buf + 1; - have_ref = mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; - if (has_second_rf) { - have_ref &= second_mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; - } + have_ref = 1; + for (ref = 0; ref < 1 + has_second_rf; ++ref) + have_ref &= mode_mv[this_mode][ref].as_int == + ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; } if (!subpelmv && have_ref && @@ -2034,10 +2049,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); - labels2mode(x, i, mode_selected, &mode_mv[mode_selected], - &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], - bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost, - x->mvcost, cpi); + labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected], + frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, + x->mvcost); br += bsi->rdstat[i][mode_idx].brate; bd += bsi->rdstat[i][mode_idx].bdist; @@ -2084,7 +2098,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int i; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi_8x8[0]; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *mbmi = &mi->mbmi; int mode_idx; @@ -2131,7 +2145,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size ) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int_mv this_mv; int i; int zero_seen = 0; @@ -2160,10 +2174,9 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, max_mv = MAX(max_mv, MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); // only need to check zero mv once - if (!this_mv.as_int && zero_seen) { - x->mode_sad[ref_frame][i] = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)]; + if (!this_mv.as_int && zero_seen) continue; - } + zero_seen = zero_seen || !this_mv.as_int; row_offset = this_mv.as_mv.row >> 3; @@ -2174,9 +2187,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride, 0x7fffffff); - x->mode_sad[ref_frame][i] = this_sad; - if (this_mv.as_int == 0) - x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = this_sad; // Note if it is the best so far. if (this_sad < best_sad) { @@ -2185,12 +2195,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, } } - if (!zero_seen) - x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = - cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_buffer, ref_y_stride, - 0x7fffffff); - // Note the index of the mv that worked best in the reference list. x->mv_best_ref_index[ref_frame] = best_index; x->max_mv_context[ref_frame] = max_mv; @@ -2271,7 +2275,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // restored if we decide to encode this way ctx->skip = x->skip; ctx->best_mode_index = mode_index; - ctx->mic = *xd->mi_8x8[0]; + ctx->mic = *xd->mi[0]; ctx->best_ref_mv[0].as_int = ref_mv->as_int; ctx->best_ref_mv[1].as_int = second_ref_mv->as_int; @@ -2322,7 +2326,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; @@ -2331,8 +2335,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them - vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref_frame, candidates, - mi_row, mi_col); + vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col); // Candidate refinement carried out at encoder and decoder vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, @@ -2355,22 +2358,21 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } -static INLINE int get_switchable_rate(const MACROBLOCK *x) { +int vp9_get_switchable_rate(const MACROBLOCK *x) { const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[ctx][mbmi->interp_filter]; } static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int bestsme = INT_MAX; int further_steps, step_param; @@ -2400,7 +2402,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; - setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); + vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } vp9_set_mv_search_range(x, &ref_mv); @@ -2456,22 +2458,41 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Further step/diamond searches as necessary further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - if (cpi->sf.search_method == FAST_HEX) { - bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, + if (cpi->sf.search_method == FAST_DIAMOND) { + bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, + &cpi->fn_ptr[bsize], 1, + &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == FAST_HEX) { + bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, &cpi->fn_ptr[bsize], 1, &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else if (cpi->sf.search_method == HEX) { bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, &cpi->fn_ptr[bsize], 1, &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else if (cpi->sf.search_method == SQUARE) { bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, &cpi->fn_ptr[bsize], 1, &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else if (cpi->sf.search_method == BIGDIA) { bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, &cpi->fn_ptr[bsize], 1, &ref_mv, &tmp_mv->as_mv); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, + &cpi->fn_ptr[bsize], 1); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 1, @@ -2517,13 +2538,14 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int_mv ref_mv[2]; int ite, ref; // Prediction buffer from second frame. uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); + const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Do joint motion search in compound mode to get more accurate mv. struct buf_2d backup_yv12[2][MAX_MB_PLANE]; @@ -2544,7 +2566,8 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[ref][i] = xd->plane[i].pre[ref]; - setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL); + vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, + NULL); } frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; @@ -2576,7 +2599,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]].as_mv, &xd->block_refs[!id]->sf, pw, ph, 0, - xd->interp_kernel, MV_PRECISION_Q3, + kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. @@ -2597,6 +2620,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, &ref_mv[id].as_mv, second_pred, pw, ph); + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv, + second_pred, &cpi->fn_ptr[bsize], 1); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -2658,7 +2684,6 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, } static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, BLOCK_SIZE bsize, int64_t txfm_cache[], int *rate2, int64_t *distortion, @@ -2674,7 +2699,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int64_t ref_best_rd) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); const int num_refs = is_comp_pred ? 2 : 1; const int this_mode = mbmi->mode; @@ -2720,13 +2745,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += rate_mv; } else { int_mv tmp_mv; - single_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); if (tmp_mv.as_int == INVALID_MV) return INT64_MAX; *rate2 += rate_mv; frame_mv[refs[0]].as_int = - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; } } @@ -2788,8 +2813,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { @@ -2859,8 +2883,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : *best_filter; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2890,12 +2913,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += get_switchable_rate(x); + *rate2 += vp9_get_switchable_rate(x); if (!is_comp_pred) { - if (cpi->active_map_enabled && x->active_ptr[0] == 0) + if (!x->in_active_map) { + if (psse) + *psse = 0; + *distortion = 0; x->skip = 1; - else if (cpi->allow_encode_breakout && x->encode_breakout) { + } else if (cpi->allow_encode_breakout && x->encode_breakout) { const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var, sse; @@ -2990,7 +3016,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); - super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv, + super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty); if (*rate_uv == INT_MAX) { *rate2 = INT_MAX; @@ -3045,7 +3071,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; - xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -3054,7 +3080,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize, max_uv_tx_size); } else { @@ -3064,7 +3090,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size); } @@ -3087,7 +3113,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - ctx->mic = *xd->mi_8x8[0]; + ctx->mic = *xd->mi[0]; } int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, @@ -3100,9 +3126,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; @@ -3120,7 +3145,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode = { 0 }; - int mode_index, best_mode_index = 0; + int mode_index, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; @@ -3138,12 +3163,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; int best_skip2 = 0; int mode_skip_mask = 0; - const int mode_skip_start = cpi->sf.mode_skip_start + 1; + int mode_skip_start = cpi->sf.mode_skip_start + 1; const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; + int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3167,7 +3193,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; @@ -3242,6 +3268,24 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_skip_mask |= new_modes_mask; } + if (bsize > cpi->sf.max_intra_bsize) { + mode_skip_mask |= 0xFF30808; + } + + if (!x->in_active_map) { + int mode_index; + assert(cpi->ref_frame_flags & VP9_LAST_FLAG); + if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0) + mode_index = THR_NEARESTMV; + else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0) + mode_index = THR_NEARMV; + else + mode_index = THR_ZEROMV; + mode_skip_mask = ~(1 << mode_index); + mode_skip_start = MAX_MODES; + disable_inter_mode_mask = 0; + } + for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3258,7 +3302,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. - if (mode_index == mode_skip_start) { + if (mode_index == mode_skip_start && best_mode_index >= 0) { switch (vp9_mode_order[best_mode_index].ref_frame[0]) { case INTRA_FRAME: break; @@ -3288,13 +3332,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame[0]; if (ref_frame != INTRA_FRAME && - cpi->sf.disable_inter_mode_mask[bsize] & (1 << INTER_OFFSET(this_mode))) + disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) continue; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + best_mode_index >=0 && vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) continue; if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && @@ -3322,7 +3367,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // one of the neighboring directional modes if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && (this_mode >= D45_PRED && this_mode <= TM_PRED)) { - if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) + if (best_mode_index >= 0 && + vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) continue; } if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { @@ -3331,46 +3377,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } else { - // if we're near/nearest and mv == 0,0, compare to zeromv - if ((this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][ref_frame].as_int == 0 && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && - (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) { - int rfc = mbmi->mode_context[ref_frame]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!comp_pred) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0 && - frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0 && - frame_mv[NEARMV][second_ref_frame].as_int == 0)) - continue; - } - } - } + if (x->in_active_map && + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, this_mode, ref_frame, + second_ref_frame)) + continue; } mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; + mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use @@ -3379,7 +3395,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, : cm->interp_filter; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { @@ -3422,7 +3437,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; } else { - this_rd = handle_inter_mode(cpi, x, tile, bsize, + this_rd = handle_inter_mode(cpi, x, bsize, tx_cache, &rate2, &distortion2, &skippable, &rate_y, &distortion_y, @@ -3641,7 +3656,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, break; } - if (best_rd >= best_rd_so_far) + if (best_mode_index < 0 || best_rd >= best_rd_so_far) return INT64_MAX; // If we used an estimate for the uv intra rd in the loop above... @@ -3671,16 +3686,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // combination that wins out. if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { + int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index]; + if (mode_index == best_mode_index) { - cpi->rd_thresh_freq_fact[bsize][mode_index] -= - (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3); + *fact -= (*fact >> 3); } else { - cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC; - if (cpi->rd_thresh_freq_fact[bsize][mode_index] > - (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) { - cpi->rd_thresh_freq_fact[bsize][mode_index] = - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT; - } + *fact = MIN(*fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } } } @@ -3716,6 +3728,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } + if (!x->in_active_map) { + assert(mbmi->ref_frame[0] == LAST_FRAME); + assert(mbmi->ref_frame[1] == NONE); + assert(mbmi->mode == NEARESTMV || + mbmi->mode == NEARMV || + mbmi->mode == ZEROMV); + assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0); + assert(mbmi->mode == mbmi->uv_mode); + } + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], @@ -3735,11 +3757,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; - const struct segmentation *seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; int comp_pred, i; @@ -3799,7 +3820,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } @@ -3832,10 +3853,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t total_sse = INT_MAX; int early_term = 0; - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = INT64_MAX; - - x->skip = 0; ref_frame = vp9_ref_order[mode_index].ref_frame[0]; second_ref_frame = vp9_ref_order[mode_index].ref_frame[1]; @@ -3872,71 +3889,43 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) continue; - // Do not allow compound prediction if the segment level reference - // frame feature is in use as in this case there can only be one reference. - if ((second_ref_frame > INTRA_FRAME) && - vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - continue; - - mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = second_ref_frame; - - if (!(ref_frame == INTRA_FRAME - || (cpi->ref_frame_flags & flag_list[ref_frame]))) { - continue; - } - if (!(second_ref_frame == NONE - || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { + if (ref_frame > INTRA_FRAME && + !(cpi->ref_frame_flags & flag_list[ref_frame])) { continue; } comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) - if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) - continue; - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) - if (ref_frame != best_inter_ref_frame && - second_ref_frame != best_inter_ref_frame) - continue; + if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) + continue; + // Do not allow compound prediction if the segment level reference frame + // feature is in use as in this case there can only be one reference. + if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) + continue; + if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) + continue; + if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && + ref_frame != best_inter_ref_frame && + second_ref_frame != best_inter_ref_frame) + continue; } // TODO(jingning, jkoleszar): scaling reference frame not supported for // sub8x8 blocks. - if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) + if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; - if (second_ref_frame > 0 && + if (second_ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; - set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - mbmi->uv_mode = DC_PRED; - - // Evaluate all sub-pel filters irrespective of whether we can use - // them for this frame. - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP - : cm->interp_filter; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - if (comp_pred) { - if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) - continue; - mode_excluded = mode_excluded ? mode_excluded : cm->reference_mode == SINGLE_REFERENCE; - } else { - if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { - mode_excluded = mode_excluded ? - mode_excluded : cm->reference_mode == COMPOUND_REFERENCE; - } - } - - // Select prediction reference frames. - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; - if (comp_pred) - xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; + } else if (ref_frame != INTRA_FRAME) { + mode_excluded = mode_excluded ? mode_excluded + : cm->reference_mode == COMPOUND_REFERENCE; } // If the segment reference frame feature is enabled.... @@ -3963,6 +3952,27 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, continue; } + mbmi->tx_size = TX_4X4; + mbmi->uv_mode = DC_PRED; + mbmi->ref_frame[0] = ref_frame; + mbmi->ref_frame[1] = second_ref_frame; + // Evaluate all sub-pel filters irrespective of whether we can use + // them for this frame. + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + x->skip = 0; + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); + + // Select prediction reference frames. + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; + if (comp_pred) + xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; + } + + for (i = 0; i < TX_MODES; ++i) + tx_cache[i] = INT64_MAX; + #ifdef MODE_TEST_HIT_STATS // TEST/DEBUG CODE // Keep a rcord of the number of test hits at each size @@ -3971,7 +3981,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (ref_frame == INTRA_FRAME) { int rate; - mbmi->tx_size = TX_4X4; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, best_rd) >= best_rd) continue; @@ -4016,7 +4025,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; cpi->mask_filter_rd = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) @@ -4024,8 +4032,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; - if (x->source_variance < - cpi->sf.disable_filter_search_var_thresh) { + if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; } else if (cpi->sf.adaptive_pred_interp_filter == 1 && ctx->pred_interp_filter < SWITCHABLE) { @@ -4040,7 +4047,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, &mbmi->ref_mvs[ref_frame][0], second_ref, @@ -4053,7 +4059,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = @@ -4080,7 +4086,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_skippable = skippable; tmp_best_mbmode = *mbmi; for (i = 0; i < 4; i++) { - tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; + tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; } pred_exists = 1; @@ -4105,7 +4111,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter : cm->interp_filter); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4128,14 +4133,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, skippable = tmp_best_skippable; *mbmi = tmp_best_mbmode; for (i = 0; i < 4; i++) - xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i]; + xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; } rate2 += rate; distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += get_switchable_rate(x); + rate2 += vp9_get_switchable_rate(x); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE @@ -4152,7 +4157,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // then dont bother looking at UV vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); - super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable, + super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, &uv_sse, BLOCK_8X8, tmp_best_rdu); if (rate_uv == INT_MAX) continue; @@ -4212,8 +4217,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } // Keep record of best inter rd with single reference - if (is_inter_block(&xd->mi_8x8[0]->mbmi) && - !has_second_ref(&xd->mi_8x8[0]->mbmi) && + if (is_inter_block(mbmi) && + !has_second_ref(mbmi) && !mode_excluded && this_rd < best_inter_rd) { best_inter_rd = this_rd; @@ -4249,11 +4254,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, best_skip2 = this_skip2; if (!x->select_txfm_size) swap_block_ptr(x, ctx, max_plane); - vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], + vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], sizeof(uint8_t) * ctx->num_4x4_blk); for (i = 0; i < 4; i++) - best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; + best_bmodes[i] = xd->mi[0]->bmi[i]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -4289,11 +4294,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_REFERENCE]) { + if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) { best_pred_rd[SINGLE_REFERENCE] = single_rd; - } else if (second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMPOUND_REFERENCE]) { + } else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) { best_pred_rd[COMPOUND_REFERENCE] = single_rd; } if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) @@ -4324,13 +4327,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ - if (bsize < BLOCK_32X32) { - if (bsize < BLOCK_16X16) { - tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; - tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; - } - tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; - } + tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; + tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; + tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { int64_t adj_rd = INT64_MAX; @@ -4369,7 +4368,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { + if (best_rd == INT64_MAX) { *returnrate = INT_MAX; *returndistortion = INT64_MAX; return best_rd; @@ -4386,16 +4385,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // combination that wins out. if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { + int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index]; + if (mode_index == best_mode_index) { - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -= - (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3); + *fact -= (*fact >> 3); } else { - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC; - if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] > - (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) { - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] = - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT; - } + *fact = MIN(*fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } } } @@ -4405,13 +4401,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, x->skip |= best_skip2; if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) - xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode; + xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; } else { for (i = 0; i < 4; ++i) - vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); + vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); - mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int; + mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; } for (i = 0; i < REFERENCE_MODES; ++i) { @@ -4430,11 +4426,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); - } else { - vp9_zero(best_filter_diff); - } - - if (!x->skip) { for (i = 0; i < TX_MODES; i++) { if (best_tx_rd[i] == INT64_MAX) best_tx_diff[i] = 0; @@ -4442,6 +4433,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, best_tx_diff[i] = best_rd - best_tx_rd[i]; } } else { + vp9_zero(best_filter_diff); vp9_zero(best_tx_diff); } diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.h b/source/libvpx/vp9/encoder/vp9_rdopt.h index 6b85d67..a01dbd4 100644 --- a/source/libvpx/vp9/encoder/vp9_rdopt.h +++ b/source/libvpx/vp9/encoder/vp9_rdopt.h @@ -23,11 +23,6 @@ extern "C" { (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) #define QIDX_SKIP_THRESH 115 -#define RD_THRESH_MAX_FACT 64 -#define RD_THRESH_INC 1 -#define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 - #define MV_COST_WEIGHT 108 #define MV_COST_WEIGHT_SUB 120 @@ -35,12 +30,18 @@ extern "C" { struct TileInfo; -int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex); +int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex); void vp9_initialize_rd_consts(VP9_COMP *cpi); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, + unsigned int qstep, int *rate, + int64_t *dist); + +int vp9_get_switchable_rate(const MACROBLOCK *x); + void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, MV_REFERENCE_FRAME ref_frame, @@ -77,9 +78,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, void vp9_init_me_luts(); -void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode, - const MV *mv); - void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, ENTROPY_CONTEXT t_above[16], diff --git a/source/libvpx/vp9/encoder/vp9_sad.c b/source/libvpx/vp9/encoder/vp9_sad.c index 58c5df4..9d8da0d 100644 --- a/source/libvpx/vp9/encoder/vp9_sad.c +++ b/source/libvpx/vp9/encoder/vp9_sad.c @@ -44,7 +44,7 @@ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *second_pred, \ unsigned int max_sad) { \ uint8_t comp_pred[m * n]; \ - comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ + vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ return sad(src_ptr, src_stride, comp_pred, m, m, n); \ } diff --git a/source/libvpx/vp9/encoder/vp9_segmentation.c b/source/libvpx/vp9/encoder/vp9_segmentation.c index 49fd7bb..9d3e6dc 100644 --- a/source/libvpx/vp9/encoder/vp9_segmentation.c +++ b/source/libvpx/vp9/encoder/vp9_segmentation.c @@ -16,6 +16,7 @@ #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_segmentation.h" void vp9_enable_segmentation(struct segmentation *seg) { @@ -132,8 +133,8 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - xd->mi_8x8 = mi_8x8; - segment_id = xd->mi_8x8[0]->mbmi.segment_id; + xd->mi = mi_8x8; + segment_id = xd->mi[0]->mbmi.segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -151,7 +152,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, // Store the prediction status for this mb and update counts // as appropriate - xd->mi_8x8[0]->mbmi.seg_id_predicted = pred_flag; + xd->mi[0]->mbmi.seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; if (!pred_flag) @@ -168,7 +169,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int bw, bh; const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; @@ -228,7 +229,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { vp9_prob t_pred_tree[SEG_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; MODE_INFO **mi_ptr, **mi; // Set default state for the segment tree probabilities and the diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.c b/source/libvpx/vp9/encoder/vp9_speed_features.c new file mode 100644 index 0000000..adad800 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_speed_features.c @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> + +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_speed_features.h" + +#define ALL_INTRA_MODES ((1 << DC_PRED) | \ + (1 << V_PRED) | (1 << H_PRED) | \ + (1 << D45_PRED) | (1 << D135_PRED) | \ + (1 << D117_PRED) | (1 << D153_PRED) | \ + (1 << D207_PRED) | (1 << D63_PRED) | \ + (1 << TM_PRED)) +#define INTRA_DC_ONLY (1 << DC_PRED) +#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) +#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) +#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) + +// Masks for partially or completely disabling split mode +#define DISABLE_ALL_INTER_SPLIT ((1 << THR_COMP_GA) | \ + (1 << THR_COMP_LA) | \ + (1 << THR_ALTR) | \ + (1 << THR_GOLD) | \ + (1 << THR_LAST)) + +#define DISABLE_ALL_SPLIT ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT) + +#define DISABLE_COMPOUND_SPLIT ((1 << THR_COMP_GA) | (1 << THR_COMP_LA)) + +#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \ + (1 << THR_COMP_LA) | \ + (1 << THR_ALTR) | \ + (1 << THR_GOLD)) + +static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, + SPEED_FEATURES *sf, int speed) { + sf->adaptive_rd_thresh = 1; + sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW; + sf->allow_skip_recode = 1; + + if (speed >= 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->subpel_iters_per_step = 1; + sf->mode_skip_start = 10; + sf->adaptive_pred_interp_filter = 1; + + sf->recode_loop = ALLOW_RECODE_KFARFGF; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + + if (speed >= 2) { + sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->adaptive_pred_interp_filter = 2; + sf->reference_masking = 1; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->disable_filter_search_var_thresh = 100; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + } + + if (speed >= 3) { + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + + sf->recode_loop = ALLOW_RECODE_KFMAXBW; + sf->adaptive_rd_thresh = 3; + sf->mode_skip_start = 6; + sf->use_fast_coef_updates = ONE_LOOP_REDUCED; + sf->use_fast_coef_costing = 1; + } + + if (speed >= 4) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->adaptive_rd_thresh = 4; + sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH | + FLAG_EARLY_TERMINATE; + sf->disable_filter_search_var_thresh = 200; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->use_lp32x32fdct = 1; + } + + if (speed >= 5) { + int i; + + sf->partition_search_type = FIXED_PARTITION; + sf->optimize_coefficients = 0; + sf->search_method = HEX; + sf->disable_filter_search_var_thresh = 500; + for (i = 0; i < TX_SIZES; ++i) { + sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; + } +} + +static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, + int speed) { + sf->static_segmentation = 0; + sf->adaptive_rd_thresh = 1; + sf->encode_breakout_thresh = 1; + sf->use_fast_coef_costing = 1; + + if (speed == 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 8; + } + + if (speed >= 2) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 2; + sf->auto_mv_step_size = 1; + sf->reference_masking = 1; + + sf->disable_filter_search_var_thresh = 50; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->adaptive_rd_thresh = 2; + sf->use_lp32x32fdct = 1; + sf->mode_skip_start = 11; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 200; + } + + if (speed >= 3) { + sf->use_square_partition_only = 1; + sf->disable_filter_search_var_thresh = 100; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->constrain_copy_partition = 1; + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = ONE_LOOP_REDUCED; + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + sf->allow_skip_recode = 0; + sf->optimize_coefficients = 0; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->lpf_pick = LPF_PICK_FROM_Q; + sf->encode_breakout_thresh = 700; + } + + if (speed >= 4) { + int i; + sf->last_partitioning_redo_frequency = 4; + sf->adaptive_rd_thresh = 5; + sf->use_fast_coef_costing = 0; + sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; + sf->adjust_partitioning_from_last_frame = + cm->last_frame_type != cm->frame_type || (0 == + (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); + sf->subpel_force_stop = 1; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->frame_parameter_update = 0; + sf->encode_breakout_thresh = 1000; + sf->search_method = FAST_HEX; + sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); + sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->max_intra_bsize = BLOCK_32X32; + sf->allow_skip_recode = 1; + } + + if (speed >= 5) { + sf->max_partition_size = BLOCK_32X32; + sf->min_partition_size = BLOCK_8X8; + sf->partition_check = + (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); + sf->force_frame_boost = cm->frame_type == KEY_FRAME || + (cm->current_video_frame % + (sf->last_partitioning_redo_frequency << 1) == 1); + sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15; + sf->partition_search_type = REFERENCE_PARTITION; + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + sf->allow_skip_recode = 0; + } + + if (speed >= 6) { + // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. + sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 360; + } + + if (speed >= 7) { + int i; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV)); + } +} + +void vp9_set_speed_features(VP9_COMP *cpi) { + SPEED_FEATURES *const sf = &cpi->sf; + VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int speed = cpi->speed < 0 ? -cpi->speed : cpi->speed; + int i; + + // best quality defaults + sf->frame_parameter_update = 1; + sf->search_method = NSTEP; + sf->recode_loop = ALLOW_RECODE; + sf->subpel_search_method = SUBPEL_TREE; + sf->subpel_iters_per_step = 2; + sf->subpel_force_stop = 0; + sf->optimize_coefficients = !oxcf->lossless; + sf->reduce_first_step_size = 0; + sf->auto_mv_step_size = 0; + sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->comp_inter_joint_search_thresh = BLOCK_4X4; + sf->adaptive_rd_thresh = 0; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; + sf->tx_size_search_method = USE_FULL_RD; + sf->use_lp32x32fdct = 0; + sf->adaptive_motion_search = 0; + sf->adaptive_pred_interp_filter = 0; + sf->reference_masking = 0; + sf->partition_search_type = SEARCH_PARTITION; + sf->less_rectangular_check = 0; + sf->use_square_partition_only = 0; + sf->auto_min_max_partition_size = NOT_IN_USE; + sf->max_partition_size = BLOCK_64X64; + sf->min_partition_size = BLOCK_4X4; + sf->adjust_partitioning_from_last_frame = 0; + sf->last_partitioning_redo_frequency = 4; + sf->constrain_copy_partition = 0; + sf->disable_split_mask = 0; + sf->mode_search_skip_flags = 0; + sf->force_frame_boost = 0; + sf->max_delta_qindex = 0; + sf->disable_split_var_thresh = 0; + sf->disable_filter_search_var_thresh = 0; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + } + sf->use_rd_breakout = 0; + sf->skip_encode_sb = 0; + sf->use_uv_intra_rd_estimate = 0; + sf->allow_skip_recode = 0; + sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; + sf->use_fast_coef_updates = TWO_LOOP; + sf->use_fast_coef_costing = 0; + sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set + sf->use_nonrd_pick_mode = 0; + sf->encode_breakout_thresh = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = 0; + sf->max_intra_bsize = BLOCK_64X64; + // This setting only takes effect when partition_search_type is set + // to FIXED_PARTITION. + sf->always_this_block_size = BLOCK_16X16; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 100; + + // Recode loop tolerence %. + sf->recode_tolerance = 25; + + switch (oxcf->mode) { + case MODE_BESTQUALITY: + case MODE_SECONDPASS_BEST: // This is the best quality mode. + cpi->diamond_search_sad = vp9_full_range_search; + break; + case MODE_FIRSTPASS: + case MODE_GOODQUALITY: + case MODE_SECONDPASS: + set_good_speed_feature(cpi, cm, sf, speed); + break; + case MODE_REALTIME: + set_rt_speed_feature(cm, sf, speed); + break; + } + + // Slow quant, dct and trellis not worthwhile for first pass + // so make sure they are always turned off. + if (cpi->pass == 1) + sf->optimize_coefficients = 0; + + // No recode for 1 pass. + if (cpi->pass == 0) { + sf->recode_loop = DISALLOW_RECODE; + sf->optimize_coefficients = 0; + } + + if (sf->subpel_search_method == SUBPEL_TREE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; + cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; + } + + cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1; + + if (cpi->encode_breakout && oxcf->mode == MODE_REALTIME && + sf->encode_breakout_thresh > cpi->encode_breakout) + cpi->encode_breakout = sf->encode_breakout_thresh; + + if (sf->disable_split_mask == DISABLE_ALL_SPLIT) + sf->adaptive_pred_interp_filter = 0; + + if (!cpi->oxcf.frame_periodic_boost) { + sf->max_delta_qindex = 0; + } +} diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.h b/source/libvpx/vp9/encoder/vp9_speed_features.h new file mode 100644 index 0000000..72f548a --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#define VP9_ENCODER_VP9_SPEED_FEATURES_H_ + +#include "vp9/common/vp9_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DIAMOND = 0, + NSTEP = 1, + HEX = 2, + BIGDIA = 3, + SQUARE = 4, + FAST_HEX = 5, + FAST_DIAMOND = 6 +} SEARCH_METHODS; + +typedef enum { + // No recode. + DISALLOW_RECODE = 0, + // Allow recode for KF and exceeding maximum frame bandwidth. + ALLOW_RECODE_KFMAXBW = 1, + // Allow recode only for KF/ARF/GF frames. + ALLOW_RECODE_KFARFGF = 2, + // Allow recode for all frames based on bitrate constraints. + ALLOW_RECODE = 3, +} RECODE_LOOP_TYPE; + +typedef enum { + SUBPEL_TREE = 0, + // Other methods to come +} SUBPEL_SEARCH_METHODS; + +typedef enum { + LAST_FRAME_PARTITION_OFF = 0, + LAST_FRAME_PARTITION_LOW_MOTION = 1, + LAST_FRAME_PARTITION_ALL = 2 +} LAST_FRAME_PARTITION_METHOD; + +typedef enum { + USE_FULL_RD = 0, + USE_LARGESTINTRA, + USE_LARGESTINTRA_MODELINTER, + USE_LARGESTALL +} TX_SIZE_SEARCH_METHOD; + +typedef enum { + NOT_IN_USE = 0, + RELAXED_NEIGHBORING_MIN_MAX = 1, + STRICT_NEIGHBORING_MIN_MAX = 2 +} AUTO_MIN_MAX_MODE; + +typedef enum { + // Try the full image with different values. + LPF_PICK_FROM_FULL_IMAGE, + // Try a small portion of the image with different values. + LPF_PICK_FROM_SUBIMAGE, + // Estimate the level based on quantizer and frame type + LPF_PICK_FROM_Q, +} LPF_PICK_METHOD; + +typedef enum { + // Terminate search early based on distortion so far compared to + // qp step, distortion in the neighborhood of the frame, etc. + FLAG_EARLY_TERMINATE = 1 << 0, + + // Skips comp inter modes if the best so far is an intra mode. + FLAG_SKIP_COMP_BESTINTRA = 1 << 1, + + // Skips comp inter modes if the best single intermode so far does + // not have the same reference as one of the two references being + // tested. + FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, + + // Skips oblique intra modes if the best so far is an inter mode. + FLAG_SKIP_INTRA_BESTINTER = 1 << 3, + + // Skips oblique intra modes at angles 27, 63, 117, 153 if the best + // intra so far is not one of the neighboring directions. + FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, + + // Skips intra modes other than DC_PRED if the source variance is small + FLAG_SKIP_INTRA_LOWVAR = 1 << 5, +} MODE_SEARCH_SKIP_LOGIC; + +typedef enum { + // Search partitions using RD/NONRD criterion + SEARCH_PARTITION = 0, + + // Always use a fixed size partition + FIXED_PARTITION = 1, + + // Use a fixed size partition in every 64X64 SB, where the size is + // determined based on source variance + VAR_BASED_FIXED_PARTITION = 2, + + REFERENCE_PARTITION = 3, + + // Use an arbitrary partitioning scheme based on source variance within + // a 64X64 SB + VAR_BASED_PARTITION, + + // Use non-fixed partitions based on source variance + SOURCE_VAR_BASED_PARTITION +} PARTITION_SEARCH_TYPE; + +typedef enum { + // Does a dry run to see if any of the contexts need to be updated or not, + // before the final run. + TWO_LOOP = 0, + + // No dry run conducted. + ONE_LOOP = 1, + + // No dry run, also only half the coef contexts and bands are updated. + // The rest are not updated at all. + ONE_LOOP_REDUCED = 2 +} FAST_COEFF_UPDATE; + +typedef struct { + // Frame level coding parameter update + int frame_parameter_update; + + // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). + SEARCH_METHODS search_method; + + RECODE_LOOP_TYPE recode_loop; + + // Subpel_search_method can only be subpel_tree which does a subpixel + // logarithmic search that keeps stepping at 1/2 pixel units until + // you stop getting a gain, and then goes on to 1/4 and repeats + // the same process. Along the way it skips many diagonals. + SUBPEL_SEARCH_METHODS subpel_search_method; + + // Maximum number of steps in logarithmic subpel search before giving up. + int subpel_iters_per_step; + + // Control when to stop subpel search + int subpel_force_stop; + + // This parameter controls the number of steps we'll do in a diamond + // search. + int max_step_search_steps; + + // This parameter controls which step in the n-step process we start at. + // It's changed adaptively based on circumstances. + int reduce_first_step_size; + + // If this is set to 1, we limit the motion search range to 2 times the + // largest motion vector found in the last frame. + int auto_mv_step_size; + + // Trellis (dynamic programming) optimization of quantized values (+1, 0). + int optimize_coefficients; + + // Always set to 0. If on it enables 0 cost background transmission + // (except for the initial transmission of the segmentation). The feature is + // disabled because the addition of very large block sizes make the + // backgrounds very to cheap to encode, and the segmentation we have + // adds overhead. + int static_segmentation; + + // If 1 we iterate finding a best reference for 2 ref frames together - via + // a log search that iterates 4 times (check around mv for last for best + // error of combined predictor then check around mv for alt). If 0 we + // we just use the best motion vector found for each frame by itself. + int comp_inter_joint_search_thresh; + + // This variable is used to cap the maximum number of times we skip testing a + // mode to be evaluated. A high value means we will be faster. + int adaptive_rd_thresh; + + // Enables skipping the reconstruction step (idct, recon) in the + // intermediate steps assuming the last frame didn't have too many intra + // blocks and the q is less than a threshold. + int skip_encode_sb; + int skip_encode_frame; + // Speed feature to allow or disallow skipping of recode at block + // level within a frame. + int allow_skip_recode; + + // This variable allows us to reuse the last frames partition choices + // (64x64 v 32x32 etc) for this frame. It can be set to only use the last + // frame as a starting point in low motion scenes or always use it. If set + // we use last partitioning_redo frequency to determine how often to redo + // the partitioning from scratch. Adjust_partitioning_from_last_frame + // enables us to adjust up or down one partitioning from the last frames + // partitioning. + LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + + // Determine which method we use to determine transform size. We can choose + // between options like full rd, largest for prediction size, largest + // for intra and model coefs for the rest. + TX_SIZE_SEARCH_METHOD tx_size_search_method; + + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less + // precise but significantly faster than the non lp version. + int use_lp32x32fdct; + + // TODO(JBB): remove this as its no longer used. + + // After looking at the first set of modes (set by index here), skip + // checking modes for reference frames that don't match the reference frame + // of the best so far. + int mode_skip_start; + + // TODO(JBB): Remove this. + int reference_masking; + + PARTITION_SEARCH_TYPE partition_search_type; + + // Used if partition_search_type = FIXED_SIZE_PARTITION + BLOCK_SIZE always_this_block_size; + + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. + int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) + int use_square_partition_only; + + // Sets min and max partition sizes for this 64x64 region based on the + // same 64x64 in last encoded frame, and the left and above neighbor. + AUTO_MIN_MAX_MODE auto_min_max_partition_size; + + // Min and max partition size we enable (block_size) as per auto + // min max, but also used by adjust partitioning, and pick_partitioning. + BLOCK_SIZE min_partition_size; + BLOCK_SIZE max_partition_size; + + // Whether or not we allow partitions one smaller or one greater than the last + // frame's partitioning. Only used if use_lastframe_partitioning is set. + int adjust_partitioning_from_last_frame; + + // How frequently we re do the partitioning from scratch. Only used if + // use_lastframe_partitioning is set. + int last_partitioning_redo_frequency; + + // This enables constrained copy partitioning, which, given an input block + // size bsize, will copy previous partition for partitions less than bsize, + // otherwise bsize partition is used. bsize is currently set to 16x16. + // Used for the case where motion is detected in superblock. + int constrain_copy_partition; + + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable + // it always, to allow it for only Last frame and Intra, disable it for all + // inter modes or to enable it always. + int disable_split_mask; + + // TODO(jingning): combine the related motion search speed features + // This allows us to use motion search at other sizes as a starting + // point for this motion search and limits the search range around it. + int adaptive_motion_search; + + // Allows sub 8x8 modes to use the prediction filter that was determined + // best for 8x8 mode. If set to 0 we always re check all the filters for + // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter + // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. + int adaptive_pred_interp_filter; + + // Search through variable block partition types in non-RD mode decision + // encoding process for RTC. + int partition_check; + + // Use finer quantizer in every other few frames that run variable block + // partition type search. + int force_frame_boost; + + // Maximally allowed base quantization index fluctuation. + int max_delta_qindex; + + // Implements various heuristics to skip searching modes + // The heuristics selected are based on flags + // defined in the MODE_SEARCH_SKIP_HEURISTICS enum + unsigned int mode_search_skip_flags; + + // A source variance threshold below which the split mode is disabled + unsigned int disable_split_var_thresh; + + // A source variance threshold below which filter search is disabled + // Choose a very large value (UINT_MAX) to use 8-tap always + unsigned int disable_filter_search_var_thresh; + + // These bit masks allow you to enable or disable intra modes for each + // transform size separately. + int intra_y_mode_mask[TX_SIZES]; + int intra_uv_mode_mask[TX_SIZES]; + + // This variable enables an early break out of mode testing if the model for + // rd built from the prediction signal indicates a value that's much + // higher than the best rd we've seen so far. + int use_rd_breakout; + + // This enables us to use an estimate for intra rd based on dc mode rather + // than choosing an actual uv mode in the stage of encoding before the actual + // final encode. + int use_uv_intra_rd_estimate; + + // This feature controls how the loop filter level is determined. + LPF_PICK_METHOD lpf_pick; + + // This feature limits the number of coefficients updates we actually do + // by only looking at counts from 1/2 the bands. + FAST_COEFF_UPDATE use_fast_coef_updates; + + // This flag controls the use of non-RD mode decision. + int use_nonrd_pick_mode; + + // This variable sets the encode_breakout threshold. Currently, it is only + // enabled in real time mode. + int encode_breakout_thresh; + + // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV + // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. + int disable_inter_mode_mask[BLOCK_SIZES]; + + // This feature controls whether we do the expensive context update and + // calculation in the rd coefficient costing loop. + int use_fast_coef_costing; + + // This feature controls the tolerence vs target used in deciding whether to + // recode a frame. It has no meaning if recode is disabled. + int recode_tolerance; + + // This variable controls the maximum block size where intra blocks can be + // used in inter frames. + // TODO(aconverse): Fold this into one of the other many mode skips + BLOCK_SIZE max_intra_bsize; + + // The frequency that we check if SOURCE_VAR_BASED_PARTITION or + // FIXED_PARTITION search type should be used. + int search_type_check_frequency; + + // The threshold used in SOURCE_VAR_BASED_PARTITION search type. + int source_var_thresh; +} SPEED_FEATURES; + +struct VP9_COMP; + +void vp9_set_speed_features(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ + diff --git a/source/libvpx/vp9/encoder/vp9_ssim.c b/source/libvpx/vp9/encoder/vp9_ssim.c index a5f18e6..026e6a8 100644 --- a/source/libvpx/vp9/encoder/vp9_ssim.c +++ b/source/libvpx/vp9/encoder/vp9_ssim.c @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" -#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_ssim.h" void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, @@ -65,12 +66,6 @@ static double similarity(unsigned long sum_s, unsigned long sum_r, return ssim_n * 1.0 / ssim_d; } -static double ssim_16x16(uint8_t *s, int sp, uint8_t *r, int rp) { - unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - vp9_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, - &sum_sxr); - return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); -} static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, diff --git a/source/libvpx/vp9/encoder/vp9_ssim.h b/source/libvpx/vp9/encoder/vp9_ssim.h new file mode 100644 index 0000000..a581c2c --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_ssim.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SSIM_H_ +#define VP9_ENCODER_VP9_SSIM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vpx_scale/yv12config.h" + +double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + int lumamask, double *weight); + +double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SSIM_H_ diff --git a/source/libvpx/vp9/encoder/vp9_subexp.c b/source/libvpx/vp9/encoder/vp9_subexp.c index fdc2106..9796d64 100644 --- a/source/libvpx/vp9/encoder/vp9_subexp.c +++ b/source/libvpx/vp9/encoder/vp9_subexp.c @@ -11,22 +11,13 @@ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" -#include "vp9/encoder/vp9_treewriter.h" +#include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_writer.h" #define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd))) static int update_bits[255]; -static int split_index(int i, int n, int modulus) { - int max1 = (n - 1 - modulus / 2) / modulus + 1; - if (i % modulus == modulus / 2) - i = i / modulus; - else - i = max1 + i - (i + modulus - modulus / 2) / modulus; - return i; -} - static int recenter_nonneg(int v, int m) { if (v > (m << 1)) return v; diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c new file mode 100644 index 0000000..c2b6263 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> + +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_svc_layercontext.h" + +void vp9_init_layer_context(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + int layer; + int layer_end; + + svc->spatial_layer_id = 0; + svc->temporal_layer_id = 0; + + if (svc->number_temporal_layers > 1) { + layer_end = svc->number_temporal_layers; + } else { + layer_end = svc->number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lc->current_video_frame_in_layer = 0; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->ni_av_qi = oxcf->worst_allowed_q; + lrc->total_actual_bits = 0; + lrc->total_target_vs_actual = 0; + lrc->ni_tot_qi = 0; + lrc->tot_q = 0.0; + lrc->avg_q = 0.0; + lrc->ni_frames = 0; + lrc->decimation_count = 0; + lrc->decimation_factor = 0; + lrc->rate_correction_factor = 1.0; + lrc->key_frame_rate_correction_factor = 1.0; + + if (svc->number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lrc->last_q[0] = oxcf->best_allowed_q; + lrc->last_q[1] = oxcf->best_allowed_q; + lrc->last_q[2] = oxcf->best_allowed_q; + } + + lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); + lrc->bits_off_target = lrc->buffer_level; + } +} + +// Update the layer context from a change_config() call. +void vp9_update_layer_context_change_config(VP9_COMP *const cpi, + const int target_bandwidth) { + SVC *const svc = &cpi->svc; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + const RATE_CONTROL *const rc = &cpi->rc; + int layer; + int layer_end; + float bitrate_alloc = 1.0; + + if (svc->number_temporal_layers > 1) { + layer_end = svc->number_temporal_layers; + } else { + layer_end = svc->number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + + if (svc->number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + } + bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; + // Update buffer-related quantities. + lc->starting_buffer_level = + (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); + lc->optimal_buffer_level = + (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc); + lc->maximum_buffer_size = + (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); + // Update framerate-related quantities. + if (svc->number_temporal_layers > 1) { + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; + } else { + lc->framerate = oxcf->framerate; + } + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + // Update qp-related quantities. + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; + } +} + +static LAYER_CONTEXT *get_layer_context(SVC *svc) { + return svc->number_temporal_layers > 1 ? + &svc->layer_context[svc->temporal_layer_id] : + &svc->layer_context[svc->spatial_layer_id]; +} + +void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(svc); + RATE_CONTROL *const lrc = &lc->rc; + const int layer = svc->temporal_layer_id; + + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; + // Update the average layer frame size (non-cumulative per-frame-bw). + if (layer == 0) { + lc->avg_frame_size = lrc->av_per_frame_bandwidth; + } else { + const double prev_layer_framerate = + oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; + const int prev_layer_target_bandwidth = + oxcf->ts_target_bitrate[layer - 1] * 1000; + lc->avg_frame_size = + (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / + (lc->framerate - prev_layer_framerate)); + } +} + +void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + RATE_CONTROL *const lrc = &lc->rc; + + lc->framerate = framerate; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->min_frame_bandwidth = (int)(lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); + lrc->max_frame_bandwidth = (int)(((int64_t)lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + lrc->max_gf_interval = 16; + + lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) + lrc->max_gf_interval = oxcf->lag_in_frames - 1; + + if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) + lrc->max_gf_interval = lrc->static_scene_max_gf_interval; +} + +void vp9_restore_layer_context(VP9_COMP *const cpi) { + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + const int old_frame_since_key = cpi->rc.frames_since_key; + const int old_frame_to_key = cpi->rc.frames_to_key; + + cpi->rc = lc->rc; + cpi->twopass = lc->twopass; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; + cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; + cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; + cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; + cpi->output_framerate = lc->framerate; + // Reset the frames_since_key and frames_to_key counters to their values + // before the layer restore. Keep these defined for the stream (not layer). + if (cpi->svc.number_temporal_layers > 1) { + cpi->rc.frames_since_key = old_frame_since_key; + cpi->rc.frames_to_key = old_frame_to_key; + } +} + +void vp9_save_layer_context(VP9_COMP *const cpi) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + + lc->rc = cpi->rc; + lc->twopass = cpi->twopass; + lc->target_bandwidth = (int)oxcf->target_bandwidth; + lc->starting_buffer_level = oxcf->starting_buffer_level; + lc->optimal_buffer_level = oxcf->optimal_buffer_level; + lc->maximum_buffer_size = oxcf->maximum_buffer_size; + lc->framerate = cpi->output_framerate; +} + +void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; + int i; + + for (i = 0; i < svc->number_spatial_layers; ++i) { + struct twopass_rc *const twopass = &svc->layer_context[i].twopass; + + svc->spatial_layer_id = i; + vp9_init_second_pass(cpi); + + twopass->total_stats.spatial_layer_id = i; + twopass->total_left_stats.spatial_layer_id = i; + } + svc->spatial_layer_id = 0; +} + +void vp9_inc_frame_in_layer(SVC *svc) { + LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1) + ? &svc->layer_context[svc->temporal_layer_id] + : &svc->layer_context[svc->spatial_layer_id]; + ++lc->current_video_frame_in_layer; +} diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/source/libvpx/vp9/encoder/vp9_svc_layercontext.h new file mode 100644 index 0000000..2abed30 --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ +#define VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ + +#include "vpx/vpx_encoder.h" + +#include "vp9/encoder/vp9_ratectrl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + RATE_CONTROL rc; + int target_bandwidth; + int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; + double framerate; + int avg_frame_size; + struct twopass_rc twopass; + struct vpx_fixed_buf rc_twopass_stats_in; + unsigned int current_video_frame_in_layer; +} LAYER_CONTEXT; + +typedef struct { + int spatial_layer_id; + int temporal_layer_id; + int number_spatial_layers; + int number_temporal_layers; + // Layer context used for rate control in one pass temporal CBR mode or + // two pass spatial mode. Defined for temporal or spatial layers for now. + // Does not support temporal combined with spatial RC. + LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; +} SVC; + +struct VP9_COMP; + +// Initialize layer context data from init_config(). +void vp9_init_layer_context(struct VP9_COMP *const cpi); + +// Update the layer context from a change_config() call. +void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi, + const int target_bandwidth); + +// Prior to encoding the frame, update framerate-related quantities +// for the current temporal layer. +void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi); + +// Update framerate-related quantities for the current spatial layer. +void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi, + double framerate); + +// Prior to encoding the frame, set the layer context, for the current layer +// to be encoded, to the cpi struct. +void vp9_restore_layer_context(struct VP9_COMP *const cpi); + +// Save the layer context after encoding the frame. +void vp9_save_layer_context(struct VP9_COMP *const cpi); + +// Initialize second pass rc for spatial svc. +void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); + +// Increment number of video frames in layer +void vp9_inc_frame_in_layer(SVC *svc); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SVC_LAYERCONTEXT_ diff --git a/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/source/libvpx/vp9/encoder/vp9_temporal_filter.c index 6233116..0410273 100644 --- a/source/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/source/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -41,7 +41,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, struct scale_factors *scale, int x, int y) { const int which_mv = 0; - MV mv = { mv_row, mv_col }; + const MV mv = { mv_row, mv_col }; + const InterpKernel *const kernel = + vp9_get_interp_kernel(xd->mi[0]->mbmi.interp_filter); + enum mv_precision mv_precision_uv; int uv_stride; if (uv_block_size == 8) { @@ -58,7 +61,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, 16, 16, which_mv, - xd->interp_kernel, MV_PRECISION_Q3, x, y); + kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_size, @@ -66,7 +69,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - xd->interp_kernel, mv_precision_uv, x, y); + kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_size, @@ -74,7 +77,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - xd->interp_kernel, mv_precision_uv, x, y); + kernel, mv_precision_uv, x, y); } void vp9_temporal_filter_apply_c(uint8_t *frame1, @@ -133,7 +136,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - MV *ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0].as_mv; + MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv; // Save input state struct buf_2d src = x->plane[0].src; @@ -250,8 +253,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (cpi->frames[frame] == NULL) continue; - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0; - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; @@ -284,8 +287,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->frames[frame]->v_buffer + mb_uv_offset, cpi->frames[frame]->y_stride, mb_uv_height, - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row, - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale, mb_col * 16, mb_row * 16); diff --git a/source/libvpx/vp9/encoder/vp9_tokenize.c b/source/libvpx/vp9/encoder/vp9_tokenize.c index e8179f3..291ccb3 100644 --- a/source/libvpx/vp9/encoder/vp9_tokenize.c +++ b/source/libvpx/vp9/encoder/vp9_tokenize.c @@ -8,18 +8,20 @@ * be found in the AUTHORS file in the root of the source tree. */ - +#include <assert.h> #include <math.h> #include <stdio.h> #include <string.h> -#include <assert.h> -#include "vp9/encoder/vp9_onyx_int.h" -#include "vp9/encoder/vp9_tokenize.h" + #include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" -#include "vp9/common/vp9_entropy.h" + +#include "vp9/encoder/vp9_cost.h" +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_tokenize.h" static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2]; const TOKENVALUE *vp9_dct_value_tokens_ptr; @@ -106,7 +108,7 @@ void vp9_coef_tree_initialize() { vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); } -static void fill_value_tokens() { +void vp9_tokenize_initialize() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; const vp9_extra_bit *const e = vp9_extra_bits; @@ -160,7 +162,6 @@ struct tokenize_b_args { VP9_COMP *cpi; MACROBLOCKD *xd; TOKENEXTRA **tp; - uint8_t *token_cache; }; static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, @@ -211,10 +212,10 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; - uint8_t *token_cache = args->token_cache; + uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &cpi->mb.plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int pt; /* near block/prev token context index */ int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ @@ -296,13 +297,6 @@ static void is_skippable(int plane, int block, args->skippable[0] &= (!args->x->plane[plane].eobs[block]); } -static int sb_is_skippable(MACROBLOCK *x, BLOCK_SIZE bsize) { - int result = 1; - struct is_skippable_args args = {x, &result}; - vp9_foreach_transformed_block(&x->e_mbd, bsize, is_skippable, &args); - return result; -} - int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { int result = 1; struct is_skippable_args args = {x, &result}; @@ -315,12 +309,12 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; TOKENEXTRA *t_backup = *t; const int ctx = vp9_get_skip_context(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache}; + struct tokenize_b_args arg = {cpi, xd, t}; if (mbmi->skip) { if (!dry_run) cm->counts.skip[ctx][1] += skip_inc; @@ -338,7 +332,3 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, *t = t_backup; } } - -void vp9_tokenize_initialize() { - fill_value_tokens(); -} diff --git a/source/libvpx/vp9/encoder/vp9_treewriter.c b/source/libvpx/vp9/encoder/vp9_treewriter.c index 35e5a8f..bb04b40 100644 --- a/source/libvpx/vp9/encoder/vp9_treewriter.c +++ b/source/libvpx/vp9/encoder/vp9_treewriter.c @@ -10,33 +10,6 @@ #include "vp9/encoder/vp9_treewriter.h" -static void cost(int *costs, vp9_tree tree, const vp9_prob *probs, - int i, int c) { - const vp9_prob prob = probs[i / 2]; - int b; - - for (b = 0; b <= 1; ++b) { - const int cc = c + vp9_cost_bit(prob, b); - const vp9_tree_index ii = tree[i + b]; - - if (ii <= 0) - costs[-ii] = cc; - else - cost(costs, tree, probs, ii, cc); - } -} - -void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) { - cost(costs, tree, probs, 0, 0); -} - -void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) { - assert(tree[0] <= 0 && tree[1] > 0); - - costs[-tree[0]] = vp9_cost_bit(probs[0], 0); - cost(costs, tree, probs, 2, 0); -} - static void tree2tok(struct vp9_token *tokens, const vp9_tree_index *tree, int i, int v, int l) { v += v; diff --git a/source/libvpx/vp9/encoder/vp9_treewriter.h b/source/libvpx/vp9/encoder/vp9_treewriter.h index fedfbe9..4a76d87 100644 --- a/source/libvpx/vp9/encoder/vp9_treewriter.h +++ b/source/libvpx/vp9/encoder/vp9_treewriter.h @@ -17,35 +17,6 @@ extern "C" { #endif -#define vp9_cost_zero(prob) (vp9_prob_cost[prob]) - -#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob)) - -#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \ - : (prob)) - -static INLINE unsigned int cost_branch256(const unsigned int ct[2], - vp9_prob p) { - return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p); -} - -static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs, - int bits, int len) { - int cost = 0; - vp9_tree_index i = 0; - - do { - const int bit = (bits >> --len) & 1; - cost += vp9_cost_bit(probs[i >> 1], bit); - i = tree[i + bit]; - } while (len); - - return cost; -} - -void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree); -void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree); - void vp9_tree_probs_from_distribution(vp9_tree tree, unsigned int branch_ct[ /* n - 1 */ ][2], const unsigned int num_events[ /* n */ ]); diff --git a/source/libvpx/vp9/encoder/vp9_variance.c b/source/libvpx/vp9/encoder/vp9_variance.c index 8bc3850..71867a9 100644 --- a/source/libvpx/vp9/encoder/vp9_variance.c +++ b/source/libvpx/vp9/encoder/vp9_variance.c @@ -216,7 +216,7 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -273,7 +273,7 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -330,7 +330,7 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -387,7 +387,7 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, return (var - (((int64_t)avg * avg) >> 10)); } +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -614,7 +620,7 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } @@ -658,7 +664,7 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -703,7 +709,7 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 1, 17, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -747,7 +753,7 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -791,7 +797,7 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -955,7 +961,7 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -999,7 +1005,7 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1043,7 +1049,7 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1089,6 +1095,23 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } + + +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + int tmp; + tmp = pred[j] + ref[j]; + comp_pred[j] = (tmp + 1) >> 1; + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} diff --git a/source/libvpx/vp9/encoder/vp9_variance.h b/source/libvpx/vp9/encoder/vp9_variance.h index 3bc2091..62e20dc 100644 --- a/source/libvpx/vp9/encoder/vp9_variance.h +++ b/source/libvpx/vp9/encoder/vp9_variance.h @@ -100,21 +100,9 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride); + #ifdef __cplusplus } // extern "C" #endif diff --git a/source/libvpx/vp9/encoder/vp9_write_bit_buffer.c b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.c new file mode 100644 index 0000000..962d0ca --- /dev/null +++ b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/encoder/vp9_write_bit_buffer.h" + +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { + return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); +} + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { + const int off = (int)wb->bit_offset; + const int p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - off % CHAR_BIT; + if (q == CHAR_BIT -1) { + wb->bit_buffer[p] = bit << q; + } else { + wb->bit_buffer[p] &= ~(1 << q); + wb->bit_buffer[p] |= bit << q; + } + wb->bit_offset = off + 1; +} + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) { + int bit; + for (bit = bits - 1; bit >= 0; bit--) + vp9_wb_write_bit(wb, (data >> bit) & 1); +} diff --git a/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h index 1795e05..073608d 100644 --- a/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h +++ b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h @@ -24,29 +24,11 @@ struct vp9_write_bit_buffer { size_t bit_offset; }; -static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { - return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); -} - -static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { - const int off = (int)wb->bit_offset; - const int p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - off % CHAR_BIT; - if (q == CHAR_BIT -1) { - wb->bit_buffer[p] = bit << q; - } else { - wb->bit_buffer[p] &= ~(1 << q); - wb->bit_buffer[p] |= bit << q; - } - wb->bit_offset = off + 1; -} - -static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, - int data, int bits) { - int bit; - for (bit = bits - 1; bit >= 0; bit--) - vp9_wb_write_bit(wb, (data >> bit) & 1); -} +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb); + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit); + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits); #ifdef __cplusplus diff --git a/source/libvpx/vp9/encoder/vp9_writer.c b/source/libvpx/vp9/encoder/vp9_writer.c index fda1b39..8398fc0 100644 --- a/source/libvpx/vp9/encoder/vp9_writer.c +++ b/source/libvpx/vp9/encoder/vp9_writer.c @@ -12,34 +12,6 @@ #include "vp9/encoder/vp9_writer.h" #include "vp9/common/vp9_entropy.h" -#ifdef ENTROPY_STATS -unsigned int active_section = 0; -#endif - -const unsigned int vp9_prob_cost[256] = { - 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, - 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, - 873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733, - 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, - 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, - 534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472, - 467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415, - 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, - 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, - 317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281, - 278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246, - 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, - 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, - 181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156, - 154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131, - 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, - 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, - 82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63, - 61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43, - 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, - 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, - 4, 3, 1, 1}; - void vp9_start_encode(vp9_writer *br, uint8_t *source) { br->lowvalue = 0; br->range = 255; diff --git a/source/libvpx/vp9/encoder/vp9_writer.h b/source/libvpx/vp9/encoder/vp9_writer.h index defeec3..7f4fa1e 100644 --- a/source/libvpx/vp9/encoder/vp9_writer.h +++ b/source/libvpx/vp9/encoder/vp9_writer.h @@ -32,8 +32,6 @@ typedef struct { uint64_t bit_counter; } vp9_writer; -extern const unsigned int vp9_prob_cost[256]; - void vp9_start_encode(vp9_writer *bc, uint8_t *buffer); void vp9_stop_encode(vp9_writer *bc); diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c index f3735eb..6865822 100644 --- a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c +++ b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c @@ -13,39 +13,80 @@ #include "vpx_ports/mem.h" void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) { - // The 2D transform is done with two passes which are actually pretty - // similar. In the first one, we transform the columns and transpose - // the results. In the second one, we transform the rows. To achieve that, - // as the first pass results are transposed, we transpose the columns (that - // is the transposed rows) and transpose the results (so that it goes back - // in normal/row positions). - int pass; + // This 2D transform implements 4 vertical 1D transforms followed + // by 4 horizontal 1D transforms. The multiplies and adds are as given + // by Chen, Smith and Fralick ('77). The commands for moving the data + // around have been minimized by hand. + // For the purposes of the comments, the 16 inputs are referred to at i0 + // through iF (in raster order), intermediate variables are a0, b0, c0 + // through f, and correspond to the in-place computations mapped to input + // locations. The outputs, o0 through oF are labeled according to the + // output locations. + // Constants - // When we use them, in one case, they are all the same. In all others - // it's a pair of them that we need to repeat four times. This is done - // by constructing the 32 bit constant corresponding to that pair. - const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); - const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); - const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); - const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); + // These are the coefficients used for the multiplies. + // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64), + // where cospi_N_64 = cos(N pi /64) + const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64, + cospi_16_64, cospi_16_64, + cospi_16_64, -cospi_16_64, + cospi_16_64, -cospi_16_64); + const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64, + cospi_16_64, -cospi_16_64, + cospi_16_64, cospi_16_64, + cospi_16_64, cospi_16_64); + const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64, + cospi_8_64, cospi_24_64, + cospi_24_64, -cospi_8_64, + cospi_24_64, -cospi_8_64); + const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64, + cospi_24_64, -cospi_8_64, + cospi_8_64, cospi_24_64, + cospi_8_64, cospi_24_64); + const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64, + cospi_16_64, cospi_16_64, + cospi_16_64, cospi_16_64, + cospi_16_64, cospi_16_64); + const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64, + cospi_16_64, -cospi_16_64, + cospi_16_64, -cospi_16_64, + cospi_16_64, -cospi_16_64); + const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64, + cospi_8_64, cospi_24_64, + -cospi_8_64, -cospi_24_64, + -cospi_8_64, -cospi_24_64); + const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64, + cospi_24_64, -cospi_8_64, + -cospi_24_64, cospi_8_64, + -cospi_24_64, cospi_8_64); + const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); + // This second rounding constant saves doing some extra adds at the end + const __m128i k__DCT_CONST_ROUNDING2 = _mm_set1_epi32(DCT_CONST_ROUNDING + +(DCT_CONST_ROUNDING << 1)); + const int DCT_CONST_BITS2 = DCT_CONST_BITS+2; const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); - const __m128i kOne = _mm_set1_epi16(1); __m128i in0, in1; + // Load inputs. { in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); + in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); + in1 = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *) + (input + 2 * stride))); in0 = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *) - (input + 1 * stride))); - in1 = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); - in1 = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *) - (input + 3 * stride)), in1); + (input + 3 * stride))); + // in0 = [i0 i1 i2 i3 iC iD iE iF] + // in1 = [i4 i5 i6 i7 i8 i9 iA iB] + - // x = x << 4 + // multiply by 16 to give some extra precision in0 = _mm_slli_epi16(in0, 4); in1 = _mm_slli_epi16(in1, 4); // if (i == 0 && input[0]) input[0] += 1; + // add 1 to the upper left pixel if it is non-zero, which helps reduce + // the round-trip error { // The mask will only contain whether the first value is zero, all // other comparison will fail as something shifted by 4 (above << 4) @@ -58,57 +99,119 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) { in0 = _mm_add_epi16(in0, k__nonzero_bias_b); } } - // Do the two transform/transpose passes - for (pass = 0; pass < 2; ++pass) { - // Transform 1/2: Add/subtract - const __m128i r0 = _mm_add_epi16(in0, in1); - const __m128i r1 = _mm_sub_epi16(in0, in1); - const __m128i r2 = _mm_unpacklo_epi64(r0, r1); - const __m128i r3 = _mm_unpackhi_epi64(r0, r1); - // Transform 1/2: Interleave to do the multiply by constants which gets us - // into 32 bits. - const __m128i t0 = _mm_unpacklo_epi16(r2, r3); - const __m128i t2 = _mm_unpackhi_epi16(r2, r3); - const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); - const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); - const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p08_p24); - const __m128i u6 = _mm_madd_epi16(t2, k__cospi_p24_m08); + // There are 4 total stages, alternating between an add/subtract stage + // followed by an multiply-and-add stage. + { + // Stage 1: Add/subtract + + // in0 = [i0 i1 i2 i3 iC iD iE iF] + // in1 = [i4 i5 i6 i7 i8 i9 iA iB] + const __m128i r0 = _mm_unpacklo_epi16(in0, in1); + const __m128i r1 = _mm_unpackhi_epi16(in0, in1); + // r0 = [i0 i4 i1 i5 i2 i6 i3 i7] + // r1 = [iC i8 iD i9 iE iA iF iB] + const __m128i r2 = _mm_shuffle_epi32(r0, 0xB4); + const __m128i r3 = _mm_shuffle_epi32(r1, 0xB4); + // r2 = [i0 i4 i1 i5 i3 i7 i2 i6] + // r3 = [iC i8 iD i9 iF iB iE iA] + + const __m128i t0 = _mm_add_epi16(r2, r3); + const __m128i t1 = _mm_sub_epi16(r2, r3); + // t0 = [a0 a4 a1 a5 a3 a7 a2 a6] + // t1 = [aC a8 aD a9 aF aB aE aA] + + // Stage 2: multiply by constants (which gets us into 32 bits). + // The constants needed here are: + // k__cospi_A = [p16 p16 p16 p16 p16 m16 p16 m16] + // k__cospi_B = [p16 m16 p16 m16 p16 p16 p16 p16] + // k__cospi_C = [p08 p24 p08 p24 p24 m08 p24 m08] + // k__cospi_D = [p24 m08 p24 m08 p08 p24 p08 p24] + const __m128i u0 = _mm_madd_epi16(t0, k__cospi_A); + const __m128i u2 = _mm_madd_epi16(t0, k__cospi_B); + const __m128i u1 = _mm_madd_epi16(t1, k__cospi_C); + const __m128i u3 = _mm_madd_epi16(t1, k__cospi_D); + // Then add and right-shift to get back to 16-bit range const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); + const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); - const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); + const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); + const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); - const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - // Combine and transpose - const __m128i res0 = _mm_packs_epi32(w0, w2); - const __m128i res1 = _mm_packs_epi32(w4, w6); - // 00 01 02 03 20 21 22 23 - // 10 11 12 13 30 31 32 33 - const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); - const __m128i tr0_1 = _mm_unpackhi_epi16(res0, res1); - // 00 10 01 11 02 12 03 13 - // 20 30 21 31 22 32 23 33 - in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); - in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); - in1 = _mm_shuffle_epi32(in1, 0x4E); - // 00 10 20 30 01 11 21 31 in0 contains 0 followed by 1 - // 02 12 22 32 03 13 23 33 in1 contains 2 followed by 3 + const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); + // w0 = [b0 b1 b7 b6] + // w1 = [b8 b9 bF bE] + // w2 = [b4 b5 b3 b2] + // w3 = [bC bD bB bA] + const __m128i x0 = _mm_packs_epi32(w0, w1); + const __m128i x1 = _mm_packs_epi32(w2, w3); + // x0 = [b0 b1 b7 b6 b8 b9 bF bE] + // x1 = [b4 b5 b3 b2 bC bD bB bA] + in0 = _mm_shuffle_epi32(x0, 0xD8); + in1 = _mm_shuffle_epi32(x1, 0x8D); + // in0 = [b0 b1 b8 b9 b7 b6 bF bE] + // in1 = [b3 b2 bB bA b4 b5 bC bD] } - in1 = _mm_shuffle_epi32(in1, 0x4E); - // Post-condition output and store it (v + 1) >> 2, taking advantage - // of the fact 1/3 are stored just after 0/2. { - __m128i out01 = _mm_add_epi16(in0, kOne); - __m128i out23 = _mm_add_epi16(in1, kOne); - out01 = _mm_srai_epi16(out01, 2); - out23 = _mm_srai_epi16(out23, 2); - _mm_storeu_si128((__m128i *)(output + 0 * 4), out01); - _mm_storeu_si128((__m128i *)(output + 2 * 4), out23); + // vertical DCTs finished. Now we do the horizontal DCTs. + // Stage 3: Add/subtract + + const __m128i t0 = _mm_add_epi16(in0, in1); + const __m128i t1 = _mm_sub_epi16(in0, in1); + // t0 = [c0 c1 c8 c9 c4 c5 cC cD] + // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE] + + // Stage 4: multiply by constants (which gets us into 32 bits). + // The constants needed here are: + // k__cospi_E = [p16 p16 p16 p16 p16 p16 p16 p16] + // k__cospi_F = [p16 m16 p16 m16 p16 m16 p16 m16] + // k__cospi_G = [p08 p24 p08 p24 m08 m24 m08 m24] + // k__cospi_H = [p24 m08 p24 m08 m24 p08 m24 p08] + const __m128i u0 = _mm_madd_epi16(t0, k__cospi_E); + const __m128i u1 = _mm_madd_epi16(t0, k__cospi_F); + const __m128i u2 = _mm_madd_epi16(t1, k__cospi_G); + const __m128i u3 = _mm_madd_epi16(t1, k__cospi_H); + // Then add and right-shift to get back to 16-bit range + // but this combines the final right-shift as well to save operations + // This unusual rounding operations is to maintain bit-accurate + // compatibility with the c version of this function which has two + // rounding steps in a row. + const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING2); + const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING2); + const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING2); + const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING2); + const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS2); + const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS2); + const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS2); + const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS2); + // w0 = [o0 o4 o8 oC] + // w1 = [o2 o6 oA oE] + // w2 = [o1 o5 o9 oD] + // w3 = [o3 o7 oB oF] + // remember the o's are numbered according to the correct output location + const __m128i x0 = _mm_packs_epi32(w0, w1); + const __m128i x1 = _mm_packs_epi32(w2, w3); + // x0 = [o0 o4 o8 oC o2 o6 oA oE] + // x1 = [o1 o5 o9 oD o3 o7 oB oF] + const __m128i y0 = _mm_unpacklo_epi16(x0, x1); + const __m128i y1 = _mm_unpackhi_epi16(x0, x1); + // y0 = [o0 o1 o4 o5 o8 o9 oC oD] + // y1 = [o2 o3 o6 o7 oA oB oE oF] + in0 = _mm_unpacklo_epi32(y0, y1); + // in0 = [o0 o1 o2 o3 o4 o5 o6 o7] + in1 = _mm_unpackhi_epi32(y0, y1); + // in1 = [o8 o9 oA oB oC oD oE oF] + } + // Post-condition (v + 1) >> 2 is now incorporated into previous + // add and right-shift commands. Only 2 store instructions needed + // because we are using the fact that 1/3 are stored just after 0/2. + { + _mm_storeu_si128((__m128i *)(output + 0 * 4), in0); + _mm_storeu_si128((__m128i *)(output + 2 * 4), in1); } } + static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in, int stride) { const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); diff --git a/source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c new file mode 100644 index 0000000..f31b176 --- /dev/null +++ b/source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <immintrin.h> // AVX2 +#include "vpx/vpx_integer.h" + +void vp9_sad32x32x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 32 ; i++) { + // load src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} + +void vp9_sad64x64x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; + __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; + __m256i ref3_reg, ref3next_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 64 ; i++) { + // load 64 bytes from src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + srcnext_reg = _mm256_load_si256((__m256i *)(src + 32)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg); + ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg); + ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg); + ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg); + + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg); + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} diff --git a/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c b/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c index a3d0114..c4d17fc 100644 --- a/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c +++ b/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c @@ -13,7 +13,6 @@ #include "vp9/common/vp9_pragmas.h" #include "vpx_ports/mem.h" -extern unsigned int vp9_get_mb_ss_mmx(const int16_t *src_ptr); extern unsigned int vp9_get8x8var_mmx ( const unsigned char *src_ptr, diff --git a/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c index 79e42c4..9e65694 100644 --- a/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c +++ b/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c @@ -24,10 +24,6 @@ extern unsigned int vp9_get4x4var_mmx int *Sum ); -unsigned int vp9_get_mb_ss_sse2 -( - const int16_t *src_ptr -); unsigned int vp9_get16x16var_sse2 ( const unsigned char *src_ptr, diff --git a/source/libvpx/vp9/vp9_common.mk b/source/libvpx/vp9/vp9_common.mk index 9fb6115..b1ba0b1 100644 --- a/source/libvpx/vp9/vp9_common.mk +++ b/source/libvpx/vp9/vp9_common.mk @@ -12,7 +12,6 @@ VP9_COMMON_SRCS-yes += vp9_common.mk VP9_COMMON_SRCS-yes += vp9_iface_common.h VP9_COMMON_SRCS-yes += common/vp9_pragmas.h VP9_COMMON_SRCS-yes += common/vp9_ppflags.h -VP9_COMMON_SRCS-yes += common/vp9_onyx.h VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_blockd.c VP9_COMMON_SRCS-yes += common/vp9_convolve.c @@ -45,7 +44,7 @@ VP9_COMMON_SRCS-yes += common/vp9_quant_common.h VP9_COMMON_SRCS-yes += common/vp9_reconinter.h VP9_COMMON_SRCS-yes += common/vp9_reconintra.h VP9_COMMON_SRCS-yes += common/vp9_rtcd.c -VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh +VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.pl VP9_COMMON_SRCS-yes += common/vp9_scale.h VP9_COMMON_SRCS-yes += common/vp9_scale.c VP9_COMMON_SRCS-yes += common/vp9_seg_common.h @@ -145,4 +144,4 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM) -$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh)) +$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) diff --git a/source/libvpx/vp9/vp9_cx_iface.c b/source/libvpx/vp9/vp9_cx_iface.c index 28c60d1..0623ad1 100644 --- a/source/libvpx/vp9/vp9_cx_iface.c +++ b/source/libvpx/vp9/vp9_cx_iface.c @@ -17,12 +17,11 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vpx/vp8cx.h" #include "vp9/encoder/vp9_firstpass.h" -#include "vp9/common/vp9_onyx.h" #include "vp9/vp9_iface_common.h" struct vp9_extracfg { struct vpx_codec_pkt_list *pkt_list; - int cpu_used; /* available cpu percentage in 1/16 */ + int cpu_used; // available cpu percentage in 1/16 unsigned int enable_auto_alt_ref; unsigned int noise_sensitivity; unsigned int sharpness; @@ -33,15 +32,17 @@ struct vp9_extracfg { unsigned int arnr_strength; unsigned int arnr_type; vp8e_tuning tuning; - unsigned int cq_level; /* constrained quality level */ + unsigned int cq_level; // constrained quality level unsigned int rc_max_intra_bitrate_pct; unsigned int lossless; unsigned int frame_parallel_decoding_mode; - unsigned int aq_mode; + AQ_MODE aq_mode; + unsigned int frame_periodic_boost; + BIT_DEPTH bit_depth; }; struct extraconfig_map { - int usage; + int usage; struct vp9_extracfg cfg; }; @@ -50,22 +51,24 @@ static const struct extraconfig_map extracfg_map[] = { 0, { // NOLINT NULL, - 0, /* cpu_used */ - 1, /* enable_auto_alt_ref */ - 0, /* noise_sensitivity */ - 0, /* sharpness */ - 0, /* static_thresh */ - 0, /* tile_columns */ - 0, /* tile_rows */ - 7, /* arnr_max_frames */ - 5, /* arnr_strength */ - 3, /* arnr_type*/ - 0, /* tuning*/ - 10, /* cq_level */ - 0, /* rc_max_intra_bitrate_pct */ - 0, /* lossless */ - 0, /* frame_parallel_decoding_mode */ - 0, /* aq_mode */ + 0, // cpu_used + 1, // enable_auto_alt_ref + 0, // noise_sensitivity + 0, // sharpness + 0, // static_thresh + 0, // tile_columns + 0, // tile_rows + 7, // arnr_max_frames + 5, // arnr_strength + 3, // arnr_type + VP8_TUNE_PSNR, // tuning + 10, // cq_level + 0, // rc_max_intra_bitrate_pct + 0, // lossless + 0, // frame_parallel_decoding_mode + NO_AQ, // aq_mode + 0, // frame_periodic_delta_q + BITS_8, // Bit depth } } }; @@ -73,9 +76,9 @@ static const struct extraconfig_map extracfg_map[] = { struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; - struct vp9_extracfg vp8_cfg; + struct vp9_extracfg extra_cfg; VP9_CONFIG oxcf; - VP9_PTR cpi; + VP9_COMP *cpi; unsigned char *cx_data; size_t cx_data_sz; unsigned char *pending_cx_data; @@ -102,15 +105,12 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { return VP9_LAST_FLAG; } -static vpx_codec_err_t -update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) { - vpx_codec_err_t res; +static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, + const struct vpx_internal_error_info *error) { + const vpx_codec_err_t res = error->error_code; - if ((res = error->error_code)) - ctx->base.err_detail = error->has_detail - ? error->detail - : NULL; + if (res != VPX_CODEC_OK) + ctx->base.err_detail = error->has_detail ? error->detail : NULL; return res; } @@ -141,24 +141,20 @@ update_error_state(vpx_codec_alg_priv_t *ctx, if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean");\ } while (0) -static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, - const struct vp9_extracfg *vp8_cfg) { - RANGE_CHECK(cfg, g_w, 1, 65535); /* 16 bits available */ - RANGE_CHECK(cfg, g_h, 1, 65535); /* 16 bits available */ + const struct vp9_extracfg *extra_cfg) { + RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available + RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); RANGE_CHECK_HI(cfg, g_profile, 3); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); - RANGE_CHECK_BOOL(vp8_cfg, lossless); - if (vp8_cfg->lossless) { - RANGE_CHECK_HI(cfg, rc_max_quantizer, 0); - RANGE_CHECK_HI(cfg, rc_min_quantizer, 0); - } - RANGE_CHECK(vp8_cfg, aq_mode, 0, AQ_MODES_COUNT - 1); - + RANGE_CHECK_BOOL(extra_cfg, lossless); + RANGE_CHECK(extra_cfg, aq_mode, 0, AQ_MODE_COUNT - 1); + RANGE_CHECK(extra_cfg, frame_periodic_boost, 0, 1); RANGE_CHECK_HI(cfg, g_threads, 64); RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS); RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q); @@ -166,61 +162,53 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); - // RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); RANGE_CHECK_BOOL(cfg, rc_resize_allowed); RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); - RANGE_CHECK(cfg, ss_number_layers, 1, - VPX_SS_MAX_LAYERS); /*Spatial layers max */ - + RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); if (cfg->ts_number_layers > 1) { unsigned int i; - for (i = 1; i < cfg->ts_number_layers; ++i) { - if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) { + for (i = 1; i < cfg->ts_number_layers; ++i) + if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1]) ERROR("ts_target_bitrate entries are not increasing"); - } - } - RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1); - for (i = cfg->ts_number_layers-2; i > 0; --i) { - if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) { + + RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1); + for (i = cfg->ts_number_layers - 2; i > 0; --i) + if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i]) ERROR("ts_rate_decimator factors are not powers of 2"); - } - } } - /* VP8 does not support a lower bound on the keyframe interval in - * automatic keyframe placement mode. - */ - if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist - && cfg->kf_min_dist > 0) + // VP8 does not support a lower bound on the keyframe interval in + // automatic keyframe placement mode. + if (cfg->kf_mode != VPX_KF_DISABLED && + cfg->kf_min_dist != cfg->kf_max_dist && + cfg->kf_min_dist > 0) ERROR("kf_min_dist not supported in auto mode, use 0 " "or kf_max_dist instead."); - RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref); - RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); - - RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); - - RANGE_CHECK(vp8_cfg, tile_columns, 0, 6); - RANGE_CHECK(vp8_cfg, tile_rows, 0, 2); - RANGE_CHECK_HI(vp8_cfg, sharpness, 7); - RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); - RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); - RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); - RANGE_CHECK(vp8_cfg, cq_level, 0, 63); + RANGE_CHECK_BOOL(extra_cfg, enable_auto_alt_ref); + RANGE_CHECK(extra_cfg, cpu_used, -16, 16); + RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); + RANGE_CHECK(extra_cfg, tile_columns, 0, 6); + RANGE_CHECK(extra_cfg, tile_rows, 0, 2); + RANGE_CHECK_HI(extra_cfg, sharpness, 7); + RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15); + RANGE_CHECK_HI(extra_cfg, arnr_strength, 6); + RANGE_CHECK(extra_cfg, arnr_type, 1, 3); + RANGE_CHECK(extra_cfg, cq_level, 0, 63); // TODO(yaowu): remove this when ssim tuning is implemented for vp9 - if (vp8_cfg->tuning == VP8_TUNE_SSIM) + if (extra_cfg->tuning == VP8_TUNE_SSIM) ERROR("Option --tune=ssim is not currently supported in VP9."); if (cfg->g_pass == VPX_RC_LAST_PASS) { size_t packet_sz = sizeof(FIRSTPASS_STATS); int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz); - FIRSTPASS_STATS *stats; + const FIRSTPASS_STATS *stats; if (cfg->rc_twopass_stats_in.buf == NULL) ERROR("rc_twopass_stats_in.buf not set."); @@ -228,22 +216,57 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (cfg->rc_twopass_stats_in.sz % packet_sz) ERROR("rc_twopass_stats_in.sz indicates truncated packet."); - if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) - ERROR("rc_twopass_stats_in requires at least two packets."); + if (cfg->ss_number_layers > 1) { + int i; + unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = {0}; + + stats = cfg->rc_twopass_stats_in.buf; + for (i = 0; i < n_packets; ++i) { + const int layer_id = (int)stats[i].spatial_layer_id; + if (layer_id >= 0 && layer_id < (int)cfg->ss_number_layers) { + ++n_packets_per_layer[layer_id]; + } + } + + for (i = 0; i < (int)cfg->ss_number_layers; ++i) { + unsigned int layer_id; + if (n_packets_per_layer[i] < 2) { + ERROR("rc_twopass_stats_in requires at least two packets for each " + "layer."); + } + + stats = (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + + n_packets - cfg->ss_number_layers + i; + layer_id = (int)stats->spatial_layer_id; + + if (layer_id >= cfg->ss_number_layers + ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1) + ERROR("rc_twopass_stats_in missing EOS stats packet"); + } + } else { + if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) + ERROR("rc_twopass_stats_in requires at least two packets."); - stats = (void *)((char *)cfg->rc_twopass_stats_in.buf - + (n_packets - 1) * packet_sz); + stats = + (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1; - if ((int)(stats->count + 0.5) != n_packets - 1) - ERROR("rc_twopass_stats_in missing EOS stats packet"); + if ((int)(stats->count + 0.5) != n_packets - 1) + ERROR("rc_twopass_stats_in missing EOS stats packet"); + } } + if (cfg->g_profile <= (unsigned int)PROFILE_1 && + extra_cfg->bit_depth > BITS_8) + ERROR("High bit-depth not supported in profile < 2"); + if (cfg->g_profile > (unsigned int)PROFILE_1 && + extra_cfg->bit_depth == BITS_8) + ERROR("Bit-depth 8 not supported in profile > 1"); return VPX_CODEC_OK; } static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, - const vpx_image_t *img) { + const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I420: @@ -255,28 +278,27 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, "supported."); } - if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h)) + if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h) ERROR("Image size must match encoder init configuration size"); return VPX_CODEC_OK; } -static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, - vpx_codec_enc_cfg_t cfg, - struct vp9_extracfg vp9_cfg) { - oxcf->version = cfg.g_profile; - oxcf->width = cfg.g_w; - oxcf->height = cfg.g_h; - /* guess a frame rate if out of whack, use 30 */ - oxcf->framerate = (double)(cfg.g_timebase.den) - / (double)(cfg.g_timebase.num); - - if (oxcf->framerate > 180) { +static vpx_codec_err_t set_encoder_config( + VP9_CONFIG *oxcf, + const vpx_codec_enc_cfg_t *cfg, + const struct vp9_extracfg *extra_cfg) { + oxcf->profile = cfg->g_profile; + oxcf->width = cfg->g_w; + oxcf->height = cfg->g_h; + oxcf->bit_depth = extra_cfg->bit_depth; + // guess a frame rate if out of whack, use 30 + oxcf->framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num; + if (oxcf->framerate > 180) oxcf->framerate = 30; - } - switch (cfg.g_pass) { + switch (cfg->g_pass) { case VPX_RC_ONE_PASS: oxcf->mode = MODE_GOODQUALITY; break; @@ -288,87 +310,83 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, break; } - if (cfg.g_pass == VPX_RC_FIRST_PASS) { - oxcf->lag_in_frames = 0; - } else { - oxcf->lag_in_frames = cfg.g_lag_in_frames; - } + oxcf->lag_in_frames = cfg->g_pass == VPX_RC_FIRST_PASS ? 0 + : cfg->g_lag_in_frames; - oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; - if (cfg.rc_end_usage == VPX_CQ) + oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; + if (cfg->rc_end_usage == VPX_CQ) oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; - else if (cfg.rc_end_usage == VPX_Q) + else if (cfg->rc_end_usage == VPX_Q) oxcf->end_usage = USAGE_CONSTANT_QUALITY; - else if (cfg.rc_end_usage == VPX_CBR) + else if (cfg->rc_end_usage == VPX_CBR) oxcf->end_usage = USAGE_STREAM_FROM_SERVER; - oxcf->target_bandwidth = cfg.rc_target_bitrate; - oxcf->rc_max_intra_bitrate_pct = vp9_cfg.rc_max_intra_bitrate_pct; + oxcf->target_bandwidth = cfg->rc_target_bitrate; + oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; - oxcf->best_allowed_q = cfg.rc_min_quantizer; - oxcf->worst_allowed_q = cfg.rc_max_quantizer; - oxcf->cq_level = vp9_cfg.cq_level; + oxcf->best_allowed_q = vp9_quantizer_to_qindex(cfg->rc_min_quantizer); + oxcf->worst_allowed_q = vp9_quantizer_to_qindex(cfg->rc_max_quantizer); + oxcf->cq_level = vp9_quantizer_to_qindex(extra_cfg->cq_level); oxcf->fixed_q = -1; - oxcf->under_shoot_pct = cfg.rc_undershoot_pct; - oxcf->over_shoot_pct = cfg.rc_overshoot_pct; + oxcf->under_shoot_pct = cfg->rc_undershoot_pct; + oxcf->over_shoot_pct = cfg->rc_overshoot_pct; + + oxcf->maximum_buffer_size = cfg->rc_buf_sz; + oxcf->starting_buffer_level = cfg->rc_buf_initial_sz; + oxcf->optimal_buffer_level = cfg->rc_buf_optimal_sz; - oxcf->maximum_buffer_size = cfg.rc_buf_sz; - oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; - oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; + oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh; - oxcf->drop_frames_water_mark = cfg.rc_dropframe_thresh; + oxcf->two_pass_vbrbias = cfg->rc_2pass_vbr_bias_pct; + oxcf->two_pass_vbrmin_section = cfg->rc_2pass_vbr_minsection_pct; + oxcf->two_pass_vbrmax_section = cfg->rc_2pass_vbr_maxsection_pct; - oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct; - oxcf->two_pass_vbrmin_section = cfg.rc_2pass_vbr_minsection_pct; - oxcf->two_pass_vbrmax_section = cfg.rc_2pass_vbr_maxsection_pct; + oxcf->auto_key = cfg->kf_mode == VPX_KF_AUTO && + cfg->kf_min_dist != cfg->kf_max_dist; - oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO - && cfg.kf_min_dist != cfg.kf_max_dist; - // oxcf->kf_min_dist = cfg.kf_min_dis; - oxcf->key_freq = cfg.kf_max_dist; + oxcf->key_freq = cfg->kf_max_dist; - oxcf->cpu_used = vp9_cfg.cpu_used; - oxcf->encode_breakout = vp9_cfg.static_thresh; - oxcf->play_alternate = vp9_cfg.enable_auto_alt_ref; - oxcf->noise_sensitivity = vp9_cfg.noise_sensitivity; - oxcf->sharpness = vp9_cfg.sharpness; + oxcf->cpu_used = extra_cfg->cpu_used; + oxcf->encode_breakout = extra_cfg->static_thresh; + oxcf->play_alternate = extra_cfg->enable_auto_alt_ref; + oxcf->noise_sensitivity = extra_cfg->noise_sensitivity; + oxcf->sharpness = extra_cfg->sharpness; - oxcf->two_pass_stats_in = cfg.rc_twopass_stats_in; - oxcf->output_pkt_list = vp9_cfg.pkt_list; + oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in; + oxcf->output_pkt_list = extra_cfg->pkt_list; - oxcf->arnr_max_frames = vp9_cfg.arnr_max_frames; - oxcf->arnr_strength = vp9_cfg.arnr_strength; - oxcf->arnr_type = vp9_cfg.arnr_type; + oxcf->arnr_max_frames = extra_cfg->arnr_max_frames; + oxcf->arnr_strength = extra_cfg->arnr_strength; + oxcf->arnr_type = extra_cfg->arnr_type; - oxcf->tuning = vp9_cfg.tuning; + oxcf->tuning = extra_cfg->tuning; - oxcf->tile_columns = vp9_cfg.tile_columns; - oxcf->tile_rows = vp9_cfg.tile_rows; + oxcf->tile_columns = extra_cfg->tile_columns; + oxcf->tile_rows = extra_cfg->tile_rows; - oxcf->lossless = vp9_cfg.lossless; + oxcf->lossless = extra_cfg->lossless; - oxcf->error_resilient_mode = cfg.g_error_resilient; - oxcf->frame_parallel_decoding_mode = vp9_cfg.frame_parallel_decoding_mode; + oxcf->error_resilient_mode = cfg->g_error_resilient; + oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode; - oxcf->aq_mode = vp9_cfg.aq_mode; + oxcf->aq_mode = extra_cfg->aq_mode; - oxcf->ss_number_layers = cfg.ss_number_layers; + oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost; + + oxcf->ss_number_layers = cfg->ss_number_layers; if (oxcf->ss_number_layers > 1) { - memcpy(oxcf->ss_target_bitrate, cfg.ss_target_bitrate, - sizeof(cfg.ss_target_bitrate)); + vp9_copy(oxcf->ss_target_bitrate, cfg->ss_target_bitrate); } else if (oxcf->ss_number_layers == 1) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; } - oxcf->ts_number_layers = cfg.ts_number_layers; + oxcf->ts_number_layers = cfg->ts_number_layers; if (oxcf->ts_number_layers > 1) { - memcpy(oxcf->ts_target_bitrate, cfg.ts_target_bitrate, - sizeof(cfg.ts_target_bitrate)); - memcpy(oxcf->ts_rate_decimator, cfg.ts_rate_decimator, - sizeof(cfg.ts_rate_decimator)); + vp9_copy(oxcf->ts_target_bitrate, cfg->ts_target_bitrate); + vp9_copy(oxcf->ts_rate_decimator, cfg->ts_rate_decimator); } else if (oxcf->ts_number_layers == 1) { oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth; oxcf->ts_rate_decimator[0] = 1; @@ -406,49 +424,44 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_config(vpx_codec_alg_priv_t *ctx, - const vpx_codec_enc_cfg_t *cfg) { +static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx, + const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; - if ((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h)) + if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) ERROR("Cannot change width or height after initialization"); - /* Prevent increasing lag_in_frames. This check is stricter than it needs - * to be -- the limit is not increasing past the first lag_in_frames - * value, but we don't track the initial config, only the last successful - * config. - */ - if ((cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames)) + // Prevent increasing lag_in_frames. This check is stricter than it needs + // to be -- the limit is not increasing past the first lag_in_frames + // value, but we don't track the initial config, only the last successful + // config. + if (cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames) ERROR("Cannot increase lag_in_frames"); - res = validate_config(ctx, cfg, &ctx->vp8_cfg); + res = validate_config(ctx, cfg, &ctx->extra_cfg); if (res == VPX_CODEC_OK) { ctx->cfg = *cfg; - set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } return res; } - -int vp9_reverse_trans(int q); - - -static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { +static vpx_codec_err_t ctrl_get_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, + va_list args) { void *arg = va_arg(args, void *); #define MAP(id, var) case id: *(RECAST(id, arg)) = var; break - if (arg == NULL) return VPX_CODEC_INVALID_PARAM; + if (arg == NULL) + return VPX_CODEC_INVALID_PARAM; switch (ctrl_id) { - MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi)); - MAP(VP8E_GET_LAST_QUANTIZER_64, - vp9_reverse_trans(vp9_get_quantizer(ctx->cpi))); + MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi)); + MAP(VP8E_GET_LAST_QUANTIZER_64, + vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi))); } return VPX_CODEC_OK; @@ -456,38 +469,39 @@ static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { - vpx_codec_err_t res = VPX_CODEC_OK; - struct vp9_extracfg xcfg = ctx->vp8_cfg; +static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, + va_list args) { + vpx_codec_err_t res = VPX_CODEC_OK; + struct vp9_extracfg extra_cfg = ctx->extra_cfg; #define MAP(id, var) case id: var = CAST(id, args); break; switch (ctrl_id) { - MAP(VP8E_SET_CPUUSED, xcfg.cpu_used); - MAP(VP8E_SET_ENABLEAUTOALTREF, xcfg.enable_auto_alt_ref); - MAP(VP8E_SET_NOISE_SENSITIVITY, xcfg.noise_sensitivity); - MAP(VP8E_SET_SHARPNESS, xcfg.sharpness); - MAP(VP8E_SET_STATIC_THRESHOLD, xcfg.static_thresh); - MAP(VP9E_SET_TILE_COLUMNS, xcfg.tile_columns); - MAP(VP9E_SET_TILE_ROWS, xcfg.tile_rows); - MAP(VP8E_SET_ARNR_MAXFRAMES, xcfg.arnr_max_frames); - MAP(VP8E_SET_ARNR_STRENGTH, xcfg.arnr_strength); - MAP(VP8E_SET_ARNR_TYPE, xcfg.arnr_type); - MAP(VP8E_SET_TUNING, xcfg.tuning); - MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); - MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct); - MAP(VP9E_SET_LOSSLESS, xcfg.lossless); - MAP(VP9E_SET_FRAME_PARALLEL_DECODING, xcfg.frame_parallel_decoding_mode); - MAP(VP9E_SET_AQ_MODE, xcfg.aq_mode); + MAP(VP8E_SET_CPUUSED, extra_cfg.cpu_used); + MAP(VP8E_SET_ENABLEAUTOALTREF, extra_cfg.enable_auto_alt_ref); + MAP(VP8E_SET_NOISE_SENSITIVITY, extra_cfg.noise_sensitivity); + MAP(VP8E_SET_SHARPNESS, extra_cfg.sharpness); + MAP(VP8E_SET_STATIC_THRESHOLD, extra_cfg.static_thresh); + MAP(VP9E_SET_TILE_COLUMNS, extra_cfg.tile_columns); + MAP(VP9E_SET_TILE_ROWS, extra_cfg.tile_rows); + MAP(VP8E_SET_ARNR_MAXFRAMES, extra_cfg.arnr_max_frames); + MAP(VP8E_SET_ARNR_STRENGTH, extra_cfg.arnr_strength); + MAP(VP8E_SET_ARNR_TYPE, extra_cfg.arnr_type); + MAP(VP8E_SET_TUNING, extra_cfg.tuning); + MAP(VP8E_SET_CQ_LEVEL, extra_cfg.cq_level); + MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, extra_cfg.rc_max_intra_bitrate_pct); + MAP(VP9E_SET_LOSSLESS, extra_cfg.lossless); + MAP(VP9E_SET_FRAME_PARALLEL_DECODING, + extra_cfg.frame_parallel_decoding_mode); + MAP(VP9E_SET_AQ_MODE, extra_cfg.aq_mode); + MAP(VP9E_SET_FRAME_PERIODIC_BOOST, extra_cfg.frame_periodic_boost); } - res = validate_config(ctx, &ctx->cfg, &xcfg); + res = validate_config(ctx, &ctx->cfg, &extra_cfg); if (res == VPX_CODEC_OK) { - ctx->vp8_cfg = xcfg; - set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); + ctx->extra_cfg = extra_cfg; + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } @@ -495,17 +509,13 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, #undef MAP } - -static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { - vpx_codec_err_t res = VPX_CODEC_OK; - struct vpx_codec_alg_priv *priv; - vpx_codec_enc_cfg_t *cfg; - unsigned int i; - - VP9_PTR optr; +static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { + vpx_codec_err_t res = VPX_CODEC_OK; if (ctx->priv == NULL) { - priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); + int i; + vpx_codec_enc_cfg_t *cfg; + struct vpx_codec_alg_priv *priv = calloc(1, sizeof(*priv)); if (priv == NULL) return VPX_CODEC_MEM_ERROR; @@ -517,49 +527,47 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { ctx->priv->enc.total_encoders = 1; if (ctx->config.enc) { - /* Update the reference to the config structure to an - * internal copy. - */ + // Update the reference to the config structure to an + // internal copy. ctx->priv->alg_priv->cfg = *ctx->config.enc; ctx->config.enc = &ctx->priv->alg_priv->cfg; } - cfg = &ctx->priv->alg_priv->cfg; + cfg = &ctx->priv->alg_priv->cfg; - /* Select the extra vp6 configuration table based on the current - * usage value. If the current usage value isn't found, use the - * values for usage case 0. - */ + // Select the extra vp6 configuration table based on the current + // usage value. If the current usage value isn't found, use the + // values for usage case 0. for (i = 0; extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage; - i++) {} + ++i) {} - priv->vp8_cfg = extracfg_map[i].cfg; - priv->vp8_cfg.pkt_list = &priv->pkt_list.head; + priv->extra_cfg = extracfg_map[i].cfg; + priv->extra_cfg.pkt_list = &priv->pkt_list.head; // Maximum buffer size approximated based on having multiple ARF. priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 8; if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; - priv->cx_data = malloc(priv->cx_data_sz); - - if (priv->cx_data == NULL) return VPX_CODEC_MEM_ERROR; + priv->cx_data = (unsigned char *)malloc(priv->cx_data_sz); + if (priv->cx_data == NULL) + return VPX_CODEC_MEM_ERROR; vp9_initialize_enc(); - res = validate_config(priv, &priv->cfg, &priv->vp8_cfg); + res = validate_config(priv, &priv->cfg, &priv->extra_cfg); if (res == VPX_CODEC_OK) { - set_vp9e_config(&ctx->priv->alg_priv->oxcf, - ctx->priv->alg_priv->cfg, - ctx->priv->alg_priv->vp8_cfg); - optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf); - - if (optr == NULL) + VP9_COMP *cpi; + set_encoder_config(&ctx->priv->alg_priv->oxcf, + &ctx->priv->alg_priv->cfg, + &ctx->priv->alg_priv->extra_cfg); + cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf); + if (cpi == NULL) res = VPX_CODEC_MEM_ERROR; else - ctx->priv->alg_priv->cpi = optr; + ctx->priv->alg_priv->cpi = cpi; } } @@ -567,46 +575,40 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { } -static vpx_codec_err_t vp9e_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - return vp9e_common_init(ctx); +static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { + return encoder_common_init(ctx); } -static vpx_codec_err_t vp9e_destroy(vpx_codec_alg_priv_t *ctx) { +static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) { free(ctx->cx_data); - vp9_remove_compressor(&ctx->cpi); + vp9_remove_compressor(ctx->cpi); free(ctx); return VPX_CODEC_OK; } static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, - unsigned long duration, - unsigned long deadline) { - unsigned int new_qc; - - /* Use best quality mode if no deadline is given. */ - new_qc = MODE_BESTQUALITY; + unsigned long duration, + unsigned long deadline) { + // Use best quality mode if no deadline is given. + MODE new_qc = MODE_BESTQUALITY; if (deadline) { - uint64_t duration_us; - - /* Convert duration parameter from stream timebase to microseconds */ - duration_us = (uint64_t)duration * 1000000 - * (uint64_t)ctx->cfg.g_timebase.num - / (uint64_t)ctx->cfg.g_timebase.den; - - /* If the deadline is more that the duration this frame is to be shown, - * use good quality mode. Otherwise use realtime mode. - */ - new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME; + // Convert duration parameter from stream timebase to microseconds + const uint64_t duration_us = (uint64_t)duration * 1000000 * + (uint64_t)ctx->cfg.g_timebase.num / + (uint64_t)ctx->cfg.g_timebase.den; + + // If the deadline is more that the duration this frame is to be shown, + // use good quality mode. Otherwise use realtime mode. + new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME; } if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS) new_qc = MODE_FIRSTPASS; else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS) - new_qc = (new_qc == MODE_BESTQUALITY) - ? MODE_SECONDPASS_BEST - : MODE_SECONDPASS; + new_qc = (new_qc == MODE_BESTQUALITY) ? MODE_SECONDPASS_BEST + : MODE_SECONDPASS; if (ctx->oxcf.mode != new_qc) { ctx->oxcf.mode = new_qc; @@ -623,10 +625,10 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { assert(ctx->pending_frame_count); assert(ctx->pending_frame_count <= 8); - /* Add the number of frames to the marker byte */ + // Add the number of frames to the marker byte marker |= ctx->pending_frame_count - 1; - /* Choose the magnitude */ + // Choose the magnitude for (mag = 0, mask = 0xff; mag < 4; mag++) { if (ctx->pending_frame_magnitude < mask) break; @@ -635,7 +637,7 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { } marker |= mag << 3; - /* Write the index */ + // Write the index index_sz = 2 + (mag + 1) * ctx->pending_frame_count; if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) { uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz; @@ -656,12 +658,12 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { return index_sz; } -static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, - const vpx_image_t *img, - vpx_codec_pts_t pts, - unsigned long duration, - vpx_enc_frame_flags_t flags, - unsigned long deadline) { +static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, + const vpx_image_t *img, + vpx_codec_pts_t pts, + unsigned long duration, + vpx_enc_frame_flags_t flags, + unsigned long deadline) { vpx_codec_err_t res = VPX_CODEC_OK; if (img) @@ -670,15 +672,15 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, pick_quickcompress_mode(ctx, duration, deadline); vpx_codec_pkt_list_init(&ctx->pkt_list); - /* Handle Flags */ - if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) - || ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) { + // Handle Flags + if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) || + ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) { ctx->base.err_detail = "Conflicting flags."; return VPX_CODEC_INVALID_PARAM; } - if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF - | VP8_EFLAG_NO_REF_ARF)) { + if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF)) { int ref = 7; if (flags & VP8_EFLAG_NO_REF_LAST) @@ -693,9 +695,9 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, vp9_use_as_reference(ctx->cpi, ref); } - if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF - | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF - | VP8_EFLAG_FORCE_ARF)) { + if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | + VP8_EFLAG_FORCE_ARF)) { int upd = 7; if (flags & VP8_EFLAG_NO_UPD_LAST) @@ -714,16 +716,16 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, vp9_update_entropy(ctx->cpi, 0); } - /* Handle fixed keyframe intervals */ - if (ctx->cfg.kf_mode == VPX_KF_AUTO - && ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) { + // Handle fixed keyframe intervals + if (ctx->cfg.kf_mode == VPX_KF_AUTO && + ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) { if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) { flags |= VPX_EFLAG_FORCE_KF; ctx->fixed_kf_cntr = 1; } } - /* Initialize the encoder instance on the first frame. */ + // Initialize the encoder instance on the first frame. if (res == VPX_CODEC_OK && ctx->cpi != NULL) { unsigned int lib_flags; YV12_BUFFER_CONFIG sd; @@ -731,15 +733,15 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, size_t size, cx_data_sz; unsigned char *cx_data; - /* Set up internal flags */ + // Set up internal flags if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1; - /* Convert API flags to internal codec lib flags */ + // Convert API flags to internal codec lib flags lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - /* vp8 use 10,000,000 ticks/second as time stamp */ - dst_time_stamp = pts * 10000000 * ctx->cfg.g_timebase.num + /* vp9 use 10,000,000 ticks/second as time stamp */ + dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num) / ctx->cfg.g_timebase.den; dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num / ctx->cfg.g_timebase.den; @@ -779,11 +781,11 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, cx_data, &dst_time_stamp, &dst_end_time_stamp, !img)) { if (size) { - vpx_codec_pts_t round, delta; + vpx_codec_pts_t round, delta; vpx_codec_cx_pkt_t pkt; - VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; + VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; - /* Pack invisible frames with the next visible frame */ + // Pack invisible frames with the next visible frame if (cpi->common.show_frame == 0) { if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; @@ -795,7 +797,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, continue; } - /* Add the frame packet to the list of returned packets. */ + // Add the frame packet to the list of returned packets. round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; @@ -826,48 +828,25 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, if (cpi->droppable) pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE; - /*if (cpi->output_partition) - { - int i; - const int num_partitions = 1; - - pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT; - - for (i = 0; i < num_partitions; ++i) - { - pkt.data.frame.buf = cx_data; - pkt.data.frame.sz = cpi->partition_sz[i]; - pkt.data.frame.partition_id = i; - // don't set the fragment bit for the last partition - if (i == (num_partitions - 1)) - pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT; - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - cx_data += cpi->partition_sz[i]; - cx_data_sz -= cpi->partition_sz[i]; - } - } - else*/ - { - if (ctx->pending_cx_data) { - ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; - ctx->pending_frame_magnitude |= size; - ctx->pending_cx_data_sz += size; - size += write_superframe_index(ctx); - pkt.data.frame.buf = ctx->pending_cx_data; - pkt.data.frame.sz = ctx->pending_cx_data_sz; - ctx->pending_cx_data = NULL; - ctx->pending_cx_data_sz = 0; - ctx->pending_frame_count = 0; - ctx->pending_frame_magnitude = 0; - } else { - pkt.data.frame.buf = cx_data; - pkt.data.frame.sz = size; - } - pkt.data.frame.partition_id = -1; - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - cx_data += size; - cx_data_sz -= size; + if (ctx->pending_cx_data) { + ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; + ctx->pending_frame_magnitude |= size; + ctx->pending_cx_data_sz += size; + size += write_superframe_index(ctx); + pkt.data.frame.buf = ctx->pending_cx_data; + pkt.data.frame.sz = ctx->pending_cx_data_sz; + ctx->pending_cx_data = NULL; + ctx->pending_cx_data_sz = 0; + ctx->pending_frame_count = 0; + ctx->pending_frame_magnitude = 0; + } else { + pkt.data.frame.buf = cx_data; + pkt.data.frame.sz = size; } + pkt.data.frame.partition_id = -1; + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); + cx_data += size; + cx_data_sz -= size; } } } @@ -876,15 +855,14 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, } -static const vpx_codec_cx_pkt_t *vp9e_get_cxdata(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) { +static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) { return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter); } -static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *); +static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); if (frame != NULL) { YV12_BUFFER_CONFIG sd; @@ -898,10 +876,9 @@ static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *); +static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); if (frame != NULL) { YV12_BUFFER_CONFIG sd; @@ -915,9 +892,8 @@ static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vp9_ref_frame_t *frame = va_arg(args, vp9_ref_frame_t *); if (frame != NULL) { @@ -931,9 +907,8 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *); (void)ctr_id; @@ -953,17 +928,17 @@ static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx, } -static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) { +static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; vp9_ppflags_t flags = {0}; if (ctx->preview_ppcfg.post_proc_flag) { - flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; - flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; - flags.noise_level = ctx->preview_ppcfg.noise_level; + flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; + flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; + flags.noise_level = ctx->preview_ppcfg.noise_level; } - if (0 == vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags)) { + if (vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) { yuvconfig2image(&ctx->preview_img, &sd, NULL); return &ctx->preview_img; } else { @@ -971,100 +946,107 @@ static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) { } } -static vpx_codec_err_t vp9e_update_entropy(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - int update = va_arg(args, int); +static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + const int update = va_arg(args, int); vp9_update_entropy(ctx->cpi, update); return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_update_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - int update = va_arg(args, int); - vp9_update_reference(ctx->cpi, update); +static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + const int ref_frame_flags = va_arg(args, int); + vp9_update_reference(ctx->cpi, ref_frame_flags); return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_use_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - int reference_flag = va_arg(args, int); +static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + const int reference_flag = va_arg(args, int); vp9_use_as_reference(ctx->cpi, reference_flag); return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_roi_map(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { // TODO(yaowu): Need to re-implement and test for VP9. return VPX_CODEC_INVALID_PARAM; } -static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - // TODO(yaowu): Need to re-implement and test for VP9. - return VPX_CODEC_INVALID_PARAM; +static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); + + if (map) { + if (!vp9_set_active_map(ctx->cpi, map->active_map, map->rows, map->cols)) + return VPX_CODEC_OK; + else + return VPX_CODEC_INVALID_PARAM; + } else { + return VPX_CODEC_INVALID_PARAM; + } } -static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - vpx_scaling_mode_t *scalemode = va_arg(args, vpx_scaling_mode_t *); +static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *); - if (scalemode != NULL) { - int res; - res = vp9_set_internal_size(ctx->cpi, - (VPX_SCALING)scalemode->h_scaling_mode, - (VPX_SCALING)scalemode->v_scaling_mode); + if (mode) { + const int res = vp9_set_internal_size(ctx->cpi, + (VPX_SCALING)mode->h_scaling_mode, + (VPX_SCALING)mode->v_scaling_mode); return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM; } else { return VPX_CODEC_INVALID_PARAM; } } -static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, +static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { int data = va_arg(args, int); + const vpx_codec_enc_cfg_t *cfg = &ctx->cfg; vp9_set_svc(ctx->cpi, data); - // CBR mode for SVC with both temporal and spatial layers not yet supported. + // CBR or two pass mode for SVC with both temporal and spatial layers + // not yet supported. if (data == 1 && - ctx->cfg.rc_end_usage == VPX_CBR && - ctx->cfg.ss_number_layers > 1 && - ctx->cfg.ts_number_layers > 1) { + (cfg->rc_end_usage == VPX_CBR || + cfg->g_pass == VPX_RC_FIRST_PASS || + cfg->g_pass == VPX_RC_LAST_PASS) && + cfg->ss_number_layers > 1 && + cfg->ts_number_layers > 1) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { - vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *); - VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; - cpi->svc.spatial_layer_id = data->spatial_layer_id; - cpi->svc.temporal_layer_id = data->temporal_layer_id; + vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *); + VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; + SVC *const svc = &cpi->svc; + svc->spatial_layer_id = data->spatial_layer_id; + svc->temporal_layer_id = data->temporal_layer_id; // Checks on valid layer_id input. - if (cpi->svc.temporal_layer_id < 0 || - cpi->svc.temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { + if (svc->temporal_layer_id < 0 || + svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } - if (cpi->svc.spatial_layer_id < 0 || - cpi->svc.spatial_layer_id >= (int)ctx->cfg.ss_number_layers) { + if (svc->spatial_layer_id < 0 || + svc->spatial_layer_id >= (int)ctx->cfg.ss_number_layers) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { - VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; - vpx_svc_parameters_t *params = va_arg(args, vpx_svc_parameters_t *); + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *); - if (params == NULL) return VPX_CODEC_INVALID_PARAM; + if (params == NULL) + return VPX_CODEC_INVALID_PARAM; cpi->svc.spatial_layer_id = params->spatial_layer; cpi->svc.temporal_layer_id = params->temporal_layer; @@ -1079,135 +1061,139 @@ static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx, ctx->cfg.rc_max_quantizer = params->max_quantizer; ctx->cfg.rc_min_quantizer = params->min_quantizer; - set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg); + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); return VPX_CODEC_OK; } -static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { - {VP8_SET_REFERENCE, vp9e_set_reference}, - {VP8_COPY_REFERENCE, vp9e_copy_reference}, - {VP8_SET_POSTPROC, vp9e_set_previewpp}, - {VP8E_UPD_ENTROPY, vp9e_update_entropy}, - {VP8E_UPD_REFERENCE, vp9e_update_reference}, - {VP8E_USE_REFERENCE, vp9e_use_reference}, - {VP8E_SET_ROI_MAP, vp9e_set_roi_map}, - {VP8E_SET_ACTIVEMAP, vp9e_set_activemap}, - {VP8E_SET_SCALEMODE, vp9e_set_scalemode}, - {VP8E_SET_CPUUSED, set_param}, - {VP8E_SET_NOISE_SENSITIVITY, set_param}, - {VP8E_SET_ENABLEAUTOALTREF, set_param}, - {VP8E_SET_SHARPNESS, set_param}, - {VP8E_SET_STATIC_THRESHOLD, set_param}, - {VP9E_SET_TILE_COLUMNS, set_param}, - {VP9E_SET_TILE_ROWS, set_param}, - {VP8E_GET_LAST_QUANTIZER, get_param}, - {VP8E_GET_LAST_QUANTIZER_64, get_param}, - {VP8E_SET_ARNR_MAXFRAMES, set_param}, - {VP8E_SET_ARNR_STRENGTH, set_param}, - {VP8E_SET_ARNR_TYPE, set_param}, - {VP8E_SET_TUNING, set_param}, - {VP8E_SET_CQ_LEVEL, set_param}, - {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, - {VP9E_SET_LOSSLESS, set_param}, - {VP9E_SET_FRAME_PARALLEL_DECODING, set_param}, - {VP9E_SET_AQ_MODE, set_param}, - {VP9_GET_REFERENCE, get_reference}, - {VP9E_SET_SVC, vp9e_set_svc}, - {VP9E_SET_SVC_PARAMETERS, vp9e_set_svc_parameters}, - {VP9E_SET_SVC_LAYER_ID, vp9e_set_svc_layer_id}, +static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { + {VP8_COPY_REFERENCE, ctrl_copy_reference}, + {VP8E_UPD_ENTROPY, ctrl_update_entropy}, + {VP8E_UPD_REFERENCE, ctrl_update_reference}, + {VP8E_USE_REFERENCE, ctrl_use_reference}, + + // Setters + {VP8_SET_REFERENCE, ctrl_set_reference}, + {VP8_SET_POSTPROC, ctrl_set_previewpp}, + {VP8E_SET_ROI_MAP, ctrl_set_roi_map}, + {VP8E_SET_ACTIVEMAP, ctrl_set_active_map}, + {VP8E_SET_SCALEMODE, ctrl_set_scale_mode}, + {VP8E_SET_CPUUSED, ctrl_set_param}, + {VP8E_SET_NOISE_SENSITIVITY, ctrl_set_param}, + {VP8E_SET_ENABLEAUTOALTREF, ctrl_set_param}, + {VP8E_SET_SHARPNESS, ctrl_set_param}, + {VP8E_SET_STATIC_THRESHOLD, ctrl_set_param}, + {VP9E_SET_TILE_COLUMNS, ctrl_set_param}, + {VP9E_SET_TILE_ROWS, ctrl_set_param}, + {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_param}, + {VP8E_SET_ARNR_STRENGTH, ctrl_set_param}, + {VP8E_SET_ARNR_TYPE, ctrl_set_param}, + {VP8E_SET_TUNING, ctrl_set_param}, + {VP8E_SET_CQ_LEVEL, ctrl_set_param}, + {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_param}, + {VP9E_SET_LOSSLESS, ctrl_set_param}, + {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_param}, + {VP9E_SET_AQ_MODE, ctrl_set_param}, + {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_param}, + {VP9E_SET_SVC, ctrl_set_svc}, + {VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters}, + {VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id}, + + // Getters + {VP8E_GET_LAST_QUANTIZER, ctrl_get_param}, + {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_param}, + {VP9_GET_REFERENCE, ctrl_get_reference}, + { -1, NULL}, }; -static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = { +static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { { 0, { // NOLINT - 0, /* g_usage */ - 0, /* g_threads */ - 0, /* g_profile */ + 0, // g_usage + 0, // g_threads + 0, // g_profile - 320, /* g_width */ - 240, /* g_height */ - {1, 30}, /* g_timebase */ + 320, // g_width + 240, // g_height + {1, 30}, // g_timebase - 0, /* g_error_resilient */ + 0, // g_error_resilient - VPX_RC_ONE_PASS, /* g_pass */ + VPX_RC_ONE_PASS, // g_pass - 25, /* g_lag_in_frames */ + 25, // g_lag_in_frames - 0, /* rc_dropframe_thresh */ - 0, /* rc_resize_allowed */ - 60, /* rc_resize_down_thresold */ - 30, /* rc_resize_up_thresold */ + 0, // rc_dropframe_thresh + 0, // rc_resize_allowed + 60, // rc_resize_down_thresold + 30, // rc_resize_up_thresold - VPX_VBR, /* rc_end_usage */ + VPX_VBR, // rc_end_usage #if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION) - {0}, /* rc_twopass_stats_in */ + {0}, // rc_twopass_stats_in #endif - 256, /* rc_target_bandwidth */ - 0, /* rc_min_quantizer */ - 63, /* rc_max_quantizer */ - 100, /* rc_undershoot_pct */ - 100, /* rc_overshoot_pct */ - - 6000, /* rc_max_buffer_size */ - 4000, /* rc_buffer_initial_size; */ - 5000, /* rc_buffer_optimal_size; */ - - 50, /* rc_two_pass_vbrbias */ - 0, /* rc_two_pass_vbrmin_section */ - 2000, /* rc_two_pass_vbrmax_section */ - - /* keyframing settings (kf) */ - VPX_KF_AUTO, /* g_kfmode*/ - 0, /* kf_min_dist */ - 9999, /* kf_max_dist */ - - VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ - {0}, /* ss_target_bitrate */ - 1, /* ts_number_layers */ - {0}, /* ts_target_bitrate */ - {0}, /* ts_rate_decimator */ - 0, /* ts_periodicity */ - {0}, /* ts_layer_id */ + 256, // rc_target_bandwidth + 0, // rc_min_quantizer + 63, // rc_max_quantizer + 100, // rc_undershoot_pct + 100, // rc_overshoot_pct + + 6000, // rc_max_buffer_size + 4000, // rc_buffer_initial_size + 5000, // rc_buffer_optimal_size + + 50, // rc_two_pass_vbrbias + 0, // rc_two_pass_vbrmin_section + 2000, // rc_two_pass_vbrmax_section + + // keyframing settings (kf) + VPX_KF_AUTO, // g_kfmode + 0, // kf_min_dist + 9999, // kf_max_dist + + VPX_SS_DEFAULT_LAYERS, // ss_number_layers + {0}, // ss_target_bitrate + 1, // ts_number_layers + {0}, // ts_target_bitrate + {0}, // ts_rate_decimator + 0, // ts_periodicity + {0}, // ts_layer_id #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION) - "vp8.fpf" /* first pass filename */ + "vp8.fpf" // first pass filename #endif } }, { -1, {NOT_IMPLEMENTED}} }; - #ifndef VERSION_STRING #define VERSION_STRING #endif CODEC_INTERFACE(vpx_codec_vp9_cx) = { "WebM Project VP9 Encoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR | - VPX_CODEC_CAP_OUTPUT_PARTITION, - /* vpx_codec_caps_t caps; */ - vp9e_init, /* vpx_codec_init_fn_t init; */ - vp9e_destroy, /* vpx_codec_destroy_fn_t destroy; */ - vp9e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ - NOT_IMPLEMENTED, /* vpx_codec_get_mmap_fn_t get_mmap; */ - NOT_IMPLEMENTED, /* vpx_codec_set_mmap_fn_t set_mmap; */ + VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, // vpx_codec_caps_t + encoder_init, // vpx_codec_init_fn_t + encoder_destroy, // vpx_codec_destroy_fn_t + encoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t + NOT_IMPLEMENTED, // vpx_codec_get_mmap_fn_t + NOT_IMPLEMENTED, // vpx_codec_set_mmap_fn_t { // NOLINT - NOT_IMPLEMENTED, /* vpx_codec_peek_si_fn_t peek_si; */ - NOT_IMPLEMENTED, /* vpx_codec_get_si_fn_t get_si; */ - NOT_IMPLEMENTED, /* vpx_codec_decode_fn_t decode; */ - NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */ + NOT_IMPLEMENTED, // vpx_codec_peek_si_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_si_fn_t + NOT_IMPLEMENTED, // vpx_codec_decode_fn_t + NOT_IMPLEMENTED, // vpx_codec_frame_get_fn_t }, { // NOLINT - vp9e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */ - vp9e_encode, /* vpx_codec_encode_fn_t encode; */ - vp9e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */ - vp9e_set_config, - NOT_IMPLEMENTED, - vp9e_get_preview, - } /* encoder functions */ + encoder_usage_cfg_map, // vpx_codec_enc_cfg_map_t + encoder_encode, // vpx_codec_encode_fn_t + encoder_get_cxdata, // vpx_codec_get_cx_data_fn_t + encoder_set_config, // vpx_codec_enc_config_set_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t + encoder_get_preview, // vpx_codec_get_preview_frame_fn_t + NOT_IMPLEMENTED , // vpx_codec_enc_mr_get_mem_loc_fn_t + } }; diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c index 83d64b8..5ed7484 100644 --- a/source/libvpx/vp9/vp9_dx_iface.c +++ b/source/libvpx/vp9/vp9_dx_iface.c @@ -8,45 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include <stdlib.h> #include <string.h> -#include "vpx/vpx_decoder.h" -#include "vpx/vp8dx.h" -#include "vpx/internal/vpx_codec_internal.h" + #include "./vpx_version.h" + +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + #include "vp9/common/vp9_frame_buffers.h" -#include "vp9/decoder/vp9_onyxd.h" -#include "vp9/decoder/vp9_onyxd_int.h" + +#include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_read_bit_buffer.h" + #include "vp9/vp9_iface_common.h" #define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) -typedef vpx_codec_stream_info_t vp9_stream_info_t; -/* Structures for handling memory allocations */ -typedef enum { - VP9_SEG_ALG_PRIV = 256, - VP9_SEG_MAX -} mem_seg_id_t; -#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) - -static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si, - vpx_codec_flags_t flags); - -static const mem_req_t vp9_mem_req_segs[] = { - {VP9_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, priv_sz}, - {VP9_SEG_MAX, 0, 0, 0, NULL} -}; +typedef vpx_codec_stream_info_t vp9_stream_info_t; struct vpx_codec_alg_priv { vpx_codec_priv_t base; - vpx_codec_mmap_t mmaps[NELEMENTS(vp9_mem_req_segs) - 1]; vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; - int defer_alloc; int decoder_init; - struct VP9Decompressor *pbi; + struct VP9Decoder *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; #if CONFIG_POSTPROC_VISUALIZER @@ -67,86 +54,54 @@ struct vpx_codec_alg_priv { vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; }; -static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si, - vpx_codec_flags_t flags) { - /* Although this declaration is constant, we can't use it in the requested - * segments list because we want to define the requested segments list - * before defining the private type (so that the number of memory maps is - * known) - */ - (void)si; - return sizeof(vpx_codec_alg_priv_t); -} - -static void vp9_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap) { - int i; - - ctx->priv = mmap->base; - ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; - ctx->priv->alg_priv = mmap->base; - - for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++) - ctx->priv->alg_priv->mmaps[i].id = vp9_mem_req_segs[i].id; - - ctx->priv->alg_priv->mmaps[0] = *mmap; - ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); - ctx->priv->init_flags = ctx->init_flags; - - if (ctx->config.dec) { - /* Update the reference to the config structure to an internal copy. */ - ctx->priv->alg_priv->cfg = *ctx->config.dec; - ctx->config.dec = &ctx->priv->alg_priv->cfg; - } -} - -static void vp9_finalize_mmaps(vpx_codec_alg_priv_t *ctx) { - /* nothing to clean up */ -} - -static vpx_codec_err_t vp9_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - vpx_codec_err_t res = VPX_CODEC_OK; - +static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { // This function only allocates space for the vpx_codec_alg_priv_t // structure. More memory may be required at the time the stream // information becomes known. if (!ctx->priv) { - vpx_codec_mmap_t mmap; - - mmap.id = vp9_mem_req_segs[0].id; - mmap.sz = sizeof(vpx_codec_alg_priv_t); - mmap.align = vp9_mem_req_segs[0].align; - mmap.flags = vp9_mem_req_segs[0].flags; - - res = vpx_mmap_alloc(&mmap); - if (!res) { - vp9_init_ctx(ctx, &mmap); - - ctx->priv->alg_priv->defer_alloc = 1; + vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv)); + if (alg_priv == NULL) + return VPX_CODEC_MEM_ERROR; + + vp9_zero(*alg_priv); + + ctx->priv = (vpx_codec_priv_t *)alg_priv; + ctx->priv->sz = sizeof(*ctx->priv); + ctx->priv->iface = ctx->iface; + ctx->priv->alg_priv = alg_priv; + ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); + ctx->priv->init_flags = ctx->init_flags; + + if (ctx->config.dec) { + // Update the reference to the config structure to an internal copy. + ctx->priv->alg_priv->cfg = *ctx->config.dec; + ctx->config.dec = &ctx->priv->alg_priv->cfg; } } - return res; + return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_destroy(vpx_codec_alg_priv_t *ctx) { - int i; - - vp9_remove_decompressor(ctx->pbi); - - for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) { - if (ctx->mmaps[i].dtor) - ctx->mmaps[i].dtor(&ctx->mmaps[i]); +static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { + if (ctx->pbi) { + vp9_decoder_remove(ctx->pbi); + ctx->pbi = NULL; } + vpx_free(ctx); + return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz, - vpx_codec_stream_info_t *si) { - if (data_sz <= 8) return VPX_CODEC_UNSUP_BITSTREAM; - if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; +static vpx_codec_err_t decoder_peek_si(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si) { + if (data_sz <= 8) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (data + data_sz <= data) + return VPX_CODEC_INVALID_PARAM; si->is_kf = 0; si->w = si->h = 0; @@ -204,8 +159,8 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz, return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si) { +static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, + vpx_codec_stream_info_t *si) { const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) ? sizeof(vp9_stream_info_t) : sizeof(vpx_codec_stream_info_t); @@ -215,7 +170,6 @@ static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } - static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { if (error->error_code) @@ -224,149 +178,122 @@ static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, return error->error_code; } -static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, - const uint8_t **data, unsigned int data_sz, - void *user_priv, int64_t deadline) { - vpx_codec_err_t res = VPX_CODEC_OK; +static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { + VP9_COMMON *const cm = &ctx->pbi->common; - ctx->img_avail = 0; + cm->new_fb_idx = -1; - /* Determine the stream parameters. Note that we rely on peek_si to - * validate that we have a buffer that does not wrap around the top - * of the heap. - */ - if (!ctx->si.h) - res = ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si); + if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { + cm->get_fb_cb = ctx->get_ext_fb_cb; + cm->release_fb_cb = ctx->release_ext_fb_cb; + cm->cb_priv = ctx->ext_priv; + } else { + cm->get_fb_cb = vp9_get_frame_buffer; + cm->release_fb_cb = vp9_release_frame_buffer; + if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to initialize internal frame buffers"); - /* Perform deferred allocations, if required */ - if (!res && ctx->defer_alloc) { - int i; + cm->cb_priv = &cm->int_frame_buffers; + } +} - for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++) { - vpx_codec_dec_cfg_t cfg; +static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { + cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; + cfg->deblocking_level = 4; + cfg->noise_level = 0; +} - cfg.w = ctx->si.w; - cfg.h = ctx->si.h; - ctx->mmaps[i].id = vp9_mem_req_segs[i].id; - ctx->mmaps[i].sz = vp9_mem_req_segs[i].sz; - ctx->mmaps[i].align = vp9_mem_req_segs[i].align; - ctx->mmaps[i].flags = vp9_mem_req_segs[i].flags; +static void set_ppflags(const vpx_codec_alg_priv_t *ctx, + vp9_ppflags_t *flags) { + flags->post_proc_flag = +#if CONFIG_POSTPROC_VISUALIZER + (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | + (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | + (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | + (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | +#endif + ctx->postproc_cfg.post_proc_flag; - if (!ctx->mmaps[i].sz) - ctx->mmaps[i].sz = vp9_mem_req_segs[i].calc_sz(&cfg, - ctx->base.init_flags); + flags->deblocking_level = ctx->postproc_cfg.deblocking_level; + flags->noise_level = ctx->postproc_cfg.noise_level; +#if CONFIG_POSTPROC_VISUALIZER + flags->display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; + flags->display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; + flags->display_b_modes_flag = ctx->dbg_color_b_modes_flag; + flags->display_mv_flag = ctx->dbg_display_mv_flag; +#endif +} - res = vpx_mmap_alloc(&ctx->mmaps[i]); - } +static void init_decoder(vpx_codec_alg_priv_t *ctx) { + VP9D_CONFIG oxcf; + oxcf.width = ctx->si.w; + oxcf.height = ctx->si.h; + oxcf.version = 9; + oxcf.max_threads = ctx->cfg.threads; + oxcf.inv_tile_order = ctx->invert_tile_order; - if (!res) - vp9_finalize_mmaps(ctx); + ctx->pbi = vp9_decoder_create(&oxcf); + if (ctx->pbi == NULL) + return; - ctx->defer_alloc = 0; - } + vp9_initialize_dec(); - /* Initialize the decoder instance on the first frame*/ - if (!res && !ctx->decoder_init) { - res = vpx_validate_mmaps(&ctx->si, ctx->mmaps, - vp9_mem_req_segs, NELEMENTS(vp9_mem_req_segs), - ctx->base.init_flags); - - if (!res) { - VP9D_CONFIG oxcf; - struct VP9Decompressor *optr; - - vp9_initialize_dec(); - - oxcf.width = ctx->si.w; - oxcf.height = ctx->si.h; - oxcf.version = 9; - oxcf.postprocess = 0; - oxcf.max_threads = ctx->cfg.threads; - oxcf.inv_tile_order = ctx->invert_tile_order; - optr = vp9_create_decompressor(&oxcf); - - // If postprocessing was enabled by the application and a - // configuration has not been provided, default it. - if (!ctx->postproc_cfg_set && - (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { - ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; - ctx->postproc_cfg.deblocking_level = 4; - ctx->postproc_cfg.noise_level = 0; - } + // If postprocessing was enabled by the application and a + // configuration has not been provided, default it. + if (!ctx->postproc_cfg_set && + (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) + set_default_ppflags(&ctx->postproc_cfg); - if (!optr) { - res = VPX_CODEC_ERROR; - } else { - VP9D_COMP *const pbi = (VP9D_COMP*)optr; - VP9_COMMON *const cm = &pbi->common; + init_buffer_callbacks(ctx); +} - // Set index to not initialized. - cm->new_fb_idx = -1; +static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, + const uint8_t **data, unsigned int data_sz, + void *user_priv, int64_t deadline) { + YV12_BUFFER_CONFIG sd = { 0 }; + int64_t time_stamp = 0, time_end_stamp = 0; + vp9_ppflags_t flags = {0}; + VP9_COMMON *cm = NULL; - if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - cm->get_fb_cb = ctx->get_ext_fb_cb; - cm->release_fb_cb = ctx->release_ext_fb_cb; - cm->cb_priv = ctx->ext_priv; - } else { - cm->get_fb_cb = vp9_get_frame_buffer; - cm->release_fb_cb = vp9_release_frame_buffer; + ctx->img_avail = 0; - if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to initialize internal frame buffers"); - cm->cb_priv = &cm->int_frame_buffers; - } + // Determine the stream parameters. Note that we rely on peek_si to + // validate that we have a buffer that does not wrap around the top + // of the heap. + if (!ctx->si.h) { + const vpx_codec_err_t res = + ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si); + if (res != VPX_CODEC_OK) + return res; + } - ctx->pbi = optr; - } - } + // Initialize the decoder instance on the first frame + if (!ctx->decoder_init) { + init_decoder(ctx); + if (ctx->pbi == NULL) + return VPX_CODEC_ERROR; ctx->decoder_init = 1; } - if (!res && ctx->pbi) { - YV12_BUFFER_CONFIG sd; - int64_t time_stamp = 0, time_end_stamp = 0; - vp9_ppflags_t flags = {0}; - - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { - flags.post_proc_flag = -#if CONFIG_POSTPROC_VISUALIZER - (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | - (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | - (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | - (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | -#endif - ctx->postproc_cfg.post_proc_flag; + cm = &ctx->pbi->common; - flags.deblocking_level = ctx->postproc_cfg.deblocking_level; - flags.noise_level = ctx->postproc_cfg.noise_level; -#if CONFIG_POSTPROC_VISUALIZER - flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; - flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; - flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; - flags.display_mv_flag = ctx->dbg_display_mv_flag; -#endif - } + if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) + return update_error_state(ctx, &cm->error); - if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) { - VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; - res = update_error_state(ctx, &pbi->common.error); - } + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + set_ppflags(ctx, &flags); - if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp, - &time_end_stamp, &flags)) { - VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi; - VP9_COMMON *const cm = &pbi->common; - yuvconfig2image(&ctx->img, &sd, user_priv); + if (vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) + return update_error_state(ctx, &cm->error); - ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - ctx->img_avail = 1; - } - } + yuvconfig2image(&ctx->img, &sd, user_priv); + ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + ctx->img_avail = 1; - return res; + return VPX_CODEC_OK; } static void parse_superframe_index(const uint8_t *data, size_t data_sz, @@ -385,7 +312,7 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz, if (data_sz >= index_sz && data[data_sz - index_sz] == marker) { // found a valid superframe index uint32_t i, j; - const uint8_t *x = data + data_sz - index_sz + 1; + const uint8_t *x = &data[data_sz - index_sz + 1]; for (i = 0; i < frames; i++) { uint32_t this_sz = 0; @@ -400,18 +327,17 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz, } } -static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline) { +static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, unsigned int data_sz, + void *user_priv, long deadline) { const uint8_t *data_start = data; const uint8_t *data_end = data + data_sz; - vpx_codec_err_t res = 0; + vpx_codec_err_t res = VPX_CODEC_OK; uint32_t sizes[8]; int frames_this_pts, frame_count = 0; - if (data == NULL || data_sz == 0) return VPX_CODEC_INVALID_PARAM; + if (data == NULL || data_sz == 0) + return VPX_CODEC_INVALID_PARAM; parse_superframe_index(data, data_sz, sizes, &frames_this_pts); @@ -450,27 +376,27 @@ static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx, assert(data_start >= data); assert(data_start <= data_end); - /* Early exit if there was a decode error */ + // Early exit if there was a decode error if (res) break; - /* Account for suboptimal termination by the encoder. */ + // Account for suboptimal termination by the encoder. while (data_start < data_end && *data_start == 0) data_start++; data_sz = (unsigned int)(data_end - data_start); } while (data_start < data_end); + return res; } -static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) { +static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; if (ctx->img_avail) { - /* iter acts as a flip flop, so an image is only returned on the first - * call to get_frame. - */ + // iter acts as a flip flop, so an image is only returned on the first + // call to get_frame. if (!(*iter)) { img = &ctx->img; *iter = img; @@ -481,7 +407,7 @@ static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t *ctx, return img; } -static vpx_codec_err_t vp9_set_fb_fn( +static vpx_codec_err_t decoder_set_fb_fn( vpx_codec_alg_priv_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { @@ -499,93 +425,24 @@ static vpx_codec_err_t vp9_set_fb_fn( return VPX_CODEC_ERROR; } -static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx, - vpx_codec_mmap_t *mmap, - vpx_codec_iter_t *iter) { - vpx_codec_err_t res; - const mem_req_t *seg_iter = *iter; - - /* Get address of next segment request */ - do { - if (!seg_iter) - seg_iter = vp9_mem_req_segs; - else if (seg_iter->id != VP9_SEG_MAX) - seg_iter++; - - *iter = (vpx_codec_iter_t)seg_iter; - - if (seg_iter->id != VP9_SEG_MAX) { - mmap->id = seg_iter->id; - mmap->sz = seg_iter->sz; - mmap->align = seg_iter->align; - mmap->flags = seg_iter->flags; - - if (!seg_iter->sz) - mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags); - - res = VPX_CODEC_OK; - } else { - res = VPX_CODEC_LIST_END; - } - } while (!mmap->sz && res != VPX_CODEC_LIST_END); - - return res; -} - -static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx, - const vpx_codec_mmap_t *mmap) { - vpx_codec_err_t res = VPX_CODEC_MEM_ERROR; - int i, done; - - if (!ctx->priv) { - if (mmap->id == VP9_SEG_ALG_PRIV) { - if (!ctx->priv) { - vp9_init_ctx(ctx, mmap); - res = VPX_CODEC_OK; - } - } - } - - done = 1; - - if (!res && ctx->priv->alg_priv) { - for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++) { - if (ctx->priv->alg_priv->mmaps[i].id == mmap->id) - if (!ctx->priv->alg_priv->mmaps[i].base) { - ctx->priv->alg_priv->mmaps[i] = *mmap; - res = VPX_CODEC_OK; - } - - done &= (ctx->priv->alg_priv->mmaps[i].base != NULL); - } - } - - if (done && !res) { - vp9_finalize_mmaps(ctx->priv->alg_priv); - res = ctx->iface->init(ctx, NULL); - } - - return res; -} - -static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); +static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); if (data) { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; + vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(ctx->pbi, + return vp9_set_reference_dec(&ctx->pbi->common, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; } } -static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { @@ -601,8 +458,8 @@ static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, } } -static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); if (data) { @@ -616,8 +473,8 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, } } -static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); @@ -633,8 +490,8 @@ static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id, #endif } -static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id, - va_list args) { +static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { #if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC int data = va_arg(args, int); @@ -653,14 +510,15 @@ static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id, #endif } -static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { - int *update_info = va_arg(args, int *); - VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; +static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { + int *const update_info = va_arg(args, int *); if (update_info) { - *update_info = pbi->refresh_frame_flags; - + if (ctx->pbi) + *update_info = ctx->pbi->refresh_frame_flags; + else + return VPX_CODEC_ERROR; return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -668,14 +526,13 @@ static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { +static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { int *corrupted = va_arg(args, int *); if (corrupted) { - VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; - if (pbi) - *corrupted = pbi->common.frame_to_show->corrupted; + if (ctx->pbi) + *corrupted = ctx->pbi->common.frame_to_show->corrupted; else return VPX_CODEC_ERROR; return VPX_CODEC_OK; @@ -684,15 +541,15 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { +static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { int *const display_size = va_arg(args, int *); if (display_size) { - const VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi; - if (pbi) { - display_size[0] = pbi->common.display_width; - display_size[1] = pbi->common.display_height; + if (ctx->pbi) { + const VP9_COMMON *const cm = &ctx->pbi->common; + display_size[0] = cm->display_width; + display_size[1] = cm->display_height; } else { return VPX_CODEC_ERROR; } @@ -702,30 +559,33 @@ static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { ctx->invert_tile_order = va_arg(args, int); return VPX_CODEC_OK; } -static vpx_codec_ctrl_fn_map_t ctf_maps[] = { - {VP8_SET_REFERENCE, set_reference}, - {VP8_COPY_REFERENCE, copy_reference}, - {VP8_SET_POSTPROC, set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, set_dbg_options}, - {VP8_SET_DBG_COLOR_MB_MODES, set_dbg_options}, - {VP8_SET_DBG_COLOR_B_MODES, set_dbg_options}, - {VP8_SET_DBG_DISPLAY_MV, set_dbg_options}, - {VP8D_GET_LAST_REF_UPDATES, get_last_ref_updates}, - {VP8D_GET_FRAME_CORRUPTED, get_frame_corrupted}, - {VP9_GET_REFERENCE, get_reference}, - {VP9D_GET_DISPLAY_SIZE, get_display_size}, - {VP9_INVERT_TILE_DECODE_ORDER, set_invert_tile_order}, +static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { + {VP8_COPY_REFERENCE, ctrl_copy_reference}, + + // Setters + {VP8_SET_REFERENCE, ctrl_set_reference}, + {VP8_SET_POSTPROC, ctrl_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options}, + {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, + + // Getters + {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, + {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, + {VP9_GET_REFERENCE, ctrl_get_reference}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_display_size}, + { -1, NULL}, }; - #ifndef VERSION_STRING #define VERSION_STRING #endif @@ -733,22 +593,20 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = { "WebM Project VP9 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | - VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, - /* vpx_codec_caps_t caps; */ - vp9_init, /* vpx_codec_init_fn_t init; */ - vp9_destroy, /* vpx_codec_destroy_fn_t destroy; */ - ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ - vp9_xma_get_mmap, /* vpx_codec_get_mmap_fn_t get_mmap; */ - vp9_xma_set_mmap, /* vpx_codec_set_mmap_fn_t set_mmap; */ + VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t + decoder_init, // vpx_codec_init_fn_t + decoder_destroy, // vpx_codec_destroy_fn_t + decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t + NOT_IMPLEMENTED, // vpx_codec_get_mmap_fn_t + NOT_IMPLEMENTED, // vpx_codec_set_mmap_fn_t { // NOLINT - vp9_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ - vp9_get_si, /* vpx_codec_get_si_fn_t get_si; */ - vp9_decode, /* vpx_codec_decode_fn_t decode; */ - vp9_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ - vp9_set_fb_fn, /* vpx_codec_set_fb_fn_t set_fb_fn; */ + decoder_peek_si, // vpx_codec_peek_si_fn_t + decoder_get_si, // vpx_codec_get_si_fn_t + decoder_decode, // vpx_codec_decode_fn_t + decoder_get_frame, // vpx_codec_frame_get_fn_t + decoder_set_fb_fn, // vpx_codec_set_fb_fn_t }, { // NOLINT - /* encoder functions */ NOT_IMPLEMENTED, NOT_IMPLEMENTED, NOT_IMPLEMENTED, diff --git a/source/libvpx/vp9/vp9cx.mk b/source/libvpx/vp9/vp9cx.mk index 6679f89..da6c0f8 100644 --- a/source/libvpx/vp9/vp9cx.mk +++ b/source/libvpx/vp9/vp9cx.mk @@ -18,6 +18,8 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c VP9_CX_SRCS-yes += encoder/vp9_bitstream.c +VP9_CX_SRCS-yes += encoder/vp9_cost.h +VP9_CX_SRCS-yes += encoder/vp9_cost.c VP9_CX_SRCS-yes += encoder/vp9_dct.c VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h @@ -28,6 +30,7 @@ VP9_CX_SRCS-yes += encoder/vp9_firstpass.c VP9_CX_SRCS-yes += encoder/vp9_block.h VP9_CX_SRCS-yes += encoder/vp9_writer.h VP9_CX_SRCS-yes += encoder/vp9_writer.c +VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.c VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.h @@ -42,6 +45,7 @@ VP9_CX_SRCS-yes += encoder/vp9_quantize.h VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h VP9_CX_SRCS-yes += encoder/vp9_rdopt.h VP9_CX_SRCS-yes += encoder/vp9_pickmode.h +VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.h VP9_CX_SRCS-yes += encoder/vp9_treewriter.h VP9_CX_SRCS-yes += encoder/vp9_variance.h @@ -56,16 +60,24 @@ VP9_CX_SRCS-yes += encoder/vp9_pickmode.c VP9_CX_SRCS-yes += encoder/vp9_sad.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.h +VP9_CX_SRCS-yes += encoder/vp9_speed_features.c +VP9_CX_SRCS-yes += encoder/vp9_speed_features.h VP9_CX_SRCS-yes += encoder/vp9_subexp.c VP9_CX_SRCS-yes += encoder/vp9_subexp.h +VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c VP9_CX_SRCS-yes += encoder/vp9_resize.c VP9_CX_SRCS-yes += encoder/vp9_resize.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c VP9_CX_SRCS-yes += encoder/vp9_variance.c -VP9_CX_SRCS-yes += encoder/vp9_vaq.c -VP9_CX_SRCS-yes += encoder/vp9_vaq.h +VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c +VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h +VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c +VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h +VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c +VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c @@ -83,6 +95,7 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm diff --git a/source/libvpx/vp9/vp9dx.mk b/source/libvpx/vp9/vp9dx.mk index de210f4..92ec6fd 100644 --- a/source/libvpx/vp9/vp9dx.mk +++ b/source/libvpx/vp9/vp9dx.mk @@ -25,14 +25,14 @@ VP9_DX_SRCS-yes += decoder/vp9_dthread.c VP9_DX_SRCS-yes += decoder/vp9_dthread.h VP9_DX_SRCS-yes += decoder/vp9_reader.h VP9_DX_SRCS-yes += decoder/vp9_reader.c +VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.c VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h VP9_DX_SRCS-yes += decoder/vp9_decodemv.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.h -VP9_DX_SRCS-yes += decoder/vp9_onyxd.h -VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h +VP9_DX_SRCS-yes += decoder/vp9_decoder.c +VP9_DX_SRCS-yes += decoder/vp9_decoder.h VP9_DX_SRCS-yes += decoder/vp9_thread.c VP9_DX_SRCS-yes += decoder/vp9_thread.h -VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h diff --git a/source/libvpx/vpx/exports_enc b/source/libvpx/vpx/exports_enc index 99b1bfa..155faf6 100644 --- a/source/libvpx/vpx/exports_enc +++ b/source/libvpx/vpx/exports_enc @@ -21,3 +21,5 @@ text vpx_svc_set_options text vpx_svc_set_quantizers text vpx_svc_set_scale_factors text vpx_svc_get_layer_resolution +text vpx_svc_get_rc_stats_buffer_size +text vpx_svc_get_rc_stats_buffer
\ No newline at end of file diff --git a/source/libvpx/vpx/src/svc_encodeframe.c b/source/libvpx/vpx/src/svc_encodeframe.c index c783724..76aacd2 100644 --- a/source/libvpx/vpx/src/svc_encodeframe.c +++ b/source/libvpx/vpx/src/svc_encodeframe.c @@ -13,6 +13,7 @@ * VP9 SVC encoding support via libvpx */ +#include <assert.h> #include <math.h> #include <stdarg.h> #include <stdio.h> @@ -81,6 +82,10 @@ typedef struct SvcInternal { size_t buffer_size; void *buffer; + char *rc_stats_buf; + size_t rc_stats_buf_size; + size_t rc_stats_buf_used; + char message_buffer[2048]; vpx_codec_ctx_t *codec_ctx; } SvcInternal; @@ -519,9 +524,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_ctx->spatial_layers); return VPX_CODEC_INVALID_PARAM; } - // use SvcInternal value for number of layers to enable forcing single layer - // for first frame - si->layers = svc_ctx->spatial_layers; res = parse_quantizer_values(svc_ctx, si->quantizers, 0); if (res != VPX_CODEC_OK) return res; @@ -533,10 +535,13 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, res = parse_scale_factors(svc_ctx, si->scale_factors); if (res != VPX_CODEC_OK) return res; - // parse aggregate command line options + // Parse aggregate command line options. Options must start with + // "layers=xx" then followed by other options res = parse_options(svc_ctx, si->options); if (res != VPX_CODEC_OK) return res; + si->layers = svc_ctx->spatial_layers; + // Assign target bitrate for each layer. We calculate the ratio // from the resolution for now. // TODO(Minghai): Optimize the mechanism of allocating bits after @@ -546,6 +551,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, float total = 0; float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; + assert(si->layers <= VPX_SS_MAX_LAYERS); for (i = 0; i < si->layers; ++i) { int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers; if (pos < VPX_SS_MAX_LAYERS && si->scaling_factor_den[pos] > 0) { @@ -569,7 +575,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->ss_number_layers = si->layers; enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. enc_cfg->kf_mode = VPX_KF_DISABLED; - enc_cfg->g_pass = VPX_RC_ONE_PASS; // Lag in frames not currently supported enc_cfg->g_lag_in_frames = 0; @@ -578,8 +583,12 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->rc_dropframe_thresh = 0; enc_cfg->rc_end_usage = VPX_CBR; enc_cfg->rc_resize_allowed = 0; - enc_cfg->rc_min_quantizer = 33; - enc_cfg->rc_max_quantizer = 33; + + if (enc_cfg->g_pass == VPX_RC_ONE_PASS) { + enc_cfg->rc_min_quantizer = 33; + enc_cfg->rc_max_quantizer = 33; + } + enc_cfg->rc_undershoot_pct = 100; enc_cfg->rc_overshoot_pct = 15; enc_cfg->rc_buf_initial_sz = 500; @@ -779,12 +788,17 @@ static void set_svc_parameters(SvcContext *svc_ctx, } layer_index = layer + VPX_SS_MAX_LAYERS - si->layers; - if (vpx_svc_is_keyframe(svc_ctx)) { - svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; - svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; + if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) { + if (vpx_svc_is_keyframe(svc_ctx)) { + svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; + svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; + } else { + svc_params.min_quantizer = si->quantizer[layer_index]; + svc_params.max_quantizer = si->quantizer[layer_index]; + } } else { - svc_params.min_quantizer = si->quantizer[layer_index]; - svc_params.max_quantizer = si->quantizer[layer_index]; + svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer; + svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer; } svc_params.distance_from_i_frame = si->frame_within_gop; @@ -845,12 +859,13 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, struct LayerData *layer_data; struct Superframe superframe; SvcInternal *const si = get_svc_internal(svc_ctx); - if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) { + if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { return VPX_CODEC_INVALID_PARAM; } memset(&superframe, 0, sizeof(superframe)); svc_log_reset(svc_ctx); + si->rc_stats_buf_used = 0; si->layers = svc_ctx->spatial_layers; if (si->frame_within_gop >= si->kf_dist || @@ -860,9 +875,12 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->is_keyframe = (si->frame_within_gop == 0); si->frame_size = 0; - svc_log(svc_ctx, SVC_LOG_DEBUG, - "vpx_svc_encode layers: %d, frame_count: %d, frame_within_gop: %d\n", - si->layers, si->encode_frame_count, si->frame_within_gop); + if (rawimg != NULL) { + svc_log(svc_ctx, SVC_LOG_DEBUG, + "vpx_svc_encode layers: %d, frame_count: %d, " + "frame_within_gop: %d\n", si->layers, si->encode_frame_count, + si->frame_within_gop); + } // encode each layer for (si->layer = 0; si->layer < si->layers; ++si->layer) { @@ -871,9 +889,11 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); continue; } - calculate_enc_frame_flags(svc_ctx); - set_svc_parameters(svc_ctx, codec_ctx); + if (rawimg != NULL) { + calculate_enc_frame_flags(svc_ctx); + set_svc_parameters(svc_ctx, codec_ctx); + } res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, si->enc_frame_flags, deadline); @@ -923,39 +943,63 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, } break; } + case VPX_CODEC_STATS_PKT: { + size_t new_size = si->rc_stats_buf_used + + cx_pkt->data.twopass_stats.sz; + + if (new_size > si->rc_stats_buf_size) { + char *p = (char*)realloc(si->rc_stats_buf, new_size); + if (p == NULL) { + svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n"); + break; + } + si->rc_stats_buf = p; + si->rc_stats_buf_size = new_size; + } + + memcpy(si->rc_stats_buf + si->rc_stats_buf_used, + cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz); + si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz; + break; + } default: { break; } } } + if (rawimg == NULL) { + break; + } } - // add superframe index to layer data list - sf_create_index(&superframe); - layer_data = ld_create(superframe.buffer, superframe.index_size); - ld_list_add(&cx_layer_list, layer_data); - - // get accumulated size of layer data - si->frame_size = ld_list_get_buffer_size(cx_layer_list); - if (si->frame_size == 0) return VPX_CODEC_ERROR; + if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) { + // add superframe index to layer data list + sf_create_index(&superframe); + layer_data = ld_create(superframe.buffer, superframe.index_size); + ld_list_add(&cx_layer_list, layer_data); + + // get accumulated size of layer data + si->frame_size = ld_list_get_buffer_size(cx_layer_list); + if (si->frame_size > 0) { + // all layers encoded, create single buffer with concatenated layers + if (si->frame_size > si->buffer_size) { + free(si->buffer); + si->buffer = malloc(si->frame_size); + if (si->buffer == NULL) { + ld_list_free(cx_layer_list); + return VPX_CODEC_MEM_ERROR; + } + si->buffer_size = si->frame_size; + } + // copy layer data into packet + ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); - // all layers encoded, create single buffer with concatenated layers - if (si->frame_size > si->buffer_size) { - free(si->buffer); - si->buffer = malloc(si->frame_size); - if (si->buffer == NULL) { ld_list_free(cx_layer_list); - return VPX_CODEC_MEM_ERROR; + + svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " + "pts: %d\n", si->encode_frame_count, si->is_keyframe, + (int)si->frame_size, (int)pts); } - si->buffer_size = si->frame_size; } - // copy layer data into packet - ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); - - ld_list_free(cx_layer_list); - - svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n", - si->encode_frame_count, si->is_keyframe, (int)si->frame_size, - (int)pts); ++si->frame_within_gop; ++si->encode_frame_count; @@ -1077,7 +1121,24 @@ void vpx_svc_release(SvcContext *svc_ctx) { si = (SvcInternal *)svc_ctx->internal; if (si != NULL) { free(si->buffer); + if (si->rc_stats_buf) { + free(si->rc_stats_buf); + } free(si); svc_ctx->internal = NULL; } } + +size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return 0; + return si->rc_stats_buf_used; +} + +char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx) { + const SvcInternal *const si = get_const_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return NULL; + return si->rc_stats_buf; +} + + diff --git a/source/libvpx/vpx/src/vpx_encoder.c b/source/libvpx/vpx/src/vpx_encoder.c index e69d96e..ece2d0b 100644 --- a/source/libvpx/vpx/src/vpx_encoder.c +++ b/source/libvpx/vpx/src/vpx_encoder.c @@ -394,7 +394,7 @@ const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list *iter = list->pkts; } - pkt = (const void *) * iter; + pkt = (const vpx_codec_cx_pkt_t *)*iter; if ((size_t)(pkt - list->pkts) < list->cnt) *iter = pkt + 1; diff --git a/source/libvpx/vpx/svc_context.h b/source/libvpx/vpx/svc_context.h index 98474ca..5d0fbbd 100644 --- a/source/libvpx/vpx/svc_context.h +++ b/source/libvpx/vpx/svc_context.h @@ -114,6 +114,17 @@ size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx); void *vpx_svc_get_buffer(const SvcContext *svc_ctx); /** + * return size of two pass rate control stats data to be returned by + * vpx_svc_get_rc_stats_buffer + */ +size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx); + +/** + * return buffer two pass of rate control stats data + */ +char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx); + +/** * return spatial resolution of the specified layer */ vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx, diff --git a/source/libvpx/vpx/vp8cx.h b/source/libvpx/vpx/vp8cx.h index 0b637d4..8944a26 100644 --- a/source/libvpx/vpx/vp8cx.h +++ b/source/libvpx/vpx/vp8cx.h @@ -192,6 +192,7 @@ enum vp8e_enc_control_id { VP9E_SET_TILE_ROWS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, + VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_SVC, VP9E_SET_SVC_PARAMETERS, @@ -364,6 +365,8 @@ VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_AQ_MODE, unsigned int) +VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int) + /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" diff --git a/source/libvpx/vpx/vpx_encoder.h b/source/libvpx/vpx/vpx_encoder.h index 851ff1a..2c882c1 100644 --- a/source/libvpx/vpx/vpx_encoder.h +++ b/source/libvpx/vpx/vpx_encoder.h @@ -49,7 +49,7 @@ extern "C" { #define VPX_SS_MAX_LAYERS 5 /*! Spatial Scalability: Default number of coding layers */ -#define VPX_SS_DEFAULT_LAYERS 3 +#define VPX_SS_DEFAULT_LAYERS 1 /*!\brief Current ABI version number * diff --git a/source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm b/source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm deleted file mode 100644 index d070a47..0000000 --- a/source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm +++ /dev/null @@ -1,123 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - - EXPORT |vpx_yv12_copy_y_neon| - - ARM - REQUIRE8 - PRESERVE8 - - INCLUDE vpx_scale_asm_offsets.asm - - AREA ||.text||, CODE, READONLY, ALIGN=2 - -;void vpx_yv12_copy_y_neon(const YV12_BUFFER_CONFIG *src_ybc, -; YV12_BUFFER_CONFIG *dst_ybc) -|vpx_yv12_copy_y_neon| PROC - push {r4 - r11, lr} - vpush {d8-d15} - - ldr r4, [r0, #yv12_buffer_config_y_height] - ldr r5, [r0, #yv12_buffer_config_y_width] - ldr r6, [r0, #yv12_buffer_config_y_stride] - ldr r7, [r1, #yv12_buffer_config_y_stride] - ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 - ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 - - ; copy two rows at one time - mov lr, r4, lsr #1 - -cp_src_to_dst_height_loop1 - mov r8, r2 - mov r9, r3 - add r10, r2, r6 - add r11, r3, r7 - movs r12, r5, lsr #7 - ble extra_copy_needed ; y_width < 128 - -cp_src_to_dst_width_loop1 - vld1.8 {q0, q1}, [r8]! - vld1.8 {q8, q9}, [r10]! - vld1.8 {q2, q3}, [r8]! - vld1.8 {q10, q11}, [r10]! - vld1.8 {q4, q5}, [r8]! - vld1.8 {q12, q13}, [r10]! - vld1.8 {q6, q7}, [r8]! - vld1.8 {q14, q15}, [r10]! - - subs r12, r12, #1 - - vst1.8 {q0, q1}, [r9]! - vst1.8 {q8, q9}, [r11]! - vst1.8 {q2, q3}, [r9]! - vst1.8 {q10, q11}, [r11]! - vst1.8 {q4, q5}, [r9]! - vst1.8 {q12, q13}, [r11]! - vst1.8 {q6, q7}, [r9]! - vst1.8 {q14, q15}, [r11]! - - bne cp_src_to_dst_width_loop1 - - subs lr, lr, #1 - add r2, r2, r6, lsl #1 - add r3, r3, r7, lsl #1 - - bne cp_src_to_dst_height_loop1 - -extra_copy_needed - ands r10, r5, #0x7f ;check to see if extra copy is needed - sub r11, r5, r10 - ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 - ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 - bne extra_cp_src_to_dst_width1 -end_of_cp_src_to_dst1 - - vpop {d8 - d15} - pop {r4-r11, pc} - -;============================= -extra_cp_src_to_dst_width1 - add r2, r2, r11 - add r3, r3, r11 - add r0, r8, r6 - add r11, r9, r7 - - mov lr, r4, lsr #1 -extra_cp_src_to_dst_height_loop1 - mov r8, r2 - mov r9, r3 - add r0, r8, r6 - add r11, r9, r7 - - mov r12, r10 - -extra_cp_src_to_dst_width_loop1 - vld1.8 {q0}, [r8]! - vld1.8 {q1}, [r0]! - - subs r12, r12, #16 - - vst1.8 {q0}, [r9]! - vst1.8 {q1}, [r11]! - bne extra_cp_src_to_dst_width_loop1 - - subs lr, lr, #1 - - add r2, r2, r6, lsl #1 - add r3, r3, r7, lsl #1 - - bne extra_cp_src_to_dst_height_loop1 - - b end_of_cp_src_to_dst1 - - ENDP - - END diff --git a/source/libvpx/vpx_scale/vpx_scale.mk b/source/libvpx/vpx_scale/vpx_scale.mk index 50d3e9d..ded8e0b 100644 --- a/source/libvpx/vpx_scale/vpx_scale.mk +++ b/source/libvpx/vpx_scale/vpx_scale.mk @@ -7,11 +7,10 @@ SCALE_SRCS-yes += generic/yv12extend.c SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c SCALE_SRCS-yes += vpx_scale_asm_offsets.c SCALE_SRCS-yes += vpx_scale_rtcd.c -SCALE_SRCS-yes += vpx_scale_rtcd.sh +SCALE_SRCS-yes += vpx_scale_rtcd.pl #neon SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM) -SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copy_y_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c @@ -24,4 +23,4 @@ SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) $(eval $(call asm_offsets_template,\ vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c)) -$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh)) +$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl)) diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.pl b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl new file mode 100644 index 0000000..8c92570 --- /dev/null +++ b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl @@ -0,0 +1,34 @@ +sub vpx_scale_forward_decls() { +print <<EOF +struct yv12_buffer_config; +EOF +} +forward_decls qw/vpx_scale_forward_decls/; + +# Scaler functions +if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") { + add_proto qw/void vp8_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_5_4_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; + add_proto qw/void vp8_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_5_3_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; + add_proto qw/void vp8_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_2_1_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; + add_proto qw/void vp8_vertical_band_2_1_scale_i/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"; +} + +add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf"; +specialize qw/vp8_yv12_extend_frame_borders neon/; + +add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; +specialize qw/vp8_yv12_copy_frame neon/; + +add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"; + +if (vpx_config("CONFIG_VP9") eq "yes") { + add_proto qw/void vp9_extend_frame_borders/, "struct yv12_buffer_config *ybf"; + specialize qw/vp9_extend_frame_borders dspr2/; + + add_proto qw/void vp9_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf"; + specialize qw/vp9_extend_frame_inner_borders dspr2/; +} +1; diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.sh b/source/libvpx/vpx_scale/vpx_scale_rtcd.sh deleted file mode 100755 index c26208c..0000000 --- a/source/libvpx/vpx_scale/vpx_scale_rtcd.sh +++ /dev/null @@ -1,34 +0,0 @@ -vpx_scale_forward_decls() { -cat <<EOF -struct yv12_buffer_config; -EOF -} -forward_decls vpx_scale_forward_decls - -# Scaler functions -if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then - prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width" - prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" - prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width" -fi - -prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf" -specialize vp8_yv12_extend_frame_borders neon - -prototype void vp8_yv12_copy_frame "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vp8_yv12_copy_frame neon - -prototype void vpx_yv12_copy_y "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc" -specialize vpx_yv12_copy_y neon - -if [ "$CONFIG_VP9" = "yes" ]; then - prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf" - specialize vp9_extend_frame_borders dspr2 - - prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf" - specialize vp9_extend_frame_inner_borders dspr2 -fi diff --git a/source/libvpx/vpxdec.c b/source/libvpx/vpxdec.c index b69e55e..4c37234 100644 --- a/source/libvpx/vpxdec.c +++ b/source/libvpx/vpxdec.c @@ -218,9 +218,11 @@ static int raw_read_frame(FILE *infile, uint8_t **buffer, static int read_frame(struct VpxDecInputContext *input, uint8_t **buf, size_t *bytes_in_buffer, size_t *buffer_size) { switch (input->vpx_input_ctx->file_type) { +#if CONFIG_WEBM_IO case FILE_TYPE_WEBM: return webm_read_frame(input->webm_ctx, buf, bytes_in_buffer, buffer_size); +#endif case FILE_TYPE_RAW: return raw_read_frame(input->vpx_input_ctx->file, buf, bytes_in_buffer, buffer_size); @@ -663,12 +665,17 @@ int main_loop(int argc, const char **argv_) { input.vpx_input_ctx->file = infile; if (file_is_ivf(input.vpx_input_ctx)) input.vpx_input_ctx->file_type = FILE_TYPE_IVF; +#if CONFIG_WEBM_IO else if (file_is_webm(input.webm_ctx, input.vpx_input_ctx)) input.vpx_input_ctx->file_type = FILE_TYPE_WEBM; +#endif else if (file_is_raw(input.vpx_input_ctx)) input.vpx_input_ctx->file_type = FILE_TYPE_RAW; else { fprintf(stderr, "Unrecognized input file type.\n"); +#if !CONFIG_WEBM_IO + fprintf(stderr, "vpxdec was built without WebM container support.\n"); +#endif return EXIT_FAILURE; } @@ -691,6 +698,7 @@ int main_loop(int argc, const char **argv_) { return EXIT_FAILURE; } +#if CONFIG_WEBM_IO if (vpx_input_ctx.file_type == FILE_TYPE_WEBM) { if (webm_guess_framerate(input.webm_ctx, input.vpx_input_ctx)) { fprintf(stderr, "Failed to guess framerate -- error parsing " @@ -698,6 +706,7 @@ int main_loop(int argc, const char **argv_) { return EXIT_FAILURE; } } +#endif } fourcc_interface = get_vpx_decoder_by_fourcc(vpx_input_ctx.fourcc); @@ -941,9 +950,12 @@ fail: } } +#if CONFIG_WEBM_IO if (input.vpx_input_ctx->file_type == FILE_TYPE_WEBM) webm_free(input.webm_ctx); - else +#endif + + if (input.vpx_input_ctx->file_type != FILE_TYPE_WEBM) free(buf); if (scaled_img) vpx_img_free(scaled_img); diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c index c61d83e..00d3e3e 100644 --- a/source/libvpx/vpxenc.c +++ b/source/libvpx/vpxenc.c @@ -123,6 +123,7 @@ int fourcc_is_ivf(const char detect[4]) { return 0; } +#if CONFIG_WEBM_IO /* Murmur hash derived from public domain reference implementation at * http:// sites.google.com/site/murmurhash/ */ @@ -169,7 +170,7 @@ static unsigned int murmur(const void *key, int len, unsigned int seed) { return h; } - +#endif // CONFIG_WEBM_IO static const arg_def_t debugmode = ARG_DEF("D", "debug", 0, "Debug mode (makes output deterministic)"); @@ -218,7 +219,7 @@ static const arg_def_t recontest = ARG_DEF_ENUM(NULL, "test-decode", 1, static const arg_def_t framerate = ARG_DEF(NULL, "fps", 1, "Stream frame rate (rate/scale)"); static const arg_def_t use_ivf = ARG_DEF(NULL, "ivf", 0, - "Output IVF (default is WebM)"); + "Output IVF (default is WebM if WebM IO is enabled)"); static const arg_def_t out_part = ARG_DEF("P", "output-partitions", 0, "Makes encoder output partitions. Requires IVF output!"); static const arg_def_t q_hist_n = ARG_DEF(NULL, "q-hist", 1, @@ -399,13 +400,17 @@ static const arg_def_t frame_parallel_decoding = ARG_DEF( NULL, "frame-parallel", 1, "Enable frame parallel decodability features"); static const arg_def_t aq_mode = ARG_DEF( NULL, "aq-mode", 1, - "Adaptive q mode (0: off (by default), 1: variance 2: complexity)"); + "Adaptive q mode (0: off (by default), 1: variance 2: complexity, " + "3: cyclic refresh)"); +static const arg_def_t frame_periodic_boost = ARG_DEF( + NULL, "frame_boost", 1, + "Enable frame periodic boost (0: off (by default), 1: on)"); static const arg_def_t *vp9_args[] = { &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type, &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless, - &frame_parallel_decoding, &aq_mode, + &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, NULL }; static const int vp9_arg_ctrl_map[] = { @@ -415,6 +420,7 @@ static const int vp9_arg_ctrl_map[] = { VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, + VP9E_SET_FRAME_PERIODIC_BOOST, 0 }; #endif @@ -834,7 +840,9 @@ static struct stream_state *new_stream(struct VpxEncoderConfig *global, /* Initialize remaining stream parameters */ stream->config.stereo_fmt = STEREO_FORMAT_MONO; stream->config.write_webm = 1; +#if CONFIG_WEBM_IO stream->ebml.last_pts_ms = -1; +#endif /* Allows removal of the application version from the EBML tags */ stream->ebml.debug = global->debug; @@ -1143,13 +1151,17 @@ static void open_output_file(struct stream_state *stream, if (stream->config.write_webm && fseek(stream->file, 0, SEEK_CUR)) fatal("WebM output to pipes not supported."); +#if CONFIG_WEBM_IO if (stream->config.write_webm) { stream->ebml.stream = stream->file; write_webm_file_header(&stream->ebml, cfg, &global->framerate, stream->config.stereo_fmt, global->codec->fourcc); - } else { + } +#endif + + if (!stream->config.write_webm) { ivf_write_file_header(stream->file, cfg, global->codec->fourcc, 0); } } @@ -1162,11 +1174,15 @@ static void close_output_file(struct stream_state *stream, if (cfg->g_pass == VPX_RC_FIRST_PASS) return; +#if CONFIG_WEBM_IO if (stream->config.write_webm) { write_webm_file_footer(&stream->ebml, stream->hash); free(stream->ebml.cue_list); stream->ebml.cue_list = NULL; - } else { + } +#endif + + if (!stream->config.write_webm) { if (!fseek(stream->file, 0, SEEK_SET)) ivf_write_file_header(stream->file, &stream->config.cfg, fourcc, @@ -1316,6 +1332,7 @@ static void get_cx_data(struct stream_state *stream, fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz); update_rate_histogram(stream->rate_hist, cfg, pkt); +#if CONFIG_WEBM_IO if (stream->config.write_webm) { /* Update the hash */ if (!stream->ebml.debug) @@ -1324,7 +1341,9 @@ static void get_cx_data(struct stream_state *stream, stream->hash); write_webm_block(&stream->ebml, cfg, pkt); - } else { + } +#endif + if (!stream->config.write_webm) { if (pkt->data.frame.partition_id <= 0) { ivf_header_pos = ftello(stream->file); fsize = pkt->data.frame.sz; @@ -1484,7 +1503,7 @@ static void print_time(const char *label, int64_t etl) { etl -= mins * 60; secs = etl; - fprintf(stderr, "[%3s %2"PRId64":%02"PRId64": % 02"PRId64"] ", + fprintf(stderr, "[%3s %2"PRId64":%02"PRId64":%02"PRId64"] ", label, hours, mins, secs); } else { fprintf(stderr, "[%3s unknown] ", label); @@ -1594,6 +1613,14 @@ int main(int argc, const char **argv_) { " and --passes=2\n", stream->index, global.pass); }); +#if !CONFIG_WEBM_IO + FOREACH_STREAM({ + stream->config.write_webm = 0; + warn("vpxenc was compiled without WebM container support." + "Producing IVF output"); + }); +#endif + /* Use the frame rate from the file only if none was specified * on the command-line. */ diff --git a/source/libvpx/y4minput.c b/source/libvpx/y4minput.c index 47f005a..90c5310a 100644 --- a/source/libvpx/y4minput.c +++ b/source/libvpx/y4minput.c @@ -10,10 +10,45 @@ * Based on code from the OggTheora software codec source code, * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors. */ +#include <errno.h> #include <stdlib.h> #include <string.h> + +#include "vpx/vpx_integer.h" #include "y4minput.h" +// Reads 'size' bytes from 'file' into 'buf' with some fault tolerance. +// Returns true on success. +static int file_read(void *buf, size_t size, FILE *file) { + const int kMaxRetries = 5; + int retry_count = 0; + int file_error; + size_t len = 0; + do { + const size_t n = fread((uint8_t*)buf + len, 1, size - len, file); + len += n; + file_error = ferror(file); + if (file_error) { + if (errno == EINTR || errno == EAGAIN) { + clearerr(file); + continue; + } else { + fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n", + (uint32_t)len, (uint32_t)size, errno, strerror(errno)); + return 0; + } + } + } while (!feof(file) && len < size && ++retry_count < kMaxRetries); + + if (!feof(file) && len != size) { + fprintf(stderr, "Error reading file: %u of %u bytes read," + " error: %d, retries: %d, %d: %s\n", + (uint32_t)len, (uint32_t)size, file_error, retry_count, + errno, strerror(errno)); + } + return len == size; +} + static int y4m_parse_tags(y4m_input *_y4m, char *_tags) { int got_w; int got_h; @@ -670,8 +705,7 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, buffer[i] = *_skip++; _nskip--; } else { - ret = (int)fread(buffer + i, 1, 1, _fin); - if (ret < 1)return -1; + if (!file_read(buffer + i, 1, _fin)) return -1; } if (buffer[i] == '\n')break; } @@ -853,10 +887,8 @@ int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) { int c_w; int c_h; int c_sz; - int ret; /*Read and skip the frame header.*/ - ret = (int)fread(frame, 1, 6, _fin); - if (ret < 6)return 0; + if (!file_read(frame, 6, _fin)) return 0; if (memcmp(frame, "FRAME", 5)) { fprintf(stderr, "Loss of framing in Y4M input data\n"); return -1; @@ -864,19 +896,19 @@ int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) { if (frame[5] != '\n') { char c; int j; - for (j = 0; j < 79 && fread(&c, 1, 1, _fin) && c != '\n'; j++); + for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) {} if (j == 79) { fprintf(stderr, "Error parsing Y4M frame header\n"); return -1; } } /*Read the frame data that needs no conversion.*/ - if (fread(_y4m->dst_buf, 1, _y4m->dst_buf_read_sz, _fin) != _y4m->dst_buf_read_sz) { + if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) { fprintf(stderr, "Error reading Y4M frame data.\n"); return -1; } /*Read the frame data that does need conversion.*/ - if (fread(_y4m->aux_buf, 1, _y4m->aux_buf_read_sz, _fin) != _y4m->aux_buf_read_sz) { + if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) { fprintf(stderr, "Error reading Y4M frame data.\n"); return -1; } |