libvpx: Pull from upstream

Update grep pattern in update_libvpx.sh to account for alternative locales. Update generate_gypi.sh to use the new perl rtcd scripts. Current HEAD: 23ccf7192434399e5db3a981fbfde40e1712ed5f git log from upstream: 23ccf71 Merge "Fix encoder uninitialized read errors reported by drmemory" 4b8ad4a Merge "Fix coding format in vp9_rc_regulate_q" 675d95f Merge "Prevent the usage of invalid best_mode_index" 9034094 Merge "Remove duplicate code" 81056e2 Merge "Minor code cleanup" 65e650e Merge "Revert "Converting set_prev_mi() to get_prev_mi()."" 3dff8aa Merge "Moving q_trans[] table to vp9_quantize.{c, h}." f10c173 Merge "Removing unused code from vp9_onyx_if.c." 1dcc1aa Prevent the usage of invalid best_mode_index 41ea9ef Merge "Removing redundant assignments." 71ffc7d Merge "Remove unused tile arguments from vp_rdopt." 1eee13c Merge "Cleanup vp9_rd_pick_inter_mode_sub8x8()." 2255085 Fix coding format in vp9_rc_regulate_q 60def47 Revert "Converting set_prev_mi() to get_prev_mi()." 2dc9248 Merge "Fix the setting of mode_skip_mask" b60d23f Removing unused code from vp9_onyx_if.c. d1a396d Moving q_trans[] table to vp9_quantize.{c, h}. 4fffefe Merge "Fix avx builds on macosx with clang 5.0." 585e01b Remove duplicate code 7cc78c0 Merge "Adding vp9_inc_frame_in_layer() function." 7c891ed Minor code cleanup 3ab4d57 Remove unused tile arguments from vp_rdopt. <...> TBR=tomfinegan@chromium.org Review URL: https://codereview.chromium.org/232133009 git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/libvpx@263021 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
author: johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c> 2014-04-10 17:14:25 +0000
committer: johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c> 2014-04-10 17:14:25 +0000
commit: 93a74791c8e808ea76001ee07693aa2a5fdd3500 (patch)
tree: 88c3a21369388876dccedda352d254b16007ba22 /source
parent: ef98d99073c8ddc400dac9bd4a1b31fb7240d861 (diff)
download: libvpx-93a74791c8e808ea76001ee07693aa2a5fdd3500.tar.gz
257 files changed, 28426 insertions, 11112 deletions
diff --git a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
index d1f0da4..77cc0b7 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
@@ -422,6 +422,7 @@ void vp8_yv12_copy_partial_frame_neon(struct yv12_buffer_config *src_ybc, struct
 RTCD_EXTERN void (*vp8_yv12_copy_partial_frame)(struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
 
 void vp8_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -435,253 +436,155 @@ static void setup_rtcd_internal(void)
     vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
     if (flags & HAS_MEDIA) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_armv6;
     if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon;
-
     vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
     if (flags & HAS_MEDIA) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_armv6;
     if (flags & HAS_NEON) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_neon;
-
     vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
     if (flags & HAS_MEDIA) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_armv6;
     if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon;
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
     if (flags & HAS_MEDIA) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_armv6;
     if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon;
-
-
-
-
-
-
-
-
     vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
     if (flags & HAS_MEDIA) vp8_copy_mem16x16 = vp8_copy_mem16x16_v6;
     if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon;
-
     vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
     if (flags & HAS_MEDIA) vp8_copy_mem8x4 = vp8_copy_mem8x4_v6;
     if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon;
-
     vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
     if (flags & HAS_MEDIA) vp8_copy_mem8x8 = vp8_copy_mem8x8_v6;
     if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon;
-
     vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
     if (flags & HAS_MEDIA) vp8_dc_only_idct_add = vp8_dc_only_idct_add_v6;
     if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon;
-
     vp8_denoiser_filter = vp8_denoiser_filter_c;
     if (flags & HAS_NEON) vp8_denoiser_filter = vp8_denoiser_filter_neon;
-
     vp8_dequant_idct_add = vp8_dequant_idct_add_c;
     if (flags & HAS_MEDIA) vp8_dequant_idct_add = vp8_dequant_idct_add_v6;
     if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon;
-
     vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
     if (flags & HAS_MEDIA) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
     if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
-
     vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
     if (flags & HAS_MEDIA) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
     if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
-
     vp8_dequantize_b = vp8_dequantize_b_c;
     if (flags & HAS_MEDIA) vp8_dequantize_b = vp8_dequantize_b_v6;
     if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon;
-
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_c;
     if (flags & HAS_MEDIA) vp8_fast_quantize_b = vp8_fast_quantize_b_armv6;
     if (flags & HAS_NEON) vp8_fast_quantize_b = vp8_fast_quantize_b_neon;
-
     vp8_fast_quantize_b_pair = vp8_fast_quantize_b_pair_c;
     if (flags & HAS_NEON) vp8_fast_quantize_b_pair = vp8_fast_quantize_b_pair_neon;
-
-
-
-
-
     vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
     if (flags & HAS_NEON) vp8_get4x4sse_cs = vp8_get4x4sse_cs_neon;
-
-
     vp8_intra4x4_predict = vp8_intra4x4_predict_c;
     if (flags & HAS_MEDIA) vp8_intra4x4_predict = vp8_intra4x4_predict_armv6;
-
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_bh = vp8_loop_filter_bh_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon;
-
     vp8_loop_filter_bv = vp8_loop_filter_bv_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_bv = vp8_loop_filter_bv_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon;
-
     vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_mbh = vp8_loop_filter_mbh_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon;
-
     vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_mbv = vp8_loop_filter_mbv_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon;
-
     vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon;
-
     vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon;
-
     vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon;
-
     vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
     if (flags & HAS_MEDIA) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_armv6;
     if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon;
-
-
-
-
-
     vp8_mse16x16 = vp8_mse16x16_c;
     if (flags & HAS_MEDIA) vp8_mse16x16 = vp8_mse16x16_armv6;
     if (flags & HAS_NEON) vp8_mse16x16 = vp8_mse16x16_neon;
-
-
-
     vp8_quantize_mb = vp8_quantize_mb_c;
     if (flags & HAS_NEON) vp8_quantize_mb = vp8_quantize_mb_neon;
-
     vp8_quantize_mbuv = vp8_quantize_mbuv_c;
     if (flags & HAS_NEON) vp8_quantize_mbuv = vp8_quantize_mbuv_neon;
-
     vp8_quantize_mby = vp8_quantize_mby_c;
     if (flags & HAS_NEON) vp8_quantize_mby = vp8_quantize_mby_neon;
-
-
-
-
     vp8_sad16x16 = vp8_sad16x16_c;
     if (flags & HAS_MEDIA) vp8_sad16x16 = vp8_sad16x16_armv6;
     if (flags & HAS_NEON) vp8_sad16x16 = vp8_sad16x16_neon;
-
-
-
-
     vp8_sad16x8 = vp8_sad16x8_c;
     if (flags & HAS_NEON) vp8_sad16x8 = vp8_sad16x8_neon;
-
-
-
-
     vp8_sad4x4 = vp8_sad4x4_c;
     if (flags & HAS_NEON) vp8_sad4x4 = vp8_sad4x4_neon;
-
-
-
-
     vp8_sad8x16 = vp8_sad8x16_c;
     if (flags & HAS_NEON) vp8_sad8x16 = vp8_sad8x16_neon;
-
-
-
-
     vp8_sad8x8 = vp8_sad8x8_c;
     if (flags & HAS_NEON) vp8_sad8x8 = vp8_sad8x8_neon;
-
-
-
-
     vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
     if (flags & HAS_MEDIA) vp8_short_fdct4x4 = vp8_short_fdct4x4_armv6;
     if (flags & HAS_NEON) vp8_short_fdct4x4 = vp8_short_fdct4x4_neon;
-
     vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
     if (flags & HAS_MEDIA) vp8_short_fdct8x4 = vp8_short_fdct8x4_armv6;
     if (flags & HAS_NEON) vp8_short_fdct8x4 = vp8_short_fdct8x4_neon;
-
     vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
     if (flags & HAS_MEDIA) vp8_short_idct4x4llm = vp8_short_idct4x4llm_v6_dual;
     if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon;
-
     vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
     if (flags & HAS_MEDIA) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_v6;
     if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon;
-
-
     vp8_short_walsh4x4 = vp8_short_walsh4x4_c;
     if (flags & HAS_MEDIA) vp8_short_walsh4x4 = vp8_short_walsh4x4_armv6;
     if (flags & HAS_NEON) vp8_short_walsh4x4 = vp8_short_walsh4x4_neon;
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
     if (flags & HAS_MEDIA) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_armv6;
     if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
     if (flags & HAS_MEDIA) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_armv6;
     if (flags & HAS_NEON) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_neon;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
     if (flags & HAS_MEDIA) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_armv6;
     if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
     if (flags & HAS_MEDIA) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_armv6;
     if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon;
-
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MEDIA) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_armv6;
     if (flags & HAS_NEON) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_neon;
-
-
-
-
     vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
     if (flags & HAS_MEDIA) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_armv6;
     if (flags & HAS_NEON) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_neon;
-
     vp8_subtract_b = vp8_subtract_b_c;
     if (flags & HAS_MEDIA) vp8_subtract_b = vp8_subtract_b_armv6;
     if (flags & HAS_NEON) vp8_subtract_b = vp8_subtract_b_neon;
-
     vp8_subtract_mbuv = vp8_subtract_mbuv_c;
     if (flags & HAS_MEDIA) vp8_subtract_mbuv = vp8_subtract_mbuv_armv6;
     if (flags & HAS_NEON) vp8_subtract_mbuv = vp8_subtract_mbuv_neon;
-
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MEDIA) vp8_subtract_mby = vp8_subtract_mby_armv6;
     if (flags & HAS_NEON) vp8_subtract_mby = vp8_subtract_mby_neon;
-
     vp8_variance16x16 = vp8_variance16x16_c;
     if (flags & HAS_MEDIA) vp8_variance16x16 = vp8_variance16x16_armv6;
     if (flags & HAS_NEON) vp8_variance16x16 = vp8_variance16x16_neon;
-
     vp8_variance16x8 = vp8_variance16x8_c;
     if (flags & HAS_NEON) vp8_variance16x8 = vp8_variance16x8_neon;
-
-
     vp8_variance8x16 = vp8_variance8x16_c;
     if (flags & HAS_NEON) vp8_variance8x16 = vp8_variance8x16_neon;
-
     vp8_variance8x8 = vp8_variance8x8_c;
     if (flags & HAS_MEDIA) vp8_variance8x8 = vp8_variance8x8_armv6;
     if (flags & HAS_NEON) vp8_variance8x8 = vp8_variance8x8_neon;
-
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MEDIA) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
     if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon;
-
     vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
     if (flags & HAS_MEDIA) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
     if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon;
-
     vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
     if (flags & HAS_MEDIA) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6;
     if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon;
-
     vp8_yv12_copy_partial_frame = vp8_yv12_copy_partial_frame_c;
     if (flags & HAS_NEON) vp8_yv12_copy_partial_frame = vp8_yv12_copy_partial_frame_neon;
 }
diff --git a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index e6eb470..127c325 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -232,6 +232,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_c
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c
 
@@ -731,6 +734,7 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source
 #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
 
 void vp9_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -741,288 +745,92 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
-
     vp9_convolve8 = vp9_convolve8_c;
     if (flags & HAS_NEON) vp9_convolve8 = vp9_convolve8_neon;
-
     vp9_convolve8_avg = vp9_convolve8_avg_c;
     if (flags & HAS_NEON) vp9_convolve8_avg = vp9_convolve8_avg_neon;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c;
     if (flags & HAS_NEON) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_neon;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c;
     if (flags & HAS_NEON) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_neon;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_c;
     if (flags & HAS_NEON) vp9_convolve8_horiz = vp9_convolve8_horiz_neon;
-
     vp9_convolve8_vert = vp9_convolve8_vert_c;
     if (flags & HAS_NEON) vp9_convolve8_vert = vp9_convolve8_vert_neon;
-
     vp9_convolve_avg = vp9_convolve_avg_c;
     if (flags & HAS_NEON) vp9_convolve_avg = vp9_convolve_avg_neon;
-
     vp9_convolve_copy = vp9_convolve_copy_c;
     if (flags & HAS_NEON) vp9_convolve_copy = vp9_convolve_copy_neon;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_NEON) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_neon;
-
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_NEON) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_neon;
-
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_NEON) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_neon;
-
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_NEON) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_neon;
-
     vp9_idct16x16_10_add = vp9_idct16x16_10_add_c;
     if (flags & HAS_NEON) vp9_idct16x16_10_add = vp9_idct16x16_10_add_neon;
-
     vp9_idct16x16_1_add = vp9_idct16x16_1_add_c;
     if (flags & HAS_NEON) vp9_idct16x16_1_add = vp9_idct16x16_1_add_neon;
-
     vp9_idct16x16_256_add = vp9_idct16x16_256_add_c;
     if (flags & HAS_NEON) vp9_idct16x16_256_add = vp9_idct16x16_256_add_neon;
-
     vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c;
     if (flags & HAS_NEON) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_neon;
-
     vp9_idct32x32_1_add = vp9_idct32x32_1_add_c;
     if (flags & HAS_NEON) vp9_idct32x32_1_add = vp9_idct32x32_1_add_neon;
-
     vp9_idct32x32_34_add = vp9_idct32x32_34_add_c;
     if (flags & HAS_NEON) vp9_idct32x32_34_add = vp9_idct32x32_1024_add_neon;
-
     vp9_idct4x4_16_add = vp9_idct4x4_16_add_c;
     if (flags & HAS_NEON) vp9_idct4x4_16_add = vp9_idct4x4_16_add_neon;
-
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_NEON) vp9_idct4x4_1_add = vp9_idct4x4_1_add_neon;
-
     vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
     if (flags & HAS_NEON) vp9_idct8x8_10_add = vp9_idct8x8_10_add_neon;
-
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_NEON) vp9_idct8x8_1_add = vp9_idct8x8_1_add_neon;
-
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
     if (flags & HAS_NEON) vp9_idct8x8_64_add = vp9_idct8x8_64_add_neon;
-
-
     vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
     if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon;
-
     vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
     if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon;
-
-
-
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c;
     if (flags & HAS_NEON) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_neon;
-
     vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c;
     if (flags & HAS_NEON) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_neon;
-
     vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c;
     if (flags & HAS_NEON) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_neon;
-
     vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c;
     if (flags & HAS_NEON) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_neon;
-
     vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c;
     if (flags & HAS_NEON) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_neon;
-
     vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c;
     if (flags & HAS_NEON) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_neon;
-
     vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c;
     if (flags & HAS_NEON) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_neon;
-
     vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c;
     if (flags & HAS_NEON) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_neon;
-
     vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c;
     if (flags & HAS_NEON) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_neon;
-
     vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c;
     if (flags & HAS_NEON) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_neon;
-
     vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c;
     if (flags & HAS_NEON) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_neon;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_c;
     if (flags & HAS_NEON) vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_neon;
-
     vp9_tm_predictor_32x32 = vp9_tm_predictor_32x32_c;
     if (flags & HAS_NEON) vp9_tm_predictor_32x32 = vp9_tm_predictor_32x32_neon;
-
     vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_c;
     if (flags & HAS_NEON) vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_neon;
-
     vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_c;
     if (flags & HAS_NEON) vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_neon;
-
     vp9_v_predictor_16x16 = vp9_v_predictor_16x16_c;
     if (flags & HAS_NEON) vp9_v_predictor_16x16 = vp9_v_predictor_16x16_neon;
-
     vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c;
     if (flags & HAS_NEON) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_neon;
-
     vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c;
     if (flags & HAS_NEON) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_neon;
-
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_NEON) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_neon;
 }
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
index 83869bc..7b66017 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
@@ -73,12 +73,12 @@
 .equ CONFIG_POSTPROC_VISUALIZER ,  0
 .equ CONFIG_OS_SUPPORT ,  1
 .equ CONFIG_UNIT_TESTS ,  0
+.equ CONFIG_WEBM_IO ,  1
 .equ CONFIG_DECODE_PERF_TESTS ,  0
 .equ CONFIG_MULTI_RES_ENCODING ,  1
 .equ CONFIG_TEMPORAL_DENOISING ,  1
 .equ CONFIG_EXPERIMENTAL ,  0
 .equ CONFIG_DECRYPT ,  0
 .equ CONFIG_MULTIPLE_ARF ,  0
-.equ CONFIG_NON420 ,  0
 .equ CONFIG_ALPHA ,  0
 	.section	.note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/source/config/linux/arm-neon-cpu-detect/vpx_config.h
index a6a6152..609fcab 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.h
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h
index 7ec35db..33bed21 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_scale_rtcd.h
@@ -49,10 +49,10 @@ void vp9_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf);
 #define vp9_extend_frame_inner_borders vp9_extend_frame_inner_borders_c
 
 void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
-void vpx_yv12_copy_y_neon(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
-RTCD_EXTERN void (*vpx_yv12_copy_y)(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
+#define vpx_yv12_copy_y vpx_yv12_copy_y_c
 
 void vpx_scale_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -63,23 +63,10 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
-
-
-
-
     vp8_yv12_copy_frame = vp8_yv12_copy_frame_c;
     if (flags & HAS_NEON) vp8_yv12_copy_frame = vp8_yv12_copy_frame_neon;
-
     vp8_yv12_extend_frame_borders = vp8_yv12_extend_frame_borders_c;
     if (flags & HAS_NEON) vp8_yv12_extend_frame_borders = vp8_yv12_extend_frame_borders_neon;
-
-
-
-    vpx_yv12_copy_y = vpx_yv12_copy_y_c;
-    if (flags & HAS_NEON) vpx_yv12_copy_y = vpx_yv12_copy_y_neon;
 }
 #endif
 
diff --git a/source/config/linux/arm-neon/vp8_rtcd.h b/source/config/linux/arm-neon/vp8_rtcd.h
index 34661bd..a52d575 100644
--- a/source/config/linux/arm-neon/vp8_rtcd.h
+++ b/source/config/linux/arm-neon/vp8_rtcd.h
@@ -422,6 +422,7 @@ void vp8_yv12_copy_partial_frame_neon(struct yv12_buffer_config *src_ybc, struct
 #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_neon
 
 void vp8_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -432,7 +433,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/arm-neon/vp9_rtcd.h b/source/config/linux/arm-neon/vp9_rtcd.h
index dbb9fc3..b757556 100644
--- a/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/source/config/linux/arm-neon/vp9_rtcd.h
@@ -232,6 +232,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_c
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c
 
@@ -731,6 +734,7 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source
 #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
 
 void vp9_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -741,7 +745,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/arm-neon/vpx_config.asm b/source/config/linux/arm-neon/vpx_config.asm
index 007f243..c42b4ed 100644
--- a/source/config/linux/arm-neon/vpx_config.asm
+++ b/source/config/linux/arm-neon/vpx_config.asm
@@ -73,12 +73,12 @@
 .equ CONFIG_POSTPROC_VISUALIZER ,  0
 .equ CONFIG_OS_SUPPORT ,  1
 .equ CONFIG_UNIT_TESTS ,  0
+.equ CONFIG_WEBM_IO ,  1
 .equ CONFIG_DECODE_PERF_TESTS ,  0
 .equ CONFIG_MULTI_RES_ENCODING ,  1
 .equ CONFIG_TEMPORAL_DENOISING ,  1
 .equ CONFIG_EXPERIMENTAL ,  0
 .equ CONFIG_DECRYPT ,  0
 .equ CONFIG_MULTIPLE_ARF ,  0
-.equ CONFIG_NON420 ,  0
 .equ CONFIG_ALPHA ,  0
 	.section	.note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm-neon/vpx_config.h b/source/config/linux/arm-neon/vpx_config.h
index f9da536..7d95fee 100644
--- a/source/config/linux/arm-neon/vpx_config.h
+++ b/source/config/linux/arm-neon/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm-neon/vpx_scale_rtcd.h b/source/config/linux/arm-neon/vpx_scale_rtcd.h
index dc9ab6f..58a946b 100644
--- a/source/config/linux/arm-neon/vpx_scale_rtcd.h
+++ b/source/config/linux/arm-neon/vpx_scale_rtcd.h
@@ -49,10 +49,10 @@ void vp9_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf);
 #define vp9_extend_frame_inner_borders vp9_extend_frame_inner_borders_c
 
 void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
-void vpx_yv12_copy_y_neon(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
-#define vpx_yv12_copy_y vpx_yv12_copy_y_neon
+#define vpx_yv12_copy_y vpx_yv12_copy_y_c
 
 void vpx_scale_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -63,7 +63,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/arm/vp8_rtcd.h b/source/config/linux/arm/vp8_rtcd.h
index 5a48adc..ab5fa41 100644
--- a/source/config/linux/arm/vp8_rtcd.h
+++ b/source/config/linux/arm/vp8_rtcd.h
@@ -367,6 +367,7 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv
 #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c
 
 void vp8_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -377,7 +378,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/arm/vp9_rtcd.h b/source/config/linux/arm/vp9_rtcd.h
index 9c7e204..4d3884c 100644
--- a/source/config/linux/arm/vp9_rtcd.h
+++ b/source/config/linux/arm/vp9_rtcd.h
@@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_c
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c
 
@@ -687,6 +690,7 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source
 #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
 
 void vp9_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -697,7 +701,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/arm/vpx_config.asm b/source/config/linux/arm/vpx_config.asm
index 08822e2..4e652ce 100644
--- a/source/config/linux/arm/vpx_config.asm
+++ b/source/config/linux/arm/vpx_config.asm
@@ -73,12 +73,12 @@
 .equ CONFIG_POSTPROC_VISUALIZER ,  0
 .equ CONFIG_OS_SUPPORT ,  1
 .equ CONFIG_UNIT_TESTS ,  0
+.equ CONFIG_WEBM_IO ,  1
 .equ CONFIG_DECODE_PERF_TESTS ,  0
 .equ CONFIG_MULTI_RES_ENCODING ,  1
 .equ CONFIG_TEMPORAL_DENOISING ,  1
 .equ CONFIG_EXPERIMENTAL ,  0
 .equ CONFIG_DECRYPT ,  0
 .equ CONFIG_MULTIPLE_ARF ,  0
-.equ CONFIG_NON420 ,  0
 .equ CONFIG_ALPHA ,  0
 	.section	.note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm/vpx_config.h b/source/config/linux/arm/vpx_config.h
index f7d694b..1b7367b 100644
--- a/source/config/linux/arm/vpx_config.h
+++ b/source/config/linux/arm/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm/vpx_scale_rtcd.h b/source/config/linux/arm/vpx_scale_rtcd.h
index 4fb918a..0a6d790 100644
--- a/source/config/linux/arm/vpx_scale_rtcd.h
+++ b/source/config/linux/arm/vpx_scale_rtcd.h
@@ -50,6 +50,7 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf
 #define vpx_yv12_copy_y vpx_yv12_copy_y_c
 
 void vpx_scale_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
@@ -60,7 +61,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/generic/vp8_rtcd.h b/source/config/linux/generic/vp8_rtcd.h
index 9564cfc..d6de728 100644
--- a/source/config/linux/generic/vp8_rtcd.h
+++ b/source/config/linux/generic/vp8_rtcd.h
@@ -324,12 +324,12 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv
 #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c
 
 void vp8_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/linux/generic/vp9_rtcd.h b/source/config/linux/generic/vp9_rtcd.h
index 652aa08..03e7181 100644
--- a/source/config/linux/generic/vp9_rtcd.h
+++ b/source/config/linux/generic/vp9_rtcd.h
@@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_c
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c
 
@@ -687,12 +690,12 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source
 #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
 
 void vp9_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/linux/generic/vpx_config.asm b/source/config/linux/generic/vpx_config.asm
index c612876..1d1039e 100644
--- a/source/config/linux/generic/vpx_config.asm
+++ b/source/config/linux/generic/vpx_config.asm
@@ -73,12 +73,12 @@
 .equ CONFIG_POSTPROC_VISUALIZER ,  0
 .equ CONFIG_OS_SUPPORT ,  1
 .equ CONFIG_UNIT_TESTS ,  0
+.equ CONFIG_WEBM_IO ,  1
 .equ CONFIG_DECODE_PERF_TESTS ,  0
 .equ CONFIG_MULTI_RES_ENCODING ,  1
 .equ CONFIG_TEMPORAL_DENOISING ,  1
 .equ CONFIG_EXPERIMENTAL ,  0
 .equ CONFIG_DECRYPT ,  0
 .equ CONFIG_MULTIPLE_ARF ,  0
-.equ CONFIG_NON420 ,  0
 .equ CONFIG_ALPHA ,  0
 	.section	.note.GNU-stack,"",%progbits
diff --git a/source/config/linux/generic/vpx_config.h b/source/config/linux/generic/vpx_config.h
index e910000..2d5e208 100644
--- a/source/config/linux/generic/vpx_config.h
+++ b/source/config/linux/generic/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/generic/vpx_scale_rtcd.h b/source/config/linux/generic/vpx_scale_rtcd.h
index 4b0a213..f5e6caa 100644
--- a/source/config/linux/generic/vpx_scale_rtcd.h
+++ b/source/config/linux/generic/vpx_scale_rtcd.h
@@ -50,12 +50,12 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf
 #define vpx_yv12_copy_y vpx_yv12_copy_y_c
 
 void vpx_scale_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/linux/ia32/vp8_rtcd.h b/source/config/linux/ia32/vp8_rtcd.h
index 7a3e0f4..7e90462 100644
--- a/source/config/linux/ia32/vp8_rtcd.h
+++ b/source/config/linux/ia32/vp8_rtcd.h
@@ -492,337 +492,239 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx;
     if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
-
     vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
     if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
-
     vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
     if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
     if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
-
-
-
-
     vp8_block_error = vp8_block_error_c;
     if (flags & HAS_MMX) vp8_block_error = vp8_block_error_mmx;
     if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_xmm;
-
     vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_c;
     if (flags & HAS_SSE2) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3;
-
     vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_c;
     if (flags & HAS_SSE2) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3;
-
     vp8_clear_system_state = vp8_clear_system_state_c;
     if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state;
-
     vp8_copy32xn = vp8_copy32xn_c;
     if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
     if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
-
     vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
     if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx;
     if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
-
     vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
     if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
-
     vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
     if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx;
-
     vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
     if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
-
     vp8_denoiser_filter = vp8_denoiser_filter_c;
     if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
-
     vp8_dequant_idct_add = vp8_dequant_idct_add_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
-
     vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
     if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
-
     vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
     if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
-
     vp8_dequantize_b = vp8_dequantize_b_c;
     if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
-
     vp8_diamond_search_sad = vp8_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_c;
     if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
-
-
     vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c;
     if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
-
-
     vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c;
     if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
-
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-
     vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
     if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-
     vp8_get_mb_ss = vp8_get_mb_ss_c;
     if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
-
-
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
-
     vp8_loop_filter_bv = vp8_loop_filter_bv_c;
     if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
-
     vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
     if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
-
     vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
     if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
-
     vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
-
     vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
-
     vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
-
     vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
-
     vp8_mbblock_error = vp8_mbblock_error_c;
     if (flags & HAS_MMX) vp8_mbblock_error = vp8_mbblock_error_mmx;
     if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_xmm;
-
     vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c;
     if (flags & HAS_SSE2) vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_xmm;
-
     vp8_mbpost_proc_down = vp8_mbpost_proc_down_c;
     if (flags & HAS_MMX) vp8_mbpost_proc_down = vp8_mbpost_proc_down_mmx;
     if (flags & HAS_SSE2) vp8_mbpost_proc_down = vp8_mbpost_proc_down_xmm;
-
     vp8_mbuverror = vp8_mbuverror_c;
     if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
     if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
-
     vp8_mse16x16 = vp8_mse16x16_c;
     if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
     if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
-
     vp8_plane_add_noise = vp8_plane_add_noise_c;
     if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
     if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
-
     vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c;
     if (flags & HAS_SSE2) vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_sse2;
-
-
-
-
     vp8_refining_search_sad = vp8_refining_search_sad_c;
     if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4;
-
     vp8_regular_quantize_b = vp8_regular_quantize_b_c;
     if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
-
-
     vp8_sad16x16 = vp8_sad16x16_c;
     if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx;
     if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt;
     if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3;
-
     vp8_sad16x16x3 = vp8_sad16x16x3_c;
     if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3;
-
     vp8_sad16x16x4d = vp8_sad16x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3;
-
     vp8_sad16x16x8 = vp8_sad16x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4;
-
     vp8_sad16x8 = vp8_sad16x8_c;
     if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx;
     if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt;
-
     vp8_sad16x8x3 = vp8_sad16x8x3_c;
     if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3;
-
     vp8_sad16x8x4d = vp8_sad16x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3;
-
     vp8_sad16x8x8 = vp8_sad16x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4;
-
     vp8_sad4x4 = vp8_sad4x4_c;
     if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx;
     if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt;
-
     vp8_sad4x4x3 = vp8_sad4x4x3_c;
     if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3;
-
     vp8_sad4x4x4d = vp8_sad4x4x4d_c;
     if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3;
-
     vp8_sad4x4x8 = vp8_sad4x4x8_c;
     if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4;
-
     vp8_sad8x16 = vp8_sad8x16_c;
     if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx;
     if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt;
-
     vp8_sad8x16x3 = vp8_sad8x16x3_c;
     if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3;
-
     vp8_sad8x16x4d = vp8_sad8x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3;
-
     vp8_sad8x16x8 = vp8_sad8x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4;
-
     vp8_sad8x8 = vp8_sad8x8_c;
     if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx;
     if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt;
-
     vp8_sad8x8x3 = vp8_sad8x8x3_c;
     if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3;
-
     vp8_sad8x8x4d = vp8_sad8x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3;
-
     vp8_sad8x8x8 = vp8_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4;
-
     vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
     if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
     if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
-
     vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
     if (flags & HAS_MMX) vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
     if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
-
     vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
     if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
-
     vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
     if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx;
     if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
-
-
     vp8_short_walsh4x4 = vp8_short_walsh4x4_c;
     if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
     if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
     if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
     if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
     if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
     if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-
     vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3;
-
     vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3;
-
     vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt;
-
     vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt;
-
     vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt;
-
     vp8_subtract_b = vp8_subtract_b_c;
     if (flags & HAS_MMX) vp8_subtract_b = vp8_subtract_b_mmx;
     if (flags & HAS_SSE2) vp8_subtract_b = vp8_subtract_b_sse2;
-
     vp8_subtract_mbuv = vp8_subtract_mbuv_c;
     if (flags & HAS_MMX) vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mbuv = vp8_subtract_mbuv_sse2;
-
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
-
     vp8_variance16x16 = vp8_variance16x16_c;
     if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
-
     vp8_variance16x8 = vp8_variance16x8_c;
     if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
     if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
-
     vp8_variance4x4 = vp8_variance4x4_c;
     if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
     if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
-
     vp8_variance8x16 = vp8_variance8x16_c;
     if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
     if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
-
     vp8_variance8x8 = vp8_variance8x8_c;
     if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
     if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
-
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
-
     vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
-
     vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
diff --git a/source/config/linux/ia32/vp9_rtcd.h b/source/config/linux/ia32/vp9_rtcd.h
index b4674d2..33c6064 100644
--- a/source/config/linux/ia32/vp9_rtcd.h
+++ b/source/config/linux/ia32/vp9_rtcd.h
@@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vp9_get_sse_sum_16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get_sse_sum_8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
@@ -909,595 +913,385 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
     vp9_block_error = vp9_block_error_c;
     if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2;
-
     vp9_convolve8 = vp9_convolve8_c;
     if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
-
     vp9_convolve8_avg = vp9_convolve8_avg_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
-
     vp9_convolve8_vert = vp9_convolve8_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
-
     vp9_convolve_avg = vp9_convolve_avg_c;
     if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2;
-
     vp9_convolve_copy = vp9_convolve_copy_c;
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
-
-
-
-
-
-
-
-
-
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
-
-
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
-
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
-
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
-
     vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3;
-
     vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3;
-
     vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3;
-
     vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3;
-
     vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3;
-
     vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3;
-
     vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3;
-
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
-
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
-
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
-
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
     vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c;
     if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2;
-
     vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c;
     if (flags & HAS_SSE2) vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_sse2;
-
     vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse;
-
     vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse;
-
-
-
-
-
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
-
     vp9_fdct16x16 = vp9_fdct16x16_c;
     if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2;
-
     vp9_fdct32x32 = vp9_fdct32x32_c;
     if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2;
-
     vp9_fdct32x32_rd = vp9_fdct32x32_rd_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
-
     vp9_fdct4x4 = vp9_fdct4x4_c;
     if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2;
-
     vp9_fdct8x8 = vp9_fdct8x8_c;
     if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2;
-
     vp9_fht16x16 = vp9_fht16x16_c;
     if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2;
-
     vp9_fht4x4 = vp9_fht4x4_c;
     if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2;
-
     vp9_fht8x8 = vp9_fht8x8_c;
     if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
-
-
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-
-
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
-
+    vp9_get_sse_sum_16x16 = vp9_get_sse_sum_16x16_c;
+    if (flags & HAS_SSE2) vp9_get_sse_sum_16x16 = vp9_get16x16var_sse2;
     vp9_get_sse_sum_8x8 = vp9_get_sse_sum_8x8_c;
     if (flags & HAS_SSE2) vp9_get_sse_sum_8x8 = vp9_get8x8var_sse2;
-
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
-
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
-
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
-
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
-
     vp9_idct16x16_10_add = vp9_idct16x16_10_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2;
-
     vp9_idct16x16_1_add = vp9_idct16x16_1_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2;
-
     vp9_idct16x16_256_add = vp9_idct16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2;
-
     vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2;
-
     vp9_idct32x32_1_add = vp9_idct32x32_1_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_1_add = vp9_idct32x32_1_add_sse2;
-
     vp9_idct32x32_34_add = vp9_idct32x32_34_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_34_add = vp9_idct32x32_34_add_sse2;
-
     vp9_idct4x4_16_add = vp9_idct4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2;
-
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2;
-
     vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2;
-
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2;
-
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
-
     vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
-
     vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
-
     vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
-
-
-
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
-
     vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c;
     if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx;
-
     vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2;
-
     vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2;
-
     vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2;
-
     vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_sse2;
-
     vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_sse2;
-
     vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c;
     if (flags & HAS_MMX) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_mmx;
-
     vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2;
-
     vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2;
-
     vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2;
-
     vp9_mse16x16 = vp9_mse16x16_c;
     if (flags & HAS_MMX) vp9_mse16x16 = vp9_mse16x16_mmx;
     if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
-
-
-
-
-
-
     vp9_refining_search_sad = vp9_refining_search_sad_c;
     if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
-
     vp9_sad16x16 = vp9_sad16x16_c;
     if (flags & HAS_MMX) vp9_sad16x16 = vp9_sad16x16_mmx;
     if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2;
-
     vp9_sad16x16_avg = vp9_sad16x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2;
-
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
-
     vp9_sad16x16x4d = vp9_sad16x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2;
-
-
     vp9_sad16x32 = vp9_sad16x32_c;
     if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2;
-
     vp9_sad16x32_avg = vp9_sad16x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2;
-
     vp9_sad16x32x4d = vp9_sad16x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2;
-
     vp9_sad16x8 = vp9_sad16x8_c;
     if (flags & HAS_MMX) vp9_sad16x8 = vp9_sad16x8_mmx;
     if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2;
-
     vp9_sad16x8_avg = vp9_sad16x8_avg_c;
     if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2;
-
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
-
     vp9_sad16x8x4d = vp9_sad16x8x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2;
-
-
     vp9_sad32x16 = vp9_sad32x16_c;
     if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2;
-
     vp9_sad32x16_avg = vp9_sad32x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2;
-
     vp9_sad32x16x4d = vp9_sad32x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2;
-
     vp9_sad32x32 = vp9_sad32x32_c;
     if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2;
-
     vp9_sad32x32_avg = vp9_sad32x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2;
-
-
     vp9_sad32x32x4d = vp9_sad32x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
-
-
     vp9_sad32x64 = vp9_sad32x64_c;
     if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2;
-
     vp9_sad32x64_avg = vp9_sad32x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2;
-
     vp9_sad32x64x4d = vp9_sad32x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2;
-
     vp9_sad4x4 = vp9_sad4x4_c;
     if (flags & HAS_MMX) vp9_sad4x4 = vp9_sad4x4_mmx;
     if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse;
-
     vp9_sad4x4_avg = vp9_sad4x4_avg_c;
     if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse;
-
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
-
     vp9_sad4x4x4d = vp9_sad4x4x4d_c;
     if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse;
-
-
     vp9_sad4x8 = vp9_sad4x8_c;
     if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse;
-
     vp9_sad4x8_avg = vp9_sad4x8_avg_c;
     if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse;
-
     vp9_sad4x8x4d = vp9_sad4x8x4d_c;
     if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse;
-
-
     vp9_sad64x32 = vp9_sad64x32_c;
     if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2;
-
     vp9_sad64x32_avg = vp9_sad64x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2;
-
     vp9_sad64x32x4d = vp9_sad64x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2;
-
     vp9_sad64x64 = vp9_sad64x64_c;
     if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2;
-
     vp9_sad64x64_avg = vp9_sad64x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2;
-
-
     vp9_sad64x64x4d = vp9_sad64x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
-
-
     vp9_sad8x16 = vp9_sad8x16_c;
     if (flags & HAS_MMX) vp9_sad8x16 = vp9_sad8x16_mmx;
     if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2;
-
     vp9_sad8x16_avg = vp9_sad8x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2;
-
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
-
     vp9_sad8x16x4d = vp9_sad8x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2;
-
-
     vp9_sad8x4 = vp9_sad8x4_c;
     if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2;
-
     vp9_sad8x4_avg = vp9_sad8x4_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2;
-
     vp9_sad8x4x4d = vp9_sad8x4x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2;
-
-
     vp9_sad8x8 = vp9_sad8x8_c;
     if (flags & HAS_MMX) vp9_sad8x8 = vp9_sad8x8_mmx;
     if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2;
-
     vp9_sad8x8_avg = vp9_sad8x8_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2;
-
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
-
     vp9_sad8x8x4d = vp9_sad8x8x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2;
-
-
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
-
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
-
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
-
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
-
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
-
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
-
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
-
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
-
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
-
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
-
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
-
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
-
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
-
-
-
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
-
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
-
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
-
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
-
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
-
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
-
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
-
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
-
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
-
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
-
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
-
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
-
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
-
     vp9_subtract_block = vp9_subtract_block_c;
     if (flags & HAS_SSE2) vp9_subtract_block = vp9_subtract_block_sse2;
-
     vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
     if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2;
-
     vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_c;
     if (flags & HAS_SSE2) vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_sse2;
-
-
     vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_sse;
-
     vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_c;
     if (flags & HAS_SSE2) vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_sse2;
-
     vp9_v_predictor_16x16 = vp9_v_predictor_16x16_c;
     if (flags & HAS_SSE2) vp9_v_predictor_16x16 = vp9_v_predictor_16x16_sse2;
-
     vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c;
     if (flags & HAS_SSE2) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_sse2;
-
     vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
-
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
-
     vp9_variance16x16 = vp9_variance16x16_c;
     if (flags & HAS_MMX) vp9_variance16x16 = vp9_variance16x16_mmx;
     if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
-
     vp9_variance16x32 = vp9_variance16x32_c;
     if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
-
     vp9_variance16x8 = vp9_variance16x8_c;
     if (flags & HAS_MMX) vp9_variance16x8 = vp9_variance16x8_mmx;
     if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
-
     vp9_variance32x16 = vp9_variance32x16_c;
     if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
-
     vp9_variance32x32 = vp9_variance32x32_c;
     if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
-
     vp9_variance32x64 = vp9_variance32x64_c;
     if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
-
     vp9_variance4x4 = vp9_variance4x4_c;
     if (flags & HAS_MMX) vp9_variance4x4 = vp9_variance4x4_mmx;
     if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
-
     vp9_variance4x8 = vp9_variance4x8_c;
     if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
-
     vp9_variance64x32 = vp9_variance64x32_c;
     if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
-
     vp9_variance64x64 = vp9_variance64x64_c;
     if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
-
     vp9_variance8x16 = vp9_variance8x16_c;
     if (flags & HAS_MMX) vp9_variance8x16 = vp9_variance8x16_mmx;
     if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
-
     vp9_variance8x4 = vp9_variance8x4_c;
     if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
-
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx;
     if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
-
     vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_c;
     if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_sse2;
-
     vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_c;
     if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_sse2;
-
     vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_c;
     if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_sse2;
 }
diff --git a/source/config/linux/ia32/vpx_config.asm b/source/config/linux/ia32/vpx_config.asm
index 9446c60..6f5cff0 100644
--- a/source/config/linux/ia32/vpx_config.asm
+++ b/source/config/linux/ia32/vpx_config.asm
@@ -70,11 +70,11 @@ CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
+CONFIG_WEBM_IO equ 1
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 1
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_DECRYPT equ 0
 CONFIG_MULTIPLE_ARF equ 0
-CONFIG_NON420 equ 0
 CONFIG_ALPHA equ 0
diff --git a/source/config/linux/ia32/vpx_config.h b/source/config/linux/ia32/vpx_config.h
index b1da19e..2e170eb 100644
--- a/source/config/linux/ia32/vpx_config.h
+++ b/source/config/linux/ia32/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/ia32/vpx_scale_rtcd.h b/source/config/linux/ia32/vpx_scale_rtcd.h
index 6eadf0f..7487e5f 100644
--- a/source/config/linux/ia32/vpx_scale_rtcd.h
+++ b/source/config/linux/ia32/vpx_scale_rtcd.h
@@ -59,7 +59,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/linux/mipsel/vp8_rtcd.h b/source/config/linux/mipsel/vp8_rtcd.h
index e46242f..72a7d9e 100644
--- a/source/config/linux/mipsel/vp8_rtcd.h
+++ b/source/config/linux/mipsel/vp8_rtcd.h
@@ -327,12 +327,12 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv
 #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c
 
 void vp8_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/linux/mipsel/vp9_rtcd.h b/source/config/linux/mipsel/vp9_rtcd.h
index 652aa08..03e7181 100644
--- a/source/config/linux/mipsel/vp9_rtcd.h
+++ b/source/config/linux/mipsel/vp9_rtcd.h
@@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_c
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c
 
@@ -687,12 +690,12 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source
 #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
 
 void vp9_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/linux/mipsel/vpx_config.h b/source/config/linux/mipsel/vpx_config.h
index 21353ea..32bd922 100644
--- a/source/config/linux/mipsel/vpx_config.h
+++ b/source/config/linux/mipsel/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/mipsel/vpx_scale_rtcd.h b/source/config/linux/mipsel/vpx_scale_rtcd.h
index 4b0a213..f5e6caa 100644
--- a/source/config/linux/mipsel/vpx_scale_rtcd.h
+++ b/source/config/linux/mipsel/vpx_scale_rtcd.h
@@ -50,12 +50,12 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf
 #define vpx_yv12_copy_y vpx_yv12_copy_y_c
 
 void vpx_scale_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/linux/x64/vp8_rtcd.h b/source/config/linux/x64/vp8_rtcd.h
index f7b58ac..9653130 100644
--- a/source/config/linux/x64/vp8_rtcd.h
+++ b/source/config/linux/x64/vp8_rtcd.h
@@ -490,151 +490,67 @@ static void setup_rtcd_internal(void)
 
     vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
-
-
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
-
-
-
-
-
     vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3;
-
     vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3;
-
-
     vp8_copy32xn = vp8_copy32xn_sse2;
     if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
-
-
-
-
-
-
-
-
-
-
     vp8_diamond_search_sad = vp8_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
-
-
-
-
-
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp8_refining_search_sad = vp8_refining_search_sad_c;
     if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4;
-
-
-
     vp8_sad16x16 = vp8_sad16x16_wmt;
     if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3;
-
     vp8_sad16x16x3 = vp8_sad16x16x3_c;
     if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3;
-
     vp8_sad16x16x4d = vp8_sad16x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3;
-
     vp8_sad16x16x8 = vp8_sad16x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4;
-
-
     vp8_sad16x8x3 = vp8_sad16x8x3_c;
     if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3;
-
     vp8_sad16x8x4d = vp8_sad16x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3;
-
     vp8_sad16x8x8 = vp8_sad16x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4;
-
-
     vp8_sad4x4x3 = vp8_sad4x4x3_c;
     if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3;
-
     vp8_sad4x4x4d = vp8_sad4x4x4d_c;
     if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3;
-
     vp8_sad4x4x8 = vp8_sad4x4x8_c;
     if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4;
-
-
     vp8_sad8x16x3 = vp8_sad8x16x3_c;
     if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3;
-
     vp8_sad8x16x4d = vp8_sad8x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3;
-
     vp8_sad8x16x8 = vp8_sad8x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4;
-
-
     vp8_sad8x8x3 = vp8_sad8x8x3_c;
     if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3;
-
     vp8_sad8x8x4d = vp8_sad8x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3;
-
     vp8_sad8x8x8 = vp8_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4;
-
-
-
-
-
-
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
     if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3;
-
     vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3;
 }
diff --git a/source/config/linux/x64/vp9_rtcd.h b/source/config/linux/x64/vp9_rtcd.h
index 08003f5..e6a0520 100644
--- a/source/config/linux/x64/vp9_rtcd.h
+++ b/source/config/linux/x64/vp9_rtcd.h
@@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_sse2
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get16x16var_sse2
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get8x8var_sse2
@@ -912,315 +916,129 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
-
     vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
-
     vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
-
     vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
-
-
-
-
-
-
-
-
-
-
-
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
-
-
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
-
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
-
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
-
     vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3;
-
     vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3;
-
     vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3;
-
     vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3;
-
     vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3;
-
     vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3;
-
     vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3;
-
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
-
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
-
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
-
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
-
-
-
-
-
-
-
-
-
-
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-
-
-
-
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
-
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
-
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
-
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_quantize_b = vp9_quantize_b_c;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
-
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
-
     vp9_refining_search_sad = vp9_refining_search_sad_c;
     if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
-
-
-
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
-
-
-
-
-
-
-
-
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
-
-
-
-
-
-
-
-
-
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
-
-
-
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
-
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
-
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
-
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
-
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
-
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
-
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
-
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
-
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
-
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
-
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
-
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
-
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
-
-
-
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
-
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
-
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
-
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
-
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
-
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
-
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
-
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
-
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
-
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
-
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
-
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
-
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
 }
diff --git a/source/config/linux/x64/vpx_config.asm b/source/config/linux/x64/vpx_config.asm
index 7a9cf0b..c045d4d 100644
--- a/source/config/linux/x64/vpx_config.asm
+++ b/source/config/linux/x64/vpx_config.asm
@@ -70,11 +70,11 @@ CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
+CONFIG_WEBM_IO equ 1
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 1
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_DECRYPT equ 0
 CONFIG_MULTIPLE_ARF equ 0
-CONFIG_NON420 equ 0
 CONFIG_ALPHA equ 0
diff --git a/source/config/linux/x64/vpx_config.h b/source/config/linux/x64/vpx_config.h
index c34ce89..494d1f4 100644
--- a/source/config/linux/x64/vpx_config.h
+++ b/source/config/linux/x64/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/x64/vpx_scale_rtcd.h b/source/config/linux/x64/vpx_scale_rtcd.h
index 6eadf0f..7487e5f 100644
--- a/source/config/linux/x64/vpx_scale_rtcd.h
+++ b/source/config/linux/x64/vpx_scale_rtcd.h
@@ -59,7 +59,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/mac/ia32/vp8_rtcd.h b/source/config/mac/ia32/vp8_rtcd.h
index 7a3e0f4..7e90462 100644
--- a/source/config/mac/ia32/vp8_rtcd.h
+++ b/source/config/mac/ia32/vp8_rtcd.h
@@ -492,337 +492,239 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx;
     if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
-
     vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
     if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
-
     vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
     if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
     if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
-
-
-
-
     vp8_block_error = vp8_block_error_c;
     if (flags & HAS_MMX) vp8_block_error = vp8_block_error_mmx;
     if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_xmm;
-
     vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_c;
     if (flags & HAS_SSE2) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3;
-
     vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_c;
     if (flags & HAS_SSE2) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3;
-
     vp8_clear_system_state = vp8_clear_system_state_c;
     if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state;
-
     vp8_copy32xn = vp8_copy32xn_c;
     if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
     if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
-
     vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
     if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx;
     if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
-
     vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
     if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
-
     vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
     if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx;
-
     vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
     if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
-
     vp8_denoiser_filter = vp8_denoiser_filter_c;
     if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
-
     vp8_dequant_idct_add = vp8_dequant_idct_add_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
-
     vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
     if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
-
     vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
     if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
-
     vp8_dequantize_b = vp8_dequantize_b_c;
     if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
-
     vp8_diamond_search_sad = vp8_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_c;
     if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
-
-
     vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c;
     if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
-
-
     vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c;
     if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
-
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-
     vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
     if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-
     vp8_get_mb_ss = vp8_get_mb_ss_c;
     if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
-
-
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
-
     vp8_loop_filter_bv = vp8_loop_filter_bv_c;
     if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
-
     vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
     if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
-
     vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
     if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
-
     vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
-
     vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
-
     vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
-
     vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
-
     vp8_mbblock_error = vp8_mbblock_error_c;
     if (flags & HAS_MMX) vp8_mbblock_error = vp8_mbblock_error_mmx;
     if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_xmm;
-
     vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c;
     if (flags & HAS_SSE2) vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_xmm;
-
     vp8_mbpost_proc_down = vp8_mbpost_proc_down_c;
     if (flags & HAS_MMX) vp8_mbpost_proc_down = vp8_mbpost_proc_down_mmx;
     if (flags & HAS_SSE2) vp8_mbpost_proc_down = vp8_mbpost_proc_down_xmm;
-
     vp8_mbuverror = vp8_mbuverror_c;
     if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
     if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
-
     vp8_mse16x16 = vp8_mse16x16_c;
     if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
     if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
-
     vp8_plane_add_noise = vp8_plane_add_noise_c;
     if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
     if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
-
     vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c;
     if (flags & HAS_SSE2) vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_sse2;
-
-
-
-
     vp8_refining_search_sad = vp8_refining_search_sad_c;
     if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4;
-
     vp8_regular_quantize_b = vp8_regular_quantize_b_c;
     if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
-
-
     vp8_sad16x16 = vp8_sad16x16_c;
     if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx;
     if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt;
     if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3;
-
     vp8_sad16x16x3 = vp8_sad16x16x3_c;
     if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3;
-
     vp8_sad16x16x4d = vp8_sad16x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3;
-
     vp8_sad16x16x8 = vp8_sad16x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4;
-
     vp8_sad16x8 = vp8_sad16x8_c;
     if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx;
     if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt;
-
     vp8_sad16x8x3 = vp8_sad16x8x3_c;
     if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3;
-
     vp8_sad16x8x4d = vp8_sad16x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3;
-
     vp8_sad16x8x8 = vp8_sad16x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4;
-
     vp8_sad4x4 = vp8_sad4x4_c;
     if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx;
     if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt;
-
     vp8_sad4x4x3 = vp8_sad4x4x3_c;
     if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3;
-
     vp8_sad4x4x4d = vp8_sad4x4x4d_c;
     if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3;
-
     vp8_sad4x4x8 = vp8_sad4x4x8_c;
     if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4;
-
     vp8_sad8x16 = vp8_sad8x16_c;
     if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx;
     if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt;
-
     vp8_sad8x16x3 = vp8_sad8x16x3_c;
     if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3;
-
     vp8_sad8x16x4d = vp8_sad8x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3;
-
     vp8_sad8x16x8 = vp8_sad8x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4;
-
     vp8_sad8x8 = vp8_sad8x8_c;
     if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx;
     if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt;
-
     vp8_sad8x8x3 = vp8_sad8x8x3_c;
     if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3;
-
     vp8_sad8x8x4d = vp8_sad8x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3;
-
     vp8_sad8x8x8 = vp8_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4;
-
     vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
     if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
     if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
-
     vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
     if (flags & HAS_MMX) vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
     if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
-
     vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
     if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
-
     vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
     if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx;
     if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
-
-
     vp8_short_walsh4x4 = vp8_short_walsh4x4_c;
     if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
     if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
     if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
     if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
     if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
     if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-
     vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3;
-
     vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3;
-
     vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt;
-
     vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt;
-
     vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt;
-
     vp8_subtract_b = vp8_subtract_b_c;
     if (flags & HAS_MMX) vp8_subtract_b = vp8_subtract_b_mmx;
     if (flags & HAS_SSE2) vp8_subtract_b = vp8_subtract_b_sse2;
-
     vp8_subtract_mbuv = vp8_subtract_mbuv_c;
     if (flags & HAS_MMX) vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mbuv = vp8_subtract_mbuv_sse2;
-
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
-
     vp8_variance16x16 = vp8_variance16x16_c;
     if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
-
     vp8_variance16x8 = vp8_variance16x8_c;
     if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
     if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
-
     vp8_variance4x4 = vp8_variance4x4_c;
     if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
     if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
-
     vp8_variance8x16 = vp8_variance8x16_c;
     if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
     if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
-
     vp8_variance8x8 = vp8_variance8x8_c;
     if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
     if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
-
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
-
     vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
-
     vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
diff --git a/source/config/mac/ia32/vp9_rtcd.h b/source/config/mac/ia32/vp9_rtcd.h
index 4f25ab3..1489a7e 100644
--- a/source/config/mac/ia32/vp9_rtcd.h
+++ b/source/config/mac/ia32/vp9_rtcd.h
@@ -249,6 +249,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vp9_get_sse_sum_16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get_sse_sum_8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
@@ -780,371 +784,164 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
-
     vp9_convolve8 = vp9_convolve8_c;
     if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
-
     vp9_convolve8_avg = vp9_convolve8_avg_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
-
     vp9_convolve8_vert = vp9_convolve8_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
-
     vp9_fdct16x16 = vp9_fdct16x16_c;
     if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2;
-
     vp9_fdct32x32 = vp9_fdct32x32_c;
     if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2;
-
     vp9_fdct32x32_rd = vp9_fdct32x32_rd_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
-
     vp9_fdct4x4 = vp9_fdct4x4_c;
     if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2;
-
     vp9_fdct8x8 = vp9_fdct8x8_c;
     if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2;
-
     vp9_fht16x16 = vp9_fht16x16_c;
     if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2;
-
     vp9_fht4x4 = vp9_fht4x4_c;
     if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2;
-
     vp9_fht8x8 = vp9_fht8x8_c;
     if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
-
-
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-
-
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
-
+    vp9_get_sse_sum_16x16 = vp9_get_sse_sum_16x16_c;
+    if (flags & HAS_SSE2) vp9_get_sse_sum_16x16 = vp9_get16x16var_sse2;
     vp9_get_sse_sum_8x8 = vp9_get_sse_sum_8x8_c;
     if (flags & HAS_SSE2) vp9_get_sse_sum_8x8 = vp9_get8x8var_sse2;
-
-
-
-
-
     vp9_idct16x16_10_add = vp9_idct16x16_10_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2;
-
     vp9_idct16x16_1_add = vp9_idct16x16_1_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2;
-
     vp9_idct16x16_256_add = vp9_idct16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2;
-
     vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2;
-
     vp9_idct32x32_1_add = vp9_idct32x32_1_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_1_add = vp9_idct32x32_1_add_sse2;
-
     vp9_idct32x32_34_add = vp9_idct32x32_34_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_34_add = vp9_idct32x32_34_add_sse2;
-
     vp9_idct4x4_16_add = vp9_idct4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2;
-
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2;
-
     vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2;
-
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2;
-
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
-
     vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
-
     vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
-
     vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
-
-
-
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
-
     vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c;
     if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx;
-
     vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2;
-
     vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2;
-
     vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2;
-
     vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_sse2;
-
     vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_sse2;
-
     vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c;
     if (flags & HAS_MMX) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_mmx;
-
     vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2;
-
     vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2;
-
     vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2;
-
     vp9_mse16x16 = vp9_mse16x16_c;
     if (flags & HAS_MMX) vp9_mse16x16 = vp9_mse16x16_mmx;
-
-
-
-
-
-
     vp9_refining_search_sad = vp9_refining_search_sad_c;
     if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
-
     vp9_sad16x16 = vp9_sad16x16_c;
     if (flags & HAS_MMX) vp9_sad16x16 = vp9_sad16x16_mmx;
-
-
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
-
     vp9_sad16x16x4d = vp9_sad16x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2;
-
-
-
-
     vp9_sad16x32x4d = vp9_sad16x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2;
-
     vp9_sad16x8 = vp9_sad16x8_c;
     if (flags & HAS_MMX) vp9_sad16x8 = vp9_sad16x8_mmx;
-
-
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
-
     vp9_sad16x8x4d = vp9_sad16x8x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2;
-
-
-
-
     vp9_sad32x16x4d = vp9_sad32x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2;
-
-
-
-
     vp9_sad32x32x4d = vp9_sad32x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
-
-
-
-
     vp9_sad32x64x4d = vp9_sad32x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2;
-
     vp9_sad4x4 = vp9_sad4x4_c;
     if (flags & HAS_MMX) vp9_sad4x4 = vp9_sad4x4_mmx;
-
-
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
-
     vp9_sad4x4x4d = vp9_sad4x4x4d_c;
     if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse;
-
-
-
-
     vp9_sad4x8x4d = vp9_sad4x8x4d_c;
     if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse;
-
-
-
-
     vp9_sad64x32x4d = vp9_sad64x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2;
-
-
-
-
     vp9_sad64x64x4d = vp9_sad64x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
-
-
     vp9_sad8x16 = vp9_sad8x16_c;
     if (flags & HAS_MMX) vp9_sad8x16 = vp9_sad8x16_mmx;
-
-
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
-
     vp9_sad8x16x4d = vp9_sad8x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2;
-
-
-
-
     vp9_sad8x4x4d = vp9_sad8x4x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2;
-
-
     vp9_sad8x8 = vp9_sad8x8_c;
     if (flags & HAS_MMX) vp9_sad8x8 = vp9_sad8x8_mmx;
-
-
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
-
     vp9_sad8x8x4d = vp9_sad8x8x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
     if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2;
-
-
-
-
-
-
-
-
-
     vp9_variance16x16 = vp9_variance16x16_c;
     if (flags & HAS_MMX) vp9_variance16x16 = vp9_variance16x16_mmx;
-
-
     vp9_variance16x8 = vp9_variance16x8_c;
     if (flags & HAS_MMX) vp9_variance16x8 = vp9_variance16x8_mmx;
-
-
-
-
     vp9_variance4x4 = vp9_variance4x4_c;
     if (flags & HAS_MMX) vp9_variance4x4 = vp9_variance4x4_mmx;
-
-
-
-
     vp9_variance8x16 = vp9_variance8x16_c;
     if (flags & HAS_MMX) vp9_variance8x16 = vp9_variance8x16_mmx;
-
-
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx;
 }
diff --git a/source/config/mac/ia32/vpx_config.asm b/source/config/mac/ia32/vpx_config.asm
index 33eed91..f296bc3 100644
--- a/source/config/mac/ia32/vpx_config.asm
+++ b/source/config/mac/ia32/vpx_config.asm
@@ -70,11 +70,11 @@ CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
+CONFIG_WEBM_IO equ 1
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 1
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_DECRYPT equ 0
 CONFIG_MULTIPLE_ARF equ 0
-CONFIG_NON420 equ 0
 CONFIG_ALPHA equ 0
diff --git a/source/config/mac/ia32/vpx_config.h b/source/config/mac/ia32/vpx_config.h
index 3e80541..7c2bcce 100644
--- a/source/config/mac/ia32/vpx_config.h
+++ b/source/config/mac/ia32/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/mac/ia32/vpx_scale_rtcd.h b/source/config/mac/ia32/vpx_scale_rtcd.h
index 6eadf0f..7487e5f 100644
--- a/source/config/mac/ia32/vpx_scale_rtcd.h
+++ b/source/config/mac/ia32/vpx_scale_rtcd.h
@@ -59,7 +59,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/mac/x64/vp8_rtcd.h b/source/config/mac/x64/vp8_rtcd.h
index f7b58ac..9653130 100644
--- a/source/config/mac/x64/vp8_rtcd.h
+++ b/source/config/mac/x64/vp8_rtcd.h
@@ -490,151 +490,67 @@ static void setup_rtcd_internal(void)
 
     vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
-
-
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
-
-
-
-
-
     vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3;
-
     vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3;
-
-
     vp8_copy32xn = vp8_copy32xn_sse2;
     if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
-
-
-
-
-
-
-
-
-
-
     vp8_diamond_search_sad = vp8_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
-
-
-
-
-
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp8_refining_search_sad = vp8_refining_search_sad_c;
     if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4;
-
-
-
     vp8_sad16x16 = vp8_sad16x16_wmt;
     if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3;
-
     vp8_sad16x16x3 = vp8_sad16x16x3_c;
     if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3;
-
     vp8_sad16x16x4d = vp8_sad16x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3;
-
     vp8_sad16x16x8 = vp8_sad16x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4;
-
-
     vp8_sad16x8x3 = vp8_sad16x8x3_c;
     if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3;
-
     vp8_sad16x8x4d = vp8_sad16x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3;
-
     vp8_sad16x8x8 = vp8_sad16x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4;
-
-
     vp8_sad4x4x3 = vp8_sad4x4x3_c;
     if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3;
-
     vp8_sad4x4x4d = vp8_sad4x4x4d_c;
     if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3;
-
     vp8_sad4x4x8 = vp8_sad4x4x8_c;
     if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4;
-
-
     vp8_sad8x16x3 = vp8_sad8x16x3_c;
     if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3;
-
     vp8_sad8x16x4d = vp8_sad8x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3;
-
     vp8_sad8x16x8 = vp8_sad8x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4;
-
-
     vp8_sad8x8x3 = vp8_sad8x8x3_c;
     if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3;
-
     vp8_sad8x8x4d = vp8_sad8x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3;
-
     vp8_sad8x8x8 = vp8_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4;
-
-
-
-
-
-
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
     if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3;
-
     vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3;
 }
diff --git a/source/config/mac/x64/vp9_rtcd.h b/source/config/mac/x64/vp9_rtcd.h
index 08003f5..e6a0520 100644
--- a/source/config/mac/x64/vp9_rtcd.h
+++ b/source/config/mac/x64/vp9_rtcd.h
@@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_sse2
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get16x16var_sse2
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get8x8var_sse2
@@ -912,315 +916,129 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
-
     vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
-
     vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
-
     vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
-
-
-
-
-
-
-
-
-
-
-
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
-
-
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
-
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
-
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
-
     vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3;
-
     vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3;
-
     vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3;
-
     vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3;
-
     vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3;
-
     vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3;
-
     vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3;
-
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
-
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
-
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
-
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
-
-
-
-
-
-
-
-
-
-
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-
-
-
-
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
-
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
-
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
-
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_quantize_b = vp9_quantize_b_c;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
-
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
-
     vp9_refining_search_sad = vp9_refining_search_sad_c;
     if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
-
-
-
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
-
-
-
-
-
-
-
-
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
-
-
-
-
-
-
-
-
-
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
-
-
-
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
-
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
-
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
-
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
-
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
-
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
-
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
-
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
-
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
-
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
-
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
-
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
-
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
-
-
-
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
-
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
-
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
-
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
-
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
-
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
-
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
-
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
-
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
-
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
-
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
-
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
-
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
 }
diff --git a/source/config/mac/x64/vpx_config.asm b/source/config/mac/x64/vpx_config.asm
index 7a9cf0b..c045d4d 100644
--- a/source/config/mac/x64/vpx_config.asm
+++ b/source/config/mac/x64/vpx_config.asm
@@ -70,11 +70,11 @@ CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
+CONFIG_WEBM_IO equ 1
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 1
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_DECRYPT equ 0
 CONFIG_MULTIPLE_ARF equ 0
-CONFIG_NON420 equ 0
 CONFIG_ALPHA equ 0
diff --git a/source/config/mac/x64/vpx_config.h b/source/config/mac/x64/vpx_config.h
index c34ce89..494d1f4 100644
--- a/source/config/mac/x64/vpx_config.h
+++ b/source/config/mac/x64/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/mac/x64/vpx_scale_rtcd.h b/source/config/mac/x64/vpx_scale_rtcd.h
index 6eadf0f..7487e5f 100644
--- a/source/config/mac/x64/vpx_scale_rtcd.h
+++ b/source/config/mac/x64/vpx_scale_rtcd.h
@@ -59,7 +59,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/nacl/vp8_rtcd.h b/source/config/nacl/vp8_rtcd.h
index 9564cfc..d6de728 100644
--- a/source/config/nacl/vp8_rtcd.h
+++ b/source/config/nacl/vp8_rtcd.h
@@ -324,12 +324,12 @@ void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv
 #define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c
 
 void vp8_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/nacl/vp9_rtcd.h b/source/config/nacl/vp9_rtcd.h
index 652aa08..03e7181 100644
--- a/source/config/nacl/vp9_rtcd.h
+++ b/source/config/nacl/vp9_rtcd.h
@@ -224,6 +224,9 @@ void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_c
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get_sse_sum_16x16_c
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get_sse_sum_8x8_c
 
@@ -687,12 +690,12 @@ unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source
 #define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
 
 void vp9_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/nacl/vpx_config.asm b/source/config/nacl/vpx_config.asm
index c612876..1d1039e 100644
--- a/source/config/nacl/vpx_config.asm
+++ b/source/config/nacl/vpx_config.asm
@@ -73,12 +73,12 @@
 .equ CONFIG_POSTPROC_VISUALIZER ,  0
 .equ CONFIG_OS_SUPPORT ,  1
 .equ CONFIG_UNIT_TESTS ,  0
+.equ CONFIG_WEBM_IO ,  1
 .equ CONFIG_DECODE_PERF_TESTS ,  0
 .equ CONFIG_MULTI_RES_ENCODING ,  1
 .equ CONFIG_TEMPORAL_DENOISING ,  1
 .equ CONFIG_EXPERIMENTAL ,  0
 .equ CONFIG_DECRYPT ,  0
 .equ CONFIG_MULTIPLE_ARF ,  0
-.equ CONFIG_NON420 ,  0
 .equ CONFIG_ALPHA ,  0
 	.section	.note.GNU-stack,"",%progbits
diff --git a/source/config/nacl/vpx_config.h b/source/config/nacl/vpx_config.h
index e910000..2d5e208 100644
--- a/source/config/nacl/vpx_config.h
+++ b/source/config/nacl/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/nacl/vpx_scale_rtcd.h b/source/config/nacl/vpx_scale_rtcd.h
index 4b0a213..f5e6caa 100644
--- a/source/config/nacl/vpx_scale_rtcd.h
+++ b/source/config/nacl/vpx_scale_rtcd.h
@@ -50,12 +50,12 @@ void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buf
 #define vpx_yv12_copy_y vpx_yv12_copy_y_c
 
 void vpx_scale_rtcd(void);
+
 #include "vpx_config.h"
 
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
-
 }
 #endif
 
diff --git a/source/config/win/ia32/vp8_rtcd.h b/source/config/win/ia32/vp8_rtcd.h
index 7a3e0f4..7e90462 100644
--- a/source/config/win/ia32/vp8_rtcd.h
+++ b/source/config/win/ia32/vp8_rtcd.h
@@ -492,337 +492,239 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx;
     if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
-
     vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
     if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
-
     vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
     if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
     if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
-
-
-
-
     vp8_block_error = vp8_block_error_c;
     if (flags & HAS_MMX) vp8_block_error = vp8_block_error_mmx;
     if (flags & HAS_SSE2) vp8_block_error = vp8_block_error_xmm;
-
     vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_c;
     if (flags & HAS_SSE2) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3;
-
     vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_c;
     if (flags & HAS_SSE2) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3;
-
     vp8_clear_system_state = vp8_clear_system_state_c;
     if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state;
-
     vp8_copy32xn = vp8_copy32xn_c;
     if (flags & HAS_SSE2) vp8_copy32xn = vp8_copy32xn_sse2;
     if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
-
     vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
     if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx;
     if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
-
     vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
     if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
-
     vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
     if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx;
-
     vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
     if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
-
     vp8_denoiser_filter = vp8_denoiser_filter_c;
     if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
-
     vp8_dequant_idct_add = vp8_dequant_idct_add_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
-
     vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
     if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
-
     vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
     if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
     if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
-
     vp8_dequantize_b = vp8_dequantize_b_c;
     if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
-
     vp8_diamond_search_sad = vp8_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_c;
     if (flags & HAS_SSE2) vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
-
-
     vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_c;
     if (flags & HAS_SSE2) vp8_filter_by_weight16x16 = vp8_filter_by_weight16x16_sse2;
-
-
     vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_c;
     if (flags & HAS_SSE2) vp8_filter_by_weight8x8 = vp8_filter_by_weight8x8_sse2;
-
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-
     vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
     if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
-
     vp8_get_mb_ss = vp8_get_mb_ss_c;
     if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
-
-
     vp8_loop_filter_bh = vp8_loop_filter_bh_c;
     if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
-
     vp8_loop_filter_bv = vp8_loop_filter_bv_c;
     if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
-
     vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
     if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
-
     vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
     if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
-
     vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
-
     vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
-
     vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
-
     vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
     if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx;
     if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
-
     vp8_mbblock_error = vp8_mbblock_error_c;
     if (flags & HAS_MMX) vp8_mbblock_error = vp8_mbblock_error_mmx;
     if (flags & HAS_SSE2) vp8_mbblock_error = vp8_mbblock_error_xmm;
-
     vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_c;
     if (flags & HAS_SSE2) vp8_mbpost_proc_across_ip = vp8_mbpost_proc_across_ip_xmm;
-
     vp8_mbpost_proc_down = vp8_mbpost_proc_down_c;
     if (flags & HAS_MMX) vp8_mbpost_proc_down = vp8_mbpost_proc_down_mmx;
     if (flags & HAS_SSE2) vp8_mbpost_proc_down = vp8_mbpost_proc_down_xmm;
-
     vp8_mbuverror = vp8_mbuverror_c;
     if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
     if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
-
     vp8_mse16x16 = vp8_mse16x16_c;
     if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
     if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
-
     vp8_plane_add_noise = vp8_plane_add_noise_c;
     if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
     if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
-
     vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_c;
     if (flags & HAS_SSE2) vp8_post_proc_down_and_across_mb_row = vp8_post_proc_down_and_across_mb_row_sse2;
-
-
-
-
     vp8_refining_search_sad = vp8_refining_search_sad_c;
     if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4;
-
     vp8_regular_quantize_b = vp8_regular_quantize_b_c;
     if (flags & HAS_SSE2) vp8_regular_quantize_b = vp8_regular_quantize_b_sse2;
-
-
     vp8_sad16x16 = vp8_sad16x16_c;
     if (flags & HAS_MMX) vp8_sad16x16 = vp8_sad16x16_mmx;
     if (flags & HAS_SSE2) vp8_sad16x16 = vp8_sad16x16_wmt;
     if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3;
-
     vp8_sad16x16x3 = vp8_sad16x16x3_c;
     if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3;
-
     vp8_sad16x16x4d = vp8_sad16x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3;
-
     vp8_sad16x16x8 = vp8_sad16x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4;
-
     vp8_sad16x8 = vp8_sad16x8_c;
     if (flags & HAS_MMX) vp8_sad16x8 = vp8_sad16x8_mmx;
     if (flags & HAS_SSE2) vp8_sad16x8 = vp8_sad16x8_wmt;
-
     vp8_sad16x8x3 = vp8_sad16x8x3_c;
     if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3;
-
     vp8_sad16x8x4d = vp8_sad16x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3;
-
     vp8_sad16x8x8 = vp8_sad16x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4;
-
     vp8_sad4x4 = vp8_sad4x4_c;
     if (flags & HAS_MMX) vp8_sad4x4 = vp8_sad4x4_mmx;
     if (flags & HAS_SSE2) vp8_sad4x4 = vp8_sad4x4_wmt;
-
     vp8_sad4x4x3 = vp8_sad4x4x3_c;
     if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3;
-
     vp8_sad4x4x4d = vp8_sad4x4x4d_c;
     if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3;
-
     vp8_sad4x4x8 = vp8_sad4x4x8_c;
     if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4;
-
     vp8_sad8x16 = vp8_sad8x16_c;
     if (flags & HAS_MMX) vp8_sad8x16 = vp8_sad8x16_mmx;
     if (flags & HAS_SSE2) vp8_sad8x16 = vp8_sad8x16_wmt;
-
     vp8_sad8x16x3 = vp8_sad8x16x3_c;
     if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3;
-
     vp8_sad8x16x4d = vp8_sad8x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3;
-
     vp8_sad8x16x8 = vp8_sad8x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4;
-
     vp8_sad8x8 = vp8_sad8x8_c;
     if (flags & HAS_MMX) vp8_sad8x8 = vp8_sad8x8_mmx;
     if (flags & HAS_SSE2) vp8_sad8x8 = vp8_sad8x8_wmt;
-
     vp8_sad8x8x3 = vp8_sad8x8x3_c;
     if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3;
-
     vp8_sad8x8x4d = vp8_sad8x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3;
-
     vp8_sad8x8x8 = vp8_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4;
-
     vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
     if (flags & HAS_MMX) vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
     if (flags & HAS_SSE2) vp8_short_fdct4x4 = vp8_short_fdct4x4_sse2;
-
     vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
     if (flags & HAS_MMX) vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
     if (flags & HAS_SSE2) vp8_short_fdct8x4 = vp8_short_fdct8x4_sse2;
-
     vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
     if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
-
     vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
     if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx;
     if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
-
-
     vp8_short_walsh4x4 = vp8_short_walsh4x4_c;
     if (flags & HAS_SSE2) vp8_short_walsh4x4 = vp8_short_walsh4x4_sse2;
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
     if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
     if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
     if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
     if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
     if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
     if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-
     vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3;
-
     vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3;
-
     vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt;
-
     vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt;
-
     vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
     if (flags & HAS_MMX) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx;
     if (flags & HAS_SSE2) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt;
-
     vp8_subtract_b = vp8_subtract_b_c;
     if (flags & HAS_MMX) vp8_subtract_b = vp8_subtract_b_mmx;
     if (flags & HAS_SSE2) vp8_subtract_b = vp8_subtract_b_sse2;
-
     vp8_subtract_mbuv = vp8_subtract_mbuv_c;
     if (flags & HAS_MMX) vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mbuv = vp8_subtract_mbuv_sse2;
-
     vp8_subtract_mby = vp8_subtract_mby_c;
     if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
     if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
-
     vp8_variance16x16 = vp8_variance16x16_c;
     if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
     if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
-
     vp8_variance16x8 = vp8_variance16x8_c;
     if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
     if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
-
     vp8_variance4x4 = vp8_variance4x4_c;
     if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
     if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
-
     vp8_variance8x16 = vp8_variance8x16_c;
     if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
     if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
-
     vp8_variance8x8 = vp8_variance8x8_c;
     if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
     if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
-
     vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
-
     vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
-
     vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
     if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
     if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
diff --git a/source/config/win/ia32/vp9_rtcd.h b/source/config/win/ia32/vp9_rtcd.h
index b4674d2..33c6064 100644
--- a/source/config/win/ia32/vp9_rtcd.h
+++ b/source/config/win/ia32/vp9_rtcd.h
@@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vp9_get_sse_sum_16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 RTCD_EXTERN void (*vp9_get_sse_sum_8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
@@ -909,595 +913,385 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
     vp9_block_error = vp9_block_error_c;
     if (flags & HAS_SSE2) vp9_block_error = vp9_block_error_sse2;
-
     vp9_convolve8 = vp9_convolve8_c;
     if (flags & HAS_SSE2) vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
-
     vp9_convolve8_avg = vp9_convolve8_avg_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_c;
     if (flags & HAS_SSE2) vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
-
     vp9_convolve8_vert = vp9_convolve8_vert_c;
     if (flags & HAS_SSE2) vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
-
     vp9_convolve_avg = vp9_convolve_avg_c;
     if (flags & HAS_SSE2) vp9_convolve_avg = vp9_convolve_avg_sse2;
-
     vp9_convolve_copy = vp9_convolve_copy_c;
     if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
-
-
-
-
-
-
-
-
-
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
-
-
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
-
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
-
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
-
     vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3;
-
     vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3;
-
     vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3;
-
     vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3;
-
     vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3;
-
     vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3;
-
     vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3;
-
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
-
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
-
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
-
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
     vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c;
     if (flags & HAS_SSE2) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_sse2;
-
     vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c;
     if (flags & HAS_SSE2) vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_sse2;
-
     vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_sse;
-
     vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_sse;
-
-
-
-
-
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
-
     vp9_fdct16x16 = vp9_fdct16x16_c;
     if (flags & HAS_SSE2) vp9_fdct16x16 = vp9_fdct16x16_sse2;
-
     vp9_fdct32x32 = vp9_fdct32x32_c;
     if (flags & HAS_SSE2) vp9_fdct32x32 = vp9_fdct32x32_sse2;
-
     vp9_fdct32x32_rd = vp9_fdct32x32_rd_c;
     if (flags & HAS_SSE2) vp9_fdct32x32_rd = vp9_fdct32x32_rd_sse2;
-
     vp9_fdct4x4 = vp9_fdct4x4_c;
     if (flags & HAS_SSE2) vp9_fdct4x4 = vp9_fdct4x4_sse2;
-
     vp9_fdct8x8 = vp9_fdct8x8_c;
     if (flags & HAS_SSE2) vp9_fdct8x8 = vp9_fdct8x8_sse2;
-
     vp9_fht16x16 = vp9_fht16x16_c;
     if (flags & HAS_SSE2) vp9_fht16x16 = vp9_fht16x16_sse2;
-
     vp9_fht4x4 = vp9_fht4x4_c;
     if (flags & HAS_SSE2) vp9_fht4x4 = vp9_fht4x4_sse2;
-
     vp9_fht8x8 = vp9_fht8x8_c;
     if (flags & HAS_SSE2) vp9_fht8x8 = vp9_fht8x8_sse2;
-
-
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-
-
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
-
+    vp9_get_sse_sum_16x16 = vp9_get_sse_sum_16x16_c;
+    if (flags & HAS_SSE2) vp9_get_sse_sum_16x16 = vp9_get16x16var_sse2;
     vp9_get_sse_sum_8x8 = vp9_get_sse_sum_8x8_c;
     if (flags & HAS_SSE2) vp9_get_sse_sum_8x8 = vp9_get8x8var_sse2;
-
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
-
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
-
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
-
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
-
     vp9_idct16x16_10_add = vp9_idct16x16_10_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_10_add = vp9_idct16x16_10_add_sse2;
-
     vp9_idct16x16_1_add = vp9_idct16x16_1_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_1_add = vp9_idct16x16_1_add_sse2;
-
     vp9_idct16x16_256_add = vp9_idct16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_idct16x16_256_add = vp9_idct16x16_256_add_sse2;
-
     vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_1024_add = vp9_idct32x32_1024_add_sse2;
-
     vp9_idct32x32_1_add = vp9_idct32x32_1_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_1_add = vp9_idct32x32_1_add_sse2;
-
     vp9_idct32x32_34_add = vp9_idct32x32_34_add_c;
     if (flags & HAS_SSE2) vp9_idct32x32_34_add = vp9_idct32x32_34_add_sse2;
-
     vp9_idct4x4_16_add = vp9_idct4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2;
-
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2;
-
     vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2;
-
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2;
-
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
-
     vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
     if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
-
     vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
     if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
-
     vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
     if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
-
-
-
     vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
-
     vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_c;
     if (flags & HAS_MMX) vp9_lpf_horizontal_4 = vp9_lpf_horizontal_4_mmx;
-
     vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_4_dual = vp9_lpf_horizontal_4_dual_sse2;
-
     vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8 = vp9_lpf_horizontal_8_sse2;
-
     vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_horizontal_8_dual = vp9_lpf_horizontal_8_dual_sse2;
-
     vp9_lpf_vertical_16 = vp9_lpf_vertical_16_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_16 = vp9_lpf_vertical_16_sse2;
-
     vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_16_dual = vp9_lpf_vertical_16_dual_sse2;
-
     vp9_lpf_vertical_4 = vp9_lpf_vertical_4_c;
     if (flags & HAS_MMX) vp9_lpf_vertical_4 = vp9_lpf_vertical_4_mmx;
-
     vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_4_dual = vp9_lpf_vertical_4_dual_sse2;
-
     vp9_lpf_vertical_8 = vp9_lpf_vertical_8_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8 = vp9_lpf_vertical_8_sse2;
-
     vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_c;
     if (flags & HAS_SSE2) vp9_lpf_vertical_8_dual = vp9_lpf_vertical_8_dual_sse2;
-
     vp9_mse16x16 = vp9_mse16x16_c;
     if (flags & HAS_MMX) vp9_mse16x16 = vp9_mse16x16_mmx;
     if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
-
-
-
-
-
-
     vp9_refining_search_sad = vp9_refining_search_sad_c;
     if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
-
     vp9_sad16x16 = vp9_sad16x16_c;
     if (flags & HAS_MMX) vp9_sad16x16 = vp9_sad16x16_mmx;
     if (flags & HAS_SSE2) vp9_sad16x16 = vp9_sad16x16_sse2;
-
     vp9_sad16x16_avg = vp9_sad16x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad16x16_avg = vp9_sad16x16_avg_sse2;
-
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
-
     vp9_sad16x16x4d = vp9_sad16x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x16x4d = vp9_sad16x16x4d_sse2;
-
-
     vp9_sad16x32 = vp9_sad16x32_c;
     if (flags & HAS_SSE2) vp9_sad16x32 = vp9_sad16x32_sse2;
-
     vp9_sad16x32_avg = vp9_sad16x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad16x32_avg = vp9_sad16x32_avg_sse2;
-
     vp9_sad16x32x4d = vp9_sad16x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x32x4d = vp9_sad16x32x4d_sse2;
-
     vp9_sad16x8 = vp9_sad16x8_c;
     if (flags & HAS_MMX) vp9_sad16x8 = vp9_sad16x8_mmx;
     if (flags & HAS_SSE2) vp9_sad16x8 = vp9_sad16x8_sse2;
-
     vp9_sad16x8_avg = vp9_sad16x8_avg_c;
     if (flags & HAS_SSE2) vp9_sad16x8_avg = vp9_sad16x8_avg_sse2;
-
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
-
     vp9_sad16x8x4d = vp9_sad16x8x4d_c;
     if (flags & HAS_SSE2) vp9_sad16x8x4d = vp9_sad16x8x4d_sse2;
-
-
     vp9_sad32x16 = vp9_sad32x16_c;
     if (flags & HAS_SSE2) vp9_sad32x16 = vp9_sad32x16_sse2;
-
     vp9_sad32x16_avg = vp9_sad32x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x16_avg = vp9_sad32x16_avg_sse2;
-
     vp9_sad32x16x4d = vp9_sad32x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x16x4d = vp9_sad32x16x4d_sse2;
-
     vp9_sad32x32 = vp9_sad32x32_c;
     if (flags & HAS_SSE2) vp9_sad32x32 = vp9_sad32x32_sse2;
-
     vp9_sad32x32_avg = vp9_sad32x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x32_avg = vp9_sad32x32_avg_sse2;
-
-
     vp9_sad32x32x4d = vp9_sad32x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x32x4d = vp9_sad32x32x4d_sse2;
-
-
     vp9_sad32x64 = vp9_sad32x64_c;
     if (flags & HAS_SSE2) vp9_sad32x64 = vp9_sad32x64_sse2;
-
     vp9_sad32x64_avg = vp9_sad32x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad32x64_avg = vp9_sad32x64_avg_sse2;
-
     vp9_sad32x64x4d = vp9_sad32x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad32x64x4d = vp9_sad32x64x4d_sse2;
-
     vp9_sad4x4 = vp9_sad4x4_c;
     if (flags & HAS_MMX) vp9_sad4x4 = vp9_sad4x4_mmx;
     if (flags & HAS_SSE) vp9_sad4x4 = vp9_sad4x4_sse;
-
     vp9_sad4x4_avg = vp9_sad4x4_avg_c;
     if (flags & HAS_SSE) vp9_sad4x4_avg = vp9_sad4x4_avg_sse;
-
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
-
     vp9_sad4x4x4d = vp9_sad4x4x4d_c;
     if (flags & HAS_SSE) vp9_sad4x4x4d = vp9_sad4x4x4d_sse;
-
-
     vp9_sad4x8 = vp9_sad4x8_c;
     if (flags & HAS_SSE) vp9_sad4x8 = vp9_sad4x8_sse;
-
     vp9_sad4x8_avg = vp9_sad4x8_avg_c;
     if (flags & HAS_SSE) vp9_sad4x8_avg = vp9_sad4x8_avg_sse;
-
     vp9_sad4x8x4d = vp9_sad4x8x4d_c;
     if (flags & HAS_SSE) vp9_sad4x8x4d = vp9_sad4x8x4d_sse;
-
-
     vp9_sad64x32 = vp9_sad64x32_c;
     if (flags & HAS_SSE2) vp9_sad64x32 = vp9_sad64x32_sse2;
-
     vp9_sad64x32_avg = vp9_sad64x32_avg_c;
     if (flags & HAS_SSE2) vp9_sad64x32_avg = vp9_sad64x32_avg_sse2;
-
     vp9_sad64x32x4d = vp9_sad64x32x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x32x4d = vp9_sad64x32x4d_sse2;
-
     vp9_sad64x64 = vp9_sad64x64_c;
     if (flags & HAS_SSE2) vp9_sad64x64 = vp9_sad64x64_sse2;
-
     vp9_sad64x64_avg = vp9_sad64x64_avg_c;
     if (flags & HAS_SSE2) vp9_sad64x64_avg = vp9_sad64x64_avg_sse2;
-
-
     vp9_sad64x64x4d = vp9_sad64x64x4d_c;
     if (flags & HAS_SSE2) vp9_sad64x64x4d = vp9_sad64x64x4d_sse2;
-
-
     vp9_sad8x16 = vp9_sad8x16_c;
     if (flags & HAS_MMX) vp9_sad8x16 = vp9_sad8x16_mmx;
     if (flags & HAS_SSE2) vp9_sad8x16 = vp9_sad8x16_sse2;
-
     vp9_sad8x16_avg = vp9_sad8x16_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x16_avg = vp9_sad8x16_avg_sse2;
-
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
-
     vp9_sad8x16x4d = vp9_sad8x16x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x16x4d = vp9_sad8x16x4d_sse2;
-
-
     vp9_sad8x4 = vp9_sad8x4_c;
     if (flags & HAS_SSE2) vp9_sad8x4 = vp9_sad8x4_sse2;
-
     vp9_sad8x4_avg = vp9_sad8x4_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x4_avg = vp9_sad8x4_avg_sse2;
-
     vp9_sad8x4x4d = vp9_sad8x4x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x4x4d = vp9_sad8x4x4d_sse2;
-
-
     vp9_sad8x8 = vp9_sad8x8_c;
     if (flags & HAS_MMX) vp9_sad8x8 = vp9_sad8x8_mmx;
     if (flags & HAS_SSE2) vp9_sad8x8 = vp9_sad8x8_sse2;
-
     vp9_sad8x8_avg = vp9_sad8x8_avg_c;
     if (flags & HAS_SSE2) vp9_sad8x8_avg = vp9_sad8x8_avg_sse2;
-
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
-
     vp9_sad8x8x4d = vp9_sad8x8x4d_c;
     if (flags & HAS_SSE2) vp9_sad8x8x4d = vp9_sad8x8x4d_sse2;
-
-
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
-
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
-
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
-
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
-
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
-
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
-
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
-
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
-
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
-
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
-
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
-
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
-
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
-
-
-
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
-
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
-
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
-
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
-
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
-
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
-
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
-
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_c;
     if (flags & HAS_SSE) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
-
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
-
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
-
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
-
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
-
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_c;
     if (flags & HAS_SSE2) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
-
     vp9_subtract_block = vp9_subtract_block_c;
     if (flags & HAS_SSE2) vp9_subtract_block = vp9_subtract_block_sse2;
-
     vp9_temporal_filter_apply = vp9_temporal_filter_apply_c;
     if (flags & HAS_SSE2) vp9_temporal_filter_apply = vp9_temporal_filter_apply_sse2;
-
     vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_c;
     if (flags & HAS_SSE2) vp9_tm_predictor_16x16 = vp9_tm_predictor_16x16_sse2;
-
-
     vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_tm_predictor_4x4 = vp9_tm_predictor_4x4_sse;
-
     vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_c;
     if (flags & HAS_SSE2) vp9_tm_predictor_8x8 = vp9_tm_predictor_8x8_sse2;
-
     vp9_v_predictor_16x16 = vp9_v_predictor_16x16_c;
     if (flags & HAS_SSE2) vp9_v_predictor_16x16 = vp9_v_predictor_16x16_sse2;
-
     vp9_v_predictor_32x32 = vp9_v_predictor_32x32_c;
     if (flags & HAS_SSE2) vp9_v_predictor_32x32 = vp9_v_predictor_32x32_sse2;
-
     vp9_v_predictor_4x4 = vp9_v_predictor_4x4_c;
     if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
-
     vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
     if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
-
     vp9_variance16x16 = vp9_variance16x16_c;
     if (flags & HAS_MMX) vp9_variance16x16 = vp9_variance16x16_mmx;
     if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
-
     vp9_variance16x32 = vp9_variance16x32_c;
     if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
-
     vp9_variance16x8 = vp9_variance16x8_c;
     if (flags & HAS_MMX) vp9_variance16x8 = vp9_variance16x8_mmx;
     if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
-
     vp9_variance32x16 = vp9_variance32x16_c;
     if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
-
     vp9_variance32x32 = vp9_variance32x32_c;
     if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
-
     vp9_variance32x64 = vp9_variance32x64_c;
     if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
-
     vp9_variance4x4 = vp9_variance4x4_c;
     if (flags & HAS_MMX) vp9_variance4x4 = vp9_variance4x4_mmx;
     if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
-
     vp9_variance4x8 = vp9_variance4x8_c;
     if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
-
     vp9_variance64x32 = vp9_variance64x32_c;
     if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
-
     vp9_variance64x64 = vp9_variance64x64_c;
     if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
-
     vp9_variance8x16 = vp9_variance8x16_c;
     if (flags & HAS_MMX) vp9_variance8x16 = vp9_variance8x16_mmx;
     if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
-
     vp9_variance8x4 = vp9_variance8x4_c;
     if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
-
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx;
     if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
-
     vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_c;
     if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_sse2;
-
     vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_c;
     if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_sse2;
-
     vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_c;
     if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_sse2;
 }
diff --git a/source/config/win/ia32/vpx_config.asm b/source/config/win/ia32/vpx_config.asm
index 7907235..cc0de0d 100644
--- a/source/config/win/ia32/vpx_config.asm
+++ b/source/config/win/ia32/vpx_config.asm
@@ -70,11 +70,11 @@ CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
+CONFIG_WEBM_IO equ 1
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 1
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_DECRYPT equ 0
 CONFIG_MULTIPLE_ARF equ 0
-CONFIG_NON420 equ 0
 CONFIG_ALPHA equ 0
diff --git a/source/config/win/ia32/vpx_config.h b/source/config/win/ia32/vpx_config.h
index a1e25bd..7aaa14d 100644
--- a/source/config/win/ia32/vpx_config.h
+++ b/source/config/win/ia32/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/win/ia32/vpx_scale_rtcd.h b/source/config/win/ia32/vpx_scale_rtcd.h
index 6eadf0f..7487e5f 100644
--- a/source/config/win/ia32/vpx_scale_rtcd.h
+++ b/source/config/win/ia32/vpx_scale_rtcd.h
@@ -59,7 +59,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/config/win/x64/vp8_rtcd.h b/source/config/win/x64/vp8_rtcd.h
index f7b58ac..9653130 100644
--- a/source/config/win/x64/vp8_rtcd.h
+++ b/source/config/win/x64/vp8_rtcd.h
@@ -490,151 +490,67 @@ static void setup_rtcd_internal(void)
 
     vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
-
-
-
     vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
-
-
-
-
-
     vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mbuv_s = vp8_build_intra_predictors_mbuv_s_ssse3;
-
     vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_sse2;
     if (flags & HAS_SSSE3) vp8_build_intra_predictors_mby_s = vp8_build_intra_predictors_mby_s_ssse3;
-
-
     vp8_copy32xn = vp8_copy32xn_sse2;
     if (flags & HAS_SSE3) vp8_copy32xn = vp8_copy32xn_sse3;
-
-
-
-
-
-
-
-
-
-
     vp8_diamond_search_sad = vp8_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp8_diamond_search_sad = vp8_diamond_search_sadx4;
-
     vp8_fast_quantize_b = vp8_fast_quantize_b_sse2;
     if (flags & HAS_SSSE3) vp8_fast_quantize_b = vp8_fast_quantize_b_ssse3;
-
-
-
-
-
     vp8_full_search_sad = vp8_full_search_sad_c;
     if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp8_refining_search_sad = vp8_refining_search_sad_c;
     if (flags & HAS_SSE3) vp8_refining_search_sad = vp8_refining_search_sadx4;
-
-
-
     vp8_sad16x16 = vp8_sad16x16_wmt;
     if (flags & HAS_SSE3) vp8_sad16x16 = vp8_sad16x16_sse3;
-
     vp8_sad16x16x3 = vp8_sad16x16x3_c;
     if (flags & HAS_SSE3) vp8_sad16x16x3 = vp8_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x16x3 = vp8_sad16x16x3_ssse3;
-
     vp8_sad16x16x4d = vp8_sad16x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x16x4d = vp8_sad16x16x4d_sse3;
-
     vp8_sad16x16x8 = vp8_sad16x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x16x8 = vp8_sad16x16x8_sse4;
-
-
     vp8_sad16x8x3 = vp8_sad16x8x3_c;
     if (flags & HAS_SSE3) vp8_sad16x8x3 = vp8_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp8_sad16x8x3 = vp8_sad16x8x3_ssse3;
-
     vp8_sad16x8x4d = vp8_sad16x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad16x8x4d = vp8_sad16x8x4d_sse3;
-
     vp8_sad16x8x8 = vp8_sad16x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad16x8x8 = vp8_sad16x8x8_sse4;
-
-
     vp8_sad4x4x3 = vp8_sad4x4x3_c;
     if (flags & HAS_SSE3) vp8_sad4x4x3 = vp8_sad4x4x3_sse3;
-
     vp8_sad4x4x4d = vp8_sad4x4x4d_c;
     if (flags & HAS_SSE3) vp8_sad4x4x4d = vp8_sad4x4x4d_sse3;
-
     vp8_sad4x4x8 = vp8_sad4x4x8_c;
     if (flags & HAS_SSE4_1) vp8_sad4x4x8 = vp8_sad4x4x8_sse4;
-
-
     vp8_sad8x16x3 = vp8_sad8x16x3_c;
     if (flags & HAS_SSE3) vp8_sad8x16x3 = vp8_sad8x16x3_sse3;
-
     vp8_sad8x16x4d = vp8_sad8x16x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x16x4d = vp8_sad8x16x4d_sse3;
-
     vp8_sad8x16x8 = vp8_sad8x16x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x16x8 = vp8_sad8x16x8_sse4;
-
-
     vp8_sad8x8x3 = vp8_sad8x8x3_c;
     if (flags & HAS_SSE3) vp8_sad8x8x3 = vp8_sad8x8x3_sse3;
-
     vp8_sad8x8x4d = vp8_sad8x8x4d_c;
     if (flags & HAS_SSE3) vp8_sad8x8x4d = vp8_sad8x8x4d_sse3;
-
     vp8_sad8x8x8 = vp8_sad8x8x8_c;
     if (flags & HAS_SSE4_1) vp8_sad8x8x8 = vp8_sad8x8x8_sse4;
-
-
-
-
-
-
-
     vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
-
     vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
     if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
-
     vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
-
     vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
     if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
-
-
     vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ssse3;
-
     vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
     if (flags & HAS_SSSE3) vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_ssse3;
 }
diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h
index 08003f5..e6a0520 100644
--- a/source/config/win/x64/vp9_rtcd.h
+++ b/source/config/win/x64/vp9_rtcd.h
@@ -271,6 +271,10 @@ unsigned int vp9_get_mb_ss_mmx(const int16_t *);
 unsigned int vp9_get_mb_ss_sse2(const int16_t *);
 #define vp9_get_mb_ss vp9_get_mb_ss_sse2
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vp9_get_sse_sum_16x16 vp9_get16x16var_sse2
+
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
 #define vp9_get_sse_sum_8x8 vp9_get8x8var_sse2
@@ -912,315 +916,129 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
-
-
-
     vp9_convolve8 = vp9_convolve8_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8 = vp9_convolve8_ssse3;
-
     vp9_convolve8_avg = vp9_convolve8_avg_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg = vp9_convolve8_avg_ssse3;
-
     vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_horiz = vp9_convolve8_avg_horiz_ssse3;
-
     vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_avg_vert = vp9_convolve8_avg_vert_ssse3;
-
     vp9_convolve8_horiz = vp9_convolve8_horiz_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_horiz = vp9_convolve8_horiz_ssse3;
-
     vp9_convolve8_vert = vp9_convolve8_vert_sse2;
     if (flags & HAS_SSSE3) vp9_convolve8_vert = vp9_convolve8_vert_ssse3;
-
-
-
-
-
-
-
-
-
-
-
     vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
-
-
     vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
-
     vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_ssse3;
-
     vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_16x16 = vp9_d207_predictor_16x16_ssse3;
-
     vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_32x32 = vp9_d207_predictor_32x32_ssse3;
-
     vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_4x4 = vp9_d207_predictor_4x4_ssse3;
-
     vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d207_predictor_8x8 = vp9_d207_predictor_8x8_ssse3;
-
     vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_ssse3;
-
     vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_32x32 = vp9_d45_predictor_32x32_ssse3;
-
     vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_ssse3;
-
     vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_ssse3;
-
     vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_16x16 = vp9_d63_predictor_16x16_ssse3;
-
     vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_32x32 = vp9_d63_predictor_32x32_ssse3;
-
     vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_4x4 = vp9_d63_predictor_4x4_ssse3;
-
     vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
-
-
-
-
-
-
-
-
-
-
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
-
-
-
-
     vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
-
     vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_32x32 = vp9_h_predictor_32x32_ssse3;
-
     vp9_h_predictor_4x4 = vp9_h_predictor_4x4_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
-
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_quantize_b = vp9_quantize_b_c;
     if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
-
     vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
     if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
-
     vp9_refining_search_sad = vp9_refining_search_sad_c;
     if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
-
-
-
     vp9_sad16x16x3 = vp9_sad16x16x3_c;
     if (flags & HAS_SSE3) vp9_sad16x16x3 = vp9_sad16x16x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x16x3 = vp9_sad16x16x3_ssse3;
-
-
-
-
-
-
-
-
     vp9_sad16x8x3 = vp9_sad16x8x3_c;
     if (flags & HAS_SSE3) vp9_sad16x8x3 = vp9_sad16x8x3_sse3;
     if (flags & HAS_SSSE3) vp9_sad16x8x3 = vp9_sad16x8x3_ssse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_sad4x4x3 = vp9_sad4x4x3_c;
     if (flags & HAS_SSE3) vp9_sad4x4x3 = vp9_sad4x4x3_sse3;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
     vp9_sad8x16x3 = vp9_sad8x16x3_c;
     if (flags & HAS_SSE3) vp9_sad8x16x3 = vp9_sad8x16x3_sse3;
-
-
-
-
-
-
-
-
-
     vp9_sad8x8x3 = vp9_sad8x8x3_c;
     if (flags & HAS_SSE3) vp9_sad8x8x3 = vp9_sad8x8x3_sse3;
-
-
-
     vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x16 = vp9_sub_pixel_avg_variance16x16_ssse3;
-
     vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x32 = vp9_sub_pixel_avg_variance16x32_ssse3;
-
     vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance16x8 = vp9_sub_pixel_avg_variance16x8_ssse3;
-
     vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x16 = vp9_sub_pixel_avg_variance32x16_ssse3;
-
     vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x32 = vp9_sub_pixel_avg_variance32x32_ssse3;
-
     vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance32x64 = vp9_sub_pixel_avg_variance32x64_ssse3;
-
     vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x4 = vp9_sub_pixel_avg_variance4x4_ssse3;
-
     vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance4x8 = vp9_sub_pixel_avg_variance4x8_ssse3;
-
     vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x32 = vp9_sub_pixel_avg_variance64x32_ssse3;
-
     vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance64x64 = vp9_sub_pixel_avg_variance64x64_ssse3;
-
     vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x16 = vp9_sub_pixel_avg_variance8x16_ssse3;
-
     vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x4 = vp9_sub_pixel_avg_variance8x4_ssse3;
-
     vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_avg_variance8x8 = vp9_sub_pixel_avg_variance8x8_ssse3;
-
-
-
     vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x16 = vp9_sub_pixel_variance16x16_ssse3;
-
     vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x32 = vp9_sub_pixel_variance16x32_ssse3;
-
     vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance16x8 = vp9_sub_pixel_variance16x8_ssse3;
-
     vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x16 = vp9_sub_pixel_variance32x16_ssse3;
-
     vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x32 = vp9_sub_pixel_variance32x32_ssse3;
-
     vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance32x64 = vp9_sub_pixel_variance32x64_ssse3;
-
     vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x4 = vp9_sub_pixel_variance4x4_ssse3;
-
     vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_sse;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance4x8 = vp9_sub_pixel_variance4x8_ssse3;
-
     vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x32 = vp9_sub_pixel_variance64x32_ssse3;
-
     vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance64x64 = vp9_sub_pixel_variance64x64_ssse3;
-
     vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x16 = vp9_sub_pixel_variance8x16_ssse3;
-
     vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
-
     vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
     if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
 }
diff --git a/source/config/win/x64/vpx_config.asm b/source/config/win/x64/vpx_config.asm
index f502251..2bfd490 100644
--- a/source/config/win/x64/vpx_config.asm
+++ b/source/config/win/x64/vpx_config.asm
@@ -70,11 +70,11 @@ CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
+CONFIG_WEBM_IO equ 1
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 1
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_DECRYPT equ 0
 CONFIG_MULTIPLE_ARF equ 0
-CONFIG_NON420 equ 0
 CONFIG_ALPHA equ 0
diff --git a/source/config/win/x64/vpx_config.h b/source/config/win/x64/vpx_config.h
index c8874e1..e7570ce 100644
--- a/source/config/win/x64/vpx_config.h
+++ b/source/config/win/x64/vpx_config.h
@@ -82,12 +82,12 @@
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 1
 #define CONFIG_TEMPORAL_DENOISING 1
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_DECRYPT 0
 #define CONFIG_MULTIPLE_ARF 0
-#define CONFIG_NON420 0
 #define CONFIG_ALPHA 0
 #endif /* VPX_CONFIG_H */
diff --git a/source/config/win/x64/vpx_scale_rtcd.h b/source/config/win/x64/vpx_scale_rtcd.h
index 6eadf0f..7487e5f 100644
--- a/source/config/win/x64/vpx_scale_rtcd.h
+++ b/source/config/win/x64/vpx_scale_rtcd.h
@@ -59,7 +59,6 @@ static void setup_rtcd_internal(void)
 
     (void)flags;
 
-
 }
 #endif
 
diff --git a/source/libvpx/.gitignore b/source/libvpx/.gitignore
index aa95d57..bb9e518 100644
--- a/source/libvpx/.gitignore
+++ b/source/libvpx/.gitignore
@@ -28,15 +28,13 @@
 /examples/decode_to_md5
 /examples/decode_with_drops
 /examples/decode_with_partial_drops
-/examples/error_resilient
 /examples/example_xma
-/examples/force_keyframe
 /examples/postproc
+/examples/set_maps
 /examples/simple_decoder
 /examples/simple_encoder
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
-/examples/vp8_set_maps
 /examples/vp8cx_set_ref
 /examples/vp9_spatial_scalable_encoder
 /examples/vpx_temporal_scalable_patterns
diff --git a/source/libvpx/build/make/Makefile b/source/libvpx/build/make/Makefile
index 6894d6d..dd7fb4a 100644
--- a/source/libvpx/build/make/Makefile
+++ b/source/libvpx/build/make/Makefile
@@ -94,6 +94,16 @@ clean::
 	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
 	rm -f $(CLEAN-OBJS)
 
+.PHONY: clean
+distclean: clean
+	if [ -z "$(target)" ]; then \
+      rm -f Makefile; \
+      rm -f config.log config.mk; \
+      rm -f vpx_config.[hc] vpx_config.asm; \
+    else \
+      rm -f $(target)-$(TOOLCHAIN).mk; \
+    fi
+
 .PHONY: dist
 dist:
 .PHONY: install
@@ -307,7 +317,7 @@ endef
 ifneq ($(target),)
 include $(SRC_PATH_BARE)/$(target:-$(TOOLCHAIN)=).mk
 endif
-ifeq ($(filter clean,$(MAKECMDGOALS)),)
+ifeq ($(filter %clean,$(MAKECMDGOALS)),)
   # Older versions of make don't like -include directives with no arguments
   ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
     -include $(filter %.d,$(OBJS-yes:.o=.d))
diff --git a/source/libvpx/build/make/ads2armasm_ms.pl b/source/libvpx/build/make/ads2armasm_ms.pl
index 95c8084..2a2c470 100755
--- a/source/libvpx/build/make/ads2armasm_ms.pl
+++ b/source/libvpx/build/make/ads2armasm_ms.pl
@@ -32,6 +32,7 @@ while (<STDIN>)
 
     s/ldrneb/ldrbne/i;
     s/ldrneh/ldrhne/i;
+    s/^(\s*)ENDP.*/$&\n$1ALIGN 4/;
 
     print;
 }
diff --git a/source/libvpx/build/make/configure.sh b/source/libvpx/build/make/configure.sh
index c379c74..514c442 100755
--- a/source/libvpx/build/make/configure.sh
+++ b/source/libvpx/build/make/configure.sh
@@ -41,7 +41,7 @@ log(){
 
 log_file(){
     log BEGIN $1
-    pr -n -t $1 >>$logfile
+    cat -n $1 >>$logfile
     log END $1
 }
 
@@ -274,6 +274,7 @@ TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm"
 
 clean_temp_files() {
     rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
+    enabled gcov && rm -f ${TMP_C%.c}.gcno ${TMP_CC%.cc}.gcno
 }
 
 #
@@ -327,7 +328,7 @@ EOF
 
 check_cflags() {
     log check_cflags "$@"
-    check_cc "$@" <<EOF
+    check_cc -Werror "$@" <<EOF
 int x;
 EOF
 }
@@ -341,7 +342,7 @@ check_cxxflags() {
 int x;
 EOF
       ;;
-      *) check_cxx "$@" <<EOF
+      *) check_cxx -Werror "$@" <<EOF
 int x;
 EOF
       ;;
@@ -378,6 +379,19 @@ EOF
     fi
 }
 
+# tests for -m$1 toggling the feature given in $2. If $2 is empty $1 is used.
+check_gcc_machine_option() {
+    local opt="$1"
+    local feature="$2"
+    [ -n "$feature" ] || feature="$opt"
+
+    if enabled gcc && ! disabled "$feature" && ! check_cflags "-m$opt"; then
+        RTCD_OPTIONS="${RTCD_OPTIONS}--disable-$feature "
+    else
+        soft_enable "$feature"
+    fi
+}
+
 write_common_config_banner() {
     print_webm_license config.mk "##" ""
     echo '# This file automatically generated by configure. Do not edit!' >> config.mk
@@ -405,8 +419,8 @@ true
 }
 
 write_common_target_config_mk() {
-    local CC=${CC}
-    local CXX=${CXX}
+    local CC="${CC}"
+    local CXX="${CXX}"
     enabled ccache && CC="ccache ${CC}"
     enabled ccache && CXX="ccache ${CXX}"
     print_webm_license $1 "##" ""
@@ -1089,30 +1103,16 @@ EOF
         esac
 
         soft_enable runtime_cpu_detect
-        soft_enable mmx
-        soft_enable sse
-        soft_enable sse2
-        soft_enable sse3
-        soft_enable ssse3
         # We can't use 'check_cflags' until the compiler is configured and CC is
         # populated.
-        if enabled gcc && ! disabled sse4_1 && ! check_cflags -msse4; then
-            RTCD_OPTIONS="${RTCD_OPTIONS}--disable-sse4_1 "
-        else
-            soft_enable sse4_1
-        fi
-
-        if enabled gcc && ! disabled avx && ! check_cflags -mavx; then
-            RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx "
-        else
-            soft_enable avx
-        fi
-
-        if enabled gcc && ! disabled avx2 && ! check_cflags -mavx2; then
-            RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx2 "
-        else
-            soft_enable avx2
-        fi
+        check_gcc_machine_option mmx
+        check_gcc_machine_option sse
+        check_gcc_machine_option sse2
+        check_gcc_machine_option sse3
+        check_gcc_machine_option ssse3
+        check_gcc_machine_option sse4 sse4_1
+        check_gcc_machine_option avx
+        check_gcc_machine_option avx2
 
         case "${AS}" in
             auto|"")
diff --git a/source/libvpx/build/make/gen_msvs_proj.sh b/source/libvpx/build/make/gen_msvs_proj.sh
index 5936370..df91435 100755
--- a/source/libvpx/build/make/gen_msvs_proj.sh
+++ b/source/libvpx/build/make/gen_msvs_proj.sh
@@ -162,7 +162,8 @@ generate_filter() {
                         done
                     done
                 fi
-                if [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then
+                if [ "$pat" == "c" ] || \
+                   [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then
                     for plat in "${platforms[@]}"; do
                         for cfg in Debug Release; do
                             open_tag FileConfiguration \
@@ -561,7 +562,7 @@ generate_vcproj() {
     close_tag Configurations
 
     open_tag Files
-    generate_filter srcs   "Source Files"   "c;cc;def;odl;idl;hpj;bat;asm;asmx"
+    generate_filter srcs   "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx"
     generate_filter hdrs   "Header Files"   "h;hm;inl;inc;xsd"
     generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
     generate_filter resrcs "Build Files"    "mk"
diff --git a/source/libvpx/build/make/gen_msvs_vcxproj.sh b/source/libvpx/build/make/gen_msvs_vcxproj.sh
index 4558aa1..23990a4 100755
--- a/source/libvpx/build/make/gen_msvs_vcxproj.sh
+++ b/source/libvpx/build/make/gen_msvs_vcxproj.sh
@@ -28,6 +28,7 @@ Options:
     --lib                       Generate a project for creating a static library
     --dll                       Generate a project for creating a dll
     --static-crt                Use the static C runtime (/MT)
+    --enable-werror             Treat warnings as errors (/WX)
     --target=isa-os-cc          Target specifier (required)
     --out=filename              Write output to a file [stdout]
     --name=project_name         Name of the project (required)
@@ -173,7 +174,8 @@ generate_filter() {
                         done
                     done
                     close_tag CustomBuild
-                elif [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then
+                elif [ "$pat" == "c" ] || \
+                     [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then
                     open_tag ClCompile \
                         Include=".\\$f"
                     # Separate file names with Condition?
@@ -233,6 +235,8 @@ for opt in "$@"; do
         ;;
         --static-crt) use_static_runtime=true
         ;;
+        --enable-werror) werror=true
+        ;;
         --ver=*)
             vs_ver="$optval"
             case "$optval" in
@@ -492,7 +496,9 @@ generate_vcxproj() {
             tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
             tag_content RuntimeLibrary $runtime
             tag_content WarningLevel Level3
-            # DebugInformationFormat
+            if ${werror:-false}; then
+                tag_content TreatWarningAsError true
+            fi
             close_tag ClCompile
             case "$proj_kind" in
             exe)
@@ -519,7 +525,7 @@ generate_vcxproj() {
     done
 
     open_tag ItemGroup
-    generate_filter "Source Files"   "c;cc;def;odl;idl;hpj;bat;asm;asmx;s"
+    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s"
     close_tag ItemGroup
     open_tag ItemGroup
     generate_filter "Header Files"   "h;hm;inl;inc;xsd"
diff --git a/source/libvpx/build/make/rtcd.pl b/source/libvpx/build/make/rtcd.pl
new file mode 100755
index 0000000..18ee80d
--- /dev/null
+++ b/source/libvpx/build/make/rtcd.pl
@@ -0,0 +1,414 @@
+#!/usr/bin/env perl
+
+no strict 'refs';
+use warnings;
+use Getopt::Long;
+Getopt::Long::Configure("auto_help");
+
+my %ALL_FUNCS = ();
+my @ALL_ARCHS;
+my @ALL_FORWARD_DECLS;
+my @REQUIRES;
+
+my %opts = ();
+my %disabled = ();
+my %required = ();
+
+my @argv;
+foreach (@ARGV) {
+  $disabled{$1} = 1, next if /--disable-(.*)/;
+  $required{$1} = 1, next if /--require-(.*)/;
+  push @argv, $_;
+}
+
+# NB: use GetOptions() instead of GetOptionsFromArray() for compatibility.
+@ARGV = @argv;
+GetOptions(
+  \%opts,
+  'arch=s',
+  'sym=s',
+  'config=s',
+);
+
+foreach my $opt (qw/arch config/) {
+  if (!defined($opts{$opt})) {
+    warn "--$opt is required!\n";
+    Getopt::Long::HelpMessage('-exit' => 1);
+  }
+}
+
+foreach my $defs_file (@ARGV) {
+  if (!-f $defs_file) {
+    warn "$defs_file: $!\n";
+    Getopt::Long::HelpMessage('-exit' => 1);
+  }
+}
+
+open CONFIG_FILE, $opts{config} or
+  die "Error opening config file '$opts{config}': $!\n";
+
+my %config = ();
+while (<CONFIG_FILE>) {
+  next if !/^CONFIG_/;
+  chomp;
+  my @pair = split /=/;
+  $config{$pair[0]} = $pair[1];
+}
+close CONFIG_FILE;
+
+#
+# Routines for the RTCD DSL to call
+#
+sub vpx_config($) {
+  return (defined $config{$_[0]}) ? $config{$_[0]} : "";
+}
+
+sub specialize {
+  my $fn=$_[0];
+  shift;
+  foreach my $opt (@_) {
+    eval "\$${fn}_${opt}=${fn}_${opt}";
+  }
+}
+
+sub add_proto {
+  my $fn = splice(@_, -2, 1);
+  $ALL_FUNCS{$fn} = \@_;
+  specialize $fn, "c";
+}
+
+sub require {
+  foreach my $fn (keys %ALL_FUNCS) {
+    foreach my $opt (@_) {
+      my $ofn = eval "\$${fn}_${opt}";
+      next if !$ofn;
+
+      # if we already have a default, then we can disable it, as we know
+      # we can do better.
+      my $best = eval "\$${fn}_default";
+      if ($best) {
+        my $best_ofn = eval "\$${best}";
+        if ($best_ofn && "$best_ofn" ne "$ofn") {
+          eval "\$${best}_link = 'false'";
+        }
+      }
+      eval "\$${fn}_default=${fn}_${opt}";
+      eval "\$${fn}_${opt}_link='true'";
+    }
+  }
+}
+
+sub forward_decls {
+  push @ALL_FORWARD_DECLS, @_;
+}
+
+#
+# Include the user's directives
+#
+foreach my $f (@ARGV) {
+  open FILE, "<", $f or die "cannot open $f: $!\n";
+  my $contents = join('', <FILE>);
+  close FILE;
+  eval $contents or warn "eval failed: $@\n";
+}
+
+#
+# Process the directives according to the command line
+#
+sub process_forward_decls() {
+  foreach (@ALL_FORWARD_DECLS) {
+    $_->();
+  }
+}
+
+sub determine_indirection {
+  vpx_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS);
+  foreach my $fn (keys %ALL_FUNCS) {
+    my $n = "";
+    my @val = @{$ALL_FUNCS{$fn}};
+    my $args = pop @val;
+    my $rtyp = "@val";
+    my $dfn = eval "\$${fn}_default";
+    $dfn = eval "\$${dfn}";
+    foreach my $opt (@_) {
+      my $ofn = eval "\$${fn}_${opt}";
+      next if !$ofn;
+      my $link = eval "\$${fn}_${opt}_link";
+      next if $link && $link eq "false";
+      $n .= "x";
+    }
+    if ($n eq "x") {
+      eval "\$${fn}_indirect = 'false'";
+    } else {
+      eval "\$${fn}_indirect = 'true'";
+    }
+  }
+}
+
+sub declare_function_pointers {
+  foreach my $fn (sort keys %ALL_FUNCS) {
+    my @val = @{$ALL_FUNCS{$fn}};
+    my $args = pop @val;
+    my $rtyp = "@val";
+    my $dfn = eval "\$${fn}_default";
+    $dfn = eval "\$${dfn}";
+    foreach my $opt (@_) {
+      my $ofn = eval "\$${fn}_${opt}";
+      next if !$ofn;
+      print "$rtyp ${ofn}($args);\n";
+    }
+    if (eval "\$${fn}_indirect" eq "false") {
+      print "#define ${fn} ${dfn}\n";
+    } else {
+      print "RTCD_EXTERN $rtyp (*${fn})($args);\n";
+    }
+    print "\n";
+  }
+}
+
+sub set_function_pointers {
+  foreach my $fn (sort keys %ALL_FUNCS) {
+    my @val = @{$ALL_FUNCS{$fn}};
+    my $args = pop @val;
+    my $rtyp = "@val";
+    my $dfn = eval "\$${fn}_default";
+    $dfn = eval "\$${dfn}";
+    if (eval "\$${fn}_indirect" eq "true") {
+      print "    $fn = $dfn;\n";
+      foreach my $opt (@_) {
+        my $ofn = eval "\$${fn}_${opt}";
+        next if !$ofn;
+        next if "$ofn" eq "$dfn";
+        my $link = eval "\$${fn}_${opt}_link";
+        next if $link && $link eq "false";
+        my $cond = eval "\$have_${opt}";
+        print "    if (${cond}) $fn = $ofn;\n"
+      }
+    }
+  }
+}
+
+sub filter {
+  my @filtered;
+  foreach (@_) { push @filtered, $_ unless $disabled{$_}; }
+  return @filtered;
+}
+
+#
+# Helper functions for generating the arch specific RTCD files
+#
+sub common_top() {
+  my $include_guard = uc($opts{sym})."_H_";
+  print <<EOF;
+#ifndef ${include_guard}
+#define ${include_guard}
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+
+process_forward_decls();
+print "\n";
+declare_function_pointers("c", @ALL_ARCHS);
+
+print <<EOF;
+void $opts{sym}(void);
+
+EOF
+}
+
+sub common_bottom() {
+  print <<EOF;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
+EOF
+}
+
+sub x86() {
+  determine_indirection("c", @ALL_ARCHS);
+
+  # Assign the helper variable for each enabled extension
+  foreach my $opt (@ALL_ARCHS) {
+    my $opt_uc = uc $opt;
+    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
+  }
+
+  common_top;
+  print <<EOF;
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = x86_simd_caps();
+
+    (void)flags;
+
+EOF
+
+  set_function_pointers("c", @ALL_ARCHS);
+
+  print <<EOF;
+}
+#endif
+EOF
+  common_bottom;
+}
+
+sub arm() {
+  determine_indirection("c", @ALL_ARCHS);
+
+  # Assign the helper variable for each enabled extension
+  foreach my $opt (@ALL_ARCHS) {
+    my $opt_uc = uc $opt;
+    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
+  }
+
+  common_top;
+  print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    (void)flags;
+
+EOF
+
+  set_function_pointers("c", @ALL_ARCHS);
+
+  print <<EOF;
+}
+#endif
+EOF
+  common_bottom;
+}
+
+sub mips() {
+  determine_indirection("c", @ALL_ARCHS);
+  common_top;
+
+  print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+EOF
+
+  set_function_pointers("c", @ALL_ARCHS);
+
+  print <<EOF;
+#if HAVE_DSPR2
+#if CONFIG_VP8
+void dsputil_static_init();
+dsputil_static_init();
+#endif
+#if CONFIG_VP9
+void vp9_dsputil_static_init();
+vp9_dsputil_static_init();
+#endif
+#endif
+}
+#endif
+EOF
+  common_bottom;
+}
+
+sub unoptimized() {
+  determine_indirection "c";
+  common_top;
+  print <<EOF;
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+EOF
+
+  set_function_pointers "c";
+
+  print <<EOF;
+}
+#endif
+EOF
+  common_bottom;
+}
+
+#
+# Main Driver
+#
+
+&require("c");
+if ($opts{arch} eq 'x86') {
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
+  x86;
+} elsif ($opts{arch} eq 'x86_64') {
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
+  @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/);
+  &require(@REQUIRES);
+  x86;
+} elsif ($opts{arch} eq 'mips32') {
+  @ALL_ARCHS = filter(qw/mips32/);
+  open CONFIG_FILE, $opts{config} or
+    die "Error opening config file '$opts{config}': $!\n";
+  while (<CONFIG_FILE>) {
+    if (/HAVE_DSPR2=yes/) {
+      @ALL_ARCHS = filter(qw/mips32 dspr2/);
+      last;
+    }
+  }
+  close CONFIG_FILE;
+  mips;
+} elsif ($opts{arch} eq 'armv5te') {
+  @ALL_ARCHS = filter(qw/edsp/);
+  arm;
+} elsif ($opts{arch} eq 'armv6') {
+  @ALL_ARCHS = filter(qw/edsp media/);
+  arm;
+} elsif ($opts{arch} eq 'armv7') {
+  @ALL_ARCHS = filter(qw/edsp media neon/);
+  arm;
+} else {
+  unoptimized;
+}
+
+__END__
+
+=head1 NAME
+
+rtcd -
+
+=head1 SYNOPSIS
+
+Usage: rtcd.pl [options] FILE
+
+See 'perldoc rtcd.pl' for more details.
+
+=head1 DESCRIPTION
+
+Reads the Run Time CPU Detections definitions from FILE and generates a
+C header file on stdout.
+
+=head1 OPTIONS
+
+Options:
+  --arch=ARCH       Architecture to generate defs for (required)
+  --disable-EXT     Disable support for EXT extensions
+  --require-EXT     Require support for EXT extensions
+  --sym=SYMBOL      Unique symbol to use for RTCD initialization function
+  --config=FILE     File with CONFIG_FOO=yes lines to parse
diff --git a/source/libvpx/build/make/rtcd.sh b/source/libvpx/build/make/rtcd.sh
deleted file mode 100755
index 93c9adc..0000000
--- a/source/libvpx/build/make/rtcd.sh
+++ /dev/null
@@ -1,373 +0,0 @@
-#!/bin/sh
-self=$0
-
-usage() {
-  cat <<EOF >&2
-Usage: $self [options] FILE
-
-Reads the Run Time CPU Detections definitions from FILE and generates a
-C header file on stdout.
-
-Options:
-  --arch=ARCH   Architecture to generate defs for (required)
-  --disable-EXT Disable support for EXT extensions
-  --require-EXT Require support for EXT extensions
-  --sym=SYMBOL  Unique symbol to use for RTCD initialization function
-  --config=FILE File with CONFIG_FOO=yes lines to parse
-EOF
-  exit 1
-}
-
-die() {
-  echo "$@" >&2
-  exit 1
-}
-
-die_argument_required() {
-  die "Option $opt requires argument"
-}
-
-for opt; do
-  optval="${opt#*=}"
-  case "$opt" in
-    --arch) die_argument_required;;
-    --arch=*) arch=${optval};;
-    --disable-*) eval "disable_${opt#--disable-}=true";;
-    --require-*) REQUIRES="${REQUIRES}${opt#--require-} ";;
-    --sym) die_argument_required;;
-    --sym=*) symbol=${optval};;
-    --config=*) config_file=${optval};;
-    -h|--help)
-      usage
-      ;;
-    -*)
-      die "Unrecognized option: ${opt%%=*}"
-      ;;
-    *)
-      defs_file="$defs_file $opt"
-      ;;
-  esac
-  shift
-done
-for f in $defs_file; do [ -f "$f" ] || usage; done
-[ -n "$arch" ] || usage
-
-# Import the configuration
-[ -f "$config_file" ] && eval $(grep CONFIG_ "$config_file")
-
-#
-# Routines for the RTCD DSL to call
-#
-prototype() {
-  rtyp=""
-  case "$1" in
-    unsigned) rtyp="$1 "; shift;;
-  esac
-  rtyp="${rtyp}$1"
-  fn="$2"
-  args="$3"
-
-  eval "${2}_rtyp='$rtyp'"
-  eval "${2}_args='$3'"
-  ALL_FUNCS="$ALL_FUNCS $fn"
-  specialize $fn c
-}
-
-specialize() {
-  fn="$1"
-  shift
-  for opt in "$@"; do
-    eval "${fn}_${opt}=${fn}_${opt}"
-  done
-}
-
-require() {
-  for fn in $ALL_FUNCS; do
-    for opt in "$@"; do
-      ofn=$(eval "echo \$${fn}_${opt}")
-      [ -z "$ofn" ] && continue
-
-      # if we already have a default, then we can disable it, as we know
-      # we can do better.
-      best=$(eval "echo \$${fn}_default")
-      best_ofn=$(eval "echo \$${best}")
-      [ -n "$best" ] && [ "$best_ofn" != "$ofn" ] && eval "${best}_link=false"
-      eval "${fn}_default=${fn}_${opt}"
-      eval "${fn}_${opt}_link=true"
-    done
-  done
-}
-
-forward_decls() {
-  ALL_FORWARD_DECLS="$ALL_FORWARD_DECLS $1"
-}
-
-#
-# Include the user's directives
-#
-for f in $defs_file; do
-  . $f
-done
-
-#
-# Process the directives according to the command line
-#
-process_forward_decls() {
-  for fn in $ALL_FORWARD_DECLS; do
-    eval $fn
-  done
-}
-
-determine_indirection() {
-  [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS
-  for fn in $ALL_FUNCS; do
-    n=""
-    rtyp="$(eval "echo \$${fn}_rtyp")"
-    args="$(eval "echo \"\$${fn}_args\"")"
-    dfn="$(eval "echo \$${fn}_default")"
-    dfn=$(eval "echo \$${dfn}")
-    for opt in "$@"; do
-      ofn=$(eval "echo \$${fn}_${opt}")
-      [ -z "$ofn" ] && continue
-      link=$(eval "echo \$${fn}_${opt}_link")
-      [ "$link" = "false" ] && continue
-      n="${n}x"
-    done
-    if [ "$n" = "x" ]; then
-      eval "${fn}_indirect=false"
-    else
-      eval "${fn}_indirect=true"
-    fi
-  done
-}
-
-declare_function_pointers() {
-  for fn in $ALL_FUNCS; do
-    rtyp="$(eval "echo \$${fn}_rtyp")"
-    args="$(eval "echo \"\$${fn}_args\"")"
-    dfn="$(eval "echo \$${fn}_default")"
-    dfn=$(eval "echo \$${dfn}")
-    for opt in "$@"; do
-      ofn=$(eval "echo \$${fn}_${opt}")
-      [ -z "$ofn" ] && continue
-      echo "$rtyp ${ofn}($args);"
-    done
-    if [ "$(eval "echo \$${fn}_indirect")" = "false" ]; then
-      echo "#define ${fn} ${dfn}"
-    else
-      echo "RTCD_EXTERN $rtyp (*${fn})($args);"
-    fi
-    echo
-  done
-}
-
-set_function_pointers() {
-  for fn in $ALL_FUNCS; do
-    n=""
-    rtyp="$(eval "echo \$${fn}_rtyp")"
-    args="$(eval "echo \"\$${fn}_args\"")"
-    dfn="$(eval "echo \$${fn}_default")"
-    dfn=$(eval "echo \$${dfn}")
-    if $(eval "echo \$${fn}_indirect"); then
-      echo "    $fn = $dfn;"
-      for opt in "$@"; do
-        ofn=$(eval "echo \$${fn}_${opt}")
-        [ -z "$ofn" ] && continue
-        [ "$ofn" = "$dfn" ] && continue;
-        link=$(eval "echo \$${fn}_${opt}_link")
-        [ "$link" = "false" ] && continue
-        cond="$(eval "echo \$have_${opt}")"
-        echo "    if (${cond}) $fn = $ofn;"
-      done
-    fi
-    echo
-  done
-}
-
-filter() {
-  filtered=""
-  for opt in "$@"; do
-    [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt"
-  done
-  echo $filtered
-}
-
-#
-# Helper functions for generating the arch specific RTCD files
-#
-common_top() {
-  outfile_basename=$(basename ${symbol:-rtcd})
-  include_guard=$(echo $outfile_basename | tr '[a-z]' '[A-Z]' | \
-    tr -c '[A-Z0-9]' _)H_
-  cat <<EOF
-#ifndef ${include_guard}
-#define ${include_guard}
-
-#ifdef RTCD_C
-#define RTCD_EXTERN
-#else
-#define RTCD_EXTERN extern
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-$(process_forward_decls)
-
-$(declare_function_pointers c $ALL_ARCHS)
-
-void ${symbol:-rtcd}(void);
-EOF
-}
-
-common_bottom() {
-  cat <<EOF
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif
-EOF
-}
-
-x86() {
-  determine_indirection c $ALL_ARCHS
-
-  # Assign the helper variable for each enabled extension
-  for opt in $ALL_ARCHS; do
-    uc=$(echo $opt | tr '[a-z]' '[A-Z]')
-    eval "have_${opt}=\"flags & HAS_${uc}\""
-  done
-
-  cat <<EOF
-$(common_top)
-
-#ifdef RTCD_C
-#include "vpx_ports/x86.h"
-static void setup_rtcd_internal(void)
-{
-    int flags = x86_simd_caps();
-
-    (void)flags;
-
-$(set_function_pointers c $ALL_ARCHS)
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-arm() {
-  determine_indirection c $ALL_ARCHS
-
-  # Assign the helper variable for each enabled extension
-  for opt in $ALL_ARCHS; do
-    uc=$(echo $opt | tr '[a-z]' '[A-Z]')
-    eval "have_${opt}=\"flags & HAS_${uc}\""
-  done
-
-  cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-#include "vpx_ports/arm.h"
-static void setup_rtcd_internal(void)
-{
-    int flags = arm_cpu_caps();
-
-    (void)flags;
-
-$(set_function_pointers c $ALL_ARCHS)
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-
-mips() {
-  determine_indirection c $ALL_ARCHS
-  cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-$(set_function_pointers c $ALL_ARCHS)
-#if HAVE_DSPR2
-#if CONFIG_VP8
-void dsputil_static_init();
-dsputil_static_init();
-#endif
-#if CONFIG_VP9
-void vp9_dsputil_static_init();
-vp9_dsputil_static_init();
-#endif
-#endif
-}
-#endif
-$(common_bottom)
-EOF
-}
-
-unoptimized() {
-  determine_indirection c
-  cat <<EOF
-$(common_top)
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-$(set_function_pointers c)
-}
-#endif
-$(common_bottom)
-EOF
-
-}
-#
-# Main Driver
-#
-ALL_FUNCS=$(export LC_ALL=C; echo $ALL_FUNCS | tr ' ' '\n' | sort |tr '\n' ' ')
-require c
-case $arch in
-  x86)
-    ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
-    x86
-    ;;
-  x86_64)
-    ALL_ARCHS=$(filter mmx sse sse2 sse3 ssse3 sse4_1 avx avx2)
-    REQUIRES=${REQUIRES:-mmx sse sse2}
-    require $(filter $REQUIRES)
-    x86
-    ;;
-  mips32)
-    ALL_ARCHS=$(filter mips32)
-    dspr2=$([ -f "$config_file" ] && eval echo $(grep HAVE_DSPR2 "$config_file"))
-    HAVE_DSPR2="${dspr2#*=}"
-    if [ "$HAVE_DSPR2" = "yes" ]; then
-        ALL_ARCHS=$(filter mips32 dspr2)
-    fi
-    mips
-    ;;
-  armv5te)
-    ALL_ARCHS=$(filter edsp)
-    arm
-    ;;
-  armv6)
-    ALL_ARCHS=$(filter edsp media)
-    arm
-    ;;
-  armv7)
-    ALL_ARCHS=$(filter edsp media neon)
-    arm
-    ;;
-  *)
-    unoptimized
-    ;;
-esac
diff --git a/source/libvpx/build/make/thumb.pm b/source/libvpx/build/make/thumb.pm
index 9604c8e..483c253 100644
--- a/source/libvpx/build/make/thumb.pm
+++ b/source/libvpx/build/make/thumb.pm
@@ -51,7 +51,7 @@ sub FixThumbInstructions($$)
 
     # Convert register post indexing to a separate add instruction.
     # This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]",
-    # "add r0, r2".
+    # "addne r0, r0, r2".
     s/^(\s*)((ldr|str)(ne)?[bhd]?)(\s+)(\w+),(\s*\w+,)?\s*\[(\w+)\],\s*(\w+)/$1$2$5$6,$7 [$8]\n$1add$4$5$8, $8, $9/g;
 
     # Convert a conditional addition to the pc register into a series of
diff --git a/source/libvpx/configure b/source/libvpx/configure
index 9f5a435..ff350cc 100755
--- a/source/libvpx/configure
+++ b/source/libvpx/configure
@@ -51,6 +51,7 @@ Advanced options:
   ${toggle_postproc_visualizer}   macro block / block level visualizers
   ${toggle_multi_res_encoding}    enable multiple-resolution encoding
   ${toggle_temporal_denoising}    enable temporal denoising and disable the spatial denoiser
+  ${toggle_webm_io}               enable input from and output to WebM container
 
 Codecs:
   Codecs can be selectively enabled or disabled individually, or by family:
@@ -160,6 +161,18 @@ for t in ${all_targets}; do
     [ -f ${source_path}/${t}.mk ] && enable_feature ${t}
 done
 
+if ! perl --version >/dev/null; then
+    die "Perl is required to build"
+fi
+
+
+if [ "`cd ${source_path} && pwd`" != "`pwd`" ]; then
+  # test to see if source_path already configured
+  if [ -f ${source_path}/vpx_config.h ]; then
+    die "source directory already configured; run 'make distclean' there first"
+  fi
+fi
+
 # check installed doxygen version
 doxy_version=$(doxygen --version 2>/dev/null)
 doxy_major=${doxy_version%%.*}
@@ -252,7 +265,6 @@ HAVE_LIST="
 "
 EXPERIMENT_LIST="
     multiple_arf
-    non420
     alpha
 "
 CONFIG_LIST="
@@ -300,6 +312,7 @@ CONFIG_LIST="
     postproc_visualizer
     os_support
     unit_tests
+    webm_io
     decode_perf_tests
     multi_res_encoding
     temporal_denoising
@@ -353,6 +366,7 @@ CMDLINE_SELECT="
     small
     postproc_visualizer
     unit_tests
+    webm_io
     decode_perf_tests
     multi_res_encoding
     temporal_denoising
@@ -675,6 +689,7 @@ process_toolchain() {
              10|11|12)
                  VCPROJ_SFX=vcxproj
                  gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
+                 enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
                  ;;
              esac
              all_targets="${all_targets} solution"
@@ -689,6 +704,9 @@ process_toolchain() {
         enabled postproc || die "postproc_visualizer requires postproc to be enabled"
     fi
 
+    # Enable WebM IO by default.
+    soft_enable webm_io
+
     # Enable unit tests by default if we have a working C++ compiler.
     case "$toolchain" in
         *-vs*)
diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk
index aeb54ab..fa5d66c 100644
--- a/source/libvpx/examples.mk
+++ b/source/libvpx/examples.mk
@@ -26,16 +26,18 @@ vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h
 vpxdec.SRCS                 += ivfdec.c ivfdec.h
 vpxdec.SRCS                 += tools_common.c tools_common.h
-vpxdec.SRCS                 += webmdec.c webmdec.h
 vpxdec.SRCS                 += y4menc.c y4menc.h
-vpxdec.SRCS                 += third_party/nestegg/halloc/halloc.h
-vpxdec.SRCS                 += third_party/nestegg/halloc/src/align.h
-vpxdec.SRCS                 += third_party/nestegg/halloc/src/halloc.c
-vpxdec.SRCS                 += third_party/nestegg/halloc/src/hlist.h
-vpxdec.SRCS                 += third_party/nestegg/halloc/src/macros.h
-vpxdec.SRCS                 += third_party/nestegg/include/nestegg/nestegg.h
-vpxdec.SRCS                 += third_party/nestegg/src/nestegg.c
 vpxdec.SRCS                 += $(LIBYUV_SRCS)
+ifeq ($(CONFIG_WEBM_IO),yes)
+  vpxdec.SRCS                 += third_party/nestegg/halloc/halloc.h
+  vpxdec.SRCS                 += third_party/nestegg/halloc/src/align.h
+  vpxdec.SRCS                 += third_party/nestegg/halloc/src/halloc.c
+  vpxdec.SRCS                 += third_party/nestegg/halloc/src/hlist.h
+  vpxdec.SRCS                 += third_party/nestegg/halloc/src/macros.h
+  vpxdec.SRCS                 += third_party/nestegg/include/nestegg/nestegg.h
+  vpxdec.SRCS                 += third_party/nestegg/src/nestegg.c
+  vpxdec.SRCS                 += webmdec.c webmdec.h
+endif
 vpxdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
 vpxdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
@@ -45,15 +47,17 @@ vpxenc.SRCS                 += ivfenc.c ivfenc.h
 vpxenc.SRCS                 += rate_hist.c rate_hist.h
 vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += warnings.c warnings.h
-vpxenc.SRCS                 += webmenc.c webmenc.h
 vpxenc.SRCS                 += vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
 vpxenc.SRCS                 += vpx_ports/vpx_timer.h
 vpxenc.SRCS                 += vpxstats.c vpxstats.h
-vpxenc.SRCS                 += third_party/libmkv/EbmlIDs.h
-vpxenc.SRCS                 += third_party/libmkv/EbmlWriter.c
-vpxenc.SRCS                 += third_party/libmkv/EbmlWriter.h
 vpxenc.SRCS                 += $(LIBYUV_SRCS)
+ifeq ($(CONFIG_WEBM_IO),yes)
+  vpxenc.SRCS                 += third_party/libmkv/EbmlIDs.h
+  vpxenc.SRCS                 += third_party/libmkv/EbmlWriter.c
+  vpxenc.SRCS                 += third_party/libmkv/EbmlWriter.h
+  vpxenc.SRCS                 += webmenc.c webmenc.h
+endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
 EXAMPLES-$(CONFIG_VP9_ENCODER)    += vp9_spatial_scalable_encoder.c
@@ -62,11 +66,12 @@ vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h
 vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
 vp9_spatial_scalable_encoder.SRCS += video_common.h
 vp9_spatial_scalable_encoder.SRCS += video_writer.h video_writer.c
+vp9_spatial_scalable_encoder.SRCS += vpxstats.c vpxstats.h
 vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
 vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
 
-ifeq ($(CONFIG_SHARED),no)
-UTILS-$(CONFIG_VP9_ENCODER)    += resize_util.c
+ifneq ($(CONFIG_SHARED),yes)
+EXAMPLES-$(CONFIG_VP9_ENCODER)    += resize_util.c
 endif
 
 # XMA example disabled for now, not used in VP8
@@ -123,9 +128,6 @@ twopass_encoder.SRCS            += video_common.h
 twopass_encoder.SRCS            += video_writer.h video_writer.c
 twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
-EXAMPLES-$(CONFIG_VP8_ENCODER)  += force_keyframe.c
-force_keyframe.GUID              = 3C67CADF-029F-4C86-81F5-D6D4F51177F0
-force_keyframe.DESCRIPTION       = Force generation of keyframes
 ifeq ($(CONFIG_DECODERS),yes)
 EXAMPLES-$(CONFIG_VP8_ENCODER)  += decode_with_drops.c
 decode_with_drops.SRCS          += ivfdec.h ivfdec.c
@@ -142,14 +144,18 @@ EXAMPLES-$(CONFIG_ERROR_CONCEALMENT)    += decode_with_partial_drops.c
 endif
 decode_with_partial_drops.GUID           = 61C2D026-5754-46AC-916F-1343ECC5537E
 decode_with_partial_drops.DESCRIPTION    = Drops parts of frames while decoding
-EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8_set_maps.c
-vp8_set_maps.SRCS                  += ivfenc.h ivfenc.c
-vp8_set_maps.SRCS                  += tools_common.h tools_common.c
-vp8_set_maps.SRCS                  += video_common.h
-vp8_set_maps.SRCS                  += video_writer.h video_writer.c
-vp8_set_maps.GUID                   = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
-vp8_set_maps.DESCRIPTION            = VP8 set active and ROI maps
+EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
+set_maps.SRCS                      += ivfenc.h ivfenc.c
+set_maps.SRCS                      += tools_common.h tools_common.c
+set_maps.SRCS                      += video_common.h
+set_maps.SRCS                      += video_writer.h video_writer.c
+set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
+set_maps.DESCRIPTION                = Set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
+vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
+vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
+vp8cx_set_ref.SRCS                 += video_common.h
+vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
 vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
 vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame
 
diff --git a/source/libvpx/examples/force_keyframe.c b/source/libvpx/examples/force_keyframe.c
deleted file mode 100644
index 6531e47..0000000
--- a/source/libvpx/examples/force_keyframe.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-// Forcing A Keyframe
-// ==================
-//
-// This is an example demonstrating how to control placement of keyframes
-// on a frame-by-frame basis.
-//
-// Configuration
-// -------------
-// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the
-// flags passed to `vpx_codec_control()`. In this example, we force a
-// keyframe every 8 frames.
-//
-// Observing The Effects
-// ---------------------
-// The output of the encoder examples shows a 'K' rather than a dot '.'
-// when the encoder generates a keyframe. Note that every 8 frames a 'K'
-// is output.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056
-
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static void mem_put_le16(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-}
-
-static void mem_put_le32(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-    mem[2] = val>>16;
-    mem[3] = val>>24;
-}
-
-static void die(const char *fmt, ...) {
-    va_list ap;
-
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
-}
-
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
-
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
-
-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
-
-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
-    }
-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, (unsigned int)pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
-}
-
-int main(int argc, char **argv) {
-    FILE                *infile, *outfile;
-    vpx_codec_ctx_t      codec;
-    vpx_codec_enc_cfg_t  cfg;
-    int                  frame_cnt = 0;
-    vpx_image_t          raw;
-    vpx_codec_err_t      res;
-    long                 width;
-    long                 height;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
-
-    /* Open files */
-    if(argc!=5)
-        die("Usage: %s <width> <height> <infile> <outfile>\n", argv[0]);
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-    if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1))
-        die("Faile to allocate image", width, height);
-    if(!(outfile = fopen(argv[4], "wb")))
-        die("Failed to open %s for writing", argv[4]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-
-    /* Populate encoder configuration */
-    res = vpx_codec_enc_config_default(interface, &cfg, 0);
-    if(res) {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-        return EXIT_FAILURE;
-    }
-
-    /* Update the default configuration with our settings */
-    cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate
-                            / cfg.g_w / cfg.g_h;
-    cfg.g_w = width;
-    cfg.g_h = height;
-
-    write_ivf_file_header(outfile, &cfg, 0);
-
-
-        /* Open input file for this encoding pass */
-        if(!(infile = fopen(argv[3], "rb")))
-            die("Failed to open %s for reading", argv[3]);
-
-        /* Initialize codec */
-        if(vpx_codec_enc_init(&codec, interface, &cfg, 0))
-            die_codec(&codec, "Failed to initialize encoder");
-
-        frame_avail = 1;
-        got_data = 0;
-        while(frame_avail || got_data) {
-            vpx_codec_iter_t iter = NULL;
-            const vpx_codec_cx_pkt_t *pkt;
-
-            if(!(frame_cnt & 7))
-                flags |= VPX_EFLAG_FORCE_KF;
-            else
-                flags &= ~VPX_EFLAG_FORCE_KF;
-            frame_avail = read_frame(infile, &raw);
-            if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
-                                1, flags, VPX_DL_REALTIME))
-                die_codec(&codec, "Failed to encode frame");
-            got_data = 0;
-            while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-                got_data = 1;
-                switch(pkt->kind) {
-                case VPX_CODEC_CX_FRAME_PKT:
-                    write_ivf_frame_header(outfile, pkt);
-                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                                  outfile);
-                    break;
-                default:
-                    break;
-                }
-                printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-                fflush(stdout);
-            }
-            frame_cnt++;
-        }
-        printf("\n");
-        fclose(infile);
-
-    printf("Processed %d frames.\n",frame_cnt-1);
-    vpx_img_free(&raw);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
-
-    /* Try to rewrite the file header with the actual frame count */
-    if(!fseek(outfile, 0, SEEK_SET))
-        write_ivf_file_header(outfile, &cfg, frame_cnt-1);
-    fclose(outfile);
-    return EXIT_SUCCESS;
-}
diff --git a/source/libvpx/resize_util.c b/source/libvpx/examples/resize_util.c
index b068f55..b068f55 100644
--- a/source/libvpx/resize_util.c
+++ b/source/libvpx/examples/resize_util.c
diff --git a/source/libvpx/examples/vp8_set_maps.c b/source/libvpx/examples/set_maps.c
index f3cc9a7..4343832 100644
--- a/source/libvpx/examples/vp8_set_maps.c
+++ b/source/libvpx/examples/set_maps.c
@@ -56,7 +56,8 @@
 static const char *exec_name;
 
 void usage_exit() {
-  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
+  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
+          exec_name);
   exit(EXIT_FAILURE);
 }
 
@@ -65,8 +66,8 @@ static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
   unsigned int i;
   vpx_roi_map_t roi = {0};
 
-  roi.rows = cfg->g_h / 16;
-  roi.cols = cfg->g_w / 16;
+  roi.rows = (cfg->g_h + 15) / 16;
+  roi.cols = (cfg->g_w + 15) / 16;
 
   roi.delta_q[0] = 0;
   roi.delta_q[1] = -2;
@@ -98,8 +99,8 @@ static void set_active_map(const vpx_codec_enc_cfg_t *cfg,
   unsigned int i;
   vpx_active_map_t map = {0};
 
-  map.rows = cfg->g_h / 16;
-  map.cols = cfg->g_w / 16;
+  map.rows = (cfg->g_h + 15) / 16;
+  map.cols = (cfg->g_w + 15) / 16;
 
   map.active_map = (uint8_t *)malloc(map.rows * map.cols);
   for (i = 0; i < map.rows * map.cols; ++i)
@@ -115,8 +116,8 @@ static void unset_active_map(const vpx_codec_enc_cfg_t *cfg,
                              vpx_codec_ctx_t *codec) {
   vpx_active_map_t map = {0};
 
-  map.rows = cfg->g_h / 16;
-  map.cols = cfg->g_w / 16;
+  map.rows = (cfg->g_h + 15) / 16;
+  map.cols = (cfg->g_w + 15) / 16;
   map.active_map = NULL;
 
   if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map))
@@ -161,20 +162,20 @@ int main(int argc, char **argv) {
   VpxVideoWriter *writer = NULL;
   const VpxInterface *encoder = NULL;
   const int fps = 2;        // TODO(dkovalev) add command line argument
-  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  const double bits_per_pixel_per_frame = 0.067;
 
   exec_name = argv[0];
 
-  if (argc != 5)
+  if (argc != 6)
     die("Invalid number of arguments");
 
-  encoder = get_vpx_encoder_by_name("vp8");  // only vp8 for now
+  encoder = get_vpx_encoder_by_name(argv[1]);
   if (!encoder)
     die("Unsupported codec.");
 
   info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  info.frame_width = strtol(argv[2], NULL, 0);
+  info.frame_height = strtol(argv[3], NULL, 0);
   info.time_base.numerator = 1;
   info.time_base.denominator = fps;
 
@@ -200,14 +201,16 @@ int main(int argc, char **argv) {
   cfg.g_h = info.frame_height;
   cfg.g_timebase.num = info.time_base.numerator;
   cfg.g_timebase.den = info.time_base.denominator;
-  cfg.rc_target_bitrate = bitrate;
+  cfg.rc_target_bitrate = (unsigned int)(bits_per_pixel_per_frame * cfg.g_w *
+                                         cfg.g_h * fps / 1000);
+  cfg.g_lag_in_frames = 0;
 
-  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
+  writer = vpx_video_writer_open(argv[5], kContainerIVF, &info);
   if (!writer)
-    die("Failed to open %s for writing.", argv[4]);
+    die("Failed to open %s for writing.", argv[5]);
 
-  if (!(infile = fopen(argv[3], "rb")))
-    die("Failed to open %s for reading.", argv[3]);
+  if (!(infile = fopen(argv[4], "rb")))
+    die("Failed to open %s for reading.", argv[4]);
 
   if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
     die_codec(&codec, "Failed to initialize encoder");
@@ -215,7 +218,7 @@ int main(int argc, char **argv) {
   while (vpx_img_read(&raw, infile)) {
     ++frame_count;
 
-    if (frame_count == 22) {
+    if (frame_count == 22 && encoder->fourcc == VP8_FOURCC) {
       set_roi_map(&cfg, &codec);
     } else if (frame_count == 33) {
       set_active_map(&cfg, &codec);
diff --git a/source/libvpx/examples/simple_encoder.c b/source/libvpx/examples/simple_encoder.c
index 6ecd498..af58091 100644
--- a/source/libvpx/examples/simple_encoder.c
+++ b/source/libvpx/examples/simple_encoder.c
@@ -64,6 +64,15 @@
 // frame is shown for one frame-time in duration. The flags parameter is
 // unused in this example. The deadline is set to VPX_DL_REALTIME to
 // make the example run as quickly as possible.
+
+// Forced Keyframes
+// ----------------
+// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the
+// flags passed to `vpx_codec_control()`. In this example, we force a
+// keyframe every <keyframe-interval> frames. Note, the output stream can
+// contain additional keyframes beyond those that have been forced using the
+// VPX_EFLAG_FORCE_KF flag because of automatic keyframe placement by the
+// encoder.
 //
 // Processing The Encoded Data
 // ---------------------------
@@ -103,8 +112,8 @@ static const char *exec_name;
 void usage_exit() {
   fprintf(stderr,
           "Usage: %s <codec> <width> <height> <infile> <outfile> "
-              "[<error-resilient>]\nSee comments in simple_encoder.c for more "
-              "information.\n",
+              "<keyframe-interval> [<error-resilient>]\nSee comments in "
+              "simple_encoder.c for more information.\n",
           exec_name);
   exit(EXIT_FAILURE);
 }
@@ -112,11 +121,12 @@ void usage_exit() {
 static void encode_frame(vpx_codec_ctx_t *codec,
                          vpx_image_t *img,
                          int frame_index,
+                         int flags,
                          VpxVideoWriter *writer) {
   vpx_codec_iter_t iter = NULL;
   const vpx_codec_cx_pkt_t *pkt = NULL;
-  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
-                                               VPX_DL_GOOD_QUALITY);
+  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1,
+                                               flags, VPX_DL_GOOD_QUALITY);
   if (res != VPX_CODEC_OK)
     die_codec(codec, "Failed to encode frame");
 
@@ -148,15 +158,20 @@ int main(int argc, char **argv) {
   const VpxInterface *encoder = NULL;
   const int fps = 30;        // TODO(dkovalev) add command line argument
   const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+  int keyframe_interval = 0;
+
+  // TODO(dkovalev): Add some simple command line parsing code to make the
+  // command line more flexible.
   const char *codec_arg = NULL;
   const char *width_arg = NULL;
   const char *height_arg = NULL;
   const char *infile_arg = NULL;
   const char *outfile_arg = NULL;
+  const char *keyframe_interval_arg = NULL;
 
   exec_name = argv[0];
 
-  if (argc < 6)
+  if (argc < 7)
     die("Invalid number of arguments");
 
   codec_arg = argv[1];
@@ -164,6 +179,7 @@ int main(int argc, char **argv) {
   height_arg = argv[3];
   infile_arg = argv[4];
   outfile_arg = argv[5];
+  keyframe_interval_arg = argv[6];
 
   encoder = get_vpx_encoder_by_name(codec_arg);
   if (!encoder)
@@ -187,6 +203,10 @@ int main(int argc, char **argv) {
     die("Failed to allocate image.");
   }
 
+  keyframe_interval = strtol(keyframe_interval_arg, NULL, 0);
+  if (keyframe_interval < 0)
+    die("Invalid keyframe interval value.");
+
   printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
 
   res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
@@ -198,7 +218,7 @@ int main(int argc, char **argv) {
   cfg.g_timebase.num = info.time_base.numerator;
   cfg.g_timebase.den = info.time_base.denominator;
   cfg.rc_target_bitrate = bitrate;
-  cfg.g_error_resilient = argc > 6 ? strtol(argv[6], NULL, 0) : 0;
+  cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;
 
   writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
   if (!writer)
@@ -210,9 +230,13 @@ int main(int argc, char **argv) {
   if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
     die_codec(&codec, "Failed to initialize encoder");
 
-  while (vpx_img_read(&raw, infile))
-    encode_frame(&codec, &raw, frame_count++, writer);
-  encode_frame(&codec, NULL, -1, writer);  // flush the encoder
+  while (vpx_img_read(&raw, infile)) {
+    int flags = 0;
+    if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
+      flags |= VPX_EFLAG_FORCE_KF;
+    encode_frame(&codec, &raw, frame_count++, flags, writer);
+  }
+  encode_frame(&codec, NULL, -1, 0, writer);  // flush the encoder
 
   printf("\n");
   fclose(infile);
diff --git a/source/libvpx/examples/vp8cx_set_ref.c b/source/libvpx/examples/vp8cx_set_ref.c
index f87dd35..9b6d11b 100644
--- a/source/libvpx/examples/vp8cx_set_ref.c
+++ b/source/libvpx/examples/vp8cx_set_ref.c
@@ -48,212 +48,140 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <string.h>
+
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056
-
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static void mem_put_le16(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-}
+#include "vpx/vpx_encoder.h"
 
-static void mem_put_le32(char *mem, unsigned int val) {
-    mem[0] = val;
-    mem[1] = val>>8;
-    mem[2] = val>>16;
-    mem[3] = val>>24;
-}
+#include "./tools_common.h"
+#include "./video_writer.h"
 
-static void die(const char *fmt, ...) {
-    va_list ap;
+static const char *exec_name;
 
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
 }
 
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
-
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
-
-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
-
-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
+static void encode_frame(vpx_codec_ctx_t *codec,
+                         vpx_image_t *img,
+                         int frame_index,
+                         VpxVideoWriter *writer) {
+  vpx_codec_iter_t iter = NULL;
+  const vpx_codec_cx_pkt_t *pkt = NULL;
+  const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0,
+                                               VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK)
+    die_codec(codec, "Failed to encode frame");
+
+  while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) {
+    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+      if (!vpx_video_writer_write_frame(writer,
+                                        pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts)) {
+        die_codec(codec, "Failed to write compressed frame");
+      }
+
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
     }
-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, (unsigned int)pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
+  }
 }
 
 int main(int argc, char **argv) {
-    FILE                *infile, *outfile;
-    vpx_codec_ctx_t      codec;
-    vpx_codec_enc_cfg_t  cfg;
-    int                  frame_cnt = 0;
-    vpx_image_t          raw;
-    vpx_codec_err_t      res;
-    long                 width;
-    long                 height;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
-    int                  update_frame_num = 0;
-
-    /* Open files */
-    if(argc!=6)
-        die("Usage: %s <width> <height> <infile> <outfile> <frame>\n",
-            argv[0]);
-
-        update_frame_num = atoi(argv[5]);
-        if(!update_frame_num)
-            die("Couldn't parse frame number '%s'\n", argv[5]);
-
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-    if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1))
-        die("Faile to allocate image", width, height);
-    if(!(outfile = fopen(argv[4], "wb")))
-        die("Failed to open %s for writing", argv[4]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-
-    /* Populate encoder configuration */
-    res = vpx_codec_enc_config_default(interface, &cfg, 0);
-    if(res) {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-        return EXIT_FAILURE;
+  FILE *infile = NULL;
+  vpx_codec_ctx_t codec = {0};
+  vpx_codec_enc_cfg_t cfg = {0};
+  int frame_count = 0;
+  vpx_image_t raw;
+  vpx_codec_err_t res;
+  VpxVideoInfo info = {0};
+  VpxVideoWriter *writer = NULL;
+  const VpxInterface *encoder = NULL;
+  int update_frame_num = 0;
+  const int fps = 30;        // TODO(dkovalev) add command line argument
+  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
+
+  exec_name = argv[0];
+
+  if (argc != 6)
+    die("Invalid number of arguments");
+
+  // TODO(dkovalev): add vp9 support and rename the file accordingly
+  encoder = get_vpx_encoder_by_name("vp8");
+  if (!encoder)
+    die("Unsupported codec.");
+
+  update_frame_num = atoi(argv[5]);
+  if (!update_frame_num)
+    die("Couldn't parse frame number '%s'\n", argv[5]);
+
+  info.codec_fourcc = encoder->fourcc;
+  info.frame_width = strtol(argv[1], NULL, 0);
+  info.frame_height = strtol(argv[2], NULL, 0);
+  info.time_base.numerator = 1;
+  info.time_base.denominator = fps;
+
+  if (info.frame_width <= 0 ||
+      info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 ||
+      (info.frame_height % 2) != 0) {
+    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
+  }
+
+  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
+                                             info.frame_height, 1)) {
+    die("Failed to allocate image.");
+  }
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->interface()));
+
+  res = vpx_codec_enc_config_default(encoder->interface(), &cfg, 0);
+  if (res)
+    die_codec(&codec, "Failed to get default codec config.");
+
+  cfg.g_w = info.frame_width;
+  cfg.g_h = info.frame_height;
+  cfg.g_timebase.num = info.time_base.numerator;
+  cfg.g_timebase.den = info.time_base.denominator;
+  cfg.rc_target_bitrate = bitrate;
+
+  writer = vpx_video_writer_open(argv[4], kContainerIVF, &info);
+  if (!writer)
+    die("Failed to open %s for writing.", argv[4]);
+
+  if (!(infile = fopen(argv[3], "rb")))
+    die("Failed to open %s for reading.", argv[3]);
+
+  if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
+    die_codec(&codec, "Failed to initialize encoder");
+
+  while (vpx_img_read(&raw, infile)) {
+    if (frame_count + 1 == update_frame_num) {
+      vpx_ref_frame_t ref;
+      ref.frame_type = VP8_LAST_FRAME;
+      ref.img = raw;
+      if (vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref))
+        die_codec(&codec, "Failed to set reference frame");
     }
 
-    /* Update the default configuration with our settings */
-    cfg.rc_target_bitrate = width * height * cfg.rc_target_bitrate
-                            / cfg.g_w / cfg.g_h;
-    cfg.g_w = width;
-    cfg.g_h = height;
-
-    write_ivf_file_header(outfile, &cfg, 0);
-
-
-        /* Open input file for this encoding pass */
-        if(!(infile = fopen(argv[3], "rb")))
-            die("Failed to open %s for reading", argv[3]);
-
-        /* Initialize codec */
-        if(vpx_codec_enc_init(&codec, interface, &cfg, 0))
-            die_codec(&codec, "Failed to initialize encoder");
-
-        frame_avail = 1;
-        got_data = 0;
-        while(frame_avail || got_data) {
-            vpx_codec_iter_t iter = NULL;
-            const vpx_codec_cx_pkt_t *pkt;
-
-            frame_avail = read_frame(infile, &raw);
-
-            if(frame_cnt + 1 == update_frame_num) {
-                vpx_ref_frame_t ref;
-
-                ref.frame_type = VP8_LAST_FRAME;
-                ref.img        = raw;
+    encode_frame(&codec, &raw, frame_count++, writer);
+  }
+  encode_frame(&codec, NULL, -1, writer);
 
-                if(vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref))
-                    die_codec(&codec, "Failed to set reference frame");
-            }
+  printf("\n");
+  fclose(infile);
+  printf("Processed %d frames.\n", frame_count);
 
-            if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
-                                1, flags, VPX_DL_REALTIME))
-                die_codec(&codec, "Failed to encode frame");
-            got_data = 0;
-            while( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-                got_data = 1;
-                switch(pkt->kind) {
-                case VPX_CODEC_CX_FRAME_PKT:
-                    write_ivf_frame_header(outfile, pkt);
-                    (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-                                  outfile);
-                    break;
-                default:
-                    break;
-                }
-                printf(pkt->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-                fflush(stdout);
-            }
-            frame_cnt++;
-        }
-        printf("\n");
-        fclose(infile);
+  vpx_img_free(&raw);
+  if (vpx_codec_destroy(&codec))
+    die_codec(&codec, "Failed to destroy codec.");
 
-    printf("Processed %d frames.\n",frame_cnt-1);
-    vpx_img_free(&raw);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
+  vpx_video_writer_close(writer);
 
-    /* Try to rewrite the file header with the actual frame count */
-    if(!fseek(outfile, 0, SEEK_SET))
-        write_ivf_file_header(outfile, &cfg, frame_cnt-1);
-    fclose(outfile);
-    return EXIT_SUCCESS;
+  return EXIT_SUCCESS;
 }
diff --git a/source/libvpx/examples/vp9_spatial_scalable_encoder.c b/source/libvpx/examples/vp9_spatial_scalable_encoder.c
index 98dc3f5..64e62ef 100644
--- a/source/libvpx/examples/vp9_spatial_scalable_encoder.c
+++ b/source/libvpx/examples/vp9_spatial_scalable_encoder.c
@@ -26,6 +26,7 @@
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
+#include "./vpxstats.h"
 
 static const struct arg_enum_list encoding_mode_enum[] = {
   {"i", INTER_LAYER_PREDICTION_I},
@@ -60,12 +61,28 @@ static const arg_def_t quantizers_arg =
 static const arg_def_t quantizers_keyframe_arg =
     ARG_DEF("qn", "quantizers-keyframe", 1, "quantizers for key frames (lowest "
         "to highest layer)");
+static const arg_def_t passes_arg =
+    ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
+static const arg_def_t pass_arg =
+    ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
+static const arg_def_t fpf_name_arg =
+    ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
+static const arg_def_t min_q_arg =
+    ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
+static const arg_def_t max_q_arg =
+    ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
+static const arg_def_t min_bitrate_arg =
+    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
+static const arg_def_t max_bitrate_arg =
+    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
 
 static const arg_def_t *svc_args[] = {
   &encoding_mode_arg, &frames_arg,        &width_arg,       &height_arg,
   &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &layers_arg,
   &kf_dist_arg,       &scale_factors_arg, &quantizers_arg,
-  &quantizers_keyframe_arg, NULL
+  &quantizers_keyframe_arg,               &passes_arg,      &pass_arg,
+  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
+  &max_bitrate_arg,   NULL
 };
 
 static const SVC_ENCODING_MODE default_encoding_mode =
@@ -85,6 +102,10 @@ typedef struct {
   const char *output_filename;
   uint32_t frames_to_code;
   uint32_t frames_to_skip;
+  struct VpxInputContext input_ctx;
+  stats_io_t rc_stats;
+  int passes;
+  int pass;
 } AppInput;
 
 static const char *exec_name;
@@ -105,6 +126,11 @@ static void parse_command_line(int argc, const char **argv_,
   char **argi = NULL;
   char **argj = NULL;
   vpx_codec_err_t res;
+  int passes = 0;
+  int pass = 0;
+  const char *fpf_file_name = NULL;
+  unsigned int min_bitrate = 0;
+  unsigned int max_bitrate = 0;
 
   // initialize SvcContext with parameters that will be passed to vpx_svc_init
   svc_ctx->log_level = SVC_LOG_DEBUG;
@@ -159,11 +185,72 @@ static void parse_command_line(int argc, const char **argv_,
       vpx_svc_set_quantizers(svc_ctx, arg.val, 0);
     } else if (arg_match(&arg, &quantizers_keyframe_arg, argi)) {
       vpx_svc_set_quantizers(svc_ctx, arg.val, 1);
+    } else if (arg_match(&arg, &passes_arg, argi)) {
+      passes = arg_parse_uint(&arg);
+      if (passes < 1 || passes > 2) {
+        die("Error: Invalid number of passes (%d)\n", passes);
+      }
+    } else if (arg_match(&arg, &pass_arg, argi)) {
+      pass = arg_parse_uint(&arg);
+      if (pass < 1 || pass > 2) {
+        die("Error: Invalid pass selected (%d)\n", pass);
+      }
+    } else if (arg_match(&arg, &fpf_name_arg, argi)) {
+      fpf_file_name = arg.val;
+    } else if (arg_match(&arg, &min_q_arg, argi)) {
+      enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &max_q_arg, argi)) {
+      enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
+      min_bitrate = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
+      max_bitrate = arg_parse_uint(&arg);
     } else {
       ++argj;
     }
   }
 
+  if (passes == 0 || passes == 1) {
+    if (pass) {
+      fprintf(stderr, "pass is ignored since there's only one pass\n");
+    }
+    enc_cfg->g_pass = VPX_RC_ONE_PASS;
+  } else {
+    if (pass == 0) {
+      die("pass must be specified when passes is 2\n");
+    }
+
+    if (fpf_file_name == NULL) {
+      die("fpf must be specified when passes is 2\n");
+    }
+
+    if (pass == 1) {
+      enc_cfg->g_pass = VPX_RC_FIRST_PASS;
+      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
+        fatal("Failed to open statistics store");
+      }
+    } else {
+      enc_cfg->g_pass = VPX_RC_LAST_PASS;
+      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
+        fatal("Failed to open statistics store");
+      }
+      enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
+    }
+    app_input->passes = passes;
+    app_input->pass = pass;
+  }
+
+  if (enc_cfg->rc_target_bitrate > 0) {
+    if (min_bitrate > 0) {
+      enc_cfg->rc_2pass_vbr_minsection_pct =
+          min_bitrate * 100 / enc_cfg->rc_target_bitrate;
+    }
+    if (max_bitrate > 0) {
+      enc_cfg->rc_2pass_vbr_maxsection_pct =
+          max_bitrate * 100 / enc_cfg->rc_target_bitrate;
+    }
+  }
+
   // Check for unrecognized options
   for (argi = argv; *argi; ++argi)
     if (argi[0][0] == '-' && strlen(argi[0]) > 1)
@@ -207,6 +294,7 @@ int main(int argc, const char **argv) {
   int pts = 0;            /* PTS starts at 0 */
   int frame_duration = 1; /* 1 timebase tick per frame */
   FILE *infile = NULL;
+  int end_of_stream = 0;
 
   memset(&svc_ctx, 0, sizeof(svc_ctx));
   svc_ctx.log_print = 1;
@@ -234,34 +322,50 @@ int main(int argc, const char **argv) {
       VPX_CODEC_OK) {
     die("Failed to get output resolution");
   }
-  writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
-                                 &info);
-  if (!writer)
-    die("Failed to open %s for writing\n", app_input.output_filename);
+
+  if (!(app_input.passes == 2 && app_input.pass == 1)) {
+    // We don't save the bitstream for the 1st pass on two pass rate control
+    writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
+                                   &info);
+    if (!writer)
+      die("Failed to open %s for writing\n", app_input.output_filename);
+  }
 
   // skip initial frames
   for (i = 0; i < app_input.frames_to_skip; ++i)
     vpx_img_read(&raw, infile);
 
   // Encode frames
-  while (frame_cnt < app_input.frames_to_code) {
-    if (!vpx_img_read(&raw, infile))
-      break;
+  while (!end_of_stream) {
+    if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
+      // We need one extra vpx_svc_encode call at end of stream to flush
+      // encoder and get remaining data
+      end_of_stream = 1;
+    }
 
-    res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration,
-                         VPX_DL_REALTIME);
+    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
+                         pts, frame_duration, VPX_DL_REALTIME);
     printf("%s", vpx_svc_get_message(&svc_ctx));
     if (res != VPX_CODEC_OK) {
       die_codec(&codec, "Failed to encode frame");
     }
-    if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
-      vpx_video_writer_write_frame(writer,
-                                   vpx_svc_get_buffer(&svc_ctx),
-                                   vpx_svc_get_frame_size(&svc_ctx),
-                                   pts);
+    if (!(app_input.passes == 2 && app_input.pass == 1)) {
+      if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
+        vpx_video_writer_write_frame(writer,
+                                     vpx_svc_get_buffer(&svc_ctx),
+                                     vpx_svc_get_frame_size(&svc_ctx),
+                                     pts);
+      }
+    }
+    if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) {
+      stats_write(&app_input.rc_stats,
+                  vpx_svc_get_rc_stats_buffer(&svc_ctx),
+                  vpx_svc_get_rc_stats_buffer_size(&svc_ctx));
+    }
+    if (!end_of_stream) {
+      ++frame_cnt;
+      pts += frame_duration;
     }
-    ++frame_cnt;
-    pts += frame_duration;
   }
 
   printf("Processed %d frames\n", frame_cnt);
@@ -269,7 +373,12 @@ int main(int argc, const char **argv) {
   fclose(infile);
   if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
 
-  vpx_video_writer_close(writer);
+  if (app_input.passes == 2)
+    stats_close(&app_input.rc_stats, 1);
+
+  if (writer) {
+    vpx_video_writer_close(writer);
+  }
 
   vpx_img_free(&raw);
 
diff --git a/source/libvpx/examples/vpx_temporal_scalable_patterns.c b/source/libvpx/examples/vpx_temporal_scalable_patterns.c
index 6ec1b62..5cb4ee9 100644
--- a/source/libvpx/examples/vpx_temporal_scalable_patterns.c
+++ b/source/libvpx/examples/vpx_temporal_scalable_patterns.c
@@ -18,6 +18,8 @@
 #include <string.h>
 
 #define VPX_CODEC_DISABLE_COMPAT 1
+#include "./vpx_config.h"
+#include "vpx_ports/vpx_timer.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
 
@@ -435,6 +437,7 @@ int main(int argc, char **argv) {
   vpx_codec_err_t res;
   unsigned int width;
   unsigned int height;
+  int speed;
   int frame_avail;
   int got_data;
   int flags = 0;
@@ -449,12 +452,13 @@ int main(int argc, char **argv) {
   const VpxInterface *encoder = NULL;
   FILE *infile = NULL;
   struct RateControlMetrics rc;
+  int64_t cx_time = 0;
 
   exec_name = argv[0];
   // Check usage and arguments.
   if (argc < 11) {
     die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den>  <frame_drop_threshold> <mode> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
         "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]);
   }
 
@@ -470,12 +474,12 @@ int main(int argc, char **argv) {
     die("Invalid resolution: %d x %d", width, height);
   }
 
-  layering_mode = strtol(argv[9], NULL, 0);
+  layering_mode = strtol(argv[10], NULL, 0);
   if (layering_mode < 0 || layering_mode > 12) {
-    die("Invalid mode (0..12) %s", argv[9]);
+    die("Invalid layering mode (0..12) %s", argv[10]);
   }
 
-  if (argc != 10 + mode_to_num_layers[layering_mode]) {
+  if (argc != 11 + mode_to_num_layers[layering_mode]) {
     die("Invalid number of arguments");
   }
 
@@ -498,12 +502,17 @@ int main(int argc, char **argv) {
   cfg.g_timebase.num = strtol(argv[6], NULL, 0);
   cfg.g_timebase.den = strtol(argv[7], NULL, 0);
 
-  for (i = 10; (int)i < 10 + mode_to_num_layers[layering_mode]; ++i) {
-    cfg.ts_target_bitrate[i - 10] = strtol(argv[i], NULL, 0);
+  speed = strtol(argv[8], NULL, 0);
+  if (speed < 0) {
+    die("Invalid speed setting: must be positive");
+  }
+
+  for (i = 11; (int)i < 11 + mode_to_num_layers[layering_mode]; ++i) {
+    cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
   }
 
   // Real time parameters.
-  cfg.rc_dropframe_thresh = strtol(argv[8], NULL, 0);
+  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
   cfg.rc_end_usage = VPX_CBR;
   cfg.rc_resize_allowed = 0;
   cfg.rc_min_quantizer = 2;
@@ -560,13 +569,16 @@ int main(int argc, char **argv) {
   if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0))
     die_codec(&codec, "Failed to initialize encoder");
 
-  vpx_codec_control(&codec, VP8E_SET_CPUUSED, -6);
-  vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
-  if (strncmp(encoder->name, "vp9", 3) == 0) {
-    vpx_codec_control(&codec, VP8E_SET_CPUUSED, 3);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0);
-    if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
-      die_codec(&codec, "Failed to set SVC");
+  if (strncmp(encoder->name, "vp8", 3) == 0) {
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
+    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1);
+  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
+      vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
+      vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+      vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
+      vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0);
+      if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
+        die_codec(&codec, "Failed to set SVC");
     }
   }
   vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
@@ -576,10 +588,13 @@ int main(int argc, char **argv) {
   // value, like 100 or 200.
   max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
       * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
+  // For low-quality key frame.
+  max_intra_size_pct = 200;
   vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
 
   frame_avail = 1;
   while (frame_avail || got_data) {
+    struct vpx_usec_timer timer;
     vpx_codec_iter_t iter = NULL;
     const vpx_codec_cx_pkt_t *pkt;
     // Update the temporal layer_id. No spatial layers in this test.
@@ -593,10 +608,13 @@ int main(int argc, char **argv) {
     frame_avail = vpx_img_read(&raw, infile);
     if (frame_avail)
       ++rc.layer_input_frames[layer_id.temporal_layer_id];
+    vpx_usec_timer_start(&timer);
     if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags,
         VPX_DL_REALTIME)) {
       die_codec(&codec, "Failed to encode frame");
     }
+    vpx_usec_timer_mark(&timer);
+    cx_time += vpx_usec_timer_elapsed(&timer);
     // Reset KF flag.
     if (layering_mode != 7) {
       layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
@@ -632,6 +650,11 @@ int main(int argc, char **argv) {
   }
   fclose(infile);
   printout_rate_control_summary(&rc, &cfg, frame_cnt);
+  printf("\n");
+  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
+          frame_cnt,
+          1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+          1000000 * (double)frame_cnt / (double)cx_time);
 
   if (vpx_codec_destroy(&codec))
     die_codec(&codec, "Failed to destroy codec");
diff --git a/source/libvpx/libs.mk b/source/libvpx/libs.mk
index 302d2af..a5c4b76 100644
--- a/source/libvpx/libs.mk
+++ b/source/libvpx/libs.mk
@@ -49,7 +49,7 @@ endif # !gcc
 define rtcd_h_template
 $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
 	@echo "    [CREATE] $$@"
-	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$$(TGT_ISA) \
+	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
           --sym=$(1) \
           --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
           $$(RTCD_OPTIONS) $$^ > $$@
@@ -162,7 +162,7 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
 endif
 
 CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.sh
+CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
@@ -236,6 +236,13 @@ vpx.def: $(call enabled,CODEC_EXPORTS)
             --out=$@ $^
 CLEAN-OBJS += vpx.def
 
+# Assembly files that are included, but don't define symbols themselves.
+# Filtered out to avoid Visual Studio build warnings.
+ASM_INCLUDES := \
+    third_party/x86inc/x86inc.asm \
+    vpx_config.asm \
+    vpx_ports/x86_abi_support.asm \
+
 vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
@@ -246,7 +253,8 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
             --proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \
             --module-def=vpx.def \
             --ver=$(CONFIG_VS_VERSION) \
-            --out=$@ $(CFLAGS) $^ \
+            --out=$@ $(CFLAGS) \
+            $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
             --src-path-bare="$(SRC_PATH_BARE)" \
 
 PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
diff --git a/source/libvpx/test/active_map_test.cc b/source/libvpx/test/active_map_test.cc
new file mode 100644
index 0000000..6377e72
--- /dev/null
+++ b/source/libvpx/test/active_map_test.cc
@@ -0,0 +1,92 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class ActiveMapTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  static const int kWidth = 208;
+  static const int kHeight = 144;
+
+  ActiveMapTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ActiveMapTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+    } else if (video->frame() == 3) {
+      vpx_active_map_t map = {0};
+      uint8_t active_map[9 * 13] = {
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+        0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
+        0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
+        0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1,
+        0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
+        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+      };
+      map.cols = (kWidth + 15) / 16;
+      map.rows = (kHeight + 15) / 16;
+      ASSERT_EQ(map.cols, 13u);
+      ASSERT_EQ(map.rows, 9u);
+      map.active_map = active_map;
+      encoder->Control(VP8E_SET_ACTIVEMAP, &map);
+    } else if (video->frame() == 15) {
+      vpx_active_map_t map = {0};
+      map.cols = (kWidth + 15) / 16;
+      map.rows = (kHeight + 15) / 16;
+      map.active_map = NULL;
+      encoder->Control(VP8E_SET_ACTIVEMAP, &map);
+    }
+  }
+
+  int cpu_used_;
+};
+
+TEST_P(ActiveMapTest, Test) {
+  // Validate that this non multiple of 64 wide clip encodes
+  cfg_.g_lag_in_frames = 0;
+  cfg_.rc_target_bitrate = 400;
+  cfg_.rc_resize_allowed = 0;
+  cfg_.g_pass = VPX_RC_ONE_PASS;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.kf_max_dist = 90000;
+
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30,
+                                       1, 0, 20);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+#define VP9_FACTORY \
+  static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)
+
+VP9_INSTANTIATE_TEST_CASE(ActiveMapTest,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(0, 6));
+}  // namespace
diff --git a/source/libvpx/test/android/scrape_gtest_log.py b/source/libvpx/test/android/scrape_gtest_log.py
new file mode 100644
index 0000000..487845c
--- /dev/null
+++ b/source/libvpx/test/android/scrape_gtest_log.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+"""Standalone script which parses a gtest log for json.
+
+Json is returned returns as an array.  This script is used by the libvpx
+waterfall to gather json results mixed in with gtest logs.  This is
+dubious software engineering.
+"""
+
+import getopt
+import json
+import os
+import re
+import sys
+
+
+def main():
+  if len(sys.argv) != 3:
+    print "Expects a file to write json to!"
+    exit(1)
+
+  try:
+    opts, _ = \
+        getopt.getopt(sys.argv[1:], \
+                      'o:', ['output-json='])
+  except getopt.GetOptError:
+    print 'scrape_gtest_log.py -o <output_json>'
+    sys.exit(2)
+
+  output_json = ''
+  for opt, arg in opts:
+    if opt in ('-o', '--output-json'):
+      output_json = os.path.join(arg)
+
+  blob = sys.stdin.read()
+  json_string = '[' + ','.join('{' + x + '}' for x in
+                               re.findall(r'{([^}]*.?)}', blob)) + ']'
+  print blob
+
+  output = json.dumps(json.loads(json_string), indent=4, sort_keys=True)
+  print output
+
+  path = os.path.dirname(output_json)
+  if path and not os.path.exists(path):
+    os.makedirs(path)
+
+  outfile = open(output_json, 'w')
+  outfile.write(output)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/source/libvpx/test/aq_segment_test.cc b/source/libvpx/test/aq_segment_test.cc
new file mode 100644
index 0000000..2f88b53
--- /dev/null
+++ b/source/libvpx/test/aq_segment_test.cc
@@ -0,0 +1,119 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <climits>
+#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class AqSegmentTest : public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWith2Params<
+        libvpx_test::TestMode, int> {
+ protected:
+  AqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100);
+    }
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
+    }
+  }
+  int set_cpu_used_;
+  int aq_mode_;
+};
+
+// Validate that this AQ segmentation mode (AQ=1, variance_ap)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ1) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                        30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this AQ segmentation mode (AQ=2, complexity_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ2) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 2;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                        30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this AQ segmentation mode (AQ=3, cyclic_refresh_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AqSegmentTest, TestNoMisMatchAQ3) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 3;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                        30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+using std::tr1::make_tuple;
+
+#define VP9_FACTORY \
+  static_cast<const libvpx_test::CodecFactory*> (&libvpx_test::kVP9)
+
+VP9_INSTANTIATE_TEST_CASE(AqSegmentTest,
+                          ::testing::Values(::libvpx_test::kRealTime,
+                                            ::libvpx_test::kOnePassGood),
+                                            ::testing::Range(3, 9));
+}  // namespace
diff --git a/source/libvpx/test/borders_test.cc b/source/libvpx/test/borders_test.cc
index 5071541..b30be45 100644
--- a/source/libvpx/test/borders_test.cc
+++ b/source/libvpx/test/borders_test.cc
@@ -21,6 +21,7 @@ class BordersTest : public ::libvpx_test::EncoderTest,
     public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
  protected:
   BordersTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~BordersTest() {}
 
   virtual void SetUp() {
     InitializeConfig();
diff --git a/source/libvpx/test/config_test.cc b/source/libvpx/test/config_test.cc
index 36c6330..0493110 100644
--- a/source/libvpx/test/config_test.cc
+++ b/source/libvpx/test/config_test.cc
@@ -20,6 +20,7 @@ class ConfigTest : public ::libvpx_test::EncoderTest,
  protected:
   ConfigTest() : EncoderTest(GET_PARAM(0)),
                  frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
+  virtual ~ConfigTest() {}
 
   virtual void SetUp() {
     InitializeConfig();
diff --git a/source/libvpx/test/cpu_speed_test.cc b/source/libvpx/test/cpu_speed_test.cc
index c92e723..be651b4 100644
--- a/source/libvpx/test/cpu_speed_test.cc
+++ b/source/libvpx/test/cpu_speed_test.cc
@@ -22,6 +22,7 @@ class CpuSpeedTest : public ::libvpx_test::EncoderTest,
         libvpx_test::TestMode, int> {
  protected:
   CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~CpuSpeedTest() {}
 
   virtual void SetUp() {
     InitializeConfig();
@@ -79,7 +80,7 @@ TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
   cfg_.rc_min_quantizer = 0;
 
   ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
+                                       20);
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
@@ -95,7 +96,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
   cfg_.rc_min_quantizer = 40;
 
   ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
+                                       20);
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
 }
@@ -107,6 +108,6 @@ using std::tr1::make_tuple;
 
 VP9_INSTANTIATE_TEST_CASE(
     CpuSpeedTest,
-    ::testing::Values(::libvpx_test::kTwoPassGood),
-    ::testing::Range(0, 5));
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::Range(0, 8));
 }  // namespace
diff --git a/source/libvpx/test/datarate_test.cc b/source/libvpx/test/datarate_test.cc
index 39c9a5a..e8604a6 100644
--- a/source/libvpx/test/datarate_test.cc
+++ b/source/libvpx/test/datarate_test.cc
@@ -17,10 +17,12 @@
 
 namespace {
 
-class DatarateTest : public ::libvpx_test::EncoderTest,
+class DatarateTestLarge : public ::libvpx_test::EncoderTest,
     public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
  public:
-  DatarateTest() : EncoderTest(GET_PARAM(0)) {}
+  DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {}
+
+  virtual ~DatarateTestLarge() {}
 
  protected:
   virtual void SetUp() {
@@ -120,7 +122,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
   size_t bits_in_last_frame_;
 };
 
-TEST_P(DatarateTest, BasicBufferModel) {
+TEST_P(DatarateTestLarge, BasicBufferModel) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_dropframe_thresh = 1;
   cfg_.rc_max_quantizer = 56;
@@ -151,7 +153,7 @@ TEST_P(DatarateTest, BasicBufferModel) {
   }
 }
 
-TEST_P(DatarateTest, ChangingDropFrameThresh) {
+TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_max_quantizer = 36;
   cfg_.rc_end_usage = VPX_CBR;
@@ -179,13 +181,13 @@ TEST_P(DatarateTest, ChangingDropFrameThresh) {
   }
 }
 
-class DatarateTestVP9 : public ::libvpx_test::EncoderTest,
+class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
     public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
  public:
-  DatarateTestVP9() : EncoderTest(GET_PARAM(0)) {}
+  DatarateTestVP9Large() : EncoderTest(GET_PARAM(0)) {}
 
  protected:
-  virtual ~DatarateTestVP9() {}
+  virtual ~DatarateTestVP9Large() {}
 
   virtual void SetUp() {
     InitializeConfig();
@@ -358,7 +360,7 @@ class DatarateTestVP9 : public ::libvpx_test::EncoderTest,
 };
 
 // Check basic rate targeting,
-TEST_P(DatarateTestVP9, BasicRateTargeting) {
+TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
@@ -382,7 +384,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting) {
 }
 
 // Check basic rate targeting,
-TEST_P(DatarateTestVP9, BasicRateTargeting444) {
+TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
   ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
 
   cfg_.g_profile = 1;
@@ -414,7 +416,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting444) {
 // as the drop frame threshold is increased, and (2) that the total number of
 // frame drops does not decrease as we increase frame drop threshold.
 // Use a lower qp-max to force some frame drops.
-TEST_P(DatarateTestVP9, ChangingDropFrameThresh) {
+TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
@@ -455,7 +457,7 @@ TEST_P(DatarateTestVP9, ChangingDropFrameThresh) {
 }
 
 // Check basic rate targeting for 2 temporal layers.
-TEST_P(DatarateTestVP9, BasicRateTargeting2TemporalLayers) {
+TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
@@ -492,7 +494,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting2TemporalLayers) {
 }
 
 // Check basic rate targeting for 3 temporal layers.
-TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) {
+TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
@@ -533,7 +535,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) {
 // Check basic rate targeting for 3 temporal layers, with frame dropping.
 // Only for one (low) bitrate with lower max_quantizer, and somewhat higher
 // frame drop threshold, to force frame dropping.
-TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayersFrameDropping) {
+TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
   cfg_.rc_buf_sz = 1000;
@@ -568,14 +570,15 @@ TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayersFrameDropping) {
         << " The datarate for the file is greater than target by too much, "
             "for layer: " << j;
     // Expect some frame drops in this test: for this 200 frames test,
-    // expect at least 10% and not more than 50% drops.
+    // expect at least 10% and not more than 60% drops.
     ASSERT_GE(num_drops_, 20);
-    ASSERT_LE(num_drops_, 100);
+    ASSERT_LE(num_drops_, 120);
   }
 }
 
-VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9,
-                          ::testing::Values(::libvpx_test::kOnePassGood),
-                          ::testing::Range(2, 5));
+VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
+                          ::testing::Values(::libvpx_test::kOnePassGood,
+                          ::libvpx_test::kRealTime),
+                          ::testing::Range(2, 7));
 }  // namespace
diff --git a/source/libvpx/test/encode_test_driver.h b/source/libvpx/test/encode_test_driver.h
index 8017a2a..9526068 100644
--- a/source/libvpx/test/encode_test_driver.h
+++ b/source/libvpx/test/encode_test_driver.h
@@ -16,6 +16,9 @@
 #include "./vpx_config.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vpx/vpx_encoder.h"
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#include "vpx/vp8cx.h"
+#endif
 
 namespace libvpx_test {
 
@@ -128,6 +131,13 @@ class Encoder {
     ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
   }
 
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+  void Control(int ctrl_id, vpx_active_map_t *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+#endif
+
   void set_deadline(unsigned long deadline) {
     deadline_ = deadline;
   }
diff --git a/source/libvpx/test/error_resilience_test.cc b/source/libvpx/test/error_resilience_test.cc
index 4cd9efb..89684f8 100644
--- a/source/libvpx/test/error_resilience_test.cc
+++ b/source/libvpx/test/error_resilience_test.cc
@@ -19,19 +19,20 @@ namespace {
 const int kMaxErrorFrames = 12;
 const int kMaxDroppableFrames = 12;
 
-class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
+class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
     public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
  protected:
-  ErrorResilienceTest() : EncoderTest(GET_PARAM(0)),
-                          psnr_(0.0),
-                          nframes_(0),
-                          mismatch_psnr_(0.0),
-                          mismatch_nframes_(0),
-                          encoding_mode_(GET_PARAM(1)) {
+  ErrorResilienceTestLarge()
+      : EncoderTest(GET_PARAM(0)),
+        psnr_(0.0),
+        nframes_(0),
+        mismatch_psnr_(0.0),
+        mismatch_nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {
     Reset();
   }
 
-  virtual ~ErrorResilienceTest() {}
+  virtual ~ErrorResilienceTestLarge() {}
 
   void Reset() {
     error_nframes_ = 0;
@@ -144,7 +145,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
   libvpx_test::TestMode encoding_mode_;
 };
 
-TEST_P(ErrorResilienceTest, OnVersusOff) {
+TEST_P(ErrorResilienceTestLarge, OnVersusOff) {
   const vpx_rational timebase = { 33333333, 1000000000 };
   cfg_.g_timebase = timebase;
   cfg_.rc_target_bitrate = 2000;
@@ -179,7 +180,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
 // if we lose (i.e., drop before decoding) a set of droppable
 // frames (i.e., frames that don't update any reference buffers).
 // Check both isolated and consecutive loss.
-TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
+TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
   const vpx_rational timebase = { 33333333, 1000000000 };
   cfg_.g_timebase = timebase;
   cfg_.rc_target_bitrate = 500;
@@ -235,7 +236,7 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
 #endif
 }
 
-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTest, ONE_PASS_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
 
 }  // namespace
diff --git a/source/libvpx/test/external_frame_buffer_test.cc b/source/libvpx/test/external_frame_buffer_test.cc
index 2e7adc1..54c79e9 100644
--- a/source/libvpx/test/external_frame_buffer_test.cc
+++ b/source/libvpx/test/external_frame_buffer_test.cc
@@ -210,7 +210,7 @@ class ExternalFrameBufferMD5Test
       ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_));
       ASSERT_EQ(VPX_CODEC_OK,
                 decoder->SetFrameBufferFunctions(
-                    GetVp9FrameBuffer, ReleaseVP9FrameBuffer, this));
+                    GetVP9FrameBuffer, ReleaseVP9FrameBuffer, this));
     }
   }
 
@@ -242,7 +242,7 @@ class ExternalFrameBufferMD5Test
 
   // Callback to get a free external frame buffer. Return value < 0 is an
   // error.
-  static int GetVp9FrameBuffer(void *user_priv, size_t min_size,
+  static int GetVP9FrameBuffer(void *user_priv, size_t min_size,
                                vpx_codec_frame_buffer_t *fb) {
     ExternalFrameBufferMD5Test *const md5Test =
         reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv);
@@ -462,5 +462,7 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
 }
 
 VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
-                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors));
+                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                              libvpx_test::kVP9TestVectors +
+                                              libvpx_test::kNumVP9TestVectors));
 }  // namespace
diff --git a/source/libvpx/test/intrapred_test.cc b/source/libvpx/test/intrapred_test.cc
index b28f5fb..cefe192 100644
--- a/source/libvpx/test/intrapred_test.cc
+++ b/source/libvpx/test/intrapred_test.cc
@@ -26,11 +26,7 @@ using libvpx_test::ACMRandom;
 
 class IntraPredBase {
  public:
-  virtual ~IntraPredBase() {}
-
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
+  virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
 
  protected:
   void SetupMacroblock(MACROBLOCKD *mbptr,
@@ -227,8 +223,9 @@ typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x,
                                   uint8_t *ypred_ptr,
                                   int y_stride);
 
-class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
-    protected IntraPredBase {
+class IntraPredYTest
+    : public IntraPredBase,
+      public ::testing::TestWithParam<intra_pred_y_fn_t> {
  public:
   static void SetUpTestCase() {
     mb_ = reinterpret_cast<MACROBLOCKD*>(
@@ -308,8 +305,9 @@ typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x,
                                    uint8_t *vpred_ptr,
                                    int pred_stride);
 
-class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
-    protected IntraPredBase {
+class IntraPredUVTest
+    : public IntraPredBase,
+      public ::testing::TestWithParam<intra_pred_uv_fn_t> {
  public:
   static void SetUpTestCase() {
     mb_ = reinterpret_cast<MACROBLOCKD*>(
diff --git a/source/libvpx/test/keyframe_test.cc b/source/libvpx/test/keyframe_test.cc
index 7ee2898..d8b21a1 100644
--- a/source/libvpx/test/keyframe_test.cc
+++ b/source/libvpx/test/keyframe_test.cc
@@ -21,6 +21,7 @@ class KeyframeTest : public ::libvpx_test::EncoderTest,
     public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
  protected:
   KeyframeTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~KeyframeTest() {}
 
   virtual void SetUp() {
     InitializeConfig();
diff --git a/source/libvpx/test/pp_filter_test.cc b/source/libvpx/test/pp_filter_test.cc
index ff7bb08..86c2b0e 100644
--- a/source/libvpx/test/pp_filter_test.cc
+++ b/source/libvpx/test/pp_filter_test.cc
@@ -25,7 +25,7 @@ typedef void (*post_proc_func_t)(unsigned char *src_ptr,
 
 namespace {
 
-class Vp8PostProcessingFilterTest
+class VP8PostProcessingFilterTest
     : public ::testing::TestWithParam<post_proc_func_t> {
  public:
   virtual void TearDown() {
@@ -36,7 +36,7 @@ class Vp8PostProcessingFilterTest
 // Test routine for the VP8 post-processing function
 // vp8_post_proc_down_and_across_mb_row_c.
 
-TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
+TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
   // Size of the underlying data block that will be filtered.
   const int block_width  = 16;
   const int block_height = 16;
@@ -91,7 +91,7 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
   for (int i = 0; i < block_height; ++i) {
     for (int j = 0; j < block_width; ++j) {
       EXPECT_EQ(expected_data[i], pixel_ptr[j])
-          << "Vp8PostProcessingFilterTest failed with invalid filter output";
+          << "VP8PostProcessingFilterTest failed with invalid filter output";
     }
     pixel_ptr += output_stride;
   }
@@ -101,11 +101,11 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
   vpx_free(flimits);
 };
 
-INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest,
+INSTANTIATE_TEST_CASE_P(C, VP8PostProcessingFilterTest,
     ::testing::Values(vp8_post_proc_down_and_across_mb_row_c));
 
 #if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest,
+INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest,
     ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
 #endif
 
diff --git a/source/libvpx/test/register_state_check.h b/source/libvpx/test/register_state_check.h
index 479a42d..7e3d053 100644
--- a/source/libvpx/test/register_state_check.h
+++ b/source/libvpx/test/register_state_check.h
@@ -11,14 +11,15 @@
 #ifndef TEST_REGISTER_STATE_CHECK_H_
 #define TEST_REGISTER_STATE_CHECK_H_
 
-#ifdef _WIN64
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+
+#if defined(_WIN64)
 
 #define _WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <winnt.h>
 
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
 namespace testing {
 namespace internal {
 
@@ -81,7 +82,61 @@ class RegisterStateCheck {
 
 }  // namespace libvpx_test
 
-#else  // !_WIN64
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) \
+      && !CONFIG_SHARED && HAVE_NEON
+
+#include "vpx/vpx_integer.h"
+
+extern "C" {
+// Save the d8-d15 registers into store.
+void vp9_push_neon(int64_t *store);
+}
+
+namespace libvpx_test {
+
+// Compares the state of d8-d15 at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// arm platform.
+// Usage:
+// {
+//   RegisterStateCheck reg_check;
+//   FunctionToVerify();
+// }
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
+  ~RegisterStateCheck() { EXPECT_TRUE(Check()); }
+
+ private:
+  static bool StoreRegisters(int64_t store[8]) {
+    vp9_push_neon(store);
+    return true;
+  }
+
+  // Compares the register state. Returns true if the states match.
+  bool Check() const {
+    if (!initialized_) return false;
+    int64_t post_store[8];
+    vp9_push_neon(post_store);
+    for (int i = 0; i < 8; ++i) {
+      EXPECT_EQ(pre_store_[i], post_store[i]) << "d"
+          << i + 8 << " has been modified";
+    }
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  bool initialized_;
+  int64_t pre_store_[8];
+};
+
+#define REGISTER_STATE_CHECK(statement) do { \
+  libvpx_test::RegisterStateCheck reg_check; \
+  statement;                               \
+} while (false)
+
+}  // namespace libvpx_test
+
+#else
 
 namespace libvpx_test {
 
diff --git a/source/libvpx/test/set_roi.cc b/source/libvpx/test/set_roi.cc
index e28f511..5b054f4 100644
--- a/source/libvpx/test/set_roi.cc
+++ b/source/libvpx/test/set_roi.cc
@@ -26,7 +26,7 @@ using libvpx_test::ACMRandom;
 
 namespace {
 
-TEST(Vp8RoiMapTest, ParameterCheck) {
+TEST(VP8RoiMapTest, ParameterCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
   int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
diff --git a/source/libvpx/test/sixtap_predict_test.cc b/source/libvpx/test/sixtap_predict_test.cc
index 1b2f03f..0c600f4 100644
--- a/source/libvpx/test/sixtap_predict_test.cc
+++ b/source/libvpx/test/sixtap_predict_test.cc
@@ -198,7 +198,7 @@ const sixtap_predict_fn_t sixtap_16x16_neon = vp8_sixtap_predict16x16_neon;
 const sixtap_predict_fn_t sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
 const sixtap_predict_fn_t sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
 INSTANTIATE_TEST_CASE_P(
-    NEON, SixtapPredictTest, ::testing::Values(
+    DISABLED_NEON, SixtapPredictTest, ::testing::Values(
         make_tuple(16, 16, sixtap_16x16_neon),
         make_tuple(8, 8, sixtap_8x8_neon),
         make_tuple(8, 4, sixtap_8x4_neon)));
diff --git a/source/libvpx/test/superframe_test.cc b/source/libvpx/test/superframe_test.cc
index d91e7b1..c0f542d 100644
--- a/source/libvpx/test/superframe_test.cc
+++ b/source/libvpx/test/superframe_test.cc
@@ -21,6 +21,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
  protected:
   SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL),
       last_sf_pts_(0) {}
+  virtual ~SuperframeTest() {}
 
   virtual void SetUp() {
     InitializeConfig();
diff --git a/source/libvpx/test/svc_test.cc b/source/libvpx/test/svc_test.cc
index 2e56534..fb9277b 100644
--- a/source/libvpx/test/svc_test.cc
+++ b/source/libvpx/test/svc_test.cc
@@ -31,6 +31,7 @@ class SvcTest : public ::testing::Test {
   SvcTest()
       : codec_iface_(0),
         test_file_name_("hantro_collage_w352h288.yuv"),
+        stats_file_name_("hantro_collage_w352h288.stat"),
         codec_initialized_(false),
         decoder_(0) {
     memset(&svc_, 0, sizeof(svc_));
@@ -73,6 +74,7 @@ class SvcTest : public ::testing::Test {
   struct vpx_codec_enc_cfg codec_enc_;
   vpx_codec_iface_t *codec_iface_;
   std::string test_file_name_;
+  std::string stats_file_name_;
   bool codec_initialized_;
   Decoder *decoder_;
 };
@@ -362,4 +364,109 @@ TEST_F(SvcTest, GetLayerResolution) {
   EXPECT_EQ(kHeight * 8 / 16, layer_height);
 }
 
+TEST_F(SvcTest, FirstPassEncode) {
+  svc_.spatial_layers = 2;
+  codec_enc_.g_pass = VPX_RC_FIRST_PASS;
+  vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
+  vpx_svc_set_quantizers(&svc_, "40,30", 0);
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+
+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
+  // FRAME 0
+  video.Begin();
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+
+  // FRAME 1
+  video.Next();
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+
+  // Flush encoder and test EOS packet
+  res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U);
+}
+
+TEST_F(SvcTest, SecondPassEncode) {
+  svc_.spatial_layers = 2;
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+
+  FILE *const stats_file = libvpx_test::OpenTestDataFile(stats_file_name_);
+  ASSERT_TRUE(stats_file != NULL) << "Stats file open failed. Filename: "
+      << stats_file;
+
+  struct vpx_fixed_buf stats_buf;
+  fseek(stats_file, 0, SEEK_END);
+  stats_buf.sz = static_cast<size_t>(ftell(stats_file));
+  fseek(stats_file, 0, SEEK_SET);
+
+  stats_buf.buf = malloc(stats_buf.sz);
+  ASSERT_TRUE(stats_buf.buf != NULL);
+  const size_t bytes_read = fread(stats_buf.buf, 1, stats_buf.sz, stats_file);
+  ASSERT_EQ(bytes_read, stats_buf.sz);
+  fclose(stats_file);
+  codec_enc_.rc_twopass_stats_in = stats_buf;
+
+  vpx_codec_err_t res =
+      vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  codec_initialized_ = true;
+
+  libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+                                     codec_enc_.g_timebase.den,
+                                     codec_enc_.g_timebase.num, 0, 30);
+  // FRAME 0
+  video.Begin();
+  // This frame is a keyframe.
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
+
+  vpx_codec_err_t res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+
+  // FRAME 1
+  video.Next();
+  // This is a P-frame.
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
+
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+
+  // FRAME 2
+  video.Next();
+  // This is a P-frame.
+  res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
+                       video.duration(), VPX_DL_GOOD_QUALITY);
+  ASSERT_EQ(VPX_CODEC_OK, res);
+  EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
+
+  res_dec = decoder_->DecodeFrame(
+      static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
+      vpx_svc_get_frame_size(&svc_));
+  ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+
+  free(stats_buf.buf);
+}
+
 }  // namespace
diff --git a/source/libvpx/test/test-data.sha1 b/source/libvpx/test/test-data.sha1
index 6f718ef..cf2ad1e 100644
--- a/source/libvpx/test/test-data.sha1
+++ b/source/libvpx/test/test-data.sha1
@@ -1,4 +1,5 @@
 d5dfb0151c9051f8c85999255645d7a23916d3c0  hantro_collage_w352h288.yuv
+998cec53307c94aa5835aaf8d5731f6a3c7c2e5a  hantro_collage_w352h288.stat
 b87815bf86020c592ccc7a846ba2e28ec8043902  hantro_odd.yuv
 b1f1c3ec79114b9a0651af24ce634afb44a9a419  rush_hour_444.y4m
 5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
@@ -588,3 +589,49 @@ b3c48382cf7d0454e83a02497c229d27720f9e20  vp90-2-11-size-351x287.webm.md5
 92a756469fa438220524e7fa6ac1d38c89514d17  vp90-2-12-droppable_2.ivf.md5
 c21e97e4ba486520118d78b01a5cb6e6dc33e190  vp90-2-12-droppable_3.ivf
 601abc9e4176c70f82ac0381365e9b151fdd24cd  vp90-2-12-droppable_3.ivf.md5
+61c640dad23cd4f7ad811b867e7b7e3521f4e3ba  vp90-2-13-largescaling.webm
+bca1b02eebdb088fa3f389fe0e7571e75a71f523  vp90-2-13-largescaling.webm.md5
+c740708fa390806eebaf669909c1285ab464f886  vp90-2-14-resize-fp-tiles-1-2.webm
+c7b85ffd8e11500f73f52e7dc5a47f57c393d47f  vp90-2-14-resize-fp-tiles-1-2.webm.md5
+ec8faa352a08f7033c60f29f80d505e2d7daa103  vp90-2-14-resize-fp-tiles-1-4.webm
+6852c783fb421bda5ded3d4c5a3ffc46de03fbc1  vp90-2-14-resize-fp-tiles-1-4.webm.md5
+8af61853ac0d07c4cb5bf7c2016661ba350b3497  vp90-2-14-resize-fp-tiles-1-8.webm
+571353bac89fea60b5706073409aa3c0d42aefe9  vp90-2-14-resize-fp-tiles-1-8.webm.md5
+b1c187ed69931496b82ec194017a79831bafceef  vp90-2-14-resize-fp-tiles-1-16.webm
+1c199a41afe42ce303944d70089eaaa2263b4a09  vp90-2-14-resize-fp-tiles-1-16.webm.md5
+8eaae5a6f2dff934610b0c7a917d7f583ba74aa5  vp90-2-14-resize-fp-tiles-2-1.webm
+db18fcf915f7ffaea6c39feab8bda6c1688af011  vp90-2-14-resize-fp-tiles-2-1.webm.md5
+bc3046d138941e2a20e9ceec0ff6d25c25d12af3  vp90-2-14-resize-fp-tiles-4-1.webm
+393211b808030d09a79927b17a4374b2f68a60ae  vp90-2-14-resize-fp-tiles-4-1.webm.md5
+6e8f8e31721a0f7f68a2964e36e0e698c2e276b1  vp90-2-14-resize-fp-tiles-8-1.webm
+491fd3cd78fb0577bfe905bb64bbf64bd7d29140  vp90-2-14-resize-fp-tiles-8-1.webm.md5
+cc5958da2a7edf739cd2cfeb18bd05e77903087e  vp90-2-14-resize-fp-tiles-16-1.webm
+0b58daf55aaf9063bf5b4fb33393d18b417dc428  vp90-2-14-resize-fp-tiles-16-1.webm.md5
+821eeecc9d8c6a316134dd42d1ff057787d8047b  vp90-2-14-resize-fp-tiles-2-4.webm
+374c549f2839a3d0b732c4e3650700144037e76c  vp90-2-14-resize-fp-tiles-2-4.webm.md5
+dff8c8e49aacea9f4c7f22cb882da984e2a1b405  vp90-2-14-resize-fp-tiles-2-8.webm
+e5b8820a7c823b21297d6e889e57ec401882c210  vp90-2-14-resize-fp-tiles-2-8.webm.md5
+77629e4b23e32896aadf6e994c78bd4ffa1c7797  vp90-2-14-resize-fp-tiles-2-16.webm
+1937f5df032664ac345d4613ad4417b4967b1230  vp90-2-14-resize-fp-tiles-2-16.webm.md5
+380ba5702bb1ec7947697314ab0300b5c56a1665  vp90-2-14-resize-fp-tiles-4-2.webm
+fde7b30d2aa64c1e851a4852f655d79fc542cf66  vp90-2-14-resize-fp-tiles-4-2.webm.md5
+dc784b258ffa2abc2ae693d11792acf0bb9cb74f  vp90-2-14-resize-fp-tiles-8-2.webm
+edf26f0130aeee8342d49c2c8f0793ad008782d9  vp90-2-14-resize-fp-tiles-8-2.webm.md5
+8e575789fd63ebf69e8eff1b9a4351a249a73bee  vp90-2-14-resize-fp-tiles-16-2.webm
+b6415318c1c589a1f64b9d569ce3cabbec2e0d52  vp90-2-14-resize-fp-tiles-16-2.webm.md5
+e3adc944a11c4c5517e63664c84ebb0847b64d81  vp90-2-14-resize-fp-tiles-4-8.webm
+03cba0532bc90a05b1990db830bf5701e24e7982  vp90-2-14-resize-fp-tiles-4-8.webm.md5
+3b27a991eb6d78dce38efab35b7db682e8cbbee3  vp90-2-14-resize-fp-tiles-4-16.webm
+5d16b7f82bf59f802724ddfd97abb487150b1c9d  vp90-2-14-resize-fp-tiles-4-16.webm.md5
+d5fed8c28c1d4c7e232ebbd25cf758757313ed96  vp90-2-14-resize-fp-tiles-8-4.webm
+5a8ff8a52cbbde7bfab569beb6d971c5f8b904f7  vp90-2-14-resize-fp-tiles-8-4.webm.md5
+17a5faa023d77ee9dad423a4e0d3145796bbc500  vp90-2-14-resize-fp-tiles-16-4.webm
+2ef8daa3c3e750fd745130d0a76a39fe86f0448f  vp90-2-14-resize-fp-tiles-16-4.webm.md5
+9361e031f5cc990d8740863e310abb5167ae351e  vp90-2-14-resize-fp-tiles-8-16.webm
+57f13a2197486584f4e1a4f82ad969f3abc5a1a2  vp90-2-14-resize-fp-tiles-8-16.webm.md5
+5803fc6fcbfb47b7661f3fcc6499158a32b56675  vp90-2-14-resize-fp-tiles-16-8.webm
+be0fe64a1a4933696ff92d93f9bdecdbd886dc13  vp90-2-14-resize-fp-tiles-16-8.webm.md5
+0ac0f6d20a0afed77f742a3b9acb59fd7b9cb093  vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm
+1765315acccfe6cd12230e731369fcb15325ebfa  vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
+4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8  vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+1ef480392112b3509cb190afbb96f9a38dd9fbac  vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
diff --git a/source/libvpx/test/test.mk b/source/libvpx/test/test.mk
index bf6d055..92664e2 100644
--- a/source/libvpx/test/test.mk
+++ b/source/libvpx/test/test.mk
@@ -18,6 +18,7 @@ LIBVPX_TEST_SRCS-yes += video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../md5_utils.h ../md5_utils.c
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
@@ -29,6 +30,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
 
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
@@ -120,6 +122,7 @@ endif # CONFIG_SHARED
 ## TEST DATA
 ##
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.stat
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 
@@ -691,8 +694,54 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
diff --git a/source/libvpx/test/test_vector_test.cc b/source/libvpx/test/test_vector_test.cc
index 53b7636..9ba18da 100644
--- a/source/libvpx/test/test_vector_test.cc
+++ b/source/libvpx/test/test_vector_test.cc
@@ -89,8 +89,12 @@ TEST_P(TestVectorTest, MD5Match) {
 }
 
 VP8_INSTANTIATE_TEST_CASE(TestVectorTest,
-                          ::testing::ValuesIn(libvpx_test::kVP8TestVectors));
+                          ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
+                                              libvpx_test::kVP8TestVectors +
+                                              libvpx_test::kNumVP8TestVectors));
 VP9_INSTANTIATE_TEST_CASE(TestVectorTest,
-                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors));
+                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                              libvpx_test::kVP9TestVectors +
+                                              libvpx_test::kNumVP9TestVectors));
 
 }  // namespace
diff --git a/source/libvpx/test/test_vectors.cc b/source/libvpx/test/test_vectors.cc
index aba8a3c..ff3c389 100644
--- a/source/libvpx/test/test_vectors.cc
+++ b/source/libvpx/test/test_vectors.cc
@@ -12,8 +12,10 @@
 
 namespace libvpx_test {
 
+#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+
 #if CONFIG_VP8_DECODER
-const char *kVP8TestVectors[kNumVp8TestVectors] = {
+const char *const kVP8TestVectors[] = {
   "vp80-00-comprehensive-001.ivf",
   "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf",
   "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf",
@@ -47,9 +49,10 @@ const char *kVP8TestVectors[kNumVp8TestVectors] = {
   "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf",
   "vp80-06-smallsize.ivf"
 };
+const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors);
 #endif  // CONFIG_VP8_DECODER
 #if CONFIG_VP9_DECODER
-const char *kVP9TestVectors[kNumVp9TestVectors] = {
+const char *const kVP9TestVectors[] = {
   "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm",
   "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm",
   "vp90-2-00-quantizer-04.webm", "vp90-2-00-quantizer-05.webm",
@@ -161,8 +164,22 @@ const char *kVP9TestVectors[kNumVp9TestVectors] = {
   "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
   "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
   "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
-  "vp91-2-04-yv444.webm"
+  "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm",
+  "vp90-2-14-resize-fp-tiles-1-16.webm",
+  "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
+  "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
+  "vp90-2-14-resize-fp-tiles-16-1.webm", "vp90-2-14-resize-fp-tiles-16-2.webm",
+  "vp90-2-14-resize-fp-tiles-16-4.webm",
+  "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm",
+  "vp90-2-14-resize-fp-tiles-16-8.webm", "vp90-2-14-resize-fp-tiles-1-8.webm",
+  "vp90-2-14-resize-fp-tiles-2-16.webm", "vp90-2-14-resize-fp-tiles-2-1.webm",
+  "vp90-2-14-resize-fp-tiles-2-4.webm", "vp90-2-14-resize-fp-tiles-2-8.webm",
+  "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm",
+  "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
+  "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
+  "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm"
 };
+const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
 
 }  // namespace libvpx_test
diff --git a/source/libvpx/test/test_vectors.h b/source/libvpx/test/test_vectors.h
index d5ecc96..8e1aabb 100644
--- a/source/libvpx/test/test_vectors.h
+++ b/source/libvpx/test/test_vectors.h
@@ -16,14 +16,13 @@
 namespace libvpx_test {
 
 #if CONFIG_VP8_DECODER
-const int kNumVp8TestVectors = 62;
-extern const char *kVP8TestVectors[kNumVp8TestVectors];
+extern const int kNumVP8TestVectors;
+extern const char *const kVP8TestVectors[];
 #endif
 
 #if CONFIG_VP9_DECODER
-const int kNumVp9TestVectors = 223;
-
-extern const char *kVP9TestVectors[kNumVp9TestVectors];
+extern const int kNumVP9TestVectors;
+extern const char *const kVP9TestVectors[];
 #endif  // CONFIG_VP9_DECODER
 
 }  // namespace libvpx_test
diff --git a/source/libvpx/test/tools_common.sh b/source/libvpx/test/tools_common.sh
new file mode 100755
index 0000000..cd79771
--- /dev/null
+++ b/source/libvpx/test/tools_common.sh
@@ -0,0 +1,437 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file contains shell code shared by test scripts for libvpx tools.
+set -e
+
+# Sets $VPX_TOOL_TEST to the name specified by positional parameter one.
+test_begin() {
+  VPX_TOOL_TEST="${1}"
+}
+
+# Clears the VPX_TOOL_TEST variable after confirming that $VPX_TOOL_TEST matches
+# positional parameter one.
+test_end() {
+  if [ "$1" != "${VPX_TOOL_TEST}" ]; then
+    echo "FAIL completed test mismatch!."
+    echo "  completed test: ${1}"
+    echo "  active test: ${VPX_TOOL_TEST}."
+    return 1
+  fi
+  VPX_TOOL_TEST='<unset>'
+}
+
+# Echoes the target configuration being tested.
+test_configuration_target() {
+  vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk"
+  # Find the TOOLCHAIN line, split it using ':=' as the field separator, and
+  # print the last field to get the value. Then pipe the value to tr to consume
+  # any leading/trailing spaces while allowing tr to echo the output to stdout.
+  awk -F ':=' '/TOOLCHAIN/ { print $NF }' "${vpx_config_mk}" | tr -d ' '
+}
+
+# Trap function used for failure reports and tool output directory removal.
+# When the contents of $VPX_TOOL_TEST do not match the string '<unset>', reports
+# failure of test stored in $VPX_TOOL_TEST.
+cleanup() {
+  if [ -n "${VPX_TOOL_TEST}" ] && [ "${VPX_TOOL_TEST}" != '<unset>' ]; then
+    echo "FAIL: $VPX_TOOL_TEST"
+  fi
+  if [ -n "${VPX_TEST_OUTPUT_DIR}" ] && [ -d "${VPX_TEST_OUTPUT_DIR}" ]; then
+    rm -rf "${VPX_TEST_OUTPUT_DIR}"
+  fi
+}
+
+# Echoes the git hash portion of the VERSION_STRING variable defined in
+# $LIBVPX_CONFIG_PATH/config.mk to stdout, or the version number string when
+# no git hash is contained in VERSION_STRING.
+config_hash() {
+  vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk"
+  # Find VERSION_STRING line, split it with "-g" and print the last field to
+  # output the git hash to stdout.
+  vpx_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${vpx_config_mk}")
+  # Handle two situations here:
+  # 1. The default case: $vpx_version is a git hash, so echo it unchanged.
+  # 2. When being run a non-dev tree, the -g portion is not present in the
+  #    version string: It's only the version number.
+  #    In this case $vpx_version is something like 'VERSION_STRING=v1.3.0', so
+  #    we echo only what is after the '='.
+  echo "${vpx_version##*=}"
+}
+
+# Echoes the short form of the current git hash.
+current_hash() {
+  if git --version > /dev/null 2>&1; then
+    (cd "$(dirname "${0}")"
+    git rev-parse --short HEAD)
+  else
+    # Return the config hash if git is unavailable: Fail silently, git hashes
+    # are used only for warnings.
+    config_hash
+  fi
+}
+
+# Echoes warnings to stdout when git hash in vpx_config.h does not match the
+# current git hash.
+check_git_hashes() {
+  hash_at_configure_time=$(config_hash)
+  hash_now=$(current_hash)
+
+  if [ "${hash_at_configure_time}" != "${hash_now}" ]; then
+    echo "Warning: git hash has changed since last configure."
+  fi
+}
+
+# This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and
+# LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that
+# the variables are set and that they all evaluate to directory paths.
+verify_vpx_test_environment() {
+  if [ ! -d "${LIBVPX_BIN_PATH}" ]; then
+    echo "The LIBVPX_BIN_PATH environment variable must be set."
+    return 1
+  fi
+  if [ ! -d "${LIBVPX_CONFIG_PATH}" ]; then
+    echo "The LIBVPX_CONFIG_PATH environment variable must be set."
+    return 1
+  fi
+  if [ ! -d "${LIBVPX_TEST_DATA_PATH}" ]; then
+    echo "The LIBVPX_TEST_DATA_PATH environment variable must be set."
+    return 1
+  fi
+}
+
+# Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which
+# should be a LIBVPX preprocessor flag. Echoes yes to stdout when the feature
+# is available.
+vpx_config_option_enabled() {
+  vpx_config_option="${1}"
+  vpx_config_file="${LIBVPX_CONFIG_PATH}/vpx_config.h"
+  config_line=$(grep "${vpx_config_option}" "${vpx_config_file}")
+  if echo "${config_line}" | egrep -q '1$'; then
+    echo yes
+  fi
+}
+
+# Echoes yes when output of test_configuration_target() contains win32 or win64.
+is_windows_target() {
+  if test_configuration_target \
+     | grep -q -e win32 -e win64 > /dev/null 2>&1; then
+    echo yes
+  fi
+}
+
+# Echoes yes to stdout when the file named by positional parameter one exists
+# in LIBVPX_BIN_PATH, and is executable.
+vpx_tool_available() {
+  tool_name="${1}"
+  if [ "$(is_windows_target)" = "yes" ]; then
+    tool_name="${tool_name}.exe"
+  fi
+  [ -x "${LIBVPX_BIN_PATH}/${1}" ] && echo yes
+}
+
+# Echoes yes to stdout when vpx_config_option_enabled() reports yes for
+# CONFIG_VP8_DECODER.
+vp8_decode_available() {
+  [ "$(vpx_config_option_enabled CONFIG_VP8_DECODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when vpx_config_option_enabled() reports yes for
+# CONFIG_VP8_ENCODER.
+vp8_encode_available() {
+  [ "$(vpx_config_option_enabled CONFIG_VP8_ENCODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when vpx_config_option_enabled() reports yes for
+# CONFIG_VP9_DECODER.
+vp9_decode_available() {
+  [ "$(vpx_config_option_enabled CONFIG_VP9_DECODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when vpx_config_option_enabled() reports yes for
+# CONFIG_VP9_ENCODER.
+vp9_encode_available() {
+  [ "$(vpx_config_option_enabled CONFIG_VP9_ENCODER)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when vpx_config_option_enabled() reports yes for
+# CONFIG_WEBM_IO.
+webm_io_available() {
+  [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes
+}
+
+# Echoes yes to stdout when vpxdec exists according to vpx_tool_available().
+vpxdec_available() {
+  [ -n $(vpx_tool_available vpxdec) ] && echo yes
+}
+
+# Wrapper function for running vpxdec in noblit mode. Requires that
+# LIBVPX_BIN_PATH points to the directory containing vpxdec. Positional
+# parameter one is used as the input file path. Positional parameter two, when
+# present, is interpreted as a boolean flag that means the input should be sent
+# to vpxdec via pipe from cat instead of directly.
+vpxdec() {
+  input="${1}"
+  pipe_input=${2}
+
+  if [ $# -gt 2 ]; then
+    # shift away $1 and $2 so the remaining arguments can be passed to vpxdec
+    # via $@.
+    shift 2
+  fi
+
+  decoder="${LIBVPX_BIN_PATH}/vpxdec"
+
+  if [ "$(is_windows_target)" = "yes" ]; then
+    decoder="${decoder}.exe"
+  fi
+
+  if [ -z "${pipe_input}" ]; then
+    "${decoder}" "$input" --summary --noblit "$@" > /dev/null 2>&1
+  else
+    cat "${input}" | "${decoder}" - --summary --noblit "$@" > /dev/null 2>&1
+  fi
+}
+
+# Echoes yes to stdout when vpxenc exists according to vpx_tool_available().
+vpxenc_available() {
+  [ -n $(vpx_tool_available vpxenc) ] && echo yes
+}
+
+# Wrapper function for running vpxenc. Positional parameters are interpreted as
+# follows:
+#   1 - codec name
+#   2 - input width
+#   3 - input height
+#   4 - number of frames to encode
+#   5 - path to input file
+#   6 - path to output file
+#       Note: The output file path must end in .ivf to output an IVF file.
+#   7 - extra flags
+#       Note: Extra flags currently supports a special case: when set to "-"
+#             input is piped to vpxenc via cat.
+vpxenc() {
+  encoder="${LIBVPX_BIN_PATH}/vpxenc"
+  codec="${1}"
+  width=${2}
+  height=${3}
+  frames=${4}
+  input=${5}
+  output="${VPX_TEST_OUTPUT_DIR}/${6}"
+  extra_flags=${7}
+
+  if [ "$(is_windows_target)" = "yes" ]; then
+    encoder="${encoder}.exe"
+  fi
+
+  # Because --ivf must be within the command line to get IVF from vpxenc.
+  if echo "${output}" | egrep -q 'ivf$'; then
+    use_ivf=--ivf
+  else
+    unset use_ivf
+  fi
+
+  if [ "${extra_flags}" = "-" ]; then
+    pipe_input=yes
+    extra_flags=${8}
+  else
+    unset pipe_input
+  fi
+
+  if [ -z "${pipe_input}" ]; then
+    "${encoder}" --codec=${codec} --width=${width} --height=${height} \
+        --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" \
+        "${input}" > /dev/null 2>&1
+  else
+    cat "${input}" \
+        | "${encoder}" --codec=${codec} --width=${width} --height=${height} \
+            --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" - \
+            > /dev/null 2>&1
+  fi
+
+  if [ ! -e "${output}" ]; then
+    # Return non-zero exit status: output file doesn't exist, so something
+    # definitely went wrong.
+    return 1
+  fi
+}
+
+# Filters strings from positional parameter one using the filter specified by
+# positional parameter two. Filter behavior depends on the presence of a third
+# positional parameter. When parameter three is present, strings that match the
+# filter are excluded. When omitted, strings matching the filter are included.
+# The filtered string is echoed to stdout.
+filter_strings() {
+  strings=${1}
+  filter=${2}
+  exclude=${3}
+
+  if [ -n "${exclude}" ]; then
+    # When positional parameter three exists the caller wants to remove strings.
+    # Tell grep to invert matches using the -v argument.
+    exclude='-v'
+  else
+    unset exclude
+  fi
+
+  if [ -n "${filter}" ]; then
+    for s in ${strings}; do
+      if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then
+        filtered_strings="${filtered_strings} ${s}"
+      fi
+    done
+  else
+    filtered_strings="${strings}"
+  fi
+  echo "${filtered_strings}"
+}
+
+# Runs user test functions passed via positional parameters one and two.
+# Functions in positional parameter one are treated as environment verification
+# functions and are run unconditionally. Functions in positional parameter two
+# are run according to the rules specified in vpx_test_usage().
+run_tests() {
+  env_tests="verify_vpx_test_environment ${1}"
+  tests_to_filter="${2}"
+
+  if [ "${VPX_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then
+    # Filter out DISABLED tests.
+    tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude)
+  fi
+
+  if [ -n "${VPX_TEST_FILTER}" ]; then
+    # Remove tests not matching the user's filter.
+    tests_to_filter=$(filter_strings "${tests_to_filter}" ${VPX_TEST_FILTER})
+  fi
+
+  tests_to_run="${env_tests} ${tests_to_filter}"
+
+  check_git_hashes
+
+  # Run tests.
+  for test in ${tests_to_run}; do
+    test_begin "${test}"
+    "${test}"
+    [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "  PASS ${test}"
+    test_end "${test}"
+  done
+
+  tested_config="$(test_configuration_target) @ $(current_hash)"
+  echo $(basename "${0%.*}"): Done, all tests pass for ${tested_config}.
+}
+
+vpx_test_usage() {
+cat << EOF
+  Usage: ${0##*/} [arguments]
+    --bin-path <path to libvpx binaries directory>
+    --config-path <path to libvpx config directory>
+    --filter <filter>: User test filter. Only tests matching filter are run.
+    --run-disabled-tests: Run disabled tests.
+    --help: Display this message and exit.
+    --test-data-path <path to libvpx test data directory>
+    --verbose: Verbose output.
+
+    When the --bin-path option is not specified the script attempts to use
+    \$LIBVPX_BIN_PATH and then the current directory.
+
+    When the --config-path option is not specified the script attempts to use
+    \$LIBVPX_CONFIG_PATH and then the current directory.
+
+    When the -test-data-path option is not specified the script attempts to use
+    \$LIBVPX_TEST_DATA_PATH and then the current directory.
+EOF
+}
+
+# Returns non-zero (failure) when required environment variables are empty
+# strings.
+vpx_test_check_environment() {
+  if [ -z "${LIBVPX_BIN_PATH}" ] || \
+     [ -z "${LIBVPX_CONFIG_PATH}" ] || \
+     [ -z "${LIBVPX_TEST_DATA_PATH}" ]; then
+    return 1
+  fi
+}
+
+# Parse the command line.
+while [ -n "$1" ]; do
+  case "$1" in
+    --bin-path)
+      LIBVPX_BIN_PATH="$2"
+      shift
+      ;;
+    --config-path)
+      LIBVPX_CONFIG_PATH="$2"
+      shift
+      ;;
+    --filter)
+      VPX_TEST_FILTER="$2"
+      shift
+      ;;
+    --run-disabled-tests)
+      VPX_TEST_RUN_DISABLED_TESTS=yes
+      ;;
+    --help)
+      vpx_test_usage
+      exit
+      ;;
+    --test-data-path)
+      LIBVPX_TEST_DATA_PATH="$2"
+      shift
+      ;;
+    --verbose)
+      VPX_TEST_VERBOSE_OUTPUT=yes
+      ;;
+    *)
+      vpx_test_usage
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+# Handle running the tests from a build directory without arguments when running
+# the tests on *nix/macosx.
+LIBVPX_BIN_PATH="${LIBVPX_BIN_PATH:-.}"
+LIBVPX_CONFIG_PATH="${LIBVPX_CONFIG_PATH:-.}"
+LIBVPX_TEST_DATA_PATH="${LIBVPX_TEST_DATA_PATH:-.}"
+
+# Create a temporary directory for output files, and a trap to clean it up.
+if [ -n "${TMPDIR}" ]; then
+  VPX_TEST_TEMP_ROOT="${TMPDIR}"
+elif [ -n "${TEMPDIR}" ]; then
+  VPX_TEST_TEMP_ROOT="${TEMPDIR}"
+else
+  VPX_TEST_TEMP_ROOT=/tmp
+fi
+
+VPX_TEST_RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
+VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_${VPX_TEST_RAND}"
+
+if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \
+   [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then
+  echo "${0##*/}: Cannot create output directory, giving up."
+  echo "${0##*/}:   VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}"
+  exit 1
+fi
+
+trap cleanup EXIT
+
+if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
+cat << EOF
+$(basename "${0%.*}") test configuration:
+  LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH}
+  LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH}
+  LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH}
+  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
+  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
+  VPX_TEST_FILTER=${VPX_TEST_FILTER}
+  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
+EOF
+fi
diff --git a/source/libvpx/test/vp8_boolcoder_test.cc b/source/libvpx/test/vp8_boolcoder_test.cc
index 7c6c601..9cd1987 100644
--- a/source/libvpx/test/vp8_boolcoder_test.cc
+++ b/source/libvpx/test/vp8_boolcoder_test.cc
@@ -35,14 +35,14 @@ const uint8_t secret_key[16] = {
   0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
 };
 
-void encrypt_buffer(uint8_t *buffer, int size) {
-  for (int i = 0; i < size; ++i) {
+void encrypt_buffer(uint8_t *buffer, size_t size) {
+  for (size_t i = 0; i < size; ++i) {
     buffer[i] ^= secret_key[i & 15];
   }
 }
 
 void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
-                           uint8_t *output, int count) {
+                     uint8_t *output, int count) {
   const size_t offset = input - reinterpret_cast<uint8_t*>(decrypt_state);
   for (int i = 0; i < count; i++) {
     output[i] = input[i] ^ secret_key[(offset + i) & 15];
diff --git a/source/libvpx/test/vp8_decrypt_test.cc b/source/libvpx/test/vp8_decrypt_test.cc
index b092509..1b5b083 100644
--- a/source/libvpx/test/vp8_decrypt_test.cc
+++ b/source/libvpx/test/vp8_decrypt_test.cc
@@ -26,9 +26,9 @@ const uint8_t test_key[16] = {
   0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
 };
 
-void encrypt_buffer(const uint8_t *src, uint8_t *dst,
-                    int size, int offset = 0) {
-  for (int i = 0; i < size; ++i) {
+void encrypt_buffer(const uint8_t *src, uint8_t *dst, size_t size,
+                    ptrdiff_t offset) {
+  for (size_t i = 0; i < size; ++i) {
     dst[i] = src[i] ^ test_key[(offset + i) & 15];
   }
 }
@@ -61,7 +61,7 @@ TEST(TestDecrypt, DecryptWorks) {
 
 #if CONFIG_DECRYPT
   std::vector<uint8_t> encrypted(video.frame_size());
-  encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size());
+  encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0);
   vp8_decrypt_init di = { test_decrypt_cb, &encrypted[0] };
   decoder.Control(VP8D_SET_DECRYPTOR, &di);
 #endif  // CONFIG_DECRYPT
diff --git a/source/libvpx/test/vp8_fdct4x4_test.cc b/source/libvpx/test/vp8_fdct4x4_test.cc
index e3c292e..bdbf74e 100644
--- a/source/libvpx/test/vp8_fdct4x4_test.cc
+++ b/source/libvpx/test/vp8_fdct4x4_test.cc
@@ -68,7 +68,7 @@ void reference_idct4x4(const int16_t *input, int16_t *output) {
 
 using libvpx_test::ACMRandom;
 
-TEST(Vp8FdctTest, SignBiasCheck) {
+TEST(VP8FdctTest, SignBiasCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   int16_t test_input_block[16];
   int16_t test_output_block[16];
@@ -127,7 +127,7 @@ TEST(Vp8FdctTest, SignBiasCheck) {
     << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
 };
 
-TEST(Vp8FdctTest, RoundTripErrorCheck) {
+TEST(VP8FdctTest, RoundTripErrorCheck) {
   ACMRandom rnd(ACMRandom::DeterministicSeed());
   int max_error = 0;
   double total_error = 0;
diff --git a/source/libvpx/test/vp9_lossless_test.cc b/source/libvpx/test/vp9_lossless_test.cc
index ad7ba44..7c3ba9f 100644
--- a/source/libvpx/test/vp9_lossless_test.cc
+++ b/source/libvpx/test/vp9_lossless_test.cc
@@ -19,16 +19,17 @@ namespace {
 
 const int kMaxPsnr = 100;
 
-class LossLessTest : public ::libvpx_test::EncoderTest,
+class LosslessTestLarge : public ::libvpx_test::EncoderTest,
     public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
  protected:
-  LossLessTest() : EncoderTest(GET_PARAM(0)),
-                   psnr_(kMaxPsnr),
-                   nframes_(0),
-                   encoding_mode_(GET_PARAM(1)) {
+  LosslessTestLarge()
+      : EncoderTest(GET_PARAM(0)),
+        psnr_(kMaxPsnr),
+        nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {
   }
 
-  virtual ~LossLessTest() {}
+  virtual ~LosslessTestLarge() {}
 
   virtual void SetUp() {
     InitializeConfig();
@@ -55,7 +56,7 @@ class LossLessTest : public ::libvpx_test::EncoderTest,
   libvpx_test::TestMode encoding_mode_;
 };
 
-TEST_P(LossLessTest, TestLossLessEncoding) {
+TEST_P(LosslessTestLarge, TestLossLessEncoding) {
   const vpx_rational timebase = { 33333333, 1000000000 };
   cfg_.g_timebase = timebase;
   cfg_.rc_target_bitrate = 2000;
@@ -73,7 +74,7 @@ TEST_P(LossLessTest, TestLossLessEncoding) {
   EXPECT_GE(psnr_lossless, kMaxPsnr);
 }
 
-TEST_P(LossLessTest, TestLossLessEncoding444) {
+TEST_P(LosslessTestLarge, TestLossLessEncoding444) {
   libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 10);
 
   cfg_.g_profile = 1;
@@ -90,5 +91,5 @@ TEST_P(LossLessTest, TestLossLessEncoding444) {
   EXPECT_GE(psnr_lossless, kMaxPsnr);
 }
 
-VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(LosslessTestLarge, ALL_TEST_MODES);
 }  // namespace
diff --git a/source/libvpx/test/vp9_thread_test.cc b/source/libvpx/test/vp9_thread_test.cc
index a78cdea..5523f20 100644
--- a/source/libvpx/test/vp9_thread_test.cc
+++ b/source/libvpx/test/vp9_thread_test.cc
@@ -153,6 +153,66 @@ TEST(VP9DecodeMTTest, MTDecode2) {
   }
 }
 
+// Test tile quantity changes within one file.
+TEST(VP9DecodeMTTest, MTDecode3) {
+  static const struct {
+    const char *name;
+    const char *expected_md5;
+  } files[] = {
+    { "vp90-2-14-resize-fp-tiles-1-16.webm",
+      "0cd5e632c326297e975f38949c31ea94" },
+    { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
+      "5c78a96a42e7f4a4f6b2edcdb791e44c" },
+    { "vp90-2-14-resize-fp-tiles-1-2.webm",
+      "e030450ae85c3277be2a418769df98e2" },
+    { "vp90-2-14-resize-fp-tiles-1-4.webm",
+      "312eed4e2b64eb7a4e7f18916606a430" },
+    { "vp90-2-14-resize-fp-tiles-16-1.webm",
+      "1755c16d8af16a9cb3fe7338d90abe52" },
+    { "vp90-2-14-resize-fp-tiles-16-2.webm",
+      "500300592d3fcb6f12fab25e48aaf4df" },
+    { "vp90-2-14-resize-fp-tiles-16-4.webm",
+      "47c48379fa6331215d91c67648e1af6e" },
+    { "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm",
+      "eecf17290739bc708506fa4827665989" },
+    { "vp90-2-14-resize-fp-tiles-16-8.webm",
+      "29b6bb54e4c26b5ca85d5de5fed94e76" },
+    { "vp90-2-14-resize-fp-tiles-1-8.webm",
+      "1b6f175e08cd82cf84bb800ac6d1caa3" },
+    { "vp90-2-14-resize-fp-tiles-2-16.webm",
+      "ca3b03e4197995d8d5444ede7a6c0804" },
+    { "vp90-2-14-resize-fp-tiles-2-1.webm",
+      "99aec065369d70bbb78ccdff65afed3f" },
+    { "vp90-2-14-resize-fp-tiles-2-4.webm",
+      "22d0ebdb49b87d2920a85aea32e1afd5" },
+    { "vp90-2-14-resize-fp-tiles-2-8.webm",
+      "c2115cf051c62e0f7db1d4a783831541" },
+    { "vp90-2-14-resize-fp-tiles-4-16.webm",
+      "c690d7e1719b31367564cac0af0939cb" },
+    { "vp90-2-14-resize-fp-tiles-4-1.webm",
+      "a926020b2cc3e15ad4cc271853a0ff26" },
+    { "vp90-2-14-resize-fp-tiles-4-2.webm",
+      "42699063d9e581f1993d0cf890c2be78" },
+    { "vp90-2-14-resize-fp-tiles-4-8.webm",
+      "7f76d96036382f45121e3d5aa6f8ec52" },
+    { "vp90-2-14-resize-fp-tiles-8-16.webm",
+      "76a43fcdd7e658542913ea43216ec55d" },
+    { "vp90-2-14-resize-fp-tiles-8-1.webm",
+      "8e3fbe89486ca60a59299dea9da91378" },
+    { "vp90-2-14-resize-fp-tiles-8-2.webm",
+      "ae96f21f21b6370cc0125621b441fc52" },
+    { "vp90-2-14-resize-fp-tiles-8-4.webm",
+      "3eb4f24f10640d42218f7fd7b9fd30d4" },
+  };
+
+  for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
+          << "threads = " << t;
+    }
+  }
+}
+
 INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool());
 
 }  // namespace
diff --git a/source/libvpx/test/vpxdec.sh b/source/libvpx/test/vpxdec.sh
new file mode 100755
index 0000000..d236f97
--- /dev/null
+++ b/source/libvpx/test/vpxdec.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests vpxdec. To add new tests to this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to vpxdec_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
+VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
+
+# Environment check: Make sure input is available.
+vpxdec_verify_environment() {
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+vpxdec_can_decode_vp8() {
+  if [ "$(vpxdec_available)" = "yes" ] && \
+     [ "$(vp8_decode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+vpxdec_can_decode_vp9() {
+  if [ "$(vpxdec_available)" = "yes" ] && \
+     [ "$(vp9_decode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+vpxdec_vp8_ivf() {
+  if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then
+    vpxdec "${VP8_IVF_FILE}"
+  fi
+}
+
+vpxdec_vp8_ivf_pipe_input() {
+  if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then
+    vpxdec "${VP8_IVF_FILE}" -
+  fi
+}
+
+vpxdec_vp9_webm() {
+  if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    vpxdec "${VP9_WEBM_FILE}"
+  fi
+}
+
+vpxdec_tests="vpxdec_vp8_ivf
+              vpxdec_vp8_ivf_pipe_input
+              vpxdec_vp9_webm"
+
+run_tests vpxdec_verify_environment "${vpxdec_tests}"
diff --git a/source/libvpx/test/vpxenc.sh b/source/libvpx/test/vpxenc.sh
new file mode 100755
index 0000000..89e4eb3
--- /dev/null
+++ b/source/libvpx/test/vpxenc.sh
@@ -0,0 +1,96 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests vpxenc using hantro_collage_w352h288.yuv as input. To add
+##  new tests to this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to vpxenc_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
+YUV_RAW_INPUT_WIDTH=352
+YUV_RAW_INPUT_HEIGHT=288
+TEST_FRAMES=10
+
+# Environment check: Make sure input is available.
+vpxenc_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+vpxenc_can_encode_vp8() {
+  if [ "$(vpxenc_available)" = "yes" ] && \
+     [ "$(vp8_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+vpxenc_can_encode_vp9() {
+  if [ "$(vpxenc_available)" = "yes" ] && \
+     [ "$(vp9_encode_available)" = "yes" ]; then
+    echo yes
+  fi
+}
+
+vpxenc_vp8_ivf() {
+  if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
+    vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
+        "${YUV_RAW_INPUT}" vp8.ivf
+  fi
+}
+
+vpxenc_vp8_ivf_pipe_input() {
+  if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then
+    vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
+        "${YUV_RAW_INPUT}" vp8.ivf -
+  fi
+}
+
+vpxenc_vp8_webm() {
+  if [ "$(vpxenc_can_encode_vp8)" = "yes" ] &&
+     [ "$(webm_io_available)" = "yes" ] ; then
+    vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
+        "${YUV_RAW_INPUT}" vp8.webm
+  fi
+}
+
+vpxenc_vp9_ivf() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
+    vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
+        "${YUV_RAW_INPUT}" vp9.ivf
+  fi
+}
+
+vpxenc_vp9_webm() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] &&
+     [ "$(webm_io_available)" = "yes" ] ; then
+    vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
+        "${YUV_RAW_INPUT}" vp9.webm
+  fi
+}
+
+DISABLED_vpxenc_vp9_ivf_lossless() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
+    vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \
+        "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless
+  fi
+}
+
+vpxenc_tests="vpxenc_vp8_ivf
+              vpxenc_vp8_webm
+              vpxenc_vp8_ivf_pipe_input
+              vpxenc_vp9_ivf
+              vpxenc_vp9_webm
+              DISABLED_vpxenc_vp9_ivf_lossless"
+
+run_tests vpxenc_verify_environment "${vpxenc_tests}"
diff --git a/source/libvpx/third_party/libwebm/AUTHORS.TXT b/source/libvpx/third_party/libwebm/AUTHORS.TXT
new file mode 100644
index 0000000..8ab6f79
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/AUTHORS.TXT
@@ -0,0 +1,4 @@
+# Names should be added to this file like so:
+# Name or Organization <email address>
+
+Google Inc.
diff --git a/source/libvpx/third_party/libwebm/LICENSE.TXT b/source/libvpx/third_party/libwebm/LICENSE.TXT
new file mode 100644
index 0000000..7a6f995
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/LICENSE.TXT
@@ -0,0 +1,30 @@
+Copyright (c) 2010, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Google nor the names of its contributors may
+    be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/source/libvpx/third_party/libwebm/PATENTS.TXT b/source/libvpx/third_party/libwebm/PATENTS.TXT
new file mode 100644
index 0000000..4414d83
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/PATENTS.TXT
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the WebM Project.
+
+Google hereby grants to you a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer, and otherwise run, modify and propagate the contents of this
+implementation of VP8, where such license applies only to those patent
+claims, both currently owned by Google and acquired in the future,
+licensable by Google that are necessarily infringed by this
+implementation of VP8. This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation. If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of VP8 or any code incorporated within this
+implementation of VP8 constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of VP8
+shall terminate as of the date such litigation is filed.
diff --git a/source/libvpx/third_party/libwebm/README.webm b/source/libvpx/third_party/libwebm/README.webm
new file mode 100644
index 0000000..2c7570d
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/README.webm
@@ -0,0 +1,7 @@
+URL: https://chromium.googlesource.com/webm/libwebm
+Version: a7118d8ec564e9db841da1eb01f547f3229f240a
+License: BSD
+License File: LICENSE.txt
+
+Description:
+libwebm is used to handle WebM container I/O.
diff --git a/source/libvpx/third_party/libwebm/RELEASE.TXT b/source/libvpx/third_party/libwebm/RELEASE.TXT
new file mode 100644
index 0000000..a7e9f03
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/RELEASE.TXT
@@ -0,0 +1,34 @@
+1.0.0.5
+ * Handled case when no duration
+ * Handled empty clusters
+ * Handled empty clusters when seeking
+ * Implemented check lacing bits
+
+1.0.0.4
+ * Made Cues member variables mutables
+ * Defined against badly-formatted cue points
+ * Segment::GetCluster returns CuePoint too
+ * Separated cue-based searches
+
+1.0.0.3
+ * Added Block::GetOffset() to get a frame's offset in a block
+ * Changed cluster count type from size_t to long
+ * Parsed SeekHead to find cues
+ * Allowed seeking beyond end of cluster cache
+ * Added not to attempt to reparse cues element
+ * Restructured Segment::LoadCluster
+ * Marked position of cues without parsing cues element
+ * Allowed cue points to be loaded incrementally
+ * Implemented to load lazily cue points as they're searched
+ * Merged Cues::LoadCuePoint into Cues::Find
+ * Lazy init cues
+ * Loaded cue point during find
+
+1.0.0.2
+ * added support for Cues element
+ * seeking was improved
+
+1.0.0.1
+ * fixed item 141
+ * added item 142
+ * added this file, RELEASE.TXT, to repository
diff --git a/source/libvpx/third_party/libwebm/mkvmuxer.cpp b/source/libvpx/third_party/libwebm/mkvmuxer.cpp
new file mode 100644
index 0000000..8ae0dda
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvmuxer.cpp
@@ -0,0 +1,3245 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "mkvmuxer.hpp"
+
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <new>
+
+#include "mkvmuxerutil.hpp"
+#include "mkvparser.hpp"
+#include "mkvwriter.hpp"
+#include "webmids.hpp"
+
+#ifdef _MSC_VER
+// Disable MSVC warnings that suggest making code non-portable.
+#pragma warning(disable:4996)
+#endif
+
+namespace mkvmuxer {
+
+namespace {
+// Deallocate the string designated by |dst|, and then copy the |src|
+// string to |dst|.  The caller owns both the |src| string and the
+// |dst| copy (hence the caller is responsible for eventually
+// deallocating the strings, either directly, or indirectly via
+// StrCpy).  Returns true if the source string was successfully copied
+// to the destination.
+bool StrCpy(const char* src, char** dst_ptr) {
+  if (dst_ptr == NULL)
+    return false;
+
+  char*& dst = *dst_ptr;
+
+  delete [] dst;
+  dst = NULL;
+
+  if (src == NULL)
+    return true;
+
+  const size_t size = strlen(src) + 1;
+
+  dst = new (std::nothrow) char[size];  // NOLINT
+  if (dst == NULL)
+    return false;
+
+  strcpy(dst, src);  // NOLINT
+  return true;
+}
+}  // namespace
+
+///////////////////////////////////////////////////////////////
+//
+// IMkvWriter Class
+
+IMkvWriter::IMkvWriter() {
+}
+
+IMkvWriter::~IMkvWriter() {
+}
+
+bool WriteEbmlHeader(IMkvWriter* writer) {
+  // Level 0
+  uint64 size = EbmlElementSize(kMkvEBMLVersion, 1ULL);
+  size += EbmlElementSize(kMkvEBMLReadVersion, 1ULL);
+  size += EbmlElementSize(kMkvEBMLMaxIDLength, 4ULL);
+  size += EbmlElementSize(kMkvEBMLMaxSizeLength, 8ULL);
+  size += EbmlElementSize(kMkvDocType, "webm");
+  size += EbmlElementSize(kMkvDocTypeVersion, 2ULL);
+  size += EbmlElementSize(kMkvDocTypeReadVersion, 2ULL);
+
+  if (!WriteEbmlMasterElement(writer, kMkvEBML, size))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvEBMLVersion, 1ULL))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvEBMLReadVersion, 1ULL))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvEBMLMaxIDLength, 4ULL))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvEBMLMaxSizeLength, 8ULL))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvDocType, "webm"))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvDocTypeVersion, 2ULL))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvDocTypeReadVersion, 2ULL))
+    return false;
+
+  return true;
+}
+
+bool ChunkedCopy(mkvparser::IMkvReader* source,
+                 mkvmuxer::IMkvWriter* dst,
+                 mkvmuxer::int64 start, int64 size) {
+  // TODO(vigneshv): Check if this is a reasonable value.
+  const uint32 kBufSize = 2048;
+  uint8* buf = new uint8[kBufSize];
+  int64 offset = start;
+  while (size > 0) {
+    const int64 read_len = (size > kBufSize) ? kBufSize : size;
+    if (source->Read(offset, static_cast<long>(read_len), buf))
+      return false;
+    dst->Write(buf, static_cast<uint32>(read_len));
+    offset += read_len;
+    size -= read_len;
+  }
+  delete[] buf;
+  return true;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Frame Class
+
+Frame::Frame()
+    : add_id_(0),
+      additional_(NULL),
+      additional_length_(0),
+      duration_(0),
+      frame_(NULL),
+      is_key_(false),
+      length_(0),
+      track_number_(0),
+      timestamp_(0),
+      discard_padding_(0) {
+}
+
+Frame::~Frame() {
+  delete [] frame_;
+  delete [] additional_;
+}
+
+bool Frame::Init(const uint8* frame, uint64 length) {
+  uint8* const data =
+      new (std::nothrow) uint8[static_cast<size_t>(length)];  // NOLINT
+  if (!data)
+    return false;
+
+  delete [] frame_;
+  frame_ = data;
+  length_ = length;
+
+  memcpy(frame_, frame, static_cast<size_t>(length_));
+  return true;
+}
+
+bool Frame::AddAdditionalData(const uint8* additional, uint64 length,
+                              uint64 add_id) {
+  uint8* const data =
+      new (std::nothrow) uint8[static_cast<size_t>(length)];  // NOLINT
+  if (!data)
+    return false;
+
+  delete [] additional_;
+  additional_ = data;
+  additional_length_ = length;
+  add_id_ = add_id;
+
+  memcpy(additional_, additional, static_cast<size_t>(additional_length_));
+  return true;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// CuePoint Class
+
+CuePoint::CuePoint()
+    : time_(0),
+      track_(0),
+      cluster_pos_(0),
+      block_number_(1),
+      output_block_number_(true) {
+}
+
+CuePoint::~CuePoint() {
+}
+
+bool CuePoint::Write(IMkvWriter* writer) const {
+  if (!writer || track_ < 1 || cluster_pos_ < 1)
+    return false;
+
+  uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_);
+  size += EbmlElementSize(kMkvCueTrack, track_);
+  if (output_block_number_ && block_number_ > 1)
+    size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
+  const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions,
+                                                      size) + size;
+  const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) +
+                              track_pos_size;
+
+  if (!WriteEbmlMasterElement(writer, kMkvCuePoint, payload_size))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlElement(writer, kMkvCueTime, time_))
+    return false;
+
+  if (!WriteEbmlMasterElement(writer, kMkvCueTrackPositions, size))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvCueTrack, track_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvCueClusterPosition, cluster_pos_))
+    return false;
+  if (output_block_number_ && block_number_ > 1)
+    if (!WriteEbmlElement(writer, kMkvCueBlockNumber, block_number_))
+      return false;
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0)
+    return false;
+
+  if (stop_position - payload_position != static_cast<int64>(payload_size))
+    return false;
+
+  return true;
+}
+
+uint64 CuePoint::PayloadSize() const {
+  uint64 size = EbmlElementSize(kMkvCueClusterPosition, cluster_pos_);
+  size += EbmlElementSize(kMkvCueTrack, track_);
+  if (output_block_number_ && block_number_ > 1)
+    size += EbmlElementSize(kMkvCueBlockNumber, block_number_);
+  const uint64 track_pos_size = EbmlMasterElementSize(kMkvCueTrackPositions,
+                                                      size) + size;
+  const uint64 payload_size = EbmlElementSize(kMkvCueTime, time_) +
+                              track_pos_size;
+
+  return payload_size;
+}
+
+uint64 CuePoint::Size() const {
+  const uint64 payload_size = PayloadSize();
+  return EbmlMasterElementSize(kMkvCuePoint, payload_size) + payload_size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Cues Class
+
+Cues::Cues()
+    : cue_entries_capacity_(0),
+      cue_entries_size_(0),
+      cue_entries_(NULL),
+      output_block_number_(true) {
+}
+
+Cues::~Cues() {
+  if (cue_entries_) {
+    for (int32 i = 0; i < cue_entries_size_; ++i) {
+      CuePoint* const cue = cue_entries_[i];
+      delete cue;
+    }
+    delete [] cue_entries_;
+  }
+}
+
+bool Cues::AddCue(CuePoint* cue) {
+  if (!cue)
+    return false;
+
+  if ((cue_entries_size_ + 1) > cue_entries_capacity_) {
+    // Add more CuePoints.
+    const int32 new_capacity =
+        (!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2;
+
+    if (new_capacity < 1)
+      return false;
+
+    CuePoint** const cues =
+        new (std::nothrow) CuePoint*[new_capacity];  // NOLINT
+    if (!cues)
+      return false;
+
+    for (int32 i = 0; i < cue_entries_size_; ++i) {
+      cues[i] = cue_entries_[i];
+    }
+
+    delete [] cue_entries_;
+
+    cue_entries_ = cues;
+    cue_entries_capacity_ = new_capacity;
+  }
+
+  cue->set_output_block_number(output_block_number_);
+  cue_entries_[cue_entries_size_++] = cue;
+  return true;
+}
+
+CuePoint* Cues::GetCueByIndex(int32 index) const {
+  if (cue_entries_ == NULL)
+    return NULL;
+
+  if (index >= cue_entries_size_)
+    return NULL;
+
+  return cue_entries_[index];
+}
+
+uint64 Cues::Size() {
+  uint64 size = 0;
+  for (int32 i = 0; i < cue_entries_size_; ++i)
+    size += GetCueByIndex(i)->Size();
+  size += EbmlMasterElementSize(kMkvCues, size);
+  return size;
+}
+
+bool Cues::Write(IMkvWriter* writer) const {
+  if (!writer)
+    return false;
+
+  uint64 size = 0;
+  for (int32 i = 0; i < cue_entries_size_; ++i) {
+    const CuePoint* const cue = GetCueByIndex(i);
+
+    if (!cue)
+      return false;
+
+    size += cue->Size();
+  }
+
+  if (!WriteEbmlMasterElement(writer, kMkvCues, size))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  for (int32 i = 0; i < cue_entries_size_; ++i) {
+    const CuePoint* const cue = GetCueByIndex(i);
+
+    if (!cue->Write(writer))
+      return false;
+  }
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0)
+    return false;
+
+  if (stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  return true;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// ContentEncAESSettings Class
+
+ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {}
+
+uint64 ContentEncAESSettings::Size() const {
+  const uint64 payload = PayloadSize();
+  const uint64 size =
+      EbmlMasterElementSize(kMkvContentEncAESSettings, payload) + payload;
+  return size;
+}
+
+bool ContentEncAESSettings::Write(IMkvWriter* writer) const {
+  const uint64 payload = PayloadSize();
+
+  if (!WriteEbmlMasterElement(writer, kMkvContentEncAESSettings, payload))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlElement(writer, kMkvAESSettingsCipherMode, cipher_mode_))
+    return false;
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(payload))
+    return false;
+
+  return true;
+}
+
+uint64 ContentEncAESSettings::PayloadSize() const {
+  uint64 size = EbmlElementSize(kMkvAESSettingsCipherMode, cipher_mode_);
+  return size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// ContentEncoding Class
+
+ContentEncoding::ContentEncoding()
+    : enc_algo_(5),
+      enc_key_id_(NULL),
+      encoding_order_(0),
+      encoding_scope_(1),
+      encoding_type_(1),
+      enc_key_id_length_(0) {
+}
+
+ContentEncoding::~ContentEncoding() {
+  delete [] enc_key_id_;
+}
+
+bool ContentEncoding::SetEncryptionID(const uint8* id, uint64 length) {
+  if (!id || length < 1)
+    return false;
+
+  delete [] enc_key_id_;
+
+  enc_key_id_ =
+      new (std::nothrow) uint8[static_cast<size_t>(length)];  // NOLINT
+  if (!enc_key_id_)
+    return false;
+
+  memcpy(enc_key_id_, id, static_cast<size_t>(length));
+  enc_key_id_length_ = length;
+
+  return true;
+}
+
+uint64 ContentEncoding::Size() const {
+  const uint64 encryption_size = EncryptionSize();
+  const uint64 encoding_size = EncodingSize(0, encryption_size);
+  const uint64 encodings_size = EbmlMasterElementSize(kMkvContentEncoding,
+                                                      encoding_size) +
+                                encoding_size;
+
+  return encodings_size;
+}
+
+bool ContentEncoding::Write(IMkvWriter* writer) const {
+  const uint64 encryption_size = EncryptionSize();
+  const uint64 encoding_size = EncodingSize(0, encryption_size);
+  const uint64 size = EbmlMasterElementSize(kMkvContentEncoding,
+                                            encoding_size) +
+                      encoding_size;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlMasterElement(writer, kMkvContentEncoding, encoding_size))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvContentEncodingOrder, encoding_order_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvContentEncodingScope, encoding_scope_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvContentEncodingType, encoding_type_))
+    return false;
+
+  if (!WriteEbmlMasterElement(writer, kMkvContentEncryption, encryption_size))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvContentEncAlgo, enc_algo_))
+    return false;
+  if (!WriteEbmlElement(writer,
+                        kMkvContentEncKeyID,
+                        enc_key_id_,
+                        enc_key_id_length_))
+    return false;
+
+  if (!enc_aes_settings_.Write(writer))
+    return false;
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  return true;
+}
+
+uint64 ContentEncoding::EncodingSize(uint64 compresion_size,
+                                     uint64 encryption_size) const {
+  // TODO(fgalligan): Add support for compression settings.
+  if (compresion_size != 0)
+    return 0;
+
+  uint64 encoding_size = 0;
+
+  if (encryption_size > 0) {
+    encoding_size += EbmlMasterElementSize(kMkvContentEncryption,
+                                           encryption_size) +
+                     encryption_size;
+  }
+  encoding_size += EbmlElementSize(kMkvContentEncodingType, encoding_type_);
+  encoding_size += EbmlElementSize(kMkvContentEncodingScope, encoding_scope_);
+  encoding_size += EbmlElementSize(kMkvContentEncodingOrder, encoding_order_);
+
+  return encoding_size;
+}
+
+uint64 ContentEncoding::EncryptionSize() const {
+  const uint64 aes_size = enc_aes_settings_.Size();
+
+  uint64 encryption_size = EbmlElementSize(kMkvContentEncKeyID,
+                                           enc_key_id_,
+                                           enc_key_id_length_);
+  encryption_size += EbmlElementSize(kMkvContentEncAlgo, enc_algo_);
+
+  return encryption_size + aes_size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Track Class
+
+Track::Track(unsigned int* seed)
+    : codec_id_(NULL),
+      codec_private_(NULL),
+      language_(NULL),
+      max_block_additional_id_(0),
+      name_(NULL),
+      number_(0),
+      type_(0),
+      uid_(MakeUID(seed)),
+      codec_delay_(0),
+      seek_pre_roll_(0),
+      codec_private_length_(0),
+      content_encoding_entries_(NULL),
+      content_encoding_entries_size_(0) {
+}
+
+Track::~Track() {
+  delete [] codec_id_;
+  delete [] codec_private_;
+  delete [] language_;
+  delete [] name_;
+
+  if (content_encoding_entries_) {
+    for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+      ContentEncoding* const encoding = content_encoding_entries_[i];
+      delete encoding;
+    }
+    delete [] content_encoding_entries_;
+  }
+}
+
+bool Track::AddContentEncoding() {
+  const uint32 count = content_encoding_entries_size_ + 1;
+
+  ContentEncoding** const content_encoding_entries =
+      new (std::nothrow) ContentEncoding*[count];  // NOLINT
+  if (!content_encoding_entries)
+    return false;
+
+  ContentEncoding* const content_encoding =
+      new (std::nothrow) ContentEncoding();  // NOLINT
+  if (!content_encoding) {
+    delete [] content_encoding_entries;
+    return false;
+  }
+
+  for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+    content_encoding_entries[i] = content_encoding_entries_[i];
+  }
+
+  delete [] content_encoding_entries_;
+
+  content_encoding_entries_ = content_encoding_entries;
+  content_encoding_entries_[content_encoding_entries_size_] = content_encoding;
+  content_encoding_entries_size_ = count;
+  return true;
+}
+
+ContentEncoding* Track::GetContentEncodingByIndex(uint32 index) const {
+  if (content_encoding_entries_ == NULL)
+    return NULL;
+
+  if (index >= content_encoding_entries_size_)
+    return NULL;
+
+  return content_encoding_entries_[index];
+}
+
+uint64 Track::PayloadSize() const {
+  uint64 size = EbmlElementSize(kMkvTrackNumber, number_);
+  size += EbmlElementSize(kMkvTrackUID, uid_);
+  size += EbmlElementSize(kMkvTrackType, type_);
+  if (codec_id_)
+    size += EbmlElementSize(kMkvCodecID, codec_id_);
+  if (codec_private_)
+    size += EbmlElementSize(kMkvCodecPrivate,
+                            codec_private_,
+                            codec_private_length_);
+  if (language_)
+    size += EbmlElementSize(kMkvLanguage, language_);
+  if (name_)
+    size += EbmlElementSize(kMkvName, name_);
+  if (max_block_additional_id_)
+    size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_);
+  if (codec_delay_)
+    size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
+  if (seek_pre_roll_)
+    size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
+
+  if (content_encoding_entries_size_ > 0) {
+    uint64 content_encodings_size = 0;
+    for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+      ContentEncoding* const encoding = content_encoding_entries_[i];
+      content_encodings_size += encoding->Size();
+    }
+
+    size += EbmlMasterElementSize(kMkvContentEncodings,
+                                  content_encodings_size) +
+            content_encodings_size;
+  }
+
+  return size;
+}
+
+uint64 Track::Size() const {
+  uint64 size = PayloadSize();
+  size += EbmlMasterElementSize(kMkvTrackEntry, size);
+  return size;
+}
+
+bool Track::Write(IMkvWriter* writer) const {
+  if (!writer)
+    return false;
+
+  // |size| may be bigger than what is written out in this function because
+  // derived classes may write out more data in the Track element.
+  const uint64 payload_size = PayloadSize();
+
+  if (!WriteEbmlMasterElement(writer, kMkvTrackEntry, payload_size))
+    return false;
+
+  uint64 size = EbmlElementSize(kMkvTrackNumber, number_);
+  size += EbmlElementSize(kMkvTrackUID, uid_);
+  size += EbmlElementSize(kMkvTrackType, type_);
+  if (codec_id_)
+    size += EbmlElementSize(kMkvCodecID, codec_id_);
+  if (codec_private_)
+    size += EbmlElementSize(kMkvCodecPrivate,
+                            codec_private_,
+                            codec_private_length_);
+  if (language_)
+    size += EbmlElementSize(kMkvLanguage, language_);
+  if (name_)
+    size += EbmlElementSize(kMkvName, name_);
+  if (max_block_additional_id_)
+    size += EbmlElementSize(kMkvMaxBlockAdditionID, max_block_additional_id_);
+  if (codec_delay_)
+    size += EbmlElementSize(kMkvCodecDelay, codec_delay_);
+  if (seek_pre_roll_)
+    size += EbmlElementSize(kMkvSeekPreRoll, seek_pre_roll_);
+
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlElement(writer, kMkvTrackNumber, number_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvTrackUID, uid_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvTrackType, type_))
+    return false;
+  if (max_block_additional_id_) {
+    if (!WriteEbmlElement(writer,
+                          kMkvMaxBlockAdditionID,
+                          max_block_additional_id_)) {
+      return false;
+    }
+  }
+  if (codec_delay_) {
+    if (!WriteEbmlElement(writer, kMkvCodecDelay, codec_delay_))
+      return false;
+  }
+  if (seek_pre_roll_) {
+    if (!WriteEbmlElement(writer, kMkvSeekPreRoll, seek_pre_roll_))
+      return false;
+  }
+  if (codec_id_) {
+    if (!WriteEbmlElement(writer, kMkvCodecID, codec_id_))
+      return false;
+  }
+  if (codec_private_) {
+    if (!WriteEbmlElement(writer,
+                          kMkvCodecPrivate,
+                          codec_private_,
+                          codec_private_length_))
+      return false;
+  }
+  if (language_) {
+    if (!WriteEbmlElement(writer, kMkvLanguage, language_))
+      return false;
+  }
+  if (name_) {
+    if (!WriteEbmlElement(writer, kMkvName, name_))
+      return false;
+  }
+
+  int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  if (content_encoding_entries_size_ > 0) {
+    uint64 content_encodings_size = 0;
+    for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+      ContentEncoding* const encoding = content_encoding_entries_[i];
+      content_encodings_size += encoding->Size();
+    }
+
+    if (!WriteEbmlMasterElement(writer,
+                                kMkvContentEncodings,
+                                content_encodings_size))
+      return false;
+
+    for (uint32 i = 0; i < content_encoding_entries_size_; ++i) {
+      ContentEncoding* const encoding = content_encoding_entries_[i];
+      if (!encoding->Write(writer))
+        return false;
+    }
+  }
+
+  stop_position = writer->Position();
+  if (stop_position < 0)
+    return false;
+  return true;
+}
+
+bool Track::SetCodecPrivate(const uint8* codec_private, uint64 length) {
+  if (!codec_private || length < 1)
+    return false;
+
+  delete [] codec_private_;
+
+  codec_private_ =
+      new (std::nothrow) uint8[static_cast<size_t>(length)];  // NOLINT
+  if (!codec_private_)
+    return false;
+
+  memcpy(codec_private_, codec_private, static_cast<size_t>(length));
+  codec_private_length_ = length;
+
+  return true;
+}
+
+void Track::set_codec_id(const char* codec_id) {
+  if (codec_id) {
+    delete [] codec_id_;
+
+    const size_t length = strlen(codec_id) + 1;
+    codec_id_ = new (std::nothrow) char[length];  // NOLINT
+    if (codec_id_) {
+#ifdef _MSC_VER
+      strcpy_s(codec_id_, length, codec_id);
+#else
+      strcpy(codec_id_, codec_id);
+#endif
+    }
+  }
+}
+
+// TODO(fgalligan): Vet the language parameter.
+void Track::set_language(const char* language) {
+  if (language) {
+    delete [] language_;
+
+    const size_t length = strlen(language) + 1;
+    language_ = new (std::nothrow) char[length];  // NOLINT
+    if (language_) {
+#ifdef _MSC_VER
+      strcpy_s(language_, length, language);
+#else
+      strcpy(language_, language);
+#endif
+    }
+  }
+}
+
+void Track::set_name(const char* name) {
+  if (name) {
+    delete [] name_;
+
+    const size_t length = strlen(name) + 1;
+    name_ = new (std::nothrow) char[length];  // NOLINT
+    if (name_) {
+#ifdef _MSC_VER
+      strcpy_s(name_, length, name);
+#else
+      strcpy(name_, name);
+#endif
+    }
+  }
+}
+
+///////////////////////////////////////////////////////////////
+//
+// VideoTrack Class
+
+VideoTrack::VideoTrack(unsigned int* seed)
+    : Track(seed),
+      display_height_(0),
+      display_width_(0),
+      frame_rate_(0.0),
+      height_(0),
+      stereo_mode_(0),
+      alpha_mode_(0),
+      width_(0) {
+}
+
+VideoTrack::~VideoTrack() {
+}
+
+bool VideoTrack::SetStereoMode(uint64 stereo_mode) {
+  if (stereo_mode != kMono &&
+      stereo_mode != kSideBySideLeftIsFirst &&
+      stereo_mode != kTopBottomRightIsFirst &&
+      stereo_mode != kTopBottomLeftIsFirst &&
+      stereo_mode != kSideBySideRightIsFirst)
+    return false;
+
+  stereo_mode_ = stereo_mode;
+  return true;
+}
+
+bool VideoTrack::SetAlphaMode(uint64 alpha_mode) {
+  if (alpha_mode != kNoAlpha &&
+      alpha_mode != kAlpha)
+    return false;
+
+  alpha_mode_ = alpha_mode;
+  return true;
+}
+
+uint64 VideoTrack::PayloadSize() const {
+  const uint64 parent_size = Track::PayloadSize();
+
+  uint64 size = VideoPayloadSize();
+  size += EbmlMasterElementSize(kMkvVideo, size);
+
+  return parent_size + size;
+}
+
+bool VideoTrack::Write(IMkvWriter* writer) const {
+  if (!Track::Write(writer))
+    return false;
+
+  const uint64 size = VideoPayloadSize();
+
+  if (!WriteEbmlMasterElement(writer, kMkvVideo, size))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlElement(writer, kMkvPixelWidth, width_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvPixelHeight, height_))
+    return false;
+  if (display_width_ > 0)
+    if (!WriteEbmlElement(writer, kMkvDisplayWidth, display_width_))
+      return false;
+  if (display_height_ > 0)
+    if (!WriteEbmlElement(writer, kMkvDisplayHeight, display_height_))
+      return false;
+  if (stereo_mode_ > kMono)
+    if (!WriteEbmlElement(writer, kMkvStereoMode, stereo_mode_))
+      return false;
+  if (alpha_mode_ > kNoAlpha)
+    if (!WriteEbmlElement(writer, kMkvAlphaMode, alpha_mode_))
+      return false;
+  if (frame_rate_ > 0.0)
+    if (!WriteEbmlElement(writer,
+                          kMkvFrameRate,
+                          static_cast<float>(frame_rate_)))
+      return false;
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  return true;
+}
+
+uint64 VideoTrack::VideoPayloadSize() const {
+  uint64 size = EbmlElementSize(kMkvPixelWidth, width_);
+  size += EbmlElementSize(kMkvPixelHeight, height_);
+  if (display_width_ > 0)
+    size += EbmlElementSize(kMkvDisplayWidth, display_width_);
+  if (display_height_ > 0)
+    size += EbmlElementSize(kMkvDisplayHeight, display_height_);
+  if (stereo_mode_ > kMono)
+    size += EbmlElementSize(kMkvStereoMode, stereo_mode_);
+  if (alpha_mode_ > kNoAlpha)
+    size += EbmlElementSize(kMkvAlphaMode, alpha_mode_);
+  if (frame_rate_ > 0.0)
+    size += EbmlElementSize(kMkvFrameRate, static_cast<float>(frame_rate_));
+
+  return size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// AudioTrack Class
+
+AudioTrack::AudioTrack(unsigned int* seed)
+    : Track(seed),
+      bit_depth_(0),
+      channels_(1),
+      sample_rate_(0.0) {
+}
+
+AudioTrack::~AudioTrack() {
+}
+
+uint64 AudioTrack::PayloadSize() const {
+  const uint64 parent_size = Track::PayloadSize();
+
+  uint64 size = EbmlElementSize(kMkvSamplingFrequency,
+                                static_cast<float>(sample_rate_));
+  size += EbmlElementSize(kMkvChannels, channels_);
+  if (bit_depth_ > 0)
+    size += EbmlElementSize(kMkvBitDepth, bit_depth_);
+  size += EbmlMasterElementSize(kMkvAudio, size);
+
+  return parent_size + size;
+}
+
+bool AudioTrack::Write(IMkvWriter* writer) const {
+  if (!Track::Write(writer))
+    return false;
+
+  // Calculate AudioSettings size.
+  uint64 size = EbmlElementSize(kMkvSamplingFrequency,
+                                static_cast<float>(sample_rate_));
+  size += EbmlElementSize(kMkvChannels, channels_);
+  if (bit_depth_ > 0)
+    size += EbmlElementSize(kMkvBitDepth, bit_depth_);
+
+  if (!WriteEbmlMasterElement(writer, kMkvAudio, size))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlElement(writer,
+                        kMkvSamplingFrequency,
+                        static_cast<float>(sample_rate_)))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvChannels, channels_))
+    return false;
+  if (bit_depth_ > 0)
+    if (!WriteEbmlElement(writer, kMkvBitDepth, bit_depth_))
+      return false;
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  return true;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Tracks Class
+
+const char Tracks::kOpusCodecId[] = "A_OPUS";
+const char Tracks::kVorbisCodecId[] = "A_VORBIS";
+const char Tracks::kVp8CodecId[] = "V_VP8";
+const char Tracks::kVp9CodecId[] = "V_VP9";
+
+
+Tracks::Tracks()
+    : track_entries_(NULL),
+      track_entries_size_(0) {
+}
+
+Tracks::~Tracks() {
+  if (track_entries_) {
+    for (uint32 i = 0; i < track_entries_size_; ++i) {
+      Track* const track = track_entries_[i];
+      delete track;
+    }
+    delete [] track_entries_;
+  }
+}
+
+bool Tracks::AddTrack(Track* track, int32 number) {
+  if (number < 0)
+    return false;
+
+  // This muxer only supports track numbers in the range [1, 126], in
+  // order to be able (to use Matroska integer representation) to
+  // serialize the block header (of which the track number is a part)
+  // for a frame using exactly 4 bytes.
+
+  if (number > 0x7E)
+    return false;
+
+  uint32 track_num = number;
+
+  if (track_num > 0) {
+    // Check to make sure a track does not already have |track_num|.
+    for (uint32 i = 0; i < track_entries_size_; ++i) {
+      if (track_entries_[i]->number() == track_num)
+        return false;
+    }
+  }
+
+  const uint32 count = track_entries_size_ + 1;
+
+  Track** const track_entries = new (std::nothrow) Track*[count];  // NOLINT
+  if (!track_entries)
+    return false;
+
+  for (uint32 i = 0; i < track_entries_size_; ++i) {
+    track_entries[i] = track_entries_[i];
+  }
+
+  delete [] track_entries_;
+
+  // Find the lowest availible track number > 0.
+  if (track_num == 0) {
+    track_num = count;
+
+    // Check to make sure a track does not already have |track_num|.
+    bool exit = false;
+    do {
+      exit = true;
+      for (uint32 i = 0; i < track_entries_size_; ++i) {
+        if (track_entries[i]->number() == track_num) {
+          track_num++;
+          exit = false;
+          break;
+        }
+      }
+    } while (!exit);
+  }
+  track->set_number(track_num);
+
+  track_entries_ = track_entries;
+  track_entries_[track_entries_size_] = track;
+  track_entries_size_ = count;
+  return true;
+}
+
+const Track* Tracks::GetTrackByIndex(uint32 index) const {
+  if (track_entries_ == NULL)
+    return NULL;
+
+  if (index >= track_entries_size_)
+    return NULL;
+
+  return track_entries_[index];
+}
+
+Track* Tracks::GetTrackByNumber(uint64 track_number) const {
+  const int32 count = track_entries_size();
+  for (int32 i = 0; i < count; ++i) {
+    if (track_entries_[i]->number() == track_number)
+      return track_entries_[i];
+  }
+
+  return NULL;
+}
+
+bool Tracks::TrackIsAudio(uint64 track_number) const {
+  const Track* const track = GetTrackByNumber(track_number);
+
+  if (track->type() == kAudio)
+    return true;
+
+  return false;
+}
+
+bool Tracks::TrackIsVideo(uint64 track_number) const {
+  const Track* const track = GetTrackByNumber(track_number);
+
+  if (track->type() == kVideo)
+    return true;
+
+  return false;
+}
+
+bool Tracks::Write(IMkvWriter* writer) const {
+  uint64 size = 0;
+  const int32 count = track_entries_size();
+  for (int32 i = 0; i < count; ++i) {
+    const Track* const track = GetTrackByIndex(i);
+
+    if (!track)
+      return false;
+
+    size += track->Size();
+  }
+
+  if (!WriteEbmlMasterElement(writer, kMkvTracks, size))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  for (int32 i = 0; i < count; ++i) {
+    const Track* const track = GetTrackByIndex(i);
+    if (!track->Write(writer))
+      return false;
+  }
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  return true;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Chapter Class
+
+bool Chapter::set_id(const char* id) {
+  return StrCpy(id, &id_);
+}
+
+void Chapter::set_time(const Segment& segment,
+                       uint64 start_ns,
+                       uint64 end_ns) {
+  const SegmentInfo* const info = segment.GetSegmentInfo();
+  const uint64 timecode_scale = info->timecode_scale();
+  start_timecode_ = start_ns / timecode_scale;
+  end_timecode_ = end_ns / timecode_scale;
+}
+
+bool Chapter::add_string(const char* title,
+                         const char* language,
+                         const char* country) {
+  if (!ExpandDisplaysArray())
+    return false;
+
+  Display& d = displays_[displays_count_++];
+  d.Init();
+
+  if (!d.set_title(title))
+    return false;
+
+  if (!d.set_language(language))
+    return false;
+
+  if (!d.set_country(country))
+    return false;
+
+  return true;
+}
+
+Chapter::Chapter() {
+  // This ctor only constructs the object.  Proper initialization is
+  // done in Init() (called in Chapters::AddChapter()).  The only
+  // reason we bother implementing this ctor is because we had to
+  // declare it as private (along with the dtor), in order to prevent
+  // clients from creating Chapter instances (a privelege we grant
+  // only to the Chapters class).  Doing no initialization here also
+  // means that creating arrays of chapter objects is more efficient,
+  // because we only initialize each new chapter object as it becomes
+  // active on the array.
+}
+
+Chapter::~Chapter() {
+}
+
+void Chapter::Init(unsigned int* seed) {
+  id_ = NULL;
+  displays_ = NULL;
+  displays_size_ = 0;
+  displays_count_ = 0;
+  uid_ = MakeUID(seed);
+}
+
+void Chapter::ShallowCopy(Chapter* dst) const {
+  dst->id_ = id_;
+  dst->start_timecode_ = start_timecode_;
+  dst->end_timecode_ = end_timecode_;
+  dst->uid_ = uid_;
+  dst->displays_ = displays_;
+  dst->displays_size_ = displays_size_;
+  dst->displays_count_ = displays_count_;
+}
+
+void Chapter::Clear() {
+  StrCpy(NULL, &id_);
+
+  while (displays_count_ > 0) {
+    Display& d = displays_[--displays_count_];
+    d.Clear();
+  }
+
+  delete [] displays_;
+  displays_ = NULL;
+
+  displays_size_ = 0;
+}
+
+bool Chapter::ExpandDisplaysArray() {
+  if (displays_size_ > displays_count_)
+    return true;  // nothing to do yet
+
+  const int size = (displays_size_ == 0) ? 1 : 2 * displays_size_;
+
+  Display* const displays = new (std::nothrow) Display[size];  // NOLINT
+  if (displays == NULL)
+    return false;
+
+  for (int idx = 0; idx < displays_count_; ++idx) {
+    displays[idx] = displays_[idx];  // shallow copy
+  }
+
+  delete [] displays_;
+
+  displays_ = displays;
+  displays_size_ = size;
+
+  return true;
+}
+
+uint64 Chapter::WriteAtom(IMkvWriter* writer) const {
+  uint64 payload_size =
+      EbmlElementSize(kMkvChapterStringUID, id_) +
+      EbmlElementSize(kMkvChapterUID, uid_) +
+      EbmlElementSize(kMkvChapterTimeStart, start_timecode_) +
+      EbmlElementSize(kMkvChapterTimeEnd, end_timecode_);
+
+  for (int idx = 0; idx < displays_count_; ++idx) {
+    const Display& d = displays_[idx];
+    payload_size += d.WriteDisplay(NULL);
+  }
+
+  const uint64 atom_size =
+      EbmlMasterElementSize(kMkvChapterAtom, payload_size) +
+      payload_size;
+
+  if (writer == NULL)
+    return atom_size;
+
+  const int64 start = writer->Position();
+
+  if (!WriteEbmlMasterElement(writer, kMkvChapterAtom, payload_size))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvChapterStringUID, id_))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvChapterUID, uid_))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvChapterTimeStart, start_timecode_))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvChapterTimeEnd, end_timecode_))
+    return 0;
+
+  for (int idx = 0; idx < displays_count_; ++idx) {
+    const Display& d = displays_[idx];
+
+    if (!d.WriteDisplay(writer))
+      return 0;
+  }
+
+  const int64 stop = writer->Position();
+
+  if (stop >= start && uint64(stop - start) != atom_size)
+    return 0;
+
+  return atom_size;
+}
+
+void Chapter::Display::Init() {
+  title_ = NULL;
+  language_ = NULL;
+  country_ = NULL;
+}
+
+void Chapter::Display::Clear() {
+  StrCpy(NULL, &title_);
+  StrCpy(NULL, &language_);
+  StrCpy(NULL, &country_);
+}
+
+bool Chapter::Display::set_title(const char* title) {
+  return StrCpy(title, &title_);
+}
+
+bool Chapter::Display::set_language(const char* language) {
+  return StrCpy(language, &language_);
+}
+
+bool Chapter::Display::set_country(const char* country) {
+  return StrCpy(country, &country_);
+}
+
+uint64 Chapter::Display::WriteDisplay(IMkvWriter* writer) const {
+  uint64 payload_size = EbmlElementSize(kMkvChapString, title_);
+
+  if (language_)
+    payload_size += EbmlElementSize(kMkvChapLanguage, language_);
+
+  if (country_)
+    payload_size += EbmlElementSize(kMkvChapCountry, country_);
+
+  const uint64 display_size =
+      EbmlMasterElementSize(kMkvChapterDisplay, payload_size) +
+      payload_size;
+
+  if (writer == NULL)
+    return display_size;
+
+  const int64 start = writer->Position();
+
+  if (!WriteEbmlMasterElement(writer, kMkvChapterDisplay, payload_size))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvChapString, title_))
+    return 0;
+
+  if (language_) {
+    if (!WriteEbmlElement(writer, kMkvChapLanguage, language_))
+      return 0;
+  }
+
+  if (country_) {
+    if (!WriteEbmlElement(writer, kMkvChapCountry, country_))
+      return 0;
+  }
+
+  const int64 stop = writer->Position();
+
+  if (stop >= start && uint64(stop - start) != display_size)
+    return 0;
+
+  return display_size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Chapters Class
+
+Chapters::Chapters()
+    : chapters_size_(0),
+      chapters_count_(0),
+      chapters_(NULL) {
+}
+
+Chapters::~Chapters() {
+  while (chapters_count_ > 0) {
+    Chapter& chapter = chapters_[--chapters_count_];
+    chapter.Clear();
+  }
+
+  delete [] chapters_;
+  chapters_ = NULL;
+}
+
+int Chapters::Count() const {
+  return chapters_count_;
+}
+
+Chapter* Chapters::AddChapter(unsigned int* seed) {
+  if (!ExpandChaptersArray())
+    return NULL;
+
+  Chapter& chapter = chapters_[chapters_count_++];
+  chapter.Init(seed);
+
+  return &chapter;
+}
+
+bool Chapters::Write(IMkvWriter* writer) const {
+  if (writer == NULL)
+    return false;
+
+  const uint64 payload_size = WriteEdition(NULL);  // return size only
+
+  if (!WriteEbmlMasterElement(writer, kMkvChapters, payload_size))
+    return false;
+
+  const int64 start = writer->Position();
+
+  if (WriteEdition(writer) == 0)  // error
+    return false;
+
+  const int64 stop = writer->Position();
+
+  if (stop >= start && uint64(stop - start) != payload_size)
+    return false;
+
+  return true;
+}
+
+bool Chapters::ExpandChaptersArray() {
+  if (chapters_size_ > chapters_count_)
+    return true;  // nothing to do yet
+
+  const int size = (chapters_size_ == 0) ? 1 : 2 * chapters_size_;
+
+  Chapter* const chapters = new (std::nothrow) Chapter[size];  // NOLINT
+  if (chapters == NULL)
+    return false;
+
+  for (int idx = 0; idx < chapters_count_; ++idx) {
+    const Chapter& src = chapters_[idx];
+    Chapter* const dst = chapters + idx;
+    src.ShallowCopy(dst);
+  }
+
+  delete [] chapters_;
+
+  chapters_ = chapters;
+  chapters_size_ = size;
+
+  return true;
+}
+
+uint64 Chapters::WriteEdition(IMkvWriter* writer) const {
+  uint64 payload_size = 0;
+
+  for (int idx = 0; idx < chapters_count_; ++idx) {
+    const Chapter& chapter = chapters_[idx];
+    payload_size += chapter.WriteAtom(NULL);
+  }
+
+  const uint64 edition_size =
+      EbmlMasterElementSize(kMkvEditionEntry, payload_size) +
+      payload_size;
+
+  if (writer == NULL)  // return size only
+    return edition_size;
+
+  const int64 start = writer->Position();
+
+  if (!WriteEbmlMasterElement(writer, kMkvEditionEntry, payload_size))
+    return 0;  // error
+
+  for (int idx = 0; idx < chapters_count_; ++idx) {
+    const Chapter& chapter = chapters_[idx];
+
+    const uint64 chapter_size = chapter.WriteAtom(writer);
+    if (chapter_size == 0)  // error
+      return 0;
+  }
+
+  const int64 stop = writer->Position();
+
+  if (stop >= start && uint64(stop - start) != edition_size)
+    return 0;
+
+  return edition_size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Cluster class
+
+Cluster::Cluster(uint64 timecode, int64 cues_pos)
+    : blocks_added_(0),
+      finalized_(false),
+      header_written_(false),
+      payload_size_(0),
+      position_for_cues_(cues_pos),
+      size_position_(-1),
+      timecode_(timecode),
+      writer_(NULL) {
+}
+
+Cluster::~Cluster() {
+}
+
+bool Cluster::Init(IMkvWriter* ptr_writer) {
+  if (!ptr_writer) {
+    return false;
+  }
+  writer_ = ptr_writer;
+  return true;
+}
+
+bool Cluster::AddFrame(const uint8* frame,
+                       uint64 length,
+                       uint64 track_number,
+                       uint64 abs_timecode,
+                       bool is_key) {
+  return DoWriteBlock(frame,
+                      length,
+                      track_number,
+                      abs_timecode,
+                      is_key ? 1 : 0,
+                      &WriteSimpleBlock);
+}
+
+bool Cluster::AddFrameWithAdditional(const uint8* frame,
+                                     uint64 length,
+                                     const uint8* additional,
+                                     uint64 additional_length,
+                                     uint64 add_id,
+                                     uint64 track_number,
+                                     uint64 abs_timecode,
+                                     bool is_key) {
+  return DoWriteBlockWithAdditional(frame,
+                                    length,
+                                    additional,
+                                    additional_length,
+                                    add_id,
+                                    track_number,
+                                    abs_timecode,
+                                    is_key ? 1 : 0,
+                                    &WriteBlockWithAdditional);
+}
+
+bool Cluster::AddFrameWithDiscardPadding(const uint8* frame,
+                                         uint64 length,
+                                         int64 discard_padding,
+                                         uint64 track_number,
+                                         uint64 abs_timecode,
+                                         bool is_key) {
+  return DoWriteBlockWithDiscardPadding(frame,
+                                        length,
+                                        discard_padding,
+                                        track_number,
+                                        abs_timecode,
+                                        is_key ? 1 : 0,
+                                        &WriteBlockWithDiscardPadding);
+}
+
+bool Cluster::AddMetadata(const uint8* frame,
+                          uint64 length,
+                          uint64 track_number,
+                          uint64 abs_timecode,
+                          uint64 duration_timecode) {
+  return DoWriteBlock(frame,
+                      length,
+                      track_number,
+                      abs_timecode,
+                      duration_timecode,
+                      &WriteMetadataBlock);
+}
+
+void Cluster::AddPayloadSize(uint64 size) {
+  payload_size_ += size;
+}
+
+bool Cluster::Finalize() {
+  if (!writer_ || finalized_ || size_position_ == -1)
+    return false;
+
+  if (writer_->Seekable()) {
+    const int64 pos = writer_->Position();
+
+    if (writer_->Position(size_position_))
+      return false;
+
+    if (WriteUIntSize(writer_, payload_size(), 8))
+      return false;
+
+    if (writer_->Position(pos))
+      return false;
+  }
+
+  finalized_ = true;
+
+  return true;
+}
+
+uint64 Cluster::Size() const {
+  const uint64 element_size =
+      EbmlMasterElementSize(kMkvCluster,
+                            0xFFFFFFFFFFFFFFFFULL) + payload_size_;
+  return element_size;
+}
+
+template <typename Type>
+bool Cluster::PreWriteBlock(Type* write_function) {
+  if (write_function == NULL)
+    return false;
+
+  if (finalized_)
+    return false;
+
+  if (!header_written_) {
+    if (!WriteClusterHeader())
+      return false;
+  }
+
+  return true;
+}
+
+void Cluster::PostWriteBlock(uint64 element_size) {
+  AddPayloadSize(element_size);
+  ++blocks_added_;
+}
+
+bool Cluster::IsValidTrackNumber(uint64 track_number) const {
+  return (track_number > 0 && track_number <= 0x7E);
+}
+
+int64 Cluster::GetRelativeTimecode(int64 abs_timecode) const {
+  const int64 cluster_timecode = this->Cluster::timecode();
+  const int64 rel_timecode =
+      static_cast<int64>(abs_timecode) - cluster_timecode;
+
+  if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode)
+    return -1;
+
+  return rel_timecode;
+}
+
+bool Cluster::DoWriteBlock(
+    const uint8* frame,
+    uint64 length,
+    uint64 track_number,
+    uint64 abs_timecode,
+    uint64 generic_arg,
+    WriteBlock write_block) {
+  if (frame == NULL || length == 0)
+    return false;
+
+  if (!IsValidTrackNumber(track_number))
+    return false;
+
+  const int64 rel_timecode = GetRelativeTimecode(abs_timecode);
+  if (rel_timecode < 0)
+    return false;
+
+  if (!PreWriteBlock(write_block))
+    return false;
+
+  const uint64 element_size = (*write_block)(writer_,
+                                             frame,
+                                             length,
+                                             track_number,
+                                             rel_timecode,
+                                             generic_arg);
+  if (element_size == 0)
+    return false;
+
+  PostWriteBlock(element_size);
+  return true;
+}
+
+bool Cluster::DoWriteBlockWithAdditional(
+    const uint8* frame,
+    uint64 length,
+    const uint8* additional,
+    uint64 additional_length,
+    uint64 add_id,
+    uint64 track_number,
+    uint64 abs_timecode,
+    uint64 generic_arg,
+    WriteBlockAdditional write_block) {
+  if (frame == NULL || length == 0 ||
+      additional == NULL || additional_length == 0)
+    return false;
+
+  if (!IsValidTrackNumber(track_number))
+    return false;
+
+  const int64 rel_timecode = GetRelativeTimecode(abs_timecode);
+  if (rel_timecode < 0)
+    return false;
+
+  if (!PreWriteBlock(write_block))
+    return false;
+
+  const uint64 element_size = (*write_block)(writer_,
+                                             frame,
+                                             length,
+                                             additional,
+                                             additional_length,
+                                             add_id,
+                                             track_number,
+                                             rel_timecode,
+                                             generic_arg);
+  if (element_size == 0)
+    return false;
+
+  PostWriteBlock(element_size);
+  return true;
+}
+
+bool Cluster::DoWriteBlockWithDiscardPadding(
+    const uint8* frame,
+    uint64 length,
+    int64 discard_padding,
+    uint64 track_number,
+    uint64 abs_timecode,
+    uint64 generic_arg,
+    WriteBlockDiscardPadding write_block) {
+  if (frame == NULL || length == 0 || discard_padding <= 0)
+    return false;
+
+  if (!IsValidTrackNumber(track_number))
+    return false;
+
+  const int64 rel_timecode = GetRelativeTimecode(abs_timecode);
+  if (rel_timecode < 0)
+    return false;
+
+  if (!PreWriteBlock(write_block))
+    return false;
+
+  const uint64 element_size = (*write_block)(writer_,
+                                             frame,
+                                             length,
+                                             discard_padding,
+                                             track_number,
+                                             rel_timecode,
+                                             generic_arg);
+  if (element_size == 0)
+    return false;
+
+  PostWriteBlock(element_size);
+  return true;
+}
+
+bool Cluster::WriteClusterHeader() {
+  if (finalized_)
+    return false;
+
+  if (WriteID(writer_, kMkvCluster))
+    return false;
+
+  // Save for later.
+  size_position_ = writer_->Position();
+
+  // Write "unknown" (EBML coded -1) as cluster size value. We need to write 8
+  // bytes because we do not know how big our cluster will be.
+  if (SerializeInt(writer_, kEbmlUnknownValue, 8))
+    return false;
+
+  if (!WriteEbmlElement(writer_, kMkvTimecode, timecode()))
+    return false;
+  AddPayloadSize(EbmlElementSize(kMkvTimecode, timecode()));
+  header_written_ = true;
+
+  return true;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// SeekHead Class
+
+SeekHead::SeekHead() : start_pos_(0ULL) {
+  for (int32 i = 0; i < kSeekEntryCount; ++i) {
+    seek_entry_id_[i] = 0;
+    seek_entry_pos_[i] = 0;
+  }
+}
+
+SeekHead::~SeekHead() {
+}
+
+bool SeekHead::Finalize(IMkvWriter* writer) const {
+  if (writer->Seekable()) {
+    if (start_pos_ == -1)
+      return false;
+
+    uint64 payload_size = 0;
+    uint64 entry_size[kSeekEntryCount];
+
+    for (int32 i = 0; i < kSeekEntryCount; ++i) {
+      if (seek_entry_id_[i] != 0) {
+        entry_size[i] = EbmlElementSize(
+            kMkvSeekID,
+            static_cast<uint64>(seek_entry_id_[i]));
+        entry_size[i] += EbmlElementSize(kMkvSeekPosition, seek_entry_pos_[i]);
+
+        payload_size += EbmlMasterElementSize(kMkvSeek, entry_size[i]) +
+                        entry_size[i];
+      }
+    }
+
+    // No SeekHead elements
+    if (payload_size == 0)
+      return true;
+
+    const int64 pos = writer->Position();
+    if (writer->Position(start_pos_))
+      return false;
+
+    if (!WriteEbmlMasterElement(writer, kMkvSeekHead, payload_size))
+      return false;
+
+    for (int32 i = 0; i < kSeekEntryCount; ++i) {
+      if (seek_entry_id_[i] != 0) {
+        if (!WriteEbmlMasterElement(writer, kMkvSeek, entry_size[i]))
+          return false;
+
+        if (!WriteEbmlElement(writer,
+                              kMkvSeekID,
+                              static_cast<uint64>(seek_entry_id_[i])))
+          return false;
+
+        if (!WriteEbmlElement(writer, kMkvSeekPosition, seek_entry_pos_[i]))
+          return false;
+      }
+    }
+
+    const uint64 total_entry_size = kSeekEntryCount * MaxEntrySize();
+    const uint64 total_size =
+        EbmlMasterElementSize(kMkvSeekHead,
+                              total_entry_size) + total_entry_size;
+    const int64 size_left = total_size - (writer->Position() - start_pos_);
+
+    const uint64 bytes_written = WriteVoidElement(writer, size_left);
+    if (!bytes_written)
+      return false;
+
+    if (writer->Position(pos))
+      return false;
+  }
+
+  return true;
+}
+
+bool SeekHead::Write(IMkvWriter* writer) {
+  const uint64 entry_size = kSeekEntryCount * MaxEntrySize();
+  const uint64 size = EbmlMasterElementSize(kMkvSeekHead, entry_size);
+
+  start_pos_ = writer->Position();
+
+  const uint64 bytes_written = WriteVoidElement(writer, size + entry_size);
+  if (!bytes_written)
+    return false;
+
+  return true;
+}
+
+bool SeekHead::AddSeekEntry(uint32 id, uint64 pos) {
+  for (int32 i = 0; i < kSeekEntryCount; ++i) {
+    if (seek_entry_id_[i] == 0) {
+      seek_entry_id_[i] = id;
+      seek_entry_pos_[i] = pos;
+      return true;
+    }
+  }
+  return false;
+}
+
+uint32 SeekHead::GetId(int index) const {
+  if (index < 0 || index >= kSeekEntryCount)
+    return UINT_MAX;
+  return seek_entry_id_[index];
+}
+
+uint64 SeekHead::GetPosition(int index) const {
+  if (index < 0 || index >= kSeekEntryCount)
+    return ULLONG_MAX;
+  return seek_entry_pos_[index];
+}
+
+bool SeekHead::SetSeekEntry(int index, uint32 id, uint64 position) {
+  if (index < 0 || index >= kSeekEntryCount)
+    return false;
+  seek_entry_id_[index] = id;
+  seek_entry_pos_[index] = position;
+  return true;
+}
+
+uint64 SeekHead::MaxEntrySize() const {
+  const uint64 max_entry_payload_size =
+      EbmlElementSize(kMkvSeekID, 0xffffffffULL) +
+      EbmlElementSize(kMkvSeekPosition, 0xffffffffffffffffULL);
+  const uint64 max_entry_size =
+      EbmlMasterElementSize(kMkvSeek, max_entry_payload_size) +
+      max_entry_payload_size;
+
+  return max_entry_size;
+}
+
+///////////////////////////////////////////////////////////////
+//
+// SegmentInfo Class
+
+SegmentInfo::SegmentInfo()
+    : duration_(-1.0),
+      muxing_app_(NULL),
+      timecode_scale_(1000000ULL),
+      writing_app_(NULL),
+      duration_pos_(-1) {
+}
+
+SegmentInfo::~SegmentInfo() {
+  delete [] muxing_app_;
+  delete [] writing_app_;
+}
+
+bool SegmentInfo::Init() {
+  int32 major;
+  int32 minor;
+  int32 build;
+  int32 revision;
+  GetVersion(&major, &minor, &build, &revision);
+  char temp[256];
+#ifdef _MSC_VER
+  sprintf_s(temp,
+            sizeof(temp)/sizeof(temp[0]),
+            "libwebm-%d.%d.%d.%d",
+            major,
+            minor,
+            build,
+            revision);
+#else
+  snprintf(temp,
+           sizeof(temp)/sizeof(temp[0]),
+           "libwebm-%d.%d.%d.%d",
+           major,
+           minor,
+           build,
+           revision);
+#endif
+
+  const size_t app_len = strlen(temp) + 1;
+
+  delete [] muxing_app_;
+
+  muxing_app_ = new (std::nothrow) char[app_len];  // NOLINT
+  if (!muxing_app_)
+    return false;
+
+#ifdef _MSC_VER
+  strcpy_s(muxing_app_, app_len, temp);
+#else
+  strcpy(muxing_app_, temp);
+#endif
+
+  set_writing_app(temp);
+  if (!writing_app_)
+    return false;
+  return true;
+}
+
+bool SegmentInfo::Finalize(IMkvWriter* writer) const {
+  if (!writer)
+    return false;
+
+  if (duration_ > 0.0) {
+    if (writer->Seekable()) {
+      if (duration_pos_ == -1)
+        return false;
+
+      const int64 pos = writer->Position();
+
+      if (writer->Position(duration_pos_))
+        return false;
+
+      if (!WriteEbmlElement(writer,
+                            kMkvDuration,
+                            static_cast<float>(duration_)))
+        return false;
+
+      if (writer->Position(pos))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+bool SegmentInfo::Write(IMkvWriter* writer) {
+  if (!writer || !muxing_app_ || !writing_app_)
+    return false;
+
+  uint64 size = EbmlElementSize(kMkvTimecodeScale, timecode_scale_);
+  if (duration_ > 0.0)
+    size += EbmlElementSize(kMkvDuration, static_cast<float>(duration_));
+  size += EbmlElementSize(kMkvMuxingApp, muxing_app_);
+  size += EbmlElementSize(kMkvWritingApp, writing_app_);
+
+  if (!WriteEbmlMasterElement(writer, kMkvInfo, size))
+    return false;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return false;
+
+  if (!WriteEbmlElement(writer, kMkvTimecodeScale, timecode_scale_))
+    return false;
+
+  if (duration_ > 0.0) {
+    // Save for later
+    duration_pos_ = writer->Position();
+
+    if (!WriteEbmlElement(writer, kMkvDuration, static_cast<float>(duration_)))
+      return false;
+  }
+
+  if (!WriteEbmlElement(writer, kMkvMuxingApp, muxing_app_))
+    return false;
+  if (!WriteEbmlElement(writer, kMkvWritingApp, writing_app_))
+    return false;
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(size))
+    return false;
+
+  return true;
+}
+
+void SegmentInfo::set_muxing_app(const char* app) {
+  if (app) {
+    const size_t length = strlen(app) + 1;
+    char* temp_str = new (std::nothrow) char[length];  // NOLINT
+    if (!temp_str)
+      return;
+
+#ifdef _MSC_VER
+    strcpy_s(temp_str, length, app);
+#else
+    strcpy(temp_str, app);
+#endif
+
+    delete [] muxing_app_;
+    muxing_app_ = temp_str;
+  }
+}
+
+void SegmentInfo::set_writing_app(const char* app) {
+  if (app) {
+    const size_t length = strlen(app) + 1;
+    char* temp_str = new (std::nothrow) char[length];  // NOLINT
+    if (!temp_str)
+      return;
+
+#ifdef _MSC_VER
+    strcpy_s(temp_str, length, app);
+#else
+    strcpy(temp_str, app);
+#endif
+
+    delete [] writing_app_;
+    writing_app_ = temp_str;
+  }
+}
+
+///////////////////////////////////////////////////////////////
+//
+// Segment Class
+
+Segment::Segment()
+    : chunk_count_(0),
+      chunk_name_(NULL),
+      chunk_writer_cluster_(NULL),
+      chunk_writer_cues_(NULL),
+      chunk_writer_header_(NULL),
+      chunking_(false),
+      chunking_base_name_(NULL),
+      cluster_list_(NULL),
+      cluster_list_capacity_(0),
+      cluster_list_size_(0),
+      cues_position_(kAfterClusters),
+      cues_track_(0),
+      force_new_cluster_(false),
+      frames_(NULL),
+      frames_capacity_(0),
+      frames_size_(0),
+      has_video_(false),
+      header_written_(false),
+      last_block_duration_(0),
+      last_timestamp_(0),
+      max_cluster_duration_(kDefaultMaxClusterDuration),
+      max_cluster_size_(0),
+      mode_(kFile),
+      new_cuepoint_(false),
+      output_cues_(true),
+      payload_pos_(0),
+      size_position_(0),
+      writer_cluster_(NULL),
+      writer_cues_(NULL),
+      writer_header_(NULL) {
+  const time_t curr_time = time(NULL);
+  seed_ = static_cast<unsigned int>(curr_time);
+#ifdef _WIN32
+  srand(seed_);
+#endif
+}
+
+Segment::~Segment() {
+  if (cluster_list_) {
+    for (int32 i = 0; i < cluster_list_size_; ++i) {
+      Cluster* const cluster = cluster_list_[i];
+      delete cluster;
+    }
+    delete [] cluster_list_;
+  }
+
+  if (frames_) {
+    for (int32 i = 0; i < frames_size_; ++i) {
+      Frame* const frame = frames_[i];
+      delete frame;
+    }
+    delete [] frames_;
+  }
+
+  delete [] chunk_name_;
+  delete [] chunking_base_name_;
+
+  if (chunk_writer_cluster_) {
+    chunk_writer_cluster_->Close();
+    delete chunk_writer_cluster_;
+  }
+  if (chunk_writer_cues_) {
+    chunk_writer_cues_->Close();
+    delete chunk_writer_cues_;
+  }
+  if (chunk_writer_header_) {
+    chunk_writer_header_->Close();
+    delete chunk_writer_header_;
+  }
+}
+
+void Segment::MoveCuesBeforeClustersHelper(uint64 diff,
+                                           int32 index,
+                                           uint64* cues_size) {
+  const uint64 old_cues_size = *cues_size;
+  CuePoint* const cue_point = cues_.GetCueByIndex(index);
+  if (cue_point == NULL)
+    return;
+  const uint64 old_cue_point_size = cue_point->Size();
+  const uint64 cluster_pos = cue_point->cluster_pos() + diff;
+  cue_point->set_cluster_pos(cluster_pos);  // update the new cluster position
+  // New size of the cue is computed as follows
+  //    Let a = current size of Cues Element
+  //    Let b = Difference in Cue Point's size after this pass
+  //    Let c = Difference in length of Cues Element's size
+  //            (This is computed as CodedSize(a + b) - CodedSize(a)
+  //    Let d = a + b + c. Now d is the new size of the Cues element which is
+  //                       passed on to the next recursive call.
+  const uint64 cue_point_size_diff = cue_point->Size() - old_cue_point_size;
+  const uint64 cue_size_diff = GetCodedUIntSize(*cues_size +
+                                                cue_point_size_diff) -
+                               GetCodedUIntSize(*cues_size);
+  *cues_size += cue_point_size_diff + cue_size_diff;
+  diff = *cues_size - old_cues_size;
+  if (diff > 0) {
+    for (int32 i = 0; i < cues_.cue_entries_size(); ++i) {
+      MoveCuesBeforeClustersHelper(diff, i, cues_size);
+    }
+  }
+}
+
+void Segment::MoveCuesBeforeClusters() {
+  const uint64 current_cue_size = cues_.Size();
+  uint64 cue_size = current_cue_size;
+  for (int32 i = 0; i < cues_.cue_entries_size(); i++)
+    MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size);
+
+  // Adjust the Seek Entry to reflect the change in position
+  // of Cluster and Cues
+  int32 cluster_index = 0;
+  int32 cues_index = 0;
+  for (int32 i = 0; i < SeekHead::kSeekEntryCount; ++i) {
+    if (seek_head_.GetId(i) == kMkvCluster)
+      cluster_index = i;
+    if (seek_head_.GetId(i) == kMkvCues)
+      cues_index = i;
+  }
+  seek_head_.SetSeekEntry(cues_index, kMkvCues,
+                          seek_head_.GetPosition(cluster_index));
+  seek_head_.SetSeekEntry(cluster_index, kMkvCluster,
+                          cues_.Size() + seek_head_.GetPosition(cues_index));
+}
+
+bool Segment::Init(IMkvWriter* ptr_writer) {
+  if (!ptr_writer) {
+    return false;
+  }
+  writer_cluster_ = ptr_writer;
+  writer_cues_ = ptr_writer;
+  writer_header_ = ptr_writer;
+  return segment_info_.Init();
+}
+
+bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader,
+                                            IMkvWriter* writer) {
+  if (!writer->Seekable() || chunking_)
+    return false;
+  const int64 cluster_offset = cluster_list_[0]->size_position() -
+                               GetUIntSize(kMkvCluster);
+
+  // Copy the headers.
+  if (!ChunkedCopy(reader, writer, 0, cluster_offset))
+    return false;
+
+  // Recompute cue positions and seek entries.
+  MoveCuesBeforeClusters();
+
+  // Write cues and seek entries.
+  // TODO(vigneshv): As of now, it's safe to call seek_head_.Finalize() for the
+  // second time with a different writer object. But the name Finalize() doesn't
+  // indicate something we want to call more than once. So consider renaming it
+  // to write() or some such.
+  if (!cues_.Write(writer) || !seek_head_.Finalize(writer))
+    return false;
+
+  // Copy the Clusters.
+  if (!ChunkedCopy(reader, writer, cluster_offset,
+                   cluster_end_offset_ - cluster_offset))
+    return false;
+
+  // Update the Segment size in case the Cues size has changed.
+  const int64 pos = writer->Position();
+  const int64 segment_size = writer->Position() - payload_pos_;
+  if (writer->Position(size_position_) ||
+      WriteUIntSize(writer, segment_size, 8) ||
+      writer->Position(pos))
+    return false;
+  return true;
+}
+
+bool Segment::Finalize() {
+  if (WriteFramesAll() < 0)
+    return false;
+
+  if (mode_ == kFile) {
+    if (cluster_list_size_ > 0) {
+      // Update last cluster's size
+      Cluster* const old_cluster = cluster_list_[cluster_list_size_-1];
+
+      if (!old_cluster || !old_cluster->Finalize())
+        return false;
+    }
+
+    if (chunking_ && chunk_writer_cluster_) {
+      chunk_writer_cluster_->Close();
+      chunk_count_++;
+    }
+
+    const double duration =
+        (static_cast<double>(last_timestamp_) + last_block_duration_) /
+        segment_info_.timecode_scale();
+    segment_info_.set_duration(duration);
+    if (!segment_info_.Finalize(writer_header_))
+      return false;
+
+    if (output_cues_)
+      if (!seek_head_.AddSeekEntry(kMkvCues, MaxOffset()))
+        return false;
+
+    if (chunking_) {
+      if (!chunk_writer_cues_)
+        return false;
+
+      char* name = NULL;
+      if (!UpdateChunkName("cues", &name))
+        return false;
+
+      const bool cues_open = chunk_writer_cues_->Open(name);
+      delete [] name;
+      if (!cues_open)
+        return false;
+    }
+
+    cluster_end_offset_ = writer_cluster_->Position();
+
+    // Write the seek headers and cues
+    if (output_cues_)
+      if (!cues_.Write(writer_cues_))
+        return false;
+
+    if (!seek_head_.Finalize(writer_header_))
+      return false;
+
+    if (writer_header_->Seekable()) {
+      if (size_position_ == -1)
+        return false;
+
+      const int64 pos = writer_header_->Position();
+      const int64 segment_size = MaxOffset();
+
+      if (segment_size < 1)
+        return false;
+
+      if (writer_header_->Position(size_position_))
+        return false;
+
+      if (WriteUIntSize(writer_header_, segment_size, 8))
+        return false;
+
+      if (writer_header_->Position(pos))
+        return false;
+    }
+
+    if (chunking_) {
+      // Do not close any writers until the segment size has been written,
+      // otherwise the size may be off.
+      if (!chunk_writer_cues_ || !chunk_writer_header_)
+        return false;
+
+      chunk_writer_cues_->Close();
+      chunk_writer_header_->Close();
+    }
+  }
+
+  return true;
+}
+
+Track* Segment::AddTrack(int32 number) {
+  Track* const track = new (std::nothrow) Track(&seed_);  // NOLINT
+
+  if (!track)
+    return NULL;
+
+  if (!tracks_.AddTrack(track, number)) {
+    delete track;
+    return NULL;
+  }
+
+  return track;
+}
+
+Chapter* Segment::AddChapter() {
+  return chapters_.AddChapter(&seed_);
+}
+
+uint64 Segment::AddVideoTrack(int32 width, int32 height, int32 number) {
+  VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_);  // NOLINT
+  if (!track)
+    return 0;
+
+  track->set_type(Tracks::kVideo);
+  track->set_codec_id(Tracks::kVp8CodecId);
+  track->set_width(width);
+  track->set_height(height);
+
+  tracks_.AddTrack(track, number);
+  has_video_ = true;
+
+  return track->number();
+}
+
+bool Segment::AddCuePoint(uint64 timestamp, uint64 track) {
+  if (cluster_list_size_  < 1)
+    return false;
+
+  const Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+  if (!cluster)
+    return false;
+
+  CuePoint* const cue = new (std::nothrow) CuePoint();  // NOLINT
+  if (!cue)
+    return false;
+
+  cue->set_time(timestamp / segment_info_.timecode_scale());
+  cue->set_block_number(cluster->blocks_added());
+  cue->set_cluster_pos(cluster->position_for_cues());
+  cue->set_track(track);
+  if (!cues_.AddCue(cue))
+    return false;
+
+  new_cuepoint_ = false;
+  return true;
+}
+
+uint64 Segment::AddAudioTrack(int32 sample_rate,
+                              int32 channels,
+                              int32 number) {
+  AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_);  // NOLINT
+  if (!track)
+    return 0;
+
+  track->set_type(Tracks::kAudio);
+  track->set_codec_id(Tracks::kVorbisCodecId);
+  track->set_sample_rate(sample_rate);
+  track->set_channels(channels);
+
+  tracks_.AddTrack(track, number);
+
+  return track->number();
+}
+
+bool Segment::AddFrame(const uint8* frame,
+                       uint64 length,
+                       uint64 track_number,
+                       uint64 timestamp,
+                       bool is_key) {
+  if (!frame)
+    return false;
+
+  if (!CheckHeaderInfo())
+    return false;
+
+  // Check for non-monotonically increasing timestamps.
+  if (timestamp < last_timestamp_)
+    return false;
+
+  // If the segment has a video track hold onto audio frames to make sure the
+  // audio that is associated with the start time of a video key-frame is
+  // muxed into the same cluster.
+  if (has_video_ && tracks_.TrackIsAudio(track_number) && !force_new_cluster_) {
+    Frame* const new_frame = new (std::nothrow) Frame();
+    if (new_frame == NULL || !new_frame->Init(frame, length))
+      return false;
+    new_frame->set_track_number(track_number);
+    new_frame->set_timestamp(timestamp);
+    new_frame->set_is_key(is_key);
+
+    if (!QueueFrame(new_frame))
+      return false;
+
+    return true;
+  }
+
+  if (!DoNewClusterProcessing(track_number, timestamp, is_key))
+    return false;
+
+  if (cluster_list_size_ < 1)
+    return false;
+
+  Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
+  if (!cluster)
+    return false;
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+  const uint64 abs_timecode = timestamp / timecode_scale;
+
+  if (!cluster->AddFrame(frame,
+                         length,
+                         track_number,
+                         abs_timecode,
+                         is_key))
+    return false;
+
+  if (new_cuepoint_ && cues_track_ == track_number) {
+    if (!AddCuePoint(timestamp, cues_track_))
+      return false;
+  }
+
+  if (timestamp > last_timestamp_)
+    last_timestamp_ = timestamp;
+
+  return true;
+}
+
+bool Segment::AddFrameWithAdditional(const uint8* frame,
+                                     uint64 length,
+                                     const uint8* additional,
+                                     uint64 additional_length,
+                                     uint64 add_id,
+                                     uint64 track_number,
+                                     uint64 timestamp,
+                                     bool is_key) {
+  if (frame == NULL || additional == NULL)
+    return false;
+
+  if (!CheckHeaderInfo())
+    return false;
+
+  // Check for non-monotonically increasing timestamps.
+  if (timestamp < last_timestamp_)
+    return false;
+
+  // If the segment has a video track hold onto audio frames to make sure the
+  // audio that is associated with the start time of a video key-frame is
+  // muxed into the same cluster.
+  if (has_video_ && tracks_.TrackIsAudio(track_number) && !force_new_cluster_) {
+    Frame* const new_frame = new (std::nothrow) Frame();
+    if (new_frame == NULL || !new_frame->Init(frame, length))
+      return false;
+    new_frame->set_track_number(track_number);
+    new_frame->set_timestamp(timestamp);
+    new_frame->set_is_key(is_key);
+
+    if (!QueueFrame(new_frame))
+      return false;
+
+    return true;
+  }
+
+  if (!DoNewClusterProcessing(track_number, timestamp, is_key))
+    return false;
+
+  if (cluster_list_size_ < 1)
+    return false;
+
+  Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
+  if (cluster == NULL)
+    return false;
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+  const uint64 abs_timecode = timestamp / timecode_scale;
+
+  if (!cluster->AddFrameWithAdditional(frame,
+                                       length,
+                                       additional,
+                                       additional_length,
+                                       add_id,
+                                       track_number,
+                                       abs_timecode,
+                                       is_key))
+    return false;
+
+  if (new_cuepoint_ && cues_track_ == track_number) {
+    if (!AddCuePoint(timestamp, cues_track_))
+      return false;
+  }
+
+  if (timestamp > last_timestamp_)
+    last_timestamp_ = timestamp;
+
+  return true;
+}
+
+bool Segment::AddFrameWithDiscardPadding(const uint8* frame,
+                                         uint64 length,
+                                         int64 discard_padding,
+                                         uint64 track_number,
+                                         uint64 timestamp,
+                                         bool is_key) {
+  if (frame == NULL || discard_padding <= 0)
+    return false;
+
+  if (!CheckHeaderInfo())
+    return false;
+
+  // Check for non-monotonically increasing timestamps.
+  if (timestamp < last_timestamp_)
+    return false;
+
+  // If the segment has a video track hold onto audio frames to make sure the
+  // audio that is associated with the start time of a video key-frame is
+  // muxed into the same cluster.
+  if (has_video_ && tracks_.TrackIsAudio(track_number) && !force_new_cluster_) {
+    Frame* const new_frame = new (std::nothrow) Frame();
+    if (new_frame == NULL || !new_frame->Init(frame, length))
+      return false;
+    new_frame->set_track_number(track_number);
+    new_frame->set_timestamp(timestamp);
+    new_frame->set_is_key(is_key);
+    new_frame->set_discard_padding(discard_padding);
+
+    if (!QueueFrame(new_frame))
+      return false;
+
+    return true;
+  }
+
+  if (!DoNewClusterProcessing(track_number, timestamp, is_key))
+    return false;
+
+  if (cluster_list_size_ < 1)
+    return false;
+
+  Cluster* const cluster = cluster_list_[cluster_list_size_ - 1];
+  if (!cluster)
+    return false;
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+  const uint64 abs_timecode = timestamp / timecode_scale;
+
+  if (!cluster->AddFrameWithDiscardPadding(frame, length,
+                                           discard_padding,
+                                           track_number,
+                                           abs_timecode,
+                                           is_key)) {
+    return false;
+  }
+
+  if (new_cuepoint_ && cues_track_ == track_number) {
+    if (!AddCuePoint(timestamp, cues_track_))
+      return false;
+  }
+
+  if (timestamp > last_timestamp_)
+    last_timestamp_ = timestamp;
+
+  return true;
+}
+
+bool Segment::AddMetadata(const uint8* frame,
+                          uint64 length,
+                          uint64 track_number,
+                          uint64 timestamp_ns,
+                          uint64 duration_ns) {
+  if (!frame)
+    return false;
+
+  if (!CheckHeaderInfo())
+    return false;
+
+  // Check for non-monotonically increasing timestamps.
+  if (timestamp_ns < last_timestamp_)
+    return false;
+
+  if (!DoNewClusterProcessing(track_number, timestamp_ns, true))
+    return false;
+
+  if (cluster_list_size_ < 1)
+    return false;
+
+  Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+
+  if (!cluster)
+    return false;
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+  const uint64 abs_timecode = timestamp_ns / timecode_scale;
+  const uint64 duration_timecode = duration_ns / timecode_scale;
+
+  if (!cluster->AddMetadata(frame,
+                            length,
+                            track_number,
+                            abs_timecode,
+                            duration_timecode))
+    return false;
+
+  if (timestamp_ns > last_timestamp_)
+    last_timestamp_ = timestamp_ns;
+
+  return true;
+}
+
+bool Segment::AddGenericFrame(const Frame* frame) {
+  last_block_duration_ = frame->duration();
+  if (!tracks_.TrackIsAudio(frame->track_number()) &&
+      !tracks_.TrackIsVideo(frame->track_number()) &&
+      frame->duration() > 0) {
+    return AddMetadata(frame->frame(),
+                       frame->length(),
+                       frame->track_number(),
+                       frame->timestamp(),
+                       frame->duration());
+  } else if (frame->additional() && frame->additional_length() > 0) {
+    return AddFrameWithAdditional(frame->frame(),
+                                  frame->length(),
+                                  frame->additional(),
+                                  frame->additional_length(),
+                                  frame->add_id(),
+                                  frame->track_number(),
+                                  frame->timestamp(),
+                                  frame->is_key());
+  } else if (frame->discard_padding() > 0) {
+    return AddFrameWithDiscardPadding(frame->frame(), frame->length(),
+                                      frame->discard_padding(),
+                                      frame->track_number(),
+                                      frame->timestamp(),
+                                      frame->is_key());
+  } else {
+    return AddFrame(frame->frame(),
+                    frame->length(),
+                    frame->track_number(),
+                    frame->timestamp(),
+                    frame->is_key());
+  }
+}
+
+void Segment::OutputCues(bool output_cues) {
+  output_cues_ = output_cues;
+}
+
+bool Segment::SetChunking(bool chunking, const char* filename) {
+  if (chunk_count_ > 0)
+    return false;
+
+  if (chunking) {
+    if (!filename)
+      return false;
+
+    // Check if we are being set to what is already set.
+    if (chunking_ && !strcmp(filename, chunking_base_name_))
+      return true;
+
+    const size_t name_length = strlen(filename) + 1;
+    char* const temp = new (std::nothrow) char[name_length];  // NOLINT
+    if (!temp)
+      return false;
+
+#ifdef _MSC_VER
+    strcpy_s(temp, name_length, filename);
+#else
+    strcpy(temp, filename);
+#endif
+
+    delete [] chunking_base_name_;
+    chunking_base_name_ = temp;
+
+    if (!UpdateChunkName("chk", &chunk_name_))
+      return false;
+
+    if (!chunk_writer_cluster_) {
+      chunk_writer_cluster_ = new (std::nothrow) MkvWriter();  // NOLINT
+      if (!chunk_writer_cluster_)
+        return false;
+    }
+
+    if (!chunk_writer_cues_) {
+      chunk_writer_cues_ = new (std::nothrow) MkvWriter();  // NOLINT
+      if (!chunk_writer_cues_)
+        return false;
+    }
+
+    if (!chunk_writer_header_) {
+      chunk_writer_header_ = new (std::nothrow) MkvWriter();  // NOLINT
+      if (!chunk_writer_header_)
+        return false;
+    }
+
+    if (!chunk_writer_cluster_->Open(chunk_name_))
+      return false;
+
+    const size_t header_length = strlen(filename) + strlen(".hdr") + 1;
+    char* const header = new (std::nothrow) char[header_length];  // NOLINT
+    if (!header)
+      return false;
+
+#ifdef _MSC_VER
+    strcpy_s(header, header_length - strlen(".hdr"), chunking_base_name_);
+    strcat_s(header, header_length, ".hdr");
+#else
+    strcpy(header, chunking_base_name_);
+    strcat(header, ".hdr");
+#endif
+    if (!chunk_writer_header_->Open(header)) {
+      delete [] header;
+      return false;
+    }
+
+    writer_cluster_ = chunk_writer_cluster_;
+    writer_cues_ = chunk_writer_cues_;
+    writer_header_ = chunk_writer_header_;
+
+    delete [] header;
+  }
+
+  chunking_ = chunking;
+
+  return true;
+}
+
+bool Segment::CuesTrack(uint64 track_number) {
+  const Track* const track = GetTrackByNumber(track_number);
+  if (!track)
+    return false;
+
+  cues_track_ = track_number;
+  return true;
+}
+
+void Segment::ForceNewClusterOnNextFrame() {
+  force_new_cluster_ = true;
+}
+
+Track* Segment::GetTrackByNumber(uint64 track_number) const {
+  return tracks_.GetTrackByNumber(track_number);
+}
+
+bool Segment::WriteSegmentHeader() {
+  // TODO(fgalligan): Support more than one segment.
+  if (!WriteEbmlHeader(writer_header_))
+    return false;
+
+  // Write "unknown" (-1) as segment size value. If mode is kFile, Segment
+  // will write over duration when the file is finalized.
+  if (WriteID(writer_header_, kMkvSegment))
+    return false;
+
+  // Save for later.
+  size_position_ = writer_header_->Position();
+
+  // Write "unknown" (EBML coded -1) as segment size value. We need to write 8
+  // bytes because if we are going to overwrite the segment size later we do
+  // not know how big our segment will be.
+  if (SerializeInt(writer_header_, kEbmlUnknownValue, 8))
+    return false;
+
+  payload_pos_ =  writer_header_->Position();
+
+  if (mode_ == kFile && writer_header_->Seekable()) {
+    // Set the duration > 0.0 so SegmentInfo will write out the duration. When
+    // the muxer is done writing we will set the correct duration and have
+    // SegmentInfo upadte it.
+    segment_info_.set_duration(1.0);
+
+    if (!seek_head_.Write(writer_header_))
+      return false;
+  }
+
+  if (!seek_head_.AddSeekEntry(kMkvInfo, MaxOffset()))
+    return false;
+  if (!segment_info_.Write(writer_header_))
+    return false;
+
+  if (!seek_head_.AddSeekEntry(kMkvTracks, MaxOffset()))
+    return false;
+  if (!tracks_.Write(writer_header_))
+    return false;
+
+  if (chapters_.Count() > 0) {
+    if (!seek_head_.AddSeekEntry(kMkvChapters, MaxOffset()))
+      return false;
+    if (!chapters_.Write(writer_header_))
+      return false;
+  }
+
+  if (chunking_ && (mode_ == kLive || !writer_header_->Seekable())) {
+    if (!chunk_writer_header_)
+      return false;
+
+    chunk_writer_header_->Close();
+  }
+
+  header_written_ = true;
+
+  return true;
+}
+
+// Here we are testing whether to create a new cluster, given a frame
+// having time frame_timestamp_ns.
+//
+int Segment::TestFrame(uint64 track_number,
+                       uint64 frame_timestamp_ns,
+                       bool is_key) const {
+  if (force_new_cluster_)
+    return 1;
+
+  // If no clusters have been created yet, then create a new cluster
+  // and write this frame immediately, in the new cluster.  This path
+  // should only be followed once, the first time we attempt to write
+  // a frame.
+
+  if (cluster_list_size_ <= 0)
+    return 1;
+
+  // There exists at least one cluster. We must compare the frame to
+  // the last cluster, in order to determine whether the frame is
+  // written to the existing cluster, or that a new cluster should be
+  // created.
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+  const uint64 frame_timecode = frame_timestamp_ns / timecode_scale;
+
+  const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1];
+  const uint64 last_cluster_timecode = last_cluster->timecode();
+
+  // For completeness we test for the case when the frame's timecode
+  // is less than the cluster's timecode.  Although in principle that
+  // is allowed, this muxer doesn't actually write clusters like that,
+  // so this indicates a bug somewhere in our algorithm.
+
+  if (frame_timecode < last_cluster_timecode)  // should never happen
+    return -1;  // error
+
+  // If the frame has a timestamp significantly larger than the last
+  // cluster (in Matroska, cluster-relative timestamps are serialized
+  // using a 16-bit signed integer), then we cannot write this frame
+  // to that cluster, and so we must create a new cluster.
+
+  const int64 delta_timecode = frame_timecode - last_cluster_timecode;
+
+  if (delta_timecode > kMaxBlockTimecode)
+    return 2;
+
+  // We decide to create a new cluster when we have a video keyframe.
+  // This will flush queued (audio) frames, and write the keyframe
+  // immediately, in the newly-created cluster.
+
+  if (is_key && tracks_.TrackIsVideo(track_number))
+    return 1;
+
+  // Create a new cluster if we have accumulated too many frames
+  // already, where "too many" is defined as "the total time of frames
+  // in the cluster exceeds a threshold".
+
+  const uint64 delta_ns = delta_timecode * timecode_scale;
+
+  if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_)
+    return 1;
+
+  // This is similar to the case above, with the difference that a new
+  // cluster is created when the size of the current cluster exceeds a
+  // threshold.
+
+  const uint64 cluster_size = last_cluster->payload_size();
+
+  if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_)
+    return 1;
+
+  // There's no need to create a new cluster, so emit this frame now.
+
+  return 0;
+}
+
+bool Segment::MakeNewCluster(uint64 frame_timestamp_ns) {
+  const int32 new_size = cluster_list_size_ + 1;
+
+  if (new_size > cluster_list_capacity_) {
+    // Add more clusters.
+    const int32 new_capacity =
+        (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2;
+    Cluster** const clusters =
+        new (std::nothrow) Cluster*[new_capacity];  // NOLINT
+    if (!clusters)
+      return false;
+
+    for (int32 i = 0; i < cluster_list_size_; ++i) {
+      clusters[i] = cluster_list_[i];
+    }
+
+    delete [] cluster_list_;
+
+    cluster_list_ = clusters;
+    cluster_list_capacity_ = new_capacity;
+  }
+
+  if (!WriteFramesLessThan(frame_timestamp_ns))
+    return false;
+
+  if (mode_ == kFile) {
+    if (cluster_list_size_ > 0) {
+      // Update old cluster's size
+      Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
+
+      if (!old_cluster || !old_cluster->Finalize())
+        return false;
+    }
+
+    if (output_cues_)
+      new_cuepoint_ = true;
+  }
+
+  if (chunking_ && cluster_list_size_ > 0) {
+    chunk_writer_cluster_->Close();
+    chunk_count_++;
+
+    if (!UpdateChunkName("chk", &chunk_name_))
+      return false;
+    if (!chunk_writer_cluster_->Open(chunk_name_))
+      return false;
+  }
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+  const uint64 frame_timecode = frame_timestamp_ns / timecode_scale;
+
+  uint64 cluster_timecode = frame_timecode;
+
+  if (frames_size_ > 0) {
+    const Frame* const f = frames_[0];  // earliest queued frame
+    const uint64 ns = f->timestamp();
+    const uint64 tc = ns / timecode_scale;
+
+    if (tc < cluster_timecode)
+      cluster_timecode = tc;
+  }
+
+  Cluster*& cluster = cluster_list_[cluster_list_size_];
+  const int64 offset = MaxOffset();
+  cluster = new (std::nothrow) Cluster(cluster_timecode, offset);  // NOLINT
+  if (!cluster)
+    return false;
+
+  if (!cluster->Init(writer_cluster_))
+    return false;
+
+  cluster_list_size_ = new_size;
+  return true;
+}
+
+bool Segment::DoNewClusterProcessing(uint64 track_number,
+                                     uint64 frame_timestamp_ns,
+                                     bool is_key) {
+  for (;;) {
+    // Based on the characteristics of the current frame and current
+    // cluster, decide whether to create a new cluster.
+    const int result = TestFrame(track_number, frame_timestamp_ns, is_key);
+    if (result < 0)  // error
+      return false;
+
+  // Always set force_new_cluster_ to false after TestFrame.
+  force_new_cluster_ = false;
+
+  // A non-zero result means create a new cluster.
+  if (result > 0 && !MakeNewCluster(frame_timestamp_ns))
+    return false;
+
+    // Write queued (audio) frames.
+    const int frame_count = WriteFramesAll();
+    if (frame_count < 0)  // error
+      return false;
+
+    // Write the current frame to the current cluster (if TestFrame
+    // returns 0) or to a newly created cluster (TestFrame returns 1).
+    if (result <= 1)
+      return true;
+
+    // TestFrame returned 2, which means there was a large time
+    // difference between the cluster and the frame itself.  Do the
+    // test again, comparing the frame to the new cluster.
+  }
+}
+
+bool Segment::CheckHeaderInfo() {
+  if (!header_written_) {
+    if (!WriteSegmentHeader())
+      return false;
+
+    if (!seek_head_.AddSeekEntry(kMkvCluster, MaxOffset()))
+      return false;
+
+    if (output_cues_ && cues_track_ == 0) {
+      // Check for a video track
+      for (uint32 i = 0; i < tracks_.track_entries_size(); ++i) {
+        const Track* const track = tracks_.GetTrackByIndex(i);
+        if (!track)
+          return false;
+
+        if (tracks_.TrackIsVideo(track->number())) {
+          cues_track_ = track->number();
+          break;
+        }
+      }
+
+      // Set first track found
+      if (cues_track_ == 0) {
+        const Track* const track = tracks_.GetTrackByIndex(0);
+        if (!track)
+          return false;
+
+        cues_track_ = track->number();
+      }
+    }
+  }
+  return true;
+}
+
+bool Segment::UpdateChunkName(const char* ext, char** name) const {
+  if (!name || !ext)
+    return false;
+
+  char ext_chk[64];
+#ifdef _MSC_VER
+  sprintf_s(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext);
+#else
+  snprintf(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext);
+#endif
+
+  const size_t length = strlen(chunking_base_name_) + strlen(ext_chk) + 1;
+  char* const str = new (std::nothrow) char[length];  // NOLINT
+  if (!str)
+    return false;
+
+#ifdef _MSC_VER
+  strcpy_s(str, length-strlen(ext_chk), chunking_base_name_);
+  strcat_s(str, length, ext_chk);
+#else
+  strcpy(str, chunking_base_name_);
+  strcat(str, ext_chk);
+#endif
+
+  delete [] *name;
+  *name = str;
+
+  return true;
+}
+
+int64 Segment::MaxOffset() {
+  if (!writer_header_)
+    return -1;
+
+  int64 offset = writer_header_->Position() - payload_pos_;
+
+  if (chunking_) {
+    for (int32 i = 0; i < cluster_list_size_; ++i) {
+      Cluster* const cluster = cluster_list_[i];
+      offset += cluster->Size();
+    }
+
+    if (writer_cues_)
+      offset += writer_cues_->Position();
+  }
+
+  return offset;
+}
+
+bool Segment::QueueFrame(Frame* frame) {
+  const int32 new_size = frames_size_ + 1;
+
+  if (new_size > frames_capacity_) {
+    // Add more frames.
+    const int32 new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2;
+
+    if (new_capacity < 1)
+      return false;
+
+    Frame** const frames = new (std::nothrow) Frame*[new_capacity];  // NOLINT
+    if (!frames)
+      return false;
+
+    for (int32 i = 0; i < frames_size_; ++i) {
+      frames[i] = frames_[i];
+    }
+
+    delete [] frames_;
+    frames_ = frames;
+    frames_capacity_ = new_capacity;
+  }
+
+  frames_[frames_size_++] = frame;
+
+  return true;
+}
+
+int Segment::WriteFramesAll() {
+  if (frames_ == NULL)
+    return 0;
+
+  if (cluster_list_size_ < 1)
+    return -1;
+
+  Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+
+  if (!cluster)
+    return -1;
+
+  const uint64 timecode_scale = segment_info_.timecode_scale();
+
+  for (int32 i = 0; i < frames_size_; ++i) {
+    Frame*& frame = frames_[i];
+    const uint64 frame_timestamp = frame->timestamp();  // ns
+    const uint64 frame_timecode = frame_timestamp / timecode_scale;
+
+    if (frame->discard_padding() > 0) {
+      if (!cluster->AddFrameWithDiscardPadding(frame->frame(),
+                                               frame->length(),
+                                               frame->discard_padding(),
+                                               frame->track_number(),
+                                               frame_timecode,
+                                               frame->is_key())) {
+        return -1;
+      }
+    } else {
+      if (!cluster->AddFrame(frame->frame(),
+                             frame->length(),
+                             frame->track_number(),
+                             frame_timecode,
+                             frame->is_key())) {
+        return -1;
+      }
+    }
+
+    if (new_cuepoint_ && cues_track_ == frame->track_number()) {
+      if (!AddCuePoint(frame_timestamp, cues_track_))
+        return -1;
+    }
+
+    if (frame_timestamp > last_timestamp_)
+      last_timestamp_ = frame_timestamp;
+
+    delete frame;
+    frame = NULL;
+  }
+
+  const int result = frames_size_;
+  frames_size_ = 0;
+
+  return result;
+}
+
+bool Segment::WriteFramesLessThan(uint64 timestamp) {
+  // Check |cluster_list_size_| to see if this is the first cluster. If it is
+  // the first cluster the audio frames that are less than the first video
+  // timesatmp will be written in a later step.
+  if (frames_size_ > 0 && cluster_list_size_ > 0) {
+    if (!frames_)
+      return false;
+
+    Cluster* const cluster = cluster_list_[cluster_list_size_-1];
+    if (!cluster)
+      return false;
+
+    const uint64 timecode_scale = segment_info_.timecode_scale();
+    int32 shift_left = 0;
+
+    // TODO(fgalligan): Change this to use the durations of frames instead of
+    // the next frame's start time if the duration is accurate.
+    for (int32 i = 1; i < frames_size_; ++i) {
+      const Frame* const frame_curr = frames_[i];
+
+      if (frame_curr->timestamp() > timestamp)
+        break;
+
+      const Frame* const frame_prev = frames_[i-1];
+      const uint64 frame_timestamp = frame_prev->timestamp();
+      const uint64 frame_timecode = frame_timestamp / timecode_scale;
+      const int64 discard_padding = frame_prev->discard_padding();
+
+      if (discard_padding > 0) {
+        if (!cluster->AddFrameWithDiscardPadding(frame_prev->frame(),
+                                                 frame_prev->length(),
+                                                 discard_padding,
+                                                 frame_prev->track_number(),
+                                                 frame_timecode,
+                                                 frame_prev->is_key())) {
+          return false;
+        }
+      } else {
+        if (!cluster->AddFrame(frame_prev->frame(),
+                               frame_prev->length(),
+                               frame_prev->track_number(),
+                               frame_timecode,
+                               frame_prev->is_key())) {
+          return false;
+        }
+      }
+
+      if (new_cuepoint_ && cues_track_ == frame_prev->track_number()) {
+        if (!AddCuePoint(frame_timestamp, cues_track_))
+          return false;
+      }
+
+      ++shift_left;
+      if (frame_timestamp > last_timestamp_)
+        last_timestamp_ = frame_timestamp;
+
+      delete frame_prev;
+    }
+
+    if (shift_left > 0) {
+      if (shift_left >= frames_size_)
+        return false;
+
+      const int32 new_frames_size = frames_size_ - shift_left;
+      for (int32 i = 0; i < new_frames_size; ++i) {
+        frames_[i] = frames_[i+shift_left];
+      }
+
+      frames_size_ = new_frames_size;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace mkvmuxer
diff --git a/source/libvpx/third_party/libwebm/mkvmuxer.hpp b/source/libvpx/third_party/libwebm/mkvmuxer.hpp
new file mode 100644
index 0000000..63a315e
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvmuxer.hpp
@@ -0,0 +1,1403 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXER_HPP
+#define MKVMUXER_HPP
+
+#include "mkvmuxertypes.hpp"
+
+// For a description of the WebM elements see
+// http://www.webmproject.org/code/specs/container/.
+
+namespace mkvparser {
+  class IMkvReader;
+}  // end namespace
+
+namespace mkvmuxer {
+
+class MkvWriter;
+class Segment;
+
+///////////////////////////////////////////////////////////////
+// Interface used by the mkvmuxer to write out the Mkv data.
+class IMkvWriter {
+ public:
+  // Writes out |len| bytes of |buf|. Returns 0 on success.
+  virtual int32 Write(const void* buf, uint32 len) = 0;
+
+  // Returns the offset of the output position from the beginning of the
+  // output.
+  virtual int64 Position() const = 0;
+
+  // Set the current File position. Returns 0 on success.
+  virtual int32 Position(int64 position) = 0;
+
+  // Returns true if the writer is seekable.
+  virtual bool Seekable() const = 0;
+
+  // Element start notification. Called whenever an element identifier is about
+  // to be written to the stream. |element_id| is the element identifier, and
+  // |position| is the location in the WebM stream where the first octet of the
+  // element identifier will be written.
+  // Note: the |MkvId| enumeration in webmids.hpp defines element values.
+  virtual void ElementStartNotify(uint64 element_id, int64 position) = 0;
+
+ protected:
+  IMkvWriter();
+  virtual ~IMkvWriter();
+
+ private:
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter);
+};
+
+// Writes out the EBML header for a WebM file. This function must be called
+// before any other libwebm writing functions are called.
+bool WriteEbmlHeader(IMkvWriter* writer);
+
+// Copies in Chunk from source to destination between the given byte positions
+bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst,
+                 int64 start, int64 size);
+
+///////////////////////////////////////////////////////////////
+// Class to hold data the will be written to a block.
+class Frame {
+ public:
+  Frame();
+  ~Frame();
+
+  // Copies |frame| data into |frame_|. Returns true on success.
+  bool Init(const uint8* frame, uint64 length);
+
+  // Copies |additional| data into |additional_|. Returns true on success.
+  bool AddAdditionalData(const uint8* additional, uint64 length,
+                         uint64 add_id);
+
+  uint64 add_id() const { return add_id_; }
+  const uint8* additional() const { return additional_; }
+  uint64 additional_length() const { return additional_length_; }
+  void set_duration(uint64 duration) { duration_ = duration; }
+  uint64 duration() const { return duration_; }
+  const uint8* frame() const { return frame_; }
+  void set_is_key(bool key) { is_key_ = key; }
+  bool is_key() const { return is_key_; }
+  uint64 length() const { return length_; }
+  void set_track_number(uint64 track_number) { track_number_ = track_number; }
+  uint64 track_number() const { return track_number_; }
+  void set_timestamp(uint64 timestamp) { timestamp_ = timestamp; }
+  uint64 timestamp() const { return timestamp_; }
+  void set_discard_padding(uint64 discard_padding) {
+    discard_padding_ = discard_padding;
+  }
+  uint64 discard_padding() const { return discard_padding_; }
+
+ private:
+  // Id of the Additional data.
+  uint64 add_id_;
+
+  // Pointer to additional data. Owned by this class.
+  uint8* additional_;
+
+  // Length of the additional data.
+  uint64 additional_length_;
+
+  // Duration of the frame in nanoseconds.
+  uint64 duration_;
+
+  // Pointer to the data. Owned by this class.
+  uint8* frame_;
+
+  // Flag telling if the data should set the key flag of a block.
+  bool is_key_;
+
+  // Length of the data.
+  uint64 length_;
+
+  // Mkv track number the data is associated with.
+  uint64 track_number_;
+
+  // Timestamp of the data in nanoseconds.
+  uint64 timestamp_;
+
+  // Discard padding for the frame.
+  int64 discard_padding_;
+};
+
+///////////////////////////////////////////////////////////////
+// Class to hold one cue point in a Cues element.
+class CuePoint {
+ public:
+  CuePoint();
+  ~CuePoint();
+
+  // Returns the size in bytes for the entire CuePoint element.
+  uint64 Size() const;
+
+  // Output the CuePoint element to the writer. Returns true on success.
+  bool Write(IMkvWriter* writer) const;
+
+  void set_time(uint64 time) { time_ = time; }
+  uint64 time() const { return time_; }
+  void set_track(uint64 track) { track_ = track; }
+  uint64 track() const { return track_; }
+  void set_cluster_pos(uint64 cluster_pos) { cluster_pos_ = cluster_pos; }
+  uint64 cluster_pos() const { return cluster_pos_; }
+  void set_block_number(uint64 block_number) { block_number_ = block_number; }
+  uint64 block_number() const { return block_number_; }
+  void set_output_block_number(bool output_block_number) {
+    output_block_number_ = output_block_number;
+  }
+  bool output_block_number() const { return output_block_number_; }
+
+ private:
+  // Returns the size in bytes for the payload of the CuePoint element.
+  uint64 PayloadSize() const;
+
+  // Absolute timecode according to the segment time base.
+  uint64 time_;
+
+  // The Track element associated with the CuePoint.
+  uint64 track_;
+
+  // The position of the Cluster containing the Block.
+  uint64 cluster_pos_;
+
+  // Number of the Block within the Cluster, starting from 1.
+  uint64 block_number_;
+
+  // If true the muxer will write out the block number for the cue if the
+  // block number is different than the default of 1. Default is set to true.
+  bool output_block_number_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(CuePoint);
+};
+
+///////////////////////////////////////////////////////////////
+// Cues element.
+class Cues {
+ public:
+  Cues();
+  ~Cues();
+
+  // Adds a cue point to the Cues element. Returns true on success.
+  bool AddCue(CuePoint* cue);
+
+  // Returns the cue point by index. Returns NULL if there is no cue point
+  // match.
+  CuePoint* GetCueByIndex(int32 index) const;
+
+  // Returns the total size of the Cues element
+  uint64 Size();
+
+  // Output the Cues element to the writer. Returns true on success.
+  bool Write(IMkvWriter* writer) const;
+
+  int32 cue_entries_size() const { return cue_entries_size_; }
+  void set_output_block_number(bool output_block_number) {
+    output_block_number_ = output_block_number;
+  }
+  bool output_block_number() const { return output_block_number_; }
+
+ private:
+  // Number of allocated elements in |cue_entries_|.
+  int32 cue_entries_capacity_;
+
+  // Number of CuePoints in |cue_entries_|.
+  int32 cue_entries_size_;
+
+  // CuePoint list.
+  CuePoint** cue_entries_;
+
+  // If true the muxer will write out the block number for the cue if the
+  // block number is different than the default of 1. Default is set to true.
+  bool output_block_number_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cues);
+};
+
+///////////////////////////////////////////////////////////////
+// ContentEncAESSettings element
+class ContentEncAESSettings {
+ public:
+  enum {
+    kCTR = 1
+  };
+
+  ContentEncAESSettings();
+  ~ContentEncAESSettings() {}
+
+  // Returns the size in bytes for the ContentEncAESSettings element.
+  uint64 Size() const;
+
+  // Writes out the ContentEncAESSettings element to |writer|. Returns true on
+  // success.
+  bool Write(IMkvWriter* writer) const;
+
+  uint64 cipher_mode() const { return cipher_mode_; }
+
+ private:
+  // Returns the size in bytes for the payload of the ContentEncAESSettings
+  // element.
+  uint64 PayloadSize() const;
+
+  // Sub elements
+  uint64 cipher_mode_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings);
+};
+
+///////////////////////////////////////////////////////////////
+// ContentEncoding element
+// Elements used to describe if the track data has been encrypted or
+// compressed with zlib or header stripping.
+// Currently only whole frames can be encrypted with AES. This dictates that
+// ContentEncodingOrder will be 0, ContentEncodingScope will be 1,
+// ContentEncodingType will be 1, and ContentEncAlgo will be 5.
+class ContentEncoding {
+ public:
+  ContentEncoding();
+  ~ContentEncoding();
+
+  // Sets the content encryption id. Copies |length| bytes from |id| to
+  // |enc_key_id_|. Returns true on success.
+  bool SetEncryptionID(const uint8* id, uint64 length);
+
+  // Returns the size in bytes for the ContentEncoding element.
+  uint64 Size() const;
+
+  // Writes out the ContentEncoding element to |writer|. Returns true on
+  // success.
+  bool Write(IMkvWriter* writer) const;
+
+  uint64 enc_algo() const { return enc_algo_; }
+  uint64 encoding_order() const { return encoding_order_; }
+  uint64 encoding_scope() const { return encoding_scope_; }
+  uint64 encoding_type() const { return encoding_type_; }
+  ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; }
+
+ private:
+  // Returns the size in bytes for the encoding elements.
+  uint64 EncodingSize(uint64 compresion_size, uint64 encryption_size) const;
+
+  // Returns the size in bytes for the encryption elements.
+  uint64 EncryptionSize() const;
+
+  // Track element names
+  uint64 enc_algo_;
+  uint8* enc_key_id_;
+  uint64 encoding_order_;
+  uint64 encoding_scope_;
+  uint64 encoding_type_;
+
+  // ContentEncAESSettings element.
+  ContentEncAESSettings enc_aes_settings_;
+
+  // Size of the ContentEncKeyID data in bytes.
+  uint64 enc_key_id_length_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
+};
+
+///////////////////////////////////////////////////////////////
+// Track element.
+class Track {
+ public:
+  // The |seed| parameter is used to synthesize a UID for the track.
+  explicit Track(unsigned int* seed);
+  virtual ~Track();
+
+  // Adds a ContentEncoding element to the Track. Returns true on success.
+  virtual bool AddContentEncoding();
+
+  // Returns the ContentEncoding by index. Returns NULL if there is no
+  // ContentEncoding match.
+  ContentEncoding* GetContentEncodingByIndex(uint32 index) const;
+
+  // Returns the size in bytes for the payload of the Track element.
+  virtual uint64 PayloadSize() const;
+
+  // Returns the size in bytes of the Track element.
+  virtual uint64 Size() const;
+
+  // Output the Track element to the writer. Returns true on success.
+  virtual bool Write(IMkvWriter* writer) const;
+
+  // Sets the CodecPrivate element of the Track element. Copies |length|
+  // bytes from |codec_private| to |codec_private_|. Returns true on success.
+  bool SetCodecPrivate(const uint8* codec_private, uint64 length);
+
+  void set_codec_id(const char* codec_id);
+  const char* codec_id() const { return codec_id_; }
+  const uint8* codec_private() const { return codec_private_; }
+  void set_language(const char* language);
+  const char* language() const { return language_; }
+  void set_max_block_additional_id(uint64 max_block_additional_id) {
+    max_block_additional_id_ = max_block_additional_id;
+  }
+  uint64 max_block_additional_id() const { return max_block_additional_id_; }
+  void set_name(const char* name);
+  const char* name() const { return name_; }
+  void set_number(uint64 number) { number_ = number; }
+  uint64 number() const { return number_; }
+  void set_type(uint64 type) { type_ = type; }
+  uint64 type() const { return type_; }
+  void set_uid(uint64 uid) { uid_ = uid; }
+  uint64 uid() const { return uid_; }
+  void set_codec_delay(uint64 codec_delay) { codec_delay_ = codec_delay; }
+  uint64 codec_delay() const { return codec_delay_; }
+  void set_seek_pre_roll(uint64 seek_pre_roll) {
+    seek_pre_roll_ = seek_pre_roll;
+  }
+  uint64 seek_pre_roll() const { return seek_pre_roll_; }
+
+  uint64 codec_private_length() const { return codec_private_length_; }
+  uint32 content_encoding_entries_size() const {
+    return content_encoding_entries_size_;
+  }
+
+ private:
+  // Track element names
+  char* codec_id_;
+  uint8* codec_private_;
+  char* language_;
+  uint64 max_block_additional_id_;
+  char* name_;
+  uint64 number_;
+  uint64 type_;
+  uint64 uid_;
+  uint64 codec_delay_;
+  uint64 seek_pre_roll_;
+
+  // Size of the CodecPrivate data in bytes.
+  uint64 codec_private_length_;
+
+  // ContentEncoding element list.
+  ContentEncoding** content_encoding_entries_;
+
+  // Number of ContentEncoding elements added.
+  uint32 content_encoding_entries_size_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track);
+};
+
+///////////////////////////////////////////////////////////////
+// Track that has video specific elements.
+class VideoTrack : public Track {
+ public:
+  // Supported modes for stereo 3D.
+  enum StereoMode {
+    kMono = 0,
+    kSideBySideLeftIsFirst  = 1,
+    kTopBottomRightIsFirst  = 2,
+    kTopBottomLeftIsFirst   = 3,
+    kSideBySideRightIsFirst = 11
+  };
+
+  enum AlphaMode {
+    kNoAlpha = 0,
+    kAlpha  = 1
+  };
+
+  // The |seed| parameter is used to synthesize a UID for the track.
+  explicit VideoTrack(unsigned int* seed);
+  virtual ~VideoTrack();
+
+  // Returns the size in bytes for the payload of the Track element plus the
+  // video specific elements.
+  virtual uint64 PayloadSize() const;
+
+  // Output the VideoTrack element to the writer. Returns true on success.
+  virtual bool Write(IMkvWriter* writer) const;
+
+  // Sets the video's stereo mode. Returns true on success.
+  bool SetStereoMode(uint64 stereo_mode);
+
+  // Sets the video's alpha mode. Returns true on success.
+  bool SetAlphaMode(uint64 alpha_mode);
+
+  void set_display_height(uint64 height) { display_height_ = height; }
+  uint64 display_height() const { return display_height_; }
+  void set_display_width(uint64 width) { display_width_ = width; }
+  uint64 display_width() const { return display_width_; }
+  void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; }
+  double frame_rate() const { return frame_rate_; }
+  void set_height(uint64 height) { height_ = height; }
+  uint64 height() const { return height_; }
+  uint64 stereo_mode() { return stereo_mode_; }
+  uint64 alpha_mode() { return alpha_mode_; }
+  void set_width(uint64 width) { width_ = width; }
+  uint64 width() const { return width_; }
+
+ private:
+  // Returns the size in bytes of the Video element.
+  uint64 VideoPayloadSize() const;
+
+  // Video track element names.
+  uint64 display_height_;
+  uint64 display_width_;
+  double frame_rate_;
+  uint64 height_;
+  uint64 stereo_mode_;
+  uint64 alpha_mode_;
+  uint64 width_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack);
+};
+
+///////////////////////////////////////////////////////////////
+// Track that has audio specific elements.
+class AudioTrack : public Track {
+ public:
+  // The |seed| parameter is used to synthesize a UID for the track.
+  explicit AudioTrack(unsigned int* seed);
+  virtual ~AudioTrack();
+
+  // Returns the size in bytes for the payload of the Track element plus the
+  // audio specific elements.
+  virtual uint64 PayloadSize() const;
+
+  // Output the AudioTrack element to the writer. Returns true on success.
+  virtual bool Write(IMkvWriter* writer) const;
+
+  void set_bit_depth(uint64 bit_depth) { bit_depth_ = bit_depth; }
+  uint64 bit_depth() const { return bit_depth_; }
+  void set_channels(uint64 channels) { channels_ = channels; }
+  uint64 channels() const { return channels_; }
+  void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; }
+  double sample_rate() const { return sample_rate_; }
+
+ private:
+  // Audio track element names.
+  uint64 bit_depth_;
+  uint64 channels_;
+  double sample_rate_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack);
+};
+
+///////////////////////////////////////////////////////////////
+// Tracks element
+class Tracks {
+ public:
+  // Audio and video type defined by the Matroska specs.
+  enum {
+    kVideo = 0x1,
+    kAudio = 0x2
+  };
+  // Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs.
+  static const char kOpusCodecId[];
+  static const char kVorbisCodecId[];
+  static const char kVp8CodecId[];
+  static const char kVp9CodecId[];
+
+  Tracks();
+  ~Tracks();
+
+  // Adds a Track element to the Tracks object. |track| will be owned and
+  // deleted by the Tracks object. Returns true on success. |number| is the
+  // number to use for the track. |number| must be >= 0. If |number| == 0
+  // then the muxer will decide on the track number.
+  bool AddTrack(Track* track, int32 number);
+
+  // Returns the track by index. Returns NULL if there is no track match.
+  const Track* GetTrackByIndex(uint32 idx) const;
+
+  // Search the Tracks and return the track that matches |tn|. Returns NULL
+  // if there is no track match.
+  Track* GetTrackByNumber(uint64 track_number) const;
+
+  // Returns true if the track number is an audio track.
+  bool TrackIsAudio(uint64 track_number) const;
+
+  // Returns true if the track number is a video track.
+  bool TrackIsVideo(uint64 track_number) const;
+
+  // Output the Tracks element to the writer. Returns true on success.
+  bool Write(IMkvWriter* writer) const;
+
+  uint32 track_entries_size() const { return track_entries_size_; }
+
+ private:
+  // Track element list.
+  Track** track_entries_;
+
+  // Number of Track elements added.
+  uint32 track_entries_size_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks);
+};
+
+///////////////////////////////////////////////////////////////
+// Chapter element
+//
+class Chapter {
+ public:
+  // Set the identifier for this chapter.  (This corresponds to the
+  // Cue Identifier line in WebVTT.)
+  // TODO(matthewjheaney): the actual serialization of this item in
+  // MKV is pending.
+  bool set_id(const char* id);
+
+  // Converts the nanosecond start and stop times of this chapter to
+  // their corresponding timecode values, and stores them that way.
+  void set_time(const Segment& segment,
+                uint64 start_time_ns,
+                uint64 end_time_ns);
+
+  // Sets the uid for this chapter. Primarily used to enable
+  // deterministic output from the muxer.
+  void set_uid(const uint64 uid) { uid_ = uid; }
+
+  // Add a title string to this chapter, per the semantics described
+  // here:
+  //  http://www.matroska.org/technical/specs/index.html
+  //
+  // The title ("chapter string") is a UTF-8 string.
+  //
+  // The language has ISO 639-2 representation, described here:
+  //  http://www.loc.gov/standards/iso639-2/englangn.html
+  //  http://www.loc.gov/standards/iso639-2/php/English_list.php
+  // If you specify NULL as the language value, this implies
+  // English ("eng").
+  //
+  // The country value corresponds to the codes listed here:
+  //  http://www.iana.org/domains/root/db/
+  //
+  // The function returns false if the string could not be allocated.
+  bool add_string(const char* title,
+                  const char* language,
+                  const char* country);
+
+ private:
+  friend class Chapters;
+
+  // For storage of chapter titles that differ by language.
+  class Display {
+   public:
+    // Establish representation invariant for new Display object.
+    void Init();
+
+    // Reclaim resources, in anticipation of destruction.
+    void Clear();
+
+    // Copies the title to the |title_| member.  Returns false on
+    // error.
+    bool set_title(const char* title);
+
+    // Copies the language to the |language_| member.  Returns false
+    // on error.
+    bool set_language(const char* language);
+
+    // Copies the country to the |country_| member.  Returns false on
+    // error.
+    bool set_country(const char* country);
+
+    // If |writer| is non-NULL, serialize the Display sub-element of
+    // the Atom into the stream.  Returns the Display element size on
+    // success, 0 if error.
+    uint64 WriteDisplay(IMkvWriter* writer) const;
+
+   private:
+    char* title_;
+    char* language_;
+    char* country_;
+  };
+
+  Chapter();
+  ~Chapter();
+
+  // Establish the representation invariant for a newly-created
+  // Chapter object.  The |seed| parameter is used to create the UID
+  // for this chapter atom.
+  void Init(unsigned int* seed);
+
+  // Copies this Chapter object to a different one.  This is used when
+  // expanding a plain array of Chapter objects (see Chapters).
+  void ShallowCopy(Chapter* dst) const;
+
+  // Reclaim resources used by this Chapter object, pending its
+  // destruction.
+  void Clear();
+
+  // If there is no storage remaining on the |displays_| array for a
+  // new display object, creates a new, longer array and copies the
+  // existing Display objects to the new array.  Returns false if the
+  // array cannot be expanded.
+  bool ExpandDisplaysArray();
+
+  // If |writer| is non-NULL, serialize the Atom sub-element into the
+  // stream.  Returns the total size of the element on success, 0 if
+  // error.
+  uint64 WriteAtom(IMkvWriter* writer) const;
+
+  // The string identifier for this chapter (corresponds to WebVTT cue
+  // identifier).
+  char* id_;
+
+  // Start timecode of the chapter.
+  uint64 start_timecode_;
+
+  // Stop timecode of the chapter.
+  uint64 end_timecode_;
+
+  // The binary identifier for this chapter.
+  uint64 uid_;
+
+  // The Atom element can contain multiple Display sub-elements, as
+  // the same logical title can be rendered in different languages.
+  Display* displays_;
+
+  // The physical length (total size) of the |displays_| array.
+  int displays_size_;
+
+  // The logical length (number of active elements) on the |displays_|
+  // array.
+  int displays_count_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapter);
+};
+
+///////////////////////////////////////////////////////////////
+// Chapters element
+//
+class Chapters {
+ public:
+  Chapters();
+  ~Chapters();
+
+  Chapter* AddChapter(unsigned int* seed);
+
+  // Returns the number of chapters that have been added.
+  int Count() const;
+
+  // Output the Chapters element to the writer. Returns true on success.
+  bool Write(IMkvWriter* writer) const;
+
+ private:
+  // Expands the chapters_ array if there is not enough space to contain
+  // another chapter object.  Returns true on success.
+  bool ExpandChaptersArray();
+
+  // If |writer| is non-NULL, serialize the Edition sub-element of the
+  // Chapters element into the stream.  Returns the Edition element
+  // size on success, 0 if error.
+  uint64 WriteEdition(IMkvWriter* writer) const;
+
+  // Total length of the chapters_ array.
+  int chapters_size_;
+
+  // Number of active chapters on the chapters_ array.
+  int chapters_count_;
+
+  // Array for storage of chapter objects.
+  Chapter* chapters_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapters);
+};
+
+///////////////////////////////////////////////////////////////
+// Cluster element
+//
+// Notes:
+//  |Init| must be called before any other method in this class.
+class Cluster {
+ public:
+  Cluster(uint64 timecode, int64 cues_pos);
+  ~Cluster();
+
+  // |timecode| is the absolute timecode of the cluster. |cues_pos| is the
+  // position for the cluster within the segment that should be written in
+  // the cues element.
+  bool Init(IMkvWriter* ptr_writer);
+
+  // Adds a frame to be output in the file. The frame is written out through
+  // |writer_| if successful. Returns true on success.
+  // Inputs:
+  //   frame: Pointer to the data
+  //   length: Length of the data
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.  The range of allowed values is [1, 126].
+  //   timecode:     Absolute (not relative to cluster) timestamp of the
+  //                 frame, expressed in timecode units.
+  //   is_key:       Flag telling whether or not this frame is a key frame.
+  bool AddFrame(const uint8* frame,
+                uint64 length,
+                uint64 track_number,
+                uint64 timecode,  // timecode units (absolute)
+                bool is_key);
+
+  // Adds a frame to be output in the file. The frame is written out through
+  // |writer_| if successful. Returns true on success.
+  // Inputs:
+  //   frame: Pointer to the data
+  //   length: Length of the data
+  //   additional: Pointer to the additional data
+  //   additional_length: Length of the additional data
+  //   add_id: Value of BlockAddID element
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.  The range of allowed values is [1, 126].
+  //   abs_timecode: Absolute (not relative to cluster) timestamp of the
+  //                 frame, expressed in timecode units.
+  //   is_key:       Flag telling whether or not this frame is a key frame.
+  bool AddFrameWithAdditional(const uint8* frame,
+                              uint64 length,
+                              const uint8* additional,
+                              uint64 additional_length,
+                              uint64 add_id,
+                              uint64 track_number,
+                              uint64 abs_timecode,
+                              bool is_key);
+
+  // Adds a frame to be output in the file. The frame is written out through
+  // |writer_| if successful. Returns true on success.
+  // Inputs:
+  //   frame: Pointer to the data.
+  //   length: Length of the data.
+  //   discard_padding: DiscardPadding element value.
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.  The range of allowed values is [1, 126].
+  //   abs_timecode: Absolute (not relative to cluster) timestamp of the
+  //                 frame, expressed in timecode units.
+  //   is_key:       Flag telling whether or not this frame is a key frame.
+  bool AddFrameWithDiscardPadding(const uint8* frame,
+                                  uint64 length,
+                                  int64 discard_padding,
+                                  uint64 track_number,
+                                  uint64 abs_timecode,
+                                  bool is_key);
+
+  // Writes a frame of metadata to the output medium; returns true on
+  // success.
+  // Inputs:
+  //   frame: Pointer to the data
+  //   length: Length of the data
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.  The range of allowed values is [1, 126].
+  //   timecode:     Absolute (not relative to cluster) timestamp of the
+  //                 metadata frame, expressed in timecode units.
+  //   duration:     Duration of metadata frame, in timecode units.
+  //
+  // The metadata frame is written as a block group, with a duration
+  // sub-element but no reference time sub-elements (indicating that
+  // it is considered a keyframe, per Matroska semantics).
+  bool AddMetadata(const uint8* frame,
+                   uint64 length,
+                   uint64 track_number,
+                   uint64 timecode,  // timecode units (absolute)
+                   uint64 duration);  // timecode units
+
+  // Increments the size of the cluster's data in bytes.
+  void AddPayloadSize(uint64 size);
+
+  // Closes the cluster so no more data can be written to it. Will update the
+  // cluster's size if |writer_| is seekable. Returns true on success.
+  bool Finalize();
+
+  // Returns the size in bytes for the entire Cluster element.
+  uint64 Size() const;
+
+  int64 size_position() const { return size_position_; }
+  int32 blocks_added() const { return blocks_added_; }
+  uint64 payload_size() const { return payload_size_; }
+  int64 position_for_cues() const { return position_for_cues_; }
+  uint64 timecode() const { return timecode_; }
+
+ private:
+  //  Signature that matches either of WriteSimpleBlock or WriteMetadataBlock
+  //  in the muxer utilities package.
+  typedef uint64 (*WriteBlock)(IMkvWriter* writer,
+                               const uint8* data,
+                               uint64 length,
+                               uint64 track_number,
+                               int64 timecode,
+                               uint64 generic_arg);
+
+  //  Signature that matches WriteBlockWithAdditional
+  //  in the muxer utilities package.
+  typedef uint64 (*WriteBlockAdditional)(IMkvWriter* writer,
+                                         const uint8* data,
+                                         uint64 length,
+                                         const uint8* additional,
+                                         uint64 add_id,
+                                         uint64 additional_length,
+                                         uint64 track_number,
+                                         int64 timecode,
+                                         uint64 is_key);
+
+  //  Signature that matches WriteBlockWithDiscardPadding
+  //  in the muxer utilities package.
+  typedef uint64 (*WriteBlockDiscardPadding)(IMkvWriter* writer,
+                                             const uint8* data,
+                                             uint64 length,
+                                             int64 discard_padding,
+                                             uint64 track_number,
+                                             int64 timecode,
+                                             uint64 is_key);
+
+  // Utility method that confirms that blocks can still be added, and that the
+  // cluster header has been written. Used by |DoWriteBlock*|. Returns true
+  // when successful.
+  template <typename Type>
+  bool PreWriteBlock(Type* write_function);
+
+  // Utility method used by the |DoWriteBlock*| methods that handles the book
+  // keeping required after each block is written.
+  void PostWriteBlock(uint64 element_size);
+
+  // To simplify things, we require that there be fewer than 127
+  // tracks -- this allows us to serialize the track number value for
+  // a stream using a single byte, per the Matroska encoding.
+  bool IsValidTrackNumber(uint64 track_number) const;
+
+  // Given |abs_timecode|, calculates timecode relative to most recent timecode.
+  // Returns -1 on failure, or a relative timecode.
+  int64 GetRelativeTimecode(int64 abs_timecode) const;
+
+  //  Used to implement AddFrame and AddMetadata.
+  bool DoWriteBlock(const uint8* frame,
+                    uint64 length,
+                    uint64 track_number,
+                    uint64 absolute_timecode,
+                    uint64 generic_arg,
+                    WriteBlock write_block);
+
+  // Used to implement AddFrameWithAdditional
+  bool DoWriteBlockWithAdditional(const uint8* frame,
+                                  uint64 length,
+                                  const uint8* additional,
+                                  uint64 additional_length,
+                                  uint64 add_id,
+                                  uint64 track_number,
+                                  uint64 absolute_timecode,
+                                  uint64 generic_arg,
+                                  WriteBlockAdditional write_block);
+
+  // Used to implement AddFrameWithDiscardPadding
+  bool DoWriteBlockWithDiscardPadding(const uint8* frame,
+                                      uint64 length,
+                                      int64 discard_padding,
+                                      uint64 track_number,
+                                      uint64 absolute_timecode,
+                                      uint64 generic_arg,
+                                      WriteBlockDiscardPadding write_block);
+
+  // Outputs the Cluster header to |writer_|. Returns true on success.
+  bool WriteClusterHeader();
+
+  // Number of blocks added to the cluster.
+  int32 blocks_added_;
+
+  // Flag telling if the cluster has been closed.
+  bool finalized_;
+
+  // Flag telling if the cluster's header has been written.
+  bool header_written_;
+
+  // The size of the cluster elements in bytes.
+  uint64 payload_size_;
+
+  // The file position used for cue points.
+  const int64 position_for_cues_;
+
+  // The file position of the cluster's size element.
+  int64 size_position_;
+
+  // The absolute timecode of the cluster.
+  const uint64 timecode_;
+
+  // Pointer to the writer object. Not owned by this class.
+  IMkvWriter* writer_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cluster);
+};
+
+///////////////////////////////////////////////////////////////
+// SeekHead element
+class SeekHead {
+ public:
+  SeekHead();
+  ~SeekHead();
+
+  // TODO(fgalligan): Change this to reserve a certain size. Then check how
+  // big the seek entry to be added is as not every seek entry will be the
+  // maximum size it could be.
+  // Adds a seek entry to be written out when the element is finalized. |id|
+  // must be the coded mkv element id. |pos| is the file position of the
+  // element. Returns true on success.
+  bool AddSeekEntry(uint32 id, uint64 pos);
+
+  // Writes out SeekHead and SeekEntry elements. Returns true on success.
+  bool Finalize(IMkvWriter* writer) const;
+
+  // Returns the id of the Seek Entry at the given index. Returns -1 if index is
+  // out of range.
+  uint32 GetId(int index) const;
+
+  // Returns the position of the Seek Entry at the given index. Returns -1 if
+  // index is out of range.
+  uint64 GetPosition(int index) const;
+
+  // Sets the Seek Entry id and position at given index.
+  // Returns true on success.
+  bool SetSeekEntry(int index, uint32 id, uint64 position);
+
+  // Reserves space by writing out a Void element which will be updated with
+  // a SeekHead element later. Returns true on success.
+  bool Write(IMkvWriter* writer);
+
+  // We are going to put a cap on the number of Seek Entries.
+  const static int32 kSeekEntryCount = 5;
+
+ private:
+  // Returns the maximum size in bytes of one seek entry.
+  uint64 MaxEntrySize() const;
+
+  // Seek entry id element list.
+  uint32 seek_entry_id_[kSeekEntryCount];
+
+  // Seek entry pos element list.
+  uint64 seek_entry_pos_[kSeekEntryCount];
+
+  // The file position of SeekHead element.
+  int64 start_pos_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead);
+};
+
+///////////////////////////////////////////////////////////////
+// Segment Information element
+class SegmentInfo {
+ public:
+  SegmentInfo();
+  ~SegmentInfo();
+
+  // Will update the duration if |duration_| is > 0.0. Returns true on success.
+  bool Finalize(IMkvWriter* writer) const;
+
+  // Sets |muxing_app_| and |writing_app_|.
+  bool Init();
+
+  // Output the Segment Information element to the writer. Returns true on
+  // success.
+  bool Write(IMkvWriter* writer);
+
+  void set_duration(double duration) { duration_ = duration; }
+  double duration() const { return duration_; }
+  void set_muxing_app(const char* app);
+  const char* muxing_app() const { return muxing_app_; }
+  void set_timecode_scale(uint64 scale) { timecode_scale_ = scale; }
+  uint64 timecode_scale() const { return timecode_scale_; }
+  void set_writing_app(const char* app);
+  const char* writing_app() const { return writing_app_; }
+
+ private:
+  // Segment Information element names.
+  // Initially set to -1 to signify that a duration has not been set and should
+  // not be written out.
+  double duration_;
+  // Set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
+  char* muxing_app_;
+  uint64 timecode_scale_;
+  // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision.
+  char* writing_app_;
+
+  // The file position of the duration element.
+  int64 duration_pos_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo);
+};
+
+///////////////////////////////////////////////////////////////
+// This class represents the main segment in a WebM file. Currently only
+// supports one Segment element.
+//
+// Notes:
+//  |Init| must be called before any other method in this class.
+class Segment {
+ public:
+  enum Mode {
+    kLive = 0x1,
+    kFile = 0x2
+  };
+
+  enum CuesPosition {
+    kAfterClusters = 0x0,  // Position Cues after Clusters - Default
+    kBeforeClusters = 0x1  // Position Cues before Clusters
+  };
+
+  const static uint64 kDefaultMaxClusterDuration = 30000000000ULL;
+
+  Segment();
+  ~Segment();
+
+  // Initializes |SegmentInfo| and returns result. Always returns false when
+  // |ptr_writer| is NULL.
+  bool Init(IMkvWriter* ptr_writer);
+
+  // Adds a generic track to the segment.  Returns the newly-allocated
+  // track object (which is owned by the segment) on success, NULL on
+  // error. |number| is the number to use for the track.  |number|
+  // must be >= 0. If |number| == 0 then the muxer will decide on the
+  // track number.
+  Track* AddTrack(int32 number);
+
+  // Adds a Vorbis audio track to the segment. Returns the number of the track
+  // on success, 0 on error. |number| is the number to use for the audio track.
+  // |number| must be >= 0. If |number| == 0 then the muxer will decide on
+  // the track number.
+  uint64 AddAudioTrack(int32 sample_rate, int32 channels, int32 number);
+
+  // Adds an empty chapter to the chapters of this segment.  Returns
+  // non-NULL on success.  After adding the chapter, the caller should
+  // populate its fields via the Chapter member functions.
+  Chapter* AddChapter();
+
+  // Adds a cue point to the Cues element. |timestamp| is the time in
+  // nanoseconds of the cue's time. |track| is the Track of the Cue. This
+  // function must be called after AddFrame to calculate the correct
+  // BlockNumber for the CuePoint. Returns true on success.
+  bool AddCuePoint(uint64 timestamp, uint64 track);
+
+  // Adds a frame to be output in the file. Returns true on success.
+  // Inputs:
+  //   frame: Pointer to the data
+  //   length: Length of the data
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.
+  //   timestamp:    Timestamp of the frame in nanoseconds from 0.
+  //   is_key:       Flag telling whether or not this frame is a key frame.
+  bool AddFrame(const uint8* frame,
+                uint64 length,
+                uint64 track_number,
+                uint64 timestamp_ns,
+                bool is_key);
+
+  // Writes a frame of metadata to the output medium; returns true on
+  // success.
+  // Inputs:
+  //   frame: Pointer to the data
+  //   length: Length of the data
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.
+  //   timecode:     Absolute timestamp of the metadata frame, expressed
+  //                 in nanosecond units.
+  //   duration:     Duration of metadata frame, in nanosecond units.
+  //
+  // The metadata frame is written as a block group, with a duration
+  // sub-element but no reference time sub-elements (indicating that
+  // it is considered a keyframe, per Matroska semantics).
+  bool AddMetadata(const uint8* frame,
+                   uint64 length,
+                   uint64 track_number,
+                   uint64 timestamp_ns,
+                   uint64 duration_ns);
+
+  // Writes a frame with additional data to the output medium; returns true on
+  // success.
+  // Inputs:
+  //   frame: Pointer to the data.
+  //   length: Length of the data.
+  //   additional: Pointer to additional data.
+  //   additional_length: Length of additional data.
+  //   add_id: Additional ID which identifies the type of additional data.
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.
+  //   timestamp:    Absolute timestamp of the frame, expressed in nanosecond
+  //                 units.
+  //   is_key:       Flag telling whether or not this frame is a key frame.
+  bool AddFrameWithAdditional(const uint8* frame,
+                              uint64 length,
+                              const uint8* additional,
+                              uint64 additional_length,
+                              uint64 add_id,
+                              uint64 track_number,
+                              uint64 timestamp,
+                              bool is_key);
+
+  // Writes a frame with DiscardPadding to the output medium; returns true on
+  // success.
+  // Inputs:
+  //   frame: Pointer to the data.
+  //   length: Length of the data.
+  //   discard_padding: DiscardPadding element value.
+  //   track_number: Track to add the data to. Value returned by Add track
+  //                 functions.
+  //   timestamp:    Absolute timestamp of the frame, expressed in nanosecond
+  //                 units.
+  //   is_key:       Flag telling whether or not this frame is a key frame.
+  bool AddFrameWithDiscardPadding(const uint8* frame,
+                                  uint64 length,
+                                  int64 discard_padding,
+                                  uint64 track_number,
+                                  uint64 timestamp,
+                                  bool is_key);
+
+  // Writes a Frame to the output medium. Chooses the correct way of writing
+  // the frame (Block vs SimpleBlock) based on the parameters passed.
+  // Inputs:
+  //   frame: frame object
+  bool AddGenericFrame(const Frame* frame);
+
+  // Adds a VP8 video track to the segment. Returns the number of the track on
+  // success, 0 on error. |number| is the number to use for the video track.
+  // |number| must be >= 0. If |number| == 0 then the muxer will decide on
+  // the track number.
+  uint64 AddVideoTrack(int32 width, int32 height, int32 number);
+
+  // This function must be called after Finalize() if you need a copy of the
+  // output with Cues written before the Clusters. It will return false if the
+  // writer is not seekable of if chunking is set to true.
+  // Input parameters:
+  // reader - an IMkvReader object created with the same underlying file of the
+  //          current writer object. Make sure to close the existing writer
+  //          object before creating this so that all the data is properly
+  //          flushed and available for reading.
+  // writer - an IMkvWriter object pointing to a *different* file than the one
+  //          pointed by the current writer object. This file will contain the
+  //          Cues element before the Clusters.
+  bool CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader,
+                                     IMkvWriter* writer);
+
+  // Sets which track to use for the Cues element. Must have added the track
+  // before calling this function. Returns true on success. |track_number| is
+  // returned by the Add track functions.
+  bool CuesTrack(uint64 track_number);
+
+  // This will force the muxer to create a new Cluster when the next frame is
+  // added.
+  void ForceNewClusterOnNextFrame();
+
+  // Writes out any frames that have not been written out. Finalizes the last
+  // cluster. May update the size and duration of the segment. May output the
+  // Cues element. May finalize the SeekHead element. Returns true on success.
+  bool Finalize();
+
+  // Returns the Cues object.
+  Cues* GetCues() { return &cues_; }
+
+  // Returns the Segment Information object.
+  const SegmentInfo* GetSegmentInfo() const { return &segment_info_; }
+  SegmentInfo* GetSegmentInfo() { return &segment_info_; }
+
+  // Search the Tracks and return the track that matches |track_number|.
+  // Returns NULL if there is no track match.
+  Track* GetTrackByNumber(uint64 track_number) const;
+
+  // Toggles whether to output a cues element.
+  void OutputCues(bool output_cues);
+
+  // Sets if the muxer will output files in chunks or not. |chunking| is a
+  // flag telling whether or not to turn on chunking. |filename| is the base
+  // filename for the chunk files. The header chunk file will be named
+  // |filename|.hdr and the data chunks will be named
+  // |filename|_XXXXXX.chk. Chunking implies that the muxer will be writing
+  // to files so the muxer will use the default MkvWriter class to control
+  // what data is written to what files. Returns true on success.
+  // TODO: Should we change the IMkvWriter Interface to add Open and Close?
+  // That will force the interface to be dependent on files.
+  bool SetChunking(bool chunking, const char* filename);
+
+  bool chunking() const { return chunking_; }
+  uint64 cues_track() const { return cues_track_; }
+  void set_max_cluster_duration(uint64 max_cluster_duration) {
+    max_cluster_duration_ = max_cluster_duration;
+  }
+  uint64 max_cluster_duration() const { return max_cluster_duration_; }
+  void set_max_cluster_size(uint64 max_cluster_size) {
+    max_cluster_size_ = max_cluster_size;
+  }
+  uint64 max_cluster_size() const { return max_cluster_size_; }
+  void set_mode(Mode mode) { mode_ = mode; }
+  Mode mode() const { return mode_; }
+  CuesPosition cues_position() const { return cues_position_; }
+  bool output_cues() const { return output_cues_; }
+  const SegmentInfo* segment_info() const { return &segment_info_; }
+
+ private:
+  // Checks if header information has been output and initialized. If not it
+  // will output the Segment element and initialize the SeekHead elment and
+  // Cues elements.
+  bool CheckHeaderInfo();
+
+  // Sets |name| according to how many chunks have been written. |ext| is the
+  // file extension. |name| must be deleted by the calling app. Returns true
+  // on success.
+  bool UpdateChunkName(const char* ext, char** name) const;
+
+  // Returns the maximum offset within the segment's payload. When chunking
+  // this function is needed to determine offsets of elements within the
+  // chunked files. Returns -1 on error.
+  int64 MaxOffset();
+
+  // Adds the frame to our frame array.
+  bool QueueFrame(Frame* frame);
+
+  // Output all frames that are queued. Returns -1 on error, otherwise
+  // it returns the number of frames written.
+  int WriteFramesAll();
+
+  // Output all frames that are queued that have an end time that is less
+  // then |timestamp|. Returns true on success and if there are no frames
+  // queued.
+  bool WriteFramesLessThan(uint64 timestamp);
+
+  // Outputs the segment header, Segment Information element, SeekHead element,
+  // and Tracks element to |writer_|.
+  bool WriteSegmentHeader();
+
+  // Given a frame with the specified timestamp (nanosecond units) and
+  // keyframe status, determine whether a new cluster should be
+  // created, before writing enqueued frames and the frame itself. The
+  // function returns one of the following values:
+  //  -1 = error: an out-of-order frame was detected
+  //  0 = do not create a new cluster, and write frame to the existing cluster
+  //  1 = create a new cluster, and write frame to that new cluster
+  //  2 = create a new cluster, and re-run test
+  int TestFrame(uint64 track_num, uint64 timestamp_ns, bool key) const;
+
+  // Create a new cluster, using the earlier of the first enqueued
+  // frame, or the indicated time. Returns true on success.
+  bool MakeNewCluster(uint64 timestamp_ns);
+
+  // Checks whether a new cluster needs to be created, and if so
+  // creates a new cluster. Returns false if creation of a new cluster
+  // was necessary but creation was not successful.
+  bool DoNewClusterProcessing(uint64 track_num, uint64 timestamp_ns, bool key);
+
+
+  // Adjusts Cue Point values (to place Cues before Clusters) so that they
+  // reflect the correct offsets.
+  void MoveCuesBeforeClusters();
+
+  // This function recursively computes the correct cluster offsets (this is
+  // done to move the Cues before Clusters). It recursively updates the change
+  // in size (which indicates a change in cluster offset) until no sizes change.
+  // Parameters:
+  // diff - indicates the difference in size of the Cues element that needs to
+  //        accounted for.
+  // index - index in the list of Cues which is currently being adjusted.
+  // cue_size - size of the Cues element.
+  void MoveCuesBeforeClustersHelper(uint64 diff, int index, uint64* cue_size);
+
+  // Seeds the random number generator used to make UIDs.
+  unsigned int seed_;
+
+  // WebM elements
+  Cues cues_;
+  SeekHead seek_head_;
+  SegmentInfo segment_info_;
+  Tracks tracks_;
+  Chapters chapters_;
+
+  // Number of chunks written.
+  int chunk_count_;
+
+  // Current chunk filename.
+  char* chunk_name_;
+
+  // Default MkvWriter object created by this class used for writing clusters
+  // out in separate files.
+  MkvWriter* chunk_writer_cluster_;
+
+  // Default MkvWriter object created by this class used for writing Cues
+  // element out to a file.
+  MkvWriter* chunk_writer_cues_;
+
+  // Default MkvWriter object created by this class used for writing the
+  // Matroska header out to a file.
+  MkvWriter* chunk_writer_header_;
+
+  // Flag telling whether or not the muxer is chunking output to multiple
+  // files.
+  bool chunking_;
+
+  // Base filename for the chunked files.
+  char* chunking_base_name_;
+
+  // File position offset where the Clusters end.
+  int64 cluster_end_offset_;
+
+  // List of clusters.
+  Cluster** cluster_list_;
+
+  // Number of cluster pointers allocated in the cluster list.
+  int32 cluster_list_capacity_;
+
+  // Number of clusters in the cluster list.
+  int32 cluster_list_size_;
+
+  // Indicates whether Cues should be written before or after Clusters
+  CuesPosition cues_position_;
+
+  // Track number that is associated with the cues element for this segment.
+  uint64 cues_track_;
+
+  // Tells the muxer to force a new cluster on the next Block.
+  bool force_new_cluster_;
+
+  // List of stored audio frames. These variables are used to store frames so
+  // the muxer can follow the guideline "Audio blocks that contain the video
+  // key frame's timecode should be in the same cluster as the video key frame
+  // block."
+  Frame** frames_;
+
+  // Number of frame pointers allocated in the frame list.
+  int32 frames_capacity_;
+
+  // Number of frames in the frame list.
+  int32 frames_size_;
+
+  // Flag telling if a video track has been added to the segment.
+  bool has_video_;
+
+  // Flag telling if the segment's header has been written.
+  bool header_written_;
+
+  // Duration of the last block in nanoseconds.
+  uint64 last_block_duration_;
+
+  // Last timestamp in nanoseconds added to a cluster.
+  uint64 last_timestamp_;
+
+  // Maximum time in nanoseconds for a cluster duration. This variable is a
+  // guideline and some clusters may have a longer duration. Default is 30
+  // seconds.
+  uint64 max_cluster_duration_;
+
+  // Maximum size in bytes for a cluster. This variable is a guideline and
+  // some clusters may have a larger size. Default is 0 which signifies that
+  // the muxer will decide the size.
+  uint64 max_cluster_size_;
+
+  // The mode that segment is in. If set to |kLive| the writer must not
+  // seek backwards.
+  Mode mode_;
+
+  // Flag telling the muxer that a new cue point should be added.
+  bool new_cuepoint_;
+
+  // TODO(fgalligan): Should we add support for more than one Cues element?
+  // Flag whether or not the muxer should output a Cues element.
+  bool output_cues_;
+
+  // The file position of the segment's payload.
+  int64 payload_pos_;
+
+  // The file position of the element's size.
+  int64 size_position_;
+
+  // Pointer to the writer objects. Not owned by this class.
+  IMkvWriter* writer_cluster_;
+  IMkvWriter* writer_cues_;
+  IMkvWriter* writer_header_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment);
+};
+
+}  //end namespace mkvmuxer
+
+#endif //MKVMUXER_HPP
diff --git a/source/libvpx/third_party/libwebm/mkvmuxertypes.hpp b/source/libvpx/third_party/libwebm/mkvmuxertypes.hpp
new file mode 100644
index 0000000..2c66fd2
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvmuxertypes.hpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXERTYPES_HPP
+#define MKVMUXERTYPES_HPP
+
+// Copied from Chromium basictypes.h
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);               \
+  void operator=(const TypeName&)
+
+namespace mkvmuxer {
+
+typedef unsigned char      uint8;
+typedef short              int16;
+typedef int                int32;
+typedef unsigned int       uint32;
+typedef long long          int64;
+typedef unsigned long long uint64;
+
+}  //end namespace mkvmuxer
+
+#endif // MKVMUXERTYPES_HPP
diff --git a/source/libvpx/third_party/libwebm/mkvmuxerutil.cpp b/source/libvpx/third_party/libwebm/mkvmuxerutil.cpp
new file mode 100644
index 0000000..18060e9
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvmuxerutil.cpp
@@ -0,0 +1,713 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "mkvmuxerutil.hpp"
+
+#ifdef __ANDROID__
+#include <fcntl.h>
+#endif
+
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#ifdef _MSC_VER
+#define _CRT_RAND_S
+#endif
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+
+#include <new>
+
+#include "mkvwriter.hpp"
+#include "webmids.hpp"
+
+namespace mkvmuxer {
+
+int32 GetCodedUIntSize(uint64 value) {
+  if (value < 0x000000000000007FULL)
+    return 1;
+  else if (value < 0x0000000000003FFFULL)
+    return 2;
+  else if (value < 0x00000000001FFFFFULL)
+    return 3;
+  else if (value < 0x000000000FFFFFFFULL)
+    return 4;
+  else if (value < 0x00000007FFFFFFFFULL)
+    return 5;
+  else if (value < 0x000003FFFFFFFFFFULL)
+    return 6;
+  else if (value < 0x0001FFFFFFFFFFFFULL)
+    return 7;
+  return 8;
+}
+
+int32 GetUIntSize(uint64 value) {
+  if (value < 0x0000000000000100ULL)
+    return 1;
+  else if (value < 0x0000000000010000ULL)
+    return 2;
+  else if (value < 0x0000000001000000ULL)
+    return 3;
+  else if (value < 0x0000000100000000ULL)
+    return 4;
+  else if (value < 0x0000010000000000ULL)
+    return 5;
+  else if (value < 0x0001000000000000ULL)
+    return 6;
+  else if (value < 0x0100000000000000ULL)
+    return 7;
+  return 8;
+}
+
+uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
+  // Size of EBML ID
+  int32 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += GetCodedUIntSize(value);
+
+  return ebml_size;
+}
+
+uint64 EbmlElementSize(uint64 type, int64 value) {
+  return EbmlElementSize(type, static_cast<uint64>(value));
+}
+
+uint64 EbmlElementSize(uint64 type, uint64 value) {
+  // Size of EBML ID
+  int32 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += GetUIntSize(value);
+
+  // Size of Datasize
+  ebml_size++;
+
+  return ebml_size;
+}
+
+uint64 EbmlElementSize(uint64 type, float /* value */ ) {
+  // Size of EBML ID
+  uint64 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += sizeof(float);
+
+  // Size of Datasize
+  ebml_size++;
+
+  return ebml_size;
+}
+
+uint64 EbmlElementSize(uint64 type, const char* value) {
+  if (!value)
+    return 0;
+
+  // Size of EBML ID
+  uint64 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += strlen(value);
+
+  // Size of Datasize
+  ebml_size++;
+
+  return ebml_size;
+}
+
+uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
+  if (!value)
+    return 0;
+
+  // Size of EBML ID
+  uint64 ebml_size = GetUIntSize(type);
+
+  // Datasize
+  ebml_size += size;
+
+  // Size of Datasize
+  ebml_size += GetCodedUIntSize(size);
+
+  return ebml_size;
+}
+
+int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
+  if (!writer || size < 1 || size > 8)
+    return -1;
+
+  for (int32 i = 1; i <= size; ++i) {
+    const int32 byte_count = size - i;
+    const int32 bit_count = byte_count * 8;
+
+    const int64 bb = value >> bit_count;
+    const uint8 b = static_cast<uint8>(bb);
+
+    const int32 status = writer->Write(&b, 1);
+
+    if (status < 0)
+      return status;
+  }
+
+  return 0;
+}
+
+int32 SerializeFloat(IMkvWriter* writer, float f) {
+  if (!writer)
+    return -1;
+
+  assert(sizeof(uint32) == sizeof(float));
+  // This union is merely used to avoid a reinterpret_cast from float& to
+  // uint32& which will result in violation of strict aliasing.
+  union U32 {
+    uint32 u32;
+    float f;
+  } value;
+  value.f = f;
+
+  for (int32 i = 1; i <= 4; ++i) {
+    const int32 byte_count = 4 - i;
+    const int32 bit_count = byte_count * 8;
+
+    const uint8 byte = static_cast<uint8>(value.u32 >> bit_count);
+
+    const int32 status = writer->Write(&byte, 1);
+
+    if (status < 0)
+      return status;
+  }
+
+  return 0;
+}
+
+int32 WriteUInt(IMkvWriter* writer, uint64 value) {
+  if (!writer)
+    return -1;
+
+  int32 size = GetCodedUIntSize(value);
+
+  return WriteUIntSize(writer, value, size);
+}
+
+int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
+  if (!writer || size < 0 || size > 8)
+    return -1;
+
+  if (size > 0) {
+    const uint64 bit = 1LL << (size * 7);
+
+    if (value > (bit - 2))
+      return -1;
+
+    value |= bit;
+  } else {
+    size = 1;
+    int64 bit;
+
+    for (;;) {
+      bit = 1LL << (size * 7);
+      const uint64 max = bit - 2;
+
+      if (value <= max)
+        break;
+
+      ++size;
+    }
+
+    if (size > 8)
+      return false;
+
+    value |= bit;
+  }
+
+  return SerializeInt(writer, value, size);
+}
+
+int32 WriteID(IMkvWriter* writer, uint64 type) {
+  if (!writer)
+    return -1;
+
+  writer->ElementStartNotify(type, writer->Position());
+
+  const int32 size = GetUIntSize(type);
+
+  return SerializeInt(writer, type, size);
+}
+
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) {
+  if (!writer)
+    return false;
+
+  if (WriteID(writer, type))
+    return false;
+
+  if (WriteUInt(writer, size))
+    return false;
+
+  return true;
+}
+
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) {
+  if (!writer)
+    return false;
+
+  if (WriteID(writer, type))
+    return false;
+
+  const uint64 size = GetUIntSize(value);
+  if (WriteUInt(writer, size))
+    return false;
+
+  if (SerializeInt(writer, value, static_cast<int32>(size)))
+    return false;
+
+  return true;
+}
+
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
+  if (!writer)
+    return false;
+
+  if (WriteID(writer, type))
+    return false;
+
+  if (WriteUInt(writer, 4))
+    return false;
+
+  if (SerializeFloat(writer, value))
+    return false;
+
+  return true;
+}
+
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) {
+  if (!writer || !value)
+    return false;
+
+  if (WriteID(writer, type))
+    return false;
+
+  const uint64 length = strlen(value);
+  if (WriteUInt(writer, length))
+    return false;
+
+  if (writer->Write(value, static_cast<const uint32>(length)))
+    return false;
+
+  return true;
+}
+
+bool WriteEbmlElement(IMkvWriter* writer,
+                      uint64 type,
+                      const uint8* value,
+                      uint64 size) {
+  if (!writer || !value || size < 1)
+    return false;
+
+  if (WriteID(writer, type))
+    return false;
+
+  if (WriteUInt(writer, size))
+    return false;
+
+  if (writer->Write(value, static_cast<uint32>(size)))
+    return false;
+
+  return true;
+}
+
+uint64 WriteSimpleBlock(IMkvWriter* writer,
+                        const uint8* data,
+                        uint64 length,
+                        uint64 track_number,
+                        int64 timecode,
+                        uint64 is_key) {
+  if (!writer)
+    return false;
+
+  if (!data || length < 1)
+    return false;
+
+  //  Here we only permit track number values to be no greater than
+  //  126, which the largest value we can store having a Matroska
+  //  integer representation of only 1 byte.
+
+  if (track_number < 1 || track_number > 126)
+    return false;
+
+  //  Technically the timestamp for a block can be less than the
+  //  timestamp for the cluster itself (remember that block timestamp
+  //  is a signed, 16-bit integer).  However, as a simplification we
+  //  only permit non-negative cluster-relative timestamps for blocks.
+
+  if (timecode < 0 || timecode > kMaxBlockTimecode)
+    return false;
+
+  if (WriteID(writer, kMkvSimpleBlock))
+    return 0;
+
+  const int32 size = static_cast<int32>(length) + 4;
+  if (WriteUInt(writer, size))
+    return 0;
+
+  if (WriteUInt(writer, static_cast<uint64>(track_number)))
+    return 0;
+
+  if (SerializeInt(writer, timecode, 2))
+    return 0;
+
+  uint64 flags = 0;
+  if (is_key)
+    flags |= 0x80;
+
+  if (SerializeInt(writer, flags, 1))
+    return 0;
+
+  if (writer->Write(data, static_cast<uint32>(length)))
+    return 0;
+
+  const uint64 element_size =
+    GetUIntSize(kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + length;
+
+  return element_size;
+}
+
+// We must write the metadata (key)frame as a BlockGroup element,
+// because we need to specify a duration for the frame.  The
+// BlockGroup element comprises the frame itself and its duration,
+// and is laid out as follows:
+//
+//   BlockGroup tag
+//   BlockGroup size
+//     Block tag
+//     Block size
+//     (the frame is the block payload)
+//     Duration tag
+//     Duration size
+//     (duration payload)
+//
+uint64 WriteMetadataBlock(IMkvWriter* writer,
+                          const uint8* data,
+                          uint64 length,
+                          uint64 track_number,
+                          int64 timecode,
+                          uint64 duration) {
+  // We don't backtrack when writing to the stream, so we must
+  // pre-compute the BlockGroup size, by summing the sizes of each
+  // sub-element (the block and the duration).
+
+  // We use a single byte for the track number of the block, which
+  // means the block header is exactly 4 bytes.
+
+  // TODO(matthewjheaney): use EbmlMasterElementSize and WriteEbmlMasterElement
+
+  const uint64 block_payload_size = 4 + length;
+  const int32 block_size = GetCodedUIntSize(block_payload_size);
+  const uint64 block_elem_size = 1 + block_size + block_payload_size;
+
+  const int32 duration_payload_size = GetUIntSize(duration);
+  const int32 duration_size = GetCodedUIntSize(duration_payload_size);
+  const uint64 duration_elem_size = 1 + duration_size + duration_payload_size;
+
+  const uint64 blockg_payload_size = block_elem_size + duration_elem_size;
+  const int32 blockg_size = GetCodedUIntSize(blockg_payload_size);
+  const uint64 blockg_elem_size = 1 + blockg_size + blockg_payload_size;
+
+  if (WriteID(writer, kMkvBlockGroup))  // 1-byte ID size
+    return 0;
+
+  if (WriteUInt(writer, blockg_payload_size))
+    return 0;
+
+  //  Write Block element
+
+  if (WriteID(writer, kMkvBlock))  // 1-byte ID size
+    return 0;
+
+  if (WriteUInt(writer, block_payload_size))
+    return 0;
+
+  // Byte 1 of 4
+
+  if (WriteUInt(writer, track_number))
+    return 0;
+
+  // Bytes 2 & 3 of 4
+
+  if (SerializeInt(writer, timecode, 2))
+    return 0;
+
+  // Byte 4 of 4
+
+  const uint64 flags = 0;
+
+  if (SerializeInt(writer, flags, 1))
+    return 0;
+
+  // Now write the actual frame (of metadata)
+
+  if (writer->Write(data, static_cast<uint32>(length)))
+    return 0;
+
+  // Write Duration element
+
+  if (WriteID(writer, kMkvBlockDuration))  // 1-byte ID size
+    return 0;
+
+  if (WriteUInt(writer, duration_payload_size))
+    return 0;
+
+  if (SerializeInt(writer, duration, duration_payload_size))
+    return 0;
+
+  // Note that we don't write a reference time as part of the block
+  // group; no reference time(s) indicates that this block is a
+  // keyframe.  (Unlike the case for a SimpleBlock element, the header
+  // bits of the Block sub-element of a BlockGroup element do not
+  // indicate keyframe status.  The keyframe status is inferred from
+  // the absence of reference time sub-elements.)
+
+  return blockg_elem_size;
+}
+
+// Writes a WebM BlockGroup with BlockAdditional data. The structure is as
+// follows:
+// Indentation shows sub-levels
+// BlockGroup
+//  Block
+//    Data
+//  BlockAdditions
+//    BlockMore
+//      BlockAddID
+//        1 (Denotes Alpha)
+//      BlockAdditional
+//        Data
+uint64 WriteBlockWithAdditional(IMkvWriter* writer,
+                                const uint8* data,
+                                uint64 length,
+                                const uint8* additional,
+                                uint64 additional_length,
+                                uint64 add_id,
+                                uint64 track_number,
+                                int64 timecode,
+                                uint64 is_key) {
+  if (!data || !additional || length < 1 || additional_length < 1)
+    return 0;
+
+  const uint64 block_payload_size = 4 + length;
+  const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock,
+                                                       block_payload_size) +
+                                 block_payload_size;
+  const uint64 block_additional_elem_size = EbmlElementSize(kMkvBlockAdditional,
+                                                            additional,
+                                                            additional_length);
+  const uint64 block_addid_elem_size = EbmlElementSize(kMkvBlockAddID, add_id);
+
+  const uint64 block_more_payload_size = block_addid_elem_size +
+                                         block_additional_elem_size;
+  const uint64 block_more_elem_size = EbmlMasterElementSize(
+                                          kMkvBlockMore,
+                                          block_more_payload_size) +
+                                      block_more_payload_size;
+  const uint64 block_additions_payload_size = block_more_elem_size;
+  const uint64 block_additions_elem_size = EbmlMasterElementSize(
+                                               kMkvBlockAdditions,
+                                               block_additions_payload_size) +
+                                           block_additions_payload_size;
+  const uint64 block_group_payload_size = block_elem_size +
+                                          block_additions_elem_size;
+  const uint64 block_group_elem_size = EbmlMasterElementSize(
+                                           kMkvBlockGroup,
+                                           block_group_payload_size) +
+                                       block_group_payload_size;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
+                              block_group_payload_size))
+    return 0;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
+    return 0;
+
+  if (WriteUInt(writer, track_number))
+    return 0;
+
+  if (SerializeInt(writer, timecode, 2))
+    return 0;
+
+  uint64 flags = 0;
+  if (is_key)
+    flags |= 0x80;
+  if (SerializeInt(writer, flags, 1))
+    return 0;
+
+  if (writer->Write(data, static_cast<uint32>(length)))
+    return 0;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlockAdditions,
+                              block_additions_payload_size))
+    return 0;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlockMore, block_more_payload_size))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvBlockAddID, add_id))
+    return 0;
+
+  if (!WriteEbmlElement(writer, kMkvBlockAdditional,
+                        additional, additional_length))
+    return 0;
+
+  return block_group_elem_size;
+}
+
+// Writes a WebM BlockGroup with DiscardPadding. The structure is as follows:
+// Indentation shows sub-levels
+// BlockGroup
+//  Block
+//    Data
+//  DiscardPadding
+uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer,
+                                    const uint8* data,
+                                    uint64 length,
+                                    int64 discard_padding,
+                                    uint64 track_number,
+                                    int64 timecode,
+                                    uint64 is_key) {
+  if (!data || length < 1 || discard_padding <= 0)
+    return 0;
+
+  const uint64 block_payload_size = 4 + length;
+  const uint64 block_elem_size = EbmlMasterElementSize(kMkvBlock,
+                                                       block_payload_size) +
+                                 block_payload_size;
+  const uint64 discard_padding_elem_size = EbmlElementSize(kMkvDiscardPadding,
+                                                           discard_padding);
+  const uint64 block_group_payload_size = block_elem_size +
+                                          discard_padding_elem_size;
+  const uint64 block_group_elem_size = EbmlMasterElementSize(
+                                           kMkvBlockGroup,
+                                           block_group_payload_size) +
+                                       block_group_payload_size;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlockGroup,
+                              block_group_payload_size))
+    return 0;
+
+  if (!WriteEbmlMasterElement(writer, kMkvBlock, block_payload_size))
+    return 0;
+
+  if (WriteUInt(writer, track_number))
+    return 0;
+
+  if (SerializeInt(writer, timecode, 2))
+    return 0;
+
+  uint64 flags = 0;
+  if (is_key)
+    flags |= 0x80;
+  if (SerializeInt(writer, flags, 1))
+    return 0;
+
+  if (writer->Write(data, static_cast<uint32>(length)))
+    return 0;
+
+  if (WriteID(writer, kMkvDiscardPadding))
+    return 0;
+
+  const uint64 size = GetUIntSize(discard_padding);
+  if (WriteUInt(writer, size))
+    return false;
+
+  if (SerializeInt(writer, discard_padding, static_cast<int32>(size)))
+    return false;
+
+  return block_group_elem_size;
+}
+
+uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) {
+  if (!writer)
+    return false;
+
+  // Subtract one for the void ID and the coded size.
+  uint64 void_entry_size = size - 1 - GetCodedUIntSize(size-1);
+  uint64 void_size = EbmlMasterElementSize(kMkvVoid, void_entry_size) +
+                     void_entry_size;
+
+  if (void_size != size)
+    return 0;
+
+  const int64 payload_position = writer->Position();
+  if (payload_position < 0)
+    return 0;
+
+  if (WriteID(writer, kMkvVoid))
+    return 0;
+
+  if (WriteUInt(writer, void_entry_size))
+    return 0;
+
+  const uint8 value = 0;
+  for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) {
+    if (writer->Write(&value, 1))
+      return 0;
+  }
+
+  const int64 stop_position = writer->Position();
+  if (stop_position < 0 ||
+      stop_position - payload_position != static_cast<int64>(void_size))
+    return 0;
+
+  return void_size;
+}
+
+void GetVersion(int32* major, int32* minor, int32* build, int32* revision) {
+  *major = 0;
+  *minor = 2;
+  *build = 1;
+  *revision = 0;
+}
+
+}  // namespace mkvmuxer
+
+mkvmuxer::uint64 mkvmuxer::MakeUID(unsigned int* seed) {
+  uint64 uid = 0;
+
+#ifdef __MINGW32__
+  srand(*seed);
+#endif
+
+  for (int i = 0; i < 7; ++i) {  // avoid problems with 8-byte values
+    uid <<= 8;
+
+    // TODO(fgalligan): Move random number generation to platform specific code.
+#ifdef _MSC_VER
+    (void)seed;
+    unsigned int random_value;
+    const errno_t e = rand_s(&random_value);
+    (void)e;
+    const int32 nn  = random_value;
+#elif __ANDROID__
+    int32 temp_num = 1;
+    int fd = open("/dev/urandom", O_RDONLY);
+    if (fd != -1) {
+      read(fd, &temp_num, sizeof(int32));
+      close(fd);
+    }
+    const int32 nn = temp_num;
+#elif defined __MINGW32__
+    const int32 nn = rand();
+#else
+    const int32 nn = rand_r(seed);
+#endif
+    const int32 n = 0xFF & (nn >> 4);  // throw away low-order bits
+
+    uid |= n;
+  }
+
+  return uid;
+}
diff --git a/source/libvpx/third_party/libwebm/mkvmuxerutil.hpp b/source/libvpx/third_party/libwebm/mkvmuxerutil.hpp
new file mode 100644
index 0000000..d196ad3
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvmuxerutil.hpp
@@ -0,0 +1,151 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXERUTIL_HPP
+#define MKVMUXERUTIL_HPP
+
+#include "mkvmuxertypes.hpp"
+
+namespace mkvmuxer {
+
+class IMkvWriter;
+
+const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
+const int64 kMaxBlockTimecode = 0x07FFFLL;
+
+// Writes out |value| in Big Endian order. Returns 0 on success.
+int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size);
+
+// Returns the size in bytes of the element.
+int32 GetUIntSize(uint64 value);
+int32 GetCodedUIntSize(uint64 value);
+uint64 EbmlMasterElementSize(uint64 type, uint64 value);
+uint64 EbmlElementSize(uint64 type, int64 value);
+uint64 EbmlElementSize(uint64 type, uint64 value);
+uint64 EbmlElementSize(uint64 type, float value);
+uint64 EbmlElementSize(uint64 type, const char* value);
+uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
+
+// Creates an EBML coded number from |value| and writes it out. The size of
+// the coded number is determined by the value of |value|. |value| must not
+// be in a coded form. Returns 0 on success.
+int32 WriteUInt(IMkvWriter* writer, uint64 value);
+
+// Creates an EBML coded number from |value| and writes it out. The size of
+// the coded number is determined by the value of |size|. |value| must not
+// be in a coded form. Returns 0 on success.
+int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size);
+
+// Output an Mkv master element. Returns true if the element was written.
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size);
+
+// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the
+// ID to |SerializeInt|. Returns 0 on success.
+int32 WriteID(IMkvWriter* writer, uint64 type);
+
+// Output an Mkv non-master element. Returns true if the element was written.
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
+bool WriteEbmlElement(IMkvWriter* writer,
+                      uint64 type,
+                      const uint8* value,
+                      uint64 size);
+
+// Output an Mkv Simple Block.
+// Inputs:
+//   data:         Pointer to the data.
+//   length:       Length of the data.
+//   track_number: Track to add the data to. Value returned by Add track
+//                  functions.  Only values in the range [1, 126] are
+//                  permitted.
+//   timecode:     Relative timecode of the Block.  Only values in the
+//                  range [0, 2^15) are permitted.
+//   is_key:       Non-zero value specifies that frame is a key frame.
+uint64 WriteSimpleBlock(IMkvWriter* writer,
+                        const uint8* data,
+                        uint64 length,
+                        uint64 track_number,
+                        int64 timecode,
+                        uint64 is_key);
+
+// Output a metadata keyframe, using a Block Group element.
+// Inputs:
+//   data:         Pointer to the (meta)data.
+//   length:       Length of the (meta)data.
+//   track_number: Track to add the data to. Value returned by Add track
+//                  functions.  Only values in the range [1, 126] are
+//                  permitted.
+//   timecode      Timecode of frame, relative to cluster timecode.  Only
+//                  values in the range [0, 2^15) are permitted.
+//   duration_timecode  Duration of frame, using timecode units.
+uint64 WriteMetadataBlock(IMkvWriter* writer,
+                          const uint8* data,
+                          uint64 length,
+                          uint64 track_number,
+                          int64 timecode,
+                          uint64 duration_timecode);
+
+// Output an Mkv Block with BlockAdditional data.
+// Inputs:
+//   data:         Pointer to the data.
+//   length:       Length of the data.
+//   additional:   Pointer to the additional data
+//   additional_length: Length of the additional data.
+//   add_id: Value of BlockAddID element.
+//   track_number: Track to add the data to. Value returned by Add track
+//                  functions.  Only values in the range [1, 126] are
+//                  permitted.
+//   timecode:     Relative timecode of the Block.  Only values in the
+//                  range [0, 2^15) are permitted.
+//   is_key:       Non-zero value specifies that frame is a key frame.
+uint64 WriteBlockWithAdditional(IMkvWriter* writer,
+                                const uint8* data,
+                                uint64 length,
+                                const uint8* additional,
+                                uint64 additional_length,
+                                uint64 add_id,
+                                uint64 track_number,
+                                int64 timecode,
+                                uint64 is_key);
+
+// Output an Mkv Block with a DiscardPadding element.
+// Inputs:
+//   data:            Pointer to the data.
+//   length:          Length of the data.
+//   discard_padding: DiscardPadding value.
+//   track_number:    Track to add the data to. Value returned by Add track
+//                    functions. Only values in the range [1, 126] are
+//                    permitted.
+//   timecode:        Relative timecode of the Block.  Only values in the
+//                    range [0, 2^15) are permitted.
+//   is_key:          Non-zero value specifies that frame is a key frame.
+uint64 WriteBlockWithDiscardPadding(IMkvWriter* writer,
+                                    const uint8* data,
+                                    uint64 length,
+                                    int64 discard_padding,
+                                    uint64 track_number,
+                                    int64 timecode,
+                                    uint64 is_key);
+
+// Output a void element. |size| must be the entire size in bytes that will be
+// void. The function will calculate the size of the void header and subtract
+// it from |size|.
+uint64 WriteVoidElement(IMkvWriter* writer, uint64 size);
+
+// Returns the version number of the muxer in |major|, |minor|, |build|,
+// and |revision|.
+void GetVersion(int32* major, int32* minor, int32* build, int32* revision);
+
+// Returns a random number to be used for UID, using |seed| to seed
+// the random-number generator (see POSIX rand_r() for semantics).
+uint64 MakeUID(unsigned int* seed);
+
+}  //end namespace mkvmuxer
+
+#endif // MKVMUXERUTIL_HPP
diff --git a/source/libvpx/third_party/libwebm/mkvparser.cpp b/source/libvpx/third_party/libwebm/mkvparser.cpp
new file mode 100644
index 0000000..b41456a
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvparser.cpp
@@ -0,0 +1,9617 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "mkvparser.hpp"
+#include <cassert>
+#include <cstring>
+#include <new>
+#include <climits>
+
+#ifdef _MSC_VER
+// Disable MSVC warnings that suggest making code non-portable.
+#pragma warning(disable:4996)
+#endif
+
+mkvparser::IMkvReader::~IMkvReader()
+{
+}
+
+void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision)
+{
+    major = 1;
+    minor = 0;
+    build = 0;
+    revision = 27;
+}
+
+long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    int status;
+
+//#ifdef _DEBUG
+//    long long total, available;
+//    status = pReader->Length(&total, &available);
+//    assert(status >= 0);
+//    assert((total < 0) || (available <= total));
+//    assert(pos < available);
+//    assert((available - pos) >= 1);  //assume here max u-int len is 8
+//#endif
+
+    len = 1;
+
+    unsigned char b;
+
+    status = pReader->Read(pos, 1, &b);
+
+    if (status < 0)  //error or underflow
+        return status;
+
+    if (status > 0)  //interpreted as "underflow"
+        return E_BUFFER_NOT_FULL;
+
+    if (b == 0)  //we can't handle u-int values larger than 8 bytes
+        return E_FILE_FORMAT_INVALID;
+
+    unsigned char m = 0x80;
+
+    while (!(b & m))
+    {
+        m >>= 1;
+        ++len;
+    }
+
+//#ifdef _DEBUG
+//    assert((available - pos) >= len);
+//#endif
+
+    long long result = b & (~m);
+    ++pos;
+
+    for (int i = 1; i < len; ++i)
+    {
+        status = pReader->Read(pos, 1, &b);
+
+        if (status < 0)
+        {
+            len = 1;
+            return status;
+        }
+
+        if (status > 0)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result <<= 8;
+        result |= b;
+
+        ++pos;
+    }
+
+    return result;
+}
+
+long long mkvparser::GetUIntLength(
+    IMkvReader* pReader,
+    long long pos,
+    long& len)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    long long total, available;
+
+    int status = pReader->Length(&total, &available);
+    assert(status >= 0);
+    assert((total < 0) || (available <= total));
+
+    len = 1;
+
+    if (pos >= available)
+        return pos;  //too few bytes available
+
+    unsigned char b;
+
+    status = pReader->Read(pos, 1, &b);
+
+    if (status < 0)
+        return status;
+
+    assert(status == 0);
+
+    if (b == 0)  //we can't handle u-int values larger than 8 bytes
+        return E_FILE_FORMAT_INVALID;
+
+    unsigned char m = 0x80;
+
+    while (!(b & m))
+    {
+        m >>= 1;
+        ++len;
+    }
+
+    return 0;  //success
+}
+
+
+long long mkvparser::UnserializeUInt(
+    IMkvReader* pReader,
+    long long pos,
+    long long size)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    if ((size <= 0) || (size > 8))
+        return E_FILE_FORMAT_INVALID;
+
+    long long result = 0;
+
+    for (long long i = 0; i < size; ++i)
+    {
+        unsigned char b;
+
+        const long status = pReader->Read(pos, 1, &b);
+
+        if (status < 0)
+            return status;
+
+        result <<= 8;
+        result |= b;
+
+        ++pos;
+    }
+
+    return result;
+}
+
+
+long mkvparser::UnserializeFloat(
+    IMkvReader* pReader,
+    long long pos,
+    long long size_,
+    double& result)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    if ((size_ != 4) && (size_ != 8))
+        return E_FILE_FORMAT_INVALID;
+
+    const long size = static_cast<long>(size_);
+
+    unsigned char buf[8];
+
+    const int status = pReader->Read(pos, size, buf);
+
+    if (status < 0)  //error
+        return status;
+
+    if (size == 4)
+    {
+        union
+        {
+            float f;
+            unsigned long ff;
+        };
+
+        ff = 0;
+
+        for (int i = 0;;)
+        {
+            ff |= buf[i];
+
+            if (++i >= 4)
+                break;
+
+            ff <<= 8;
+        }
+
+        result = f;
+    }
+    else
+    {
+        assert(size == 8);
+
+        union
+        {
+            double d;
+            unsigned long long dd;
+        };
+
+        dd = 0;
+
+        for (int i = 0;;)
+        {
+            dd |= buf[i];
+
+            if (++i >= 8)
+                break;
+
+            dd <<= 8;
+        }
+
+        result = d;
+    }
+
+    return 0;
+}
+
+
+long mkvparser::UnserializeInt(
+    IMkvReader* pReader,
+    long long pos,
+    long size,
+    long long& result)
+{
+    assert(pReader);
+    assert(pos >= 0);
+    assert(size > 0);
+    assert(size <= 8);
+
+    {
+        signed char b;
+
+        const long status = pReader->Read(pos, 1, (unsigned char*)&b);
+
+        if (status < 0)
+            return status;
+
+        result = b;
+
+        ++pos;
+    }
+
+    for (long i = 1; i < size; ++i)
+    {
+        unsigned char b;
+
+        const long status = pReader->Read(pos, 1, &b);
+
+        if (status < 0)
+            return status;
+
+        result <<= 8;
+        result |= b;
+
+        ++pos;
+    }
+
+    return 0;  //success
+}
+
+
+long mkvparser::UnserializeString(
+    IMkvReader* pReader,
+    long long pos,
+    long long size_,
+    char*& str)
+{
+    delete[] str;
+    str = NULL;
+
+    if (size_ >= LONG_MAX)  //we need (size+1) chars
+        return E_FILE_FORMAT_INVALID;
+
+    const long size = static_cast<long>(size_);
+
+    str = new (std::nothrow) char[size+1];
+
+    if (str == NULL)
+        return -1;
+
+    unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
+
+    const long status = pReader->Read(pos, size, buf);
+
+    if (status)
+    {
+        delete[] str;
+        str = NULL;
+
+        return status;
+    }
+
+    str[size] = '\0';
+
+    return 0;  //success
+}
+
+
+long mkvparser::ParseElementHeader(
+    IMkvReader* pReader,
+    long long& pos,
+    long long stop,
+    long long& id,
+    long long& size)
+{
+    if ((stop >= 0) && (pos >= stop))
+        return E_FILE_FORMAT_INVALID;
+
+    long len;
+
+    id = ReadUInt(pReader, pos, len);
+
+    if (id < 0)
+        return E_FILE_FORMAT_INVALID;
+
+    pos += len;  //consume id
+
+    if ((stop >= 0) && (pos >= stop))
+        return E_FILE_FORMAT_INVALID;
+
+    size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)
+        return E_FILE_FORMAT_INVALID;
+
+    pos += len;  //consume length of size
+
+    //pos now designates payload
+
+    if ((stop >= 0) && ((pos + size) > stop))
+        return E_FILE_FORMAT_INVALID;
+
+    return 0;  //success
+}
+
+
+bool mkvparser::Match(
+    IMkvReader* pReader,
+    long long& pos,
+    unsigned long id_,
+    long long& val)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    long long total, available;
+
+    const long status = pReader->Length(&total, &available);
+    assert(status >= 0);
+    assert((total < 0) || (available <= total));
+    if (status < 0)
+        return false;
+
+    long len;
+
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);
+    assert(len > 0);
+    assert(len <= 8);
+    assert((pos + len) <= available);
+
+    if ((unsigned long)id != id_)
+        return false;
+
+    pos += len;  //consume id
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);
+    assert(size <= 8);
+    assert(len > 0);
+    assert(len <= 8);
+    assert((pos + len) <= available);
+
+    pos += len;  //consume length of size of payload
+
+    val = UnserializeUInt(pReader, pos, size);
+    assert(val >= 0);
+
+    pos += size;  //consume size of payload
+
+    return true;
+}
+
+bool mkvparser::Match(
+    IMkvReader* pReader,
+    long long& pos,
+    unsigned long id_,
+    unsigned char*& buf,
+    size_t& buflen)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    long long total, available;
+
+    long status = pReader->Length(&total, &available);
+    assert(status >= 0);
+    assert((total < 0) || (available <= total));
+    if (status < 0)
+        return false;
+
+    long len;
+    const long long id = ReadUInt(pReader, pos, len);
+    assert(id >= 0);
+    assert(len > 0);
+    assert(len <= 8);
+    assert((pos + len) <= available);
+
+    if ((unsigned long)id != id_)
+        return false;
+
+    pos += len;  //consume id
+
+    const long long size_ = ReadUInt(pReader, pos, len);
+    assert(size_ >= 0);
+    assert(len > 0);
+    assert(len <= 8);
+    assert((pos + len) <= available);
+
+    pos += len;  //consume length of size of payload
+    assert((pos + size_) <= available);
+
+    const long buflen_ = static_cast<long>(size_);
+
+    buf = new (std::nothrow) unsigned char[buflen_];
+    assert(buf);  //TODO
+
+    status = pReader->Read(pos, buflen_, buf);
+    assert(status == 0);  //TODO
+
+    buflen = buflen_;
+
+    pos += size_;  //consume size of payload
+    return true;
+}
+
+
+namespace mkvparser
+{
+
+EBMLHeader::EBMLHeader() :
+    m_docType(NULL)
+{
+    Init();
+}
+
+EBMLHeader::~EBMLHeader()
+{
+    delete[] m_docType;
+}
+
+void EBMLHeader::Init()
+{
+    m_version = 1;
+    m_readVersion = 1;
+    m_maxIdLength = 4;
+    m_maxSizeLength = 8;
+
+    if (m_docType)
+    {
+        delete[] m_docType;
+        m_docType = NULL;
+    }
+
+    m_docTypeVersion = 1;
+    m_docTypeReadVersion = 1;
+}
+
+long long EBMLHeader::Parse(
+    IMkvReader* pReader,
+    long long& pos)
+{
+    assert(pReader);
+
+    long long total, available;
+
+    long status = pReader->Length(&total, &available);
+
+    if (status < 0)  //error
+        return status;
+
+    pos = 0;
+    long long end = (available >= 1024) ? 1024 : available;
+
+    for (;;)
+    {
+        unsigned char b = 0;
+
+        while (pos < end)
+        {
+            status = pReader->Read(pos, 1, &b);
+
+            if (status < 0)  //error
+                return status;
+
+            if (b == 0x1A)
+                break;
+
+            ++pos;
+        }
+
+        if (b != 0x1A)
+        {
+            if (pos >= 1024)
+                return E_FILE_FORMAT_INVALID;  //don't bother looking anymore
+
+            if ((total >= 0) && ((total - available) < 5))
+                return E_FILE_FORMAT_INVALID;
+
+            return available + 5;  //5 = 4-byte ID + 1st byte of size
+        }
+
+        if ((total >= 0) && ((total - pos) < 5))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((available - pos) < 5)
+            return pos + 5;  //try again later
+
+        long len;
+
+        const long long result = ReadUInt(pReader, pos, len);
+
+        if (result < 0)  //error
+            return result;
+
+        if (result == 0x0A45DFA3)  //EBML Header ID
+        {
+            pos += len;  //consume ID
+            break;
+        }
+
+        ++pos;  //throw away just the 0x1A byte, and try again
+    }
+
+    //pos designates start of size field
+
+    //get length of size field
+
+    long len;
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  //error
+        return result;
+
+    if (result > 0)  //need more data
+        return result;
+
+    assert(len > 0);
+    assert(len <= 8);
+
+    if ((total >= 0) && ((total -  pos) < len))
+        return E_FILE_FORMAT_INVALID;
+
+    if ((available - pos) < len)
+        return pos + len;  //try again later
+
+    //get the EBML header size
+
+    result = ReadUInt(pReader, pos, len);
+
+    if (result < 0)  //error
+        return result;
+
+    pos += len;  //consume size field
+
+    //pos now designates start of payload
+
+    if ((total >= 0) && ((total - pos) < result))
+        return E_FILE_FORMAT_INVALID;
+
+    if ((available - pos) < result)
+        return pos + result;
+
+    end = pos + result;
+
+    Init();
+
+    while (pos < end)
+    {
+        long long id, size;
+
+        status = ParseElementHeader(
+                    pReader,
+                    pos,
+                    end,
+                    id,
+                    size);
+
+        if (status < 0) //error
+            return status;
+
+        if (size == 0)  //weird
+            return E_FILE_FORMAT_INVALID;
+
+        if (id == 0x0286)  //version
+        {
+            m_version = UnserializeUInt(pReader, pos, size);
+
+            if (m_version <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x02F7)  //read version
+        {
+            m_readVersion = UnserializeUInt(pReader, pos, size);
+
+            if (m_readVersion <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x02F2)  //max id length
+        {
+            m_maxIdLength = UnserializeUInt(pReader, pos, size);
+
+            if (m_maxIdLength <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x02F3)  //max size length
+        {
+            m_maxSizeLength = UnserializeUInt(pReader, pos, size);
+
+            if (m_maxSizeLength <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x0282)  //doctype
+        {
+            if (m_docType)
+                return E_FILE_FORMAT_INVALID;
+
+            status = UnserializeString(pReader, pos, size, m_docType);
+
+            if (status)  //error
+                return status;
+        }
+        else if (id == 0x0287)  //doctype version
+        {
+            m_docTypeVersion = UnserializeUInt(pReader, pos, size);
+
+            if (m_docTypeVersion <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x0285)  //doctype read version
+        {
+            m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
+
+            if (m_docTypeReadVersion <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+
+        pos += size;
+    }
+
+    assert(pos == end);
+    return 0;
+}
+
+
+Segment::Segment(
+    IMkvReader* pReader,
+    long long elem_start,
+    //long long elem_size,
+    long long start,
+    long long size) :
+    m_pReader(pReader),
+    m_element_start(elem_start),
+    //m_element_size(elem_size),
+    m_start(start),
+    m_size(size),
+    m_pos(start),
+    m_pUnknownSize(0),
+    m_pSeekHead(NULL),
+    m_pInfo(NULL),
+    m_pTracks(NULL),
+    m_pCues(NULL),
+    m_pChapters(NULL),
+    m_clusters(NULL),
+    m_clusterCount(0),
+    m_clusterPreloadCount(0),
+    m_clusterSize(0)
+{
+}
+
+
+Segment::~Segment()
+{
+    const long count = m_clusterCount + m_clusterPreloadCount;
+
+    Cluster** i = m_clusters;
+    Cluster** j = m_clusters + count;
+
+    while (i != j)
+    {
+        Cluster* const p = *i++;
+        assert(p);
+
+        delete p;
+    }
+
+    delete[] m_clusters;
+
+    delete m_pTracks;
+    delete m_pInfo;
+    delete m_pCues;
+    delete m_pChapters;
+    delete m_pSeekHead;
+}
+
+
+long long Segment::CreateInstance(
+    IMkvReader* pReader,
+    long long pos,
+    Segment*& pSegment)
+{
+    assert(pReader);
+    assert(pos >= 0);
+
+    pSegment = NULL;
+
+    long long total, available;
+
+    const long status = pReader->Length(&total, &available);
+
+    if (status < 0) //error
+        return status;
+
+    if (available < 0)
+        return -1;
+
+    if ((total >= 0) && (available > total))
+        return -1;
+
+    //I would assume that in practice this loop would execute
+    //exactly once, but we allow for other elements (e.g. Void)
+    //to immediately follow the EBML header.  This is fine for
+    //the source filter case (since the entire file is available),
+    //but in the splitter case over a network we should probably
+    //just give up early.  We could for example decide only to
+    //execute this loop a maximum of, say, 10 times.
+    //TODO:
+    //There is an implied "give up early" by only parsing up
+    //to the available limit.  We do do that, but only if the
+    //total file size is unknown.  We could decide to always
+    //use what's available as our limit (irrespective of whether
+    //we happen to know the total file length).  This would have
+    //as its sense "parse this much of the file before giving up",
+    //which a slightly different sense from "try to parse up to
+    //10 EMBL elements before giving up".
+
+    for (;;)
+    {
+        if ((total >= 0) && (pos >= total))
+            return E_FILE_FORMAT_INVALID;
+
+        //Read ID
+        long len;
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result)  //error, or too few available bytes
+            return result;
+
+        if ((total >= 0) && ((pos + len) > total))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > available)
+            return pos + len;
+
+        const long long idpos = pos;
+        const long long id = ReadUInt(pReader, pos, len);
+
+        if (id < 0)  //error
+            return id;
+
+        pos += len;  //consume ID
+
+        //Read Size
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result)  //error, or too few available bytes
+            return result;
+
+        if ((total >= 0) && ((pos + len) > total))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > available)
+            return pos + len;
+
+        long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return size;
+
+        pos += len;  //consume length of size of element
+
+        //Pos now points to start of payload
+
+        //Handle "unknown size" for live streaming of webm files.
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (id == 0x08538067)  //Segment ID
+        {
+            if (size == unknown_size)
+                size = -1;
+
+            else if (total < 0)
+                size = -1;
+
+            else if ((pos + size) > total)
+                size = -1;
+
+            pSegment = new (std::nothrow) Segment(
+                                            pReader,
+                                            idpos,
+                                            //elem_size
+                                            pos,
+                                            size);
+
+            if (pSegment == 0)
+                return -1;  //generic error
+
+            return 0;    //success
+        }
+
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((total >= 0) && ((pos + size) > total))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + size) > available)
+            return pos + size;
+
+        pos += size;  //consume payload
+    }
+}
+
+
+long long Segment::ParseHeaders()
+{
+    //Outermost (level 0) segment object has been constructed,
+    //and pos designates start of payload.  We need to find the
+    //inner (level 1) elements.
+    long long total, available;
+
+    const int status = m_pReader->Length(&total, &available);
+
+    if (status < 0) //error
+        return status;
+
+    assert((total < 0) || (available <= total));
+
+    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+    assert((segment_stop < 0) || (total < 0) || (segment_stop <= total));
+    assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+    for (;;)
+    {
+        if ((total >= 0) && (m_pos >= total))
+            break;
+
+        if ((segment_stop >= 0) && (m_pos >= segment_stop))
+            break;
+
+        long long pos = m_pos;
+        const long long element_start = pos;
+
+        if ((pos + 1) > available)
+            return (pos + 1);
+
+        long len;
+        long long result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return result;
+
+        if (result > 0)  //underflow (weird)
+            return (pos + 1);
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > available)
+            return pos + len;
+
+        const long long idpos = pos;
+        const long long id = ReadUInt(m_pReader, idpos, len);
+
+        if (id < 0)  //error
+            return id;
+
+        if (id == 0x0F43B675)  //Cluster ID
+            break;
+
+        pos += len;  //consume ID
+
+        if ((pos + 1) > available)
+            return (pos + 1);
+
+        //Read Size
+        result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return result;
+
+        if (result > 0)  //underflow (weird)
+            return (pos + 1);
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > available)
+            return pos + len;
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+
+        if (size < 0)  //error
+            return size;
+
+        pos += len;  //consume length of size of element
+
+        const long long element_size = size + pos - element_start;
+
+        //Pos now points to start of payload
+
+        if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        //We read EBML elements either in total or nothing at all.
+
+        if ((pos + size) > available)
+            return pos + size;
+
+        if (id == 0x0549A966)  //Segment Info ID
+        {
+            if (m_pInfo)
+                return E_FILE_FORMAT_INVALID;
+
+            m_pInfo = new (std::nothrow) SegmentInfo(
+                                          this,
+                                          pos,
+                                          size,
+                                          element_start,
+                                          element_size);
+
+            if (m_pInfo == NULL)
+                return -1;
+
+            const long status = m_pInfo->Parse();
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x0654AE6B)  //Tracks ID
+        {
+            if (m_pTracks)
+                return E_FILE_FORMAT_INVALID;
+
+            m_pTracks = new (std::nothrow) Tracks(this,
+                                                  pos,
+                                                  size,
+                                                  element_start,
+                                                  element_size);
+
+            if (m_pTracks == NULL)
+                return -1;
+
+            const long status = m_pTracks->Parse();
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x0C53BB6B)  //Cues ID
+        {
+            if (m_pCues == NULL)
+            {
+                m_pCues = new (std::nothrow) Cues(
+                                                this,
+                                                pos,
+                                                size,
+                                                element_start,
+                                                element_size);
+
+                if (m_pCues == NULL)
+                    return -1;
+            }
+        }
+        else if (id == 0x014D9B74)  //SeekHead ID
+        {
+            if (m_pSeekHead == NULL)
+            {
+                m_pSeekHead = new (std::nothrow) SeekHead(
+                                                    this,
+                                                    pos,
+                                                    size,
+                                                    element_start,
+                                                    element_size);
+
+                if (m_pSeekHead == NULL)
+                    return -1;
+
+                const long status = m_pSeekHead->Parse();
+
+                if (status)
+                    return status;
+            }
+        }
+        else if (id == 0x0043A770)  //Chapters ID
+        {
+            if (m_pChapters == NULL)
+            {
+                m_pChapters = new (std::nothrow) Chapters(
+                                this,
+                                pos,
+                                size,
+                                element_start,
+                                element_size);
+
+                if (m_pChapters == NULL)
+                  return -1;
+
+                const long status = m_pChapters->Parse();
+
+                if (status)
+                  return status;
+            }
+        }
+
+        m_pos = pos + size;  //consume payload
+    }
+
+    assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+    if (m_pInfo == NULL)  //TODO: liberalize this behavior
+        return E_FILE_FORMAT_INVALID;
+
+    if (m_pTracks == NULL)
+        return E_FILE_FORMAT_INVALID;
+
+    return 0;  //success
+}
+
+
+long Segment::LoadCluster(
+    long long& pos,
+    long& len)
+{
+    for (;;)
+    {
+        const long result = DoLoadCluster(pos, len);
+
+        if (result <= 1)
+            return result;
+    }
+}
+
+
+long Segment::DoLoadCluster(
+    long long& pos,
+    long& len)
+{
+    if (m_pos < 0)
+        return DoLoadClusterUnknownSize(pos, len);
+
+    long long total, avail;
+
+    long status = m_pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+    long long cluster_off = -1;   //offset relative to start of segment
+    long long cluster_size = -1;  //size of cluster payload
+
+    for (;;)
+    {
+        if ((total >= 0) && (m_pos >= total))
+            return 1;  //no more clusters
+
+        if ((segment_stop >= 0) && (m_pos >= segment_stop))
+            return 1;  //no more clusters
+
+        pos = m_pos;
+
+        //Read ID
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long idpos = pos;
+        const long long id = ReadUInt(m_pReader, idpos, len);
+
+        if (id < 0)  //error (or underflow)
+            return static_cast<long>(id);
+
+        pos += len;  //consume ID
+
+        //Read Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        pos += len;  //consume length of size of element
+
+        //pos now points to start of payload
+
+        if (size == 0)  //weird
+        {
+            m_pos = pos;
+            continue;
+        }
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+#if 0  //we must handle this to support live webm
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;  //TODO: allow this
+#endif
+
+        if ((segment_stop >= 0) &&
+            (size != unknown_size) &&
+            ((pos + size) > segment_stop))
+        {
+            return E_FILE_FORMAT_INVALID;
+        }
+
+#if 0  //commented-out, to support incremental cluster parsing
+        len = static_cast<long>(size);
+
+        if ((pos + size) > avail)
+            return E_BUFFER_NOT_FULL;
+#endif
+
+        if (id == 0x0C53BB6B)  //Cues ID
+        {
+            if (size == unknown_size)
+                return E_FILE_FORMAT_INVALID;  //TODO: liberalize
+
+            if (m_pCues == NULL)
+            {
+                const long long element_size = (pos - idpos) + size;
+
+                m_pCues = new Cues(this,
+                                   pos,
+                                   size,
+                                   idpos,
+                                   element_size);
+                assert(m_pCues);  //TODO
+            }
+
+            m_pos = pos + size;  //consume payload
+            continue;
+        }
+
+        if (id != 0x0F43B675)  //Cluster ID
+        {
+            if (size == unknown_size)
+                return E_FILE_FORMAT_INVALID;  //TODO: liberalize
+
+            m_pos = pos + size;  //consume payload
+            continue;
+        }
+
+        //We have a cluster.
+
+        cluster_off = idpos - m_start;  //relative pos
+
+        if (size != unknown_size)
+            cluster_size = size;
+
+        break;
+    }
+
+    assert(cluster_off >= 0);  //have cluster
+
+    long long pos_;
+    long len_;
+
+    status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
+
+    if (status < 0) //error, or underflow
+    {
+        pos = pos_;
+        len = len_;
+
+        return status;
+    }
+
+    //status == 0 means "no block entries found"
+    //status > 0 means "found at least one block entry"
+
+    //TODO:
+    //The issue here is that the segment increments its own
+    //pos ptr past the most recent cluster parsed, and then
+    //starts from there to parse the next cluster.  If we
+    //don't know the size of the current cluster, then we
+    //must either parse its payload (as we do below), looking
+    //for the cluster (or cues) ID to terminate the parse.
+    //This isn't really what we want: rather, we really need
+    //a way to create the curr cluster object immediately.
+    //The pity is that cluster::parse can determine its own
+    //boundary, and we largely duplicate that same logic here.
+    //
+    //Maybe we need to get rid of our look-ahead preloading
+    //in source::parse???
+    //
+    //As we're parsing the blocks in the curr cluster
+    //(in cluster::parse), we should have some way to signal
+    //to the segment that we have determined the boundary,
+    //so it can adjust its own segment::m_pos member.
+    //
+    //The problem is that we're asserting in asyncreadinit,
+    //because we adjust the pos down to the curr seek pos,
+    //and the resulting adjusted len is > 2GB.  I'm suspicious
+    //that this is even correct, but even if it is, we can't
+    //be loading that much data in the cache anyway.
+
+    const long idx = m_clusterCount;
+
+    if (m_clusterPreloadCount > 0)
+    {
+        assert(idx < m_clusterSize);
+
+        Cluster* const pCluster = m_clusters[idx];
+        assert(pCluster);
+        assert(pCluster->m_index < 0);
+
+        const long long off = pCluster->GetPosition();
+        assert(off >= 0);
+
+        if (off == cluster_off)  //preloaded already
+        {
+            if (status == 0)  //no entries found
+                return E_FILE_FORMAT_INVALID;
+
+            if (cluster_size >= 0)
+                pos += cluster_size;
+            else
+            {
+                const long long element_size = pCluster->GetElementSize();
+
+                if (element_size <= 0)
+                    return E_FILE_FORMAT_INVALID;  //TODO: handle this case
+
+                pos = pCluster->m_element_start + element_size;
+            }
+
+            pCluster->m_index = idx;  //move from preloaded to loaded
+            ++m_clusterCount;
+            --m_clusterPreloadCount;
+
+            m_pos = pos;  //consume payload
+            assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+            return 0;  //success
+        }
+    }
+
+    if (status == 0)  //no entries found
+    {
+        if (cluster_size < 0)
+            return E_FILE_FORMAT_INVALID;  //TODO: handle this
+
+        pos += cluster_size;
+
+        if ((total >= 0) && (pos >= total))
+        {
+            m_pos = total;
+            return 1;  //no more clusters
+        }
+
+        if ((segment_stop >= 0) && (pos >= segment_stop))
+        {
+            m_pos = segment_stop;
+            return 1;  //no more clusters
+        }
+
+        m_pos = pos;
+        return 2;  //try again
+    }
+
+    //status > 0 means we have an entry
+
+    Cluster* const pCluster = Cluster::Create(this,
+                                              idx,
+                                              cluster_off);
+                                              //element_size);
+    assert(pCluster);
+
+    AppendCluster(pCluster);
+    assert(m_clusters);
+    assert(idx < m_clusterSize);
+    assert(m_clusters[idx] == pCluster);
+
+    if (cluster_size >= 0)
+    {
+        pos += cluster_size;
+
+        m_pos = pos;
+        assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+        return 0;
+    }
+
+    m_pUnknownSize = pCluster;
+    m_pos = -pos;
+
+    return 0;  //partial success, since we have a new cluster
+
+    //status == 0 means "no block entries found"
+
+    //pos designates start of payload
+    //m_pos has NOT been adjusted yet (in case we need to come back here)
+
+#if 0
+
+    if (cluster_size < 0)  //unknown size
+    {
+        const long long payload_pos = pos;  //absolute pos of cluster payload
+
+        for (;;)  //determine cluster size
+        {
+            if ((total >= 0) && (pos >= total))
+                break;
+
+            if ((segment_stop >= 0) && (pos >= segment_stop))
+                break;  //no more clusters
+
+            //Read ID
+
+            if ((pos + 1) > avail)
+            {
+                len = 1;
+                return E_BUFFER_NOT_FULL;
+            }
+
+            long long result = GetUIntLength(m_pReader, pos, len);
+
+            if (result < 0)  //error
+                return static_cast<long>(result);
+
+            if (result > 0)  //weird
+                return E_BUFFER_NOT_FULL;
+
+            if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+                return E_FILE_FORMAT_INVALID;
+
+            if ((pos + len) > avail)
+                return E_BUFFER_NOT_FULL;
+
+            const long long idpos = pos;
+            const long long id = ReadUInt(m_pReader, idpos, len);
+
+            if (id < 0)  //error (or underflow)
+                return static_cast<long>(id);
+
+            //This is the distinguished set of ID's we use to determine
+            //that we have exhausted the sub-element's inside the cluster
+            //whose ID we parsed earlier.
+
+            if (id == 0x0F43B675)  //Cluster ID
+                break;
+
+            if (id == 0x0C53BB6B)  //Cues ID
+                break;
+
+            switch (id)
+            {
+                case 0x20:  //BlockGroup
+                case 0x23:  //Simple Block
+                case 0x67:  //TimeCode
+                case 0x2B:  //PrevSize
+                    break;
+
+                default:
+                    assert(false);
+                    break;
+            }
+
+            pos += len;  //consume ID (of sub-element)
+
+            //Read Size
+
+            if ((pos + 1) > avail)
+            {
+                len = 1;
+                return E_BUFFER_NOT_FULL;
+            }
+
+            result = GetUIntLength(m_pReader, pos, len);
+
+            if (result < 0)  //error
+                return static_cast<long>(result);
+
+            if (result > 0)  //weird
+                return E_BUFFER_NOT_FULL;
+
+            if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+                return E_FILE_FORMAT_INVALID;
+
+            if ((pos + len) > avail)
+                return E_BUFFER_NOT_FULL;
+
+            const long long size = ReadUInt(m_pReader, pos, len);
+
+            if (size < 0)  //error
+                return static_cast<long>(size);
+
+            pos += len;  //consume size field of element
+
+            //pos now points to start of sub-element's payload
+
+            if (size == 0)  //weird
+                continue;
+
+            const long long unknown_size = (1LL << (7 * len)) - 1;
+
+            if (size == unknown_size)
+                return E_FILE_FORMAT_INVALID;  //not allowed for sub-elements
+
+            if ((segment_stop >= 0) && ((pos + size) > segment_stop))  //weird
+                return E_FILE_FORMAT_INVALID;
+
+            pos += size;  //consume payload of sub-element
+            assert((segment_stop < 0) || (pos <= segment_stop));
+        }  //determine cluster size
+
+        cluster_size = pos - payload_pos;
+        assert(cluster_size >= 0);
+
+        pos = payload_pos;  //reset and re-parse original cluster
+    }
+
+    if (m_clusterPreloadCount > 0)
+    {
+        assert(idx < m_clusterSize);
+
+        Cluster* const pCluster = m_clusters[idx];
+        assert(pCluster);
+        assert(pCluster->m_index < 0);
+
+        const long long off = pCluster->GetPosition();
+        assert(off >= 0);
+
+        if (off == cluster_off)  //preloaded already
+            return E_FILE_FORMAT_INVALID;  //subtle
+    }
+
+    m_pos = pos + cluster_size;  //consume payload
+    assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+    return 2;     //try to find another cluster
+
+#endif
+
+}
+
+
+long Segment::DoLoadClusterUnknownSize(
+    long long& pos,
+    long& len)
+{
+    assert(m_pos < 0);
+    assert(m_pUnknownSize);
+
+#if 0
+    assert(m_pUnknownSize->GetElementSize() < 0);  //TODO: verify this
+
+    const long long element_start = m_pUnknownSize->m_element_start;
+
+    pos = -m_pos;
+    assert(pos > element_start);
+
+    //We have already consumed the (cluster) ID and size fields.
+    //We just need to consume the blocks and other sub-elements
+    //of this cluster, until we discover the boundary.
+
+    long long total, avail;
+
+    long status = m_pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+    long long element_size = -1;
+
+    for (;;)  //determine cluster size
+    {
+        if ((total >= 0) && (pos >= total))
+        {
+            element_size = total - element_start;
+            assert(element_size > 0);
+
+            break;
+        }
+
+        if ((segment_stop >= 0) && (pos >= segment_stop))
+        {
+            element_size = segment_stop - element_start;
+            assert(element_size > 0);
+
+            break;
+        }
+
+        //Read ID
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long idpos = pos;
+        const long long id = ReadUInt(m_pReader, idpos, len);
+
+        if (id < 0)  //error (or underflow)
+            return static_cast<long>(id);
+
+        //This is the distinguished set of ID's we use to determine
+        //that we have exhausted the sub-element's inside the cluster
+        //whose ID we parsed earlier.
+
+        if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster ID or Cues ID
+        {
+            element_size = pos - element_start;
+            assert(element_size > 0);
+
+            break;
+        }
+
+#ifdef _DEBUG
+        switch (id)
+        {
+            case 0x20:  //BlockGroup
+            case 0x23:  //Simple Block
+            case 0x67:  //TimeCode
+            case 0x2B:  //PrevSize
+                break;
+
+            default:
+                assert(false);
+                break;
+        }
+#endif
+
+        pos += len;  //consume ID (of sub-element)
+
+        //Read Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        pos += len;  //consume size field of element
+
+        //pos now points to start of sub-element's payload
+
+        if (size == 0)  //weird
+            continue;
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;  //not allowed for sub-elements
+
+        if ((segment_stop >= 0) && ((pos + size) > segment_stop))  //weird
+            return E_FILE_FORMAT_INVALID;
+
+        pos += size;  //consume payload of sub-element
+        assert((segment_stop < 0) || (pos <= segment_stop));
+    }  //determine cluster size
+
+    assert(element_size >= 0);
+
+    m_pos = element_start + element_size;
+    m_pUnknownSize = 0;
+
+    return 2;  //continue parsing
+#else
+    const long status = m_pUnknownSize->Parse(pos, len);
+
+    if (status < 0)  //error or underflow
+        return status;
+
+    if (status == 0)  //parsed a block
+        return 2;     //continue parsing
+
+    assert(status > 0);   //nothing left to parse of this cluster
+
+    const long long start = m_pUnknownSize->m_element_start;
+
+    const long long size = m_pUnknownSize->GetElementSize();
+    assert(size >= 0);
+
+    pos = start + size;
+    m_pos = pos;
+
+    m_pUnknownSize = 0;
+
+    return 2;  //continue parsing
+#endif
+}
+
+
+void Segment::AppendCluster(Cluster* pCluster)
+{
+    assert(pCluster);
+    assert(pCluster->m_index >= 0);
+
+    const long count = m_clusterCount + m_clusterPreloadCount;
+
+    long& size = m_clusterSize;
+    assert(size >= count);
+
+    const long idx = pCluster->m_index;
+    assert(idx == m_clusterCount);
+
+    if (count >= size)
+    {
+        const long n = (size <= 0) ? 2048 : 2*size;
+
+        Cluster** const qq = new Cluster*[n];
+        Cluster** q = qq;
+
+        Cluster** p = m_clusters;
+        Cluster** const pp = p + count;
+
+        while (p != pp)
+            *q++ = *p++;
+
+        delete[] m_clusters;
+
+        m_clusters = qq;
+        size = n;
+    }
+
+    if (m_clusterPreloadCount > 0)
+    {
+        assert(m_clusters);
+
+        Cluster** const p = m_clusters + m_clusterCount;
+        assert(*p);
+        assert((*p)->m_index < 0);
+
+        Cluster** q = p + m_clusterPreloadCount;
+        assert(q < (m_clusters + size));
+
+        for (;;)
+        {
+            Cluster** const qq = q - 1;
+            assert((*qq)->m_index < 0);
+
+            *q = *qq;
+            q = qq;
+
+            if (q == p)
+                break;
+        }
+    }
+
+    m_clusters[idx] = pCluster;
+    ++m_clusterCount;
+}
+
+
+void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx)
+{
+    assert(pCluster);
+    assert(pCluster->m_index < 0);
+    assert(idx >= m_clusterCount);
+
+    const long count = m_clusterCount + m_clusterPreloadCount;
+
+    long& size = m_clusterSize;
+    assert(size >= count);
+
+    if (count >= size)
+    {
+        const long n = (size <= 0) ? 2048 : 2*size;
+
+        Cluster** const qq = new Cluster*[n];
+        Cluster** q = qq;
+
+        Cluster** p = m_clusters;
+        Cluster** const pp = p + count;
+
+        while (p != pp)
+            *q++ = *p++;
+
+        delete[] m_clusters;
+
+        m_clusters = qq;
+        size = n;
+    }
+
+    assert(m_clusters);
+
+    Cluster** const p = m_clusters + idx;
+
+    Cluster** q = m_clusters + count;
+    assert(q >= p);
+    assert(q < (m_clusters + size));
+
+    while (q > p)
+    {
+        Cluster** const qq = q - 1;
+        assert((*qq)->m_index < 0);
+
+        *q = *qq;
+        q = qq;
+    }
+
+    m_clusters[idx] = pCluster;
+    ++m_clusterPreloadCount;
+}
+
+
+long Segment::Load()
+{
+    assert(m_clusters == NULL);
+    assert(m_clusterSize == 0);
+    assert(m_clusterCount == 0);
+    //assert(m_size >= 0);
+
+    //Outermost (level 0) segment object has been constructed,
+    //and pos designates start of payload.  We need to find the
+    //inner (level 1) elements.
+
+    const long long header_status = ParseHeaders();
+
+    if (header_status < 0)  //error
+        return static_cast<long>(header_status);
+
+    if (header_status > 0)  //underflow
+        return E_BUFFER_NOT_FULL;
+
+    assert(m_pInfo);
+    assert(m_pTracks);
+
+    for (;;)
+    {
+        const int status = LoadCluster();
+
+        if (status < 0)  //error
+            return status;
+
+        if (status >= 1)  //no more clusters
+            return 0;
+    }
+}
+
+
+SeekHead::SeekHead(
+    Segment* pSegment,
+    long long start,
+    long long size_,
+    long long element_start,
+    long long element_size) :
+    m_pSegment(pSegment),
+    m_start(start),
+    m_size(size_),
+    m_element_start(element_start),
+    m_element_size(element_size),
+    m_entries(0),
+    m_entry_count(0),
+    m_void_elements(0),
+    m_void_element_count(0)
+{
+}
+
+
+SeekHead::~SeekHead()
+{
+    delete[] m_entries;
+    delete[] m_void_elements;
+}
+
+
+long SeekHead::Parse()
+{
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long pos = m_start;
+    const long long stop = m_start + m_size;
+
+    //first count the seek head entries
+
+    int entry_count = 0;
+    int void_element_count = 0;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (id == 0x0DBB)  //SeekEntry ID
+            ++entry_count;
+        else if (id == 0x6C)  //Void ID
+            ++void_element_count;
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    m_entries = new (std::nothrow) Entry[entry_count];
+
+    if (m_entries == NULL)
+        return -1;
+
+    m_void_elements = new (std::nothrow) VoidElement[void_element_count];
+
+    if (m_void_elements == NULL)
+        return -1;
+
+    //now parse the entries and void elements
+
+    Entry* pEntry = m_entries;
+    VoidElement* pVoidElement = m_void_elements;
+
+    pos = m_start;
+
+    while (pos < stop)
+    {
+        const long long idpos = pos;
+
+        long long id, size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (id == 0x0DBB)  //SeekEntry ID
+        {
+            if (ParseEntry(pReader, pos, size, pEntry))
+            {
+                Entry& e = *pEntry++;
+
+                e.element_start = idpos;
+                e.element_size = (pos + size) - idpos;
+            }
+        }
+        else if (id == 0x6C)  //Void ID
+        {
+            VoidElement& e = *pVoidElement++;
+
+            e.element_start = idpos;
+            e.element_size = (pos + size) - idpos;
+        }
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
+    assert(count_ >= 0);
+    assert(count_ <= entry_count);
+
+    m_entry_count = static_cast<int>(count_);
+
+    count_ = ptrdiff_t(pVoidElement - m_void_elements);
+    assert(count_ >= 0);
+    assert(count_ <= void_element_count);
+
+    m_void_element_count = static_cast<int>(count_);
+
+    return 0;
+}
+
+
+int SeekHead::GetCount() const
+{
+    return m_entry_count;
+}
+
+const SeekHead::Entry* SeekHead::GetEntry(int idx) const
+{
+    if (idx < 0)
+        return 0;
+
+    if (idx >= m_entry_count)
+        return 0;
+
+    return m_entries + idx;
+}
+
+int SeekHead::GetVoidElementCount() const
+{
+    return m_void_element_count;
+}
+
+const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const
+{
+    if (idx < 0)
+        return 0;
+
+    if (idx >= m_void_element_count)
+        return 0;
+
+    return m_void_elements + idx;
+}
+
+
+#if 0
+void Segment::ParseCues(long long off)
+{
+    if (m_pCues)
+        return;
+
+    //odbgstream os;
+    //os << "Segment::ParseCues (begin)" << endl;
+
+    long long pos = m_start + off;
+    const long long element_start = pos;
+    const long long stop = m_start + m_size;
+
+    long len;
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);
+    assert((pos + len) <= stop);
+
+    const long long idpos = pos;
+
+    const long long id = ReadUInt(m_pReader, idpos, len);
+    assert(id == 0x0C53BB6B);  //Cues ID
+
+    pos += len;  //consume ID
+    assert(pos < stop);
+
+    //Read Size
+
+    result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);
+    assert((pos + len) <= stop);
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+    assert(size >= 0);
+
+    pos += len;  //consume length of size of element
+    assert((pos + size) <= stop);
+
+    const long long element_size = size + pos - element_start;
+
+    //Pos now points to start of payload
+
+    m_pCues = new Cues(this, pos, size, element_start, element_size);
+    assert(m_pCues);  //TODO
+
+    //os << "Segment::ParseCues (end)" << endl;
+}
+#else
+long Segment::ParseCues(
+    long long off,
+    long long& pos,
+    long& len)
+{
+    if (m_pCues)
+        return 0;  //success
+
+    if (off < 0)
+        return -1;
+
+    long long total, avail;
+
+    const int status = m_pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    pos = m_start + off;
+
+    if ((total < 0) || (pos >= total))
+        return 1;  //don't bother parsing cues
+
+    const long long element_start = pos;
+    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+    if ((pos + 1) > avail)
+    {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  //error
+        return static_cast<long>(result);
+
+    if (result > 0) //underflow (weird)
+    {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
+
+    const long long idpos = pos;
+
+    const long long id = ReadUInt(m_pReader, idpos, len);
+
+    if (id != 0x0C53BB6B)  //Cues ID
+        return E_FILE_FORMAT_INVALID;
+
+    pos += len;  //consume ID
+    assert((segment_stop < 0) || (pos <= segment_stop));
+
+    //Read Size
+
+    if ((pos + 1) > avail)
+    {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  //error
+        return static_cast<long>(result);
+
+    if (result > 0) //underflow (weird)
+    {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0)  //error
+        return static_cast<long>(size);
+
+    if (size == 0)  //weird, although technically not illegal
+        return 1;   //done
+
+    pos += len;  //consume length of size of element
+    assert((segment_stop < 0) || (pos <= segment_stop));
+
+    //Pos now points to start of payload
+
+    const long long element_stop = pos + size;
+
+    if ((segment_stop >= 0) && (element_stop > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && (element_stop > total))
+        return 1;  //don't bother parsing anymore
+
+    len = static_cast<long>(size);
+
+    if (element_stop > avail)
+        return E_BUFFER_NOT_FULL;
+
+    const long long element_size = element_stop - element_start;
+
+    m_pCues = new (std::nothrow) Cues(
+                                    this,
+                                    pos,
+                                    size,
+                                    element_start,
+                                    element_size);
+    assert(m_pCues);  //TODO
+
+    return 0;  //success
+}
+#endif
+
+
+#if 0
+void Segment::ParseSeekEntry(
+    long long start,
+    long long size_)
+{
+    long long pos = start;
+
+    const long long stop = start + size_;
+
+    long len;
+
+    const long long seekIdId = ReadUInt(m_pReader, pos, len);
+    //seekIdId;
+    assert(seekIdId == 0x13AB);  //SeekID ID
+    assert((pos + len) <= stop);
+
+    pos += len;  //consume id
+
+    const long long seekIdSize = ReadUInt(m_pReader, pos, len);
+    assert(seekIdSize >= 0);
+    assert((pos + len) <= stop);
+
+    pos += len;  //consume size
+
+    const long long seekId = ReadUInt(m_pReader, pos, len);  //payload
+    assert(seekId >= 0);
+    assert(len == seekIdSize);
+    assert((pos + len) <= stop);
+
+    pos += seekIdSize;  //consume payload
+
+    const long long seekPosId = ReadUInt(m_pReader, pos, len);
+    //seekPosId;
+    assert(seekPosId == 0x13AC);  //SeekPos ID
+    assert((pos + len) <= stop);
+
+    pos += len;  //consume id
+
+    const long long seekPosSize = ReadUInt(m_pReader, pos, len);
+    assert(seekPosSize >= 0);
+    assert((pos + len) <= stop);
+
+    pos += len;  //consume size
+    assert((pos + seekPosSize) <= stop);
+
+    const long long seekOff = UnserializeUInt(m_pReader, pos, seekPosSize);
+    assert(seekOff >= 0);
+    assert(seekOff < m_size);
+
+    pos += seekPosSize;  //consume payload
+    assert(pos == stop);
+
+    const long long seekPos = m_start + seekOff;
+    assert(seekPos < (m_start + m_size));
+
+    if (seekId == 0x0C53BB6B)  //Cues ID
+        ParseCues(seekOff);
+}
+#else
+bool SeekHead::ParseEntry(
+    IMkvReader* pReader,
+    long long start,
+    long long size_,
+    Entry* pEntry)
+{
+    if (size_ <= 0)
+        return false;
+
+    long long pos = start;
+    const long long stop = start + size_;
+
+    long len;
+
+    //parse the container for the level-1 element ID
+
+    const long long seekIdId = ReadUInt(pReader, pos, len);
+    //seekIdId;
+
+    if (seekIdId != 0x13AB)  //SeekID ID
+        return false;
+
+    if ((pos + len) > stop)
+        return false;
+
+    pos += len;  //consume SeekID id
+
+    const long long seekIdSize = ReadUInt(pReader, pos, len);
+
+    if (seekIdSize <= 0)
+        return false;
+
+    if ((pos + len) > stop)
+        return false;
+
+    pos += len;  //consume size of field
+
+    if ((pos + seekIdSize) > stop)
+        return false;
+
+    //Note that the SeekId payload really is serialized
+    //as a "Matroska integer", not as a plain binary value.
+    //In fact, Matroska requires that ID values in the
+    //stream exactly match the binary representation as listed
+    //in the Matroska specification.
+    //
+    //This parser is more liberal, and permits IDs to have
+    //any width.  (This could make the representation in the stream
+    //different from what's in the spec, but it doesn't matter here,
+    //since we always normalize "Matroska integer" values.)
+
+    pEntry->id = ReadUInt(pReader, pos, len);  //payload
+
+    if (pEntry->id <= 0)
+        return false;
+
+    if (len != seekIdSize)
+        return false;
+
+    pos += seekIdSize;  //consume SeekID payload
+
+    const long long seekPosId = ReadUInt(pReader, pos, len);
+
+    if (seekPosId != 0x13AC)  //SeekPos ID
+        return false;
+
+    if ((pos + len) > stop)
+        return false;
+
+    pos += len;  //consume id
+
+    const long long seekPosSize = ReadUInt(pReader, pos, len);
+
+    if (seekPosSize <= 0)
+        return false;
+
+    if ((pos + len) > stop)
+        return false;
+
+    pos += len;  //consume size
+
+    if ((pos + seekPosSize) > stop)
+        return false;
+
+    pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
+
+    if (pEntry->pos < 0)
+        return false;
+
+    pos += seekPosSize;  //consume payload
+
+    if (pos != stop)
+        return false;
+
+    return true;
+}
+#endif
+
+
+Cues::Cues(
+    Segment* pSegment,
+    long long start_,
+    long long size_,
+    long long element_start,
+    long long element_size) :
+    m_pSegment(pSegment),
+    m_start(start_),
+    m_size(size_),
+    m_element_start(element_start),
+    m_element_size(element_size),
+    m_cue_points(NULL),
+    m_count(0),
+    m_preload_count(0),
+    m_pos(start_)
+{
+}
+
+
+Cues::~Cues()
+{
+    const long n = m_count + m_preload_count;
+
+    CuePoint** p = m_cue_points;
+    CuePoint** const q = p + n;
+
+    while (p != q)
+    {
+        CuePoint* const pCP = *p++;
+        assert(pCP);
+
+        delete pCP;
+    }
+
+    delete[] m_cue_points;
+}
+
+
+long Cues::GetCount() const
+{
+    if (m_cue_points == NULL)
+        return -1;
+
+    return m_count;  //TODO: really ignore preload count?
+}
+
+
+bool Cues::DoneParsing() const
+{
+    const long long stop = m_start + m_size;
+    return (m_pos >= stop);
+}
+
+
+void Cues::Init() const
+{
+    if (m_cue_points)
+        return;
+
+    assert(m_count == 0);
+    assert(m_preload_count == 0);
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    const long long stop = m_start + m_size;
+    long long pos = m_start;
+
+    long cue_points_size = 0;
+
+    while (pos < stop)
+    {
+        const long long idpos = pos;
+
+        long len;
+
+        const long long id = ReadUInt(pReader, pos, len);
+        assert(id >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, pos, len);
+        assert(size >= 0);
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume Size field
+        assert((pos + size) <= stop);
+
+        if (id == 0x3B)  //CuePoint ID
+            PreloadCuePoint(cue_points_size, idpos);
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+}
+
+
+void Cues::PreloadCuePoint(
+    long& cue_points_size,
+    long long pos) const
+{
+    assert(m_count == 0);
+
+    if (m_preload_count >= cue_points_size)
+    {
+        const long n = (cue_points_size <= 0) ? 2048 : 2*cue_points_size;
+
+        CuePoint** const qq = new CuePoint*[n];
+        CuePoint** q = qq;  //beginning of target
+
+        CuePoint** p = m_cue_points;                //beginning of source
+        CuePoint** const pp = p + m_preload_count;  //end of source
+
+        while (p != pp)
+            *q++ = *p++;
+
+        delete[] m_cue_points;
+
+        m_cue_points = qq;
+        cue_points_size = n;
+    }
+
+    CuePoint* const pCP = new CuePoint(m_preload_count, pos);
+    m_cue_points[m_preload_count++] = pCP;
+}
+
+
+bool Cues::LoadCuePoint() const
+{
+    //odbgstream os;
+    //os << "Cues::LoadCuePoint" << endl;
+
+    const long long stop = m_start + m_size;
+
+    if (m_pos >= stop)
+        return false;  //nothing else to do
+
+    Init();
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    while (m_pos < stop)
+    {
+        const long long idpos = m_pos;
+
+        long len;
+
+        const long long id = ReadUInt(pReader, m_pos, len);
+        assert(id >= 0);  //TODO
+        assert((m_pos + len) <= stop);
+
+        m_pos += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, m_pos, len);
+        assert(size >= 0);
+        assert((m_pos + len) <= stop);
+
+        m_pos += len;  //consume Size field
+        assert((m_pos + size) <= stop);
+
+        if (id != 0x3B)  //CuePoint ID
+        {
+            m_pos += size;  //consume payload
+            assert(m_pos <= stop);
+
+            continue;
+        }
+
+        assert(m_preload_count > 0);
+
+        CuePoint* const pCP = m_cue_points[m_count];
+        assert(pCP);
+        assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos));
+        if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))
+            return false;
+
+        pCP->Load(pReader);
+        ++m_count;
+        --m_preload_count;
+
+        m_pos += size;  //consume payload
+        assert(m_pos <= stop);
+
+        return true;  //yes, we loaded a cue point
+    }
+
+    //return (m_pos < stop);
+    return false;  //no, we did not load a cue point
+}
+
+
+bool Cues::Find(
+    long long time_ns,
+    const Track* pTrack,
+    const CuePoint*& pCP,
+    const CuePoint::TrackPosition*& pTP) const
+{
+    assert(time_ns >= 0);
+    assert(pTrack);
+
+#if 0
+    LoadCuePoint();  //establish invariant
+
+    assert(m_cue_points);
+    assert(m_count > 0);
+
+    CuePoint** const ii = m_cue_points;
+    CuePoint** i = ii;
+
+    CuePoint** const jj = ii + m_count + m_preload_count;
+    CuePoint** j = jj;
+
+    pCP = *i;
+    assert(pCP);
+
+    if (time_ns <= pCP->GetTime(m_pSegment))
+    {
+        pTP = pCP->Find(pTrack);
+        return (pTP != NULL);
+    }
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[ii, i) <= time_ns
+        //[i, j)  ?
+        //[j, jj) > time_ns
+
+        CuePoint** const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        CuePoint* const pCP = *k;
+        assert(pCP);
+
+        pCP->Load(pReader);
+
+        const long long t = pCP->GetTime(m_pSegment);
+
+        if (t <= time_ns)
+            i = k + 1;
+        else
+            j = k;
+
+        assert(i <= j);
+    }
+
+    assert(i == j);
+    assert(i <= jj);
+    assert(i > ii);
+
+    pCP = *--i;
+    assert(pCP);
+    assert(pCP->GetTime(m_pSegment) <= time_ns);
+#else
+    if (m_cue_points == NULL)
+        return false;
+
+    if (m_count == 0)
+        return false;
+
+    CuePoint** const ii = m_cue_points;
+    CuePoint** i = ii;
+
+    CuePoint** const jj = ii + m_count;
+    CuePoint** j = jj;
+
+    pCP = *i;
+    assert(pCP);
+
+    if (time_ns <= pCP->GetTime(m_pSegment))
+    {
+        pTP = pCP->Find(pTrack);
+        return (pTP != NULL);
+    }
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[ii, i) <= time_ns
+        //[i, j)  ?
+        //[j, jj) > time_ns
+
+        CuePoint** const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        CuePoint* const pCP = *k;
+        assert(pCP);
+
+        const long long t = pCP->GetTime(m_pSegment);
+
+        if (t <= time_ns)
+            i = k + 1;
+        else
+            j = k;
+
+        assert(i <= j);
+    }
+
+    assert(i == j);
+    assert(i <= jj);
+    assert(i > ii);
+
+    pCP = *--i;
+    assert(pCP);
+    assert(pCP->GetTime(m_pSegment) <= time_ns);
+#endif
+
+    //TODO: here and elsewhere, it's probably not correct to search
+    //for the cue point with this time, and then search for a matching
+    //track.  In principle, the matching track could be on some earlier
+    //cue point, and with our current algorithm, we'd miss it.  To make
+    //this bullet-proof, we'd need to create a secondary structure,
+    //with a list of cue points that apply to a track, and then search
+    //that track-based structure for a matching cue point.
+
+    pTP = pCP->Find(pTrack);
+    return (pTP != NULL);
+}
+
+
+#if 0
+bool Cues::FindNext(
+    long long time_ns,
+    const Track* pTrack,
+    const CuePoint*& pCP,
+    const CuePoint::TrackPosition*& pTP) const
+{
+    pCP = 0;
+    pTP = 0;
+
+    if (m_count == 0)
+        return false;
+
+    assert(m_cue_points);
+
+    const CuePoint* const* const ii = m_cue_points;
+    const CuePoint* const* i = ii;
+
+    const CuePoint* const* const jj = ii + m_count;
+    const CuePoint* const* j = jj;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[ii, i) <= time_ns
+        //[i, j)  ?
+        //[j, jj) > time_ns
+
+        const CuePoint* const* const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        pCP = *k;
+        assert(pCP);
+
+        const long long t = pCP->GetTime(m_pSegment);
+
+        if (t <= time_ns)
+            i = k + 1;
+        else
+            j = k;
+
+        assert(i <= j);
+    }
+
+    assert(i == j);
+    assert(i <= jj);
+
+    if (i >= jj)  //time_ns is greater than max cue point
+        return false;
+
+    pCP = *i;
+    assert(pCP);
+    assert(pCP->GetTime(m_pSegment) > time_ns);
+
+    pTP = pCP->Find(pTrack);
+    return (pTP != NULL);
+}
+#endif
+
+
+const CuePoint* Cues::GetFirst() const
+{
+    if (m_cue_points == NULL)
+        return NULL;
+
+    if (m_count == 0)
+        return NULL;
+
+#if 0
+    LoadCuePoint();  //init cues
+
+    const size_t count = m_count + m_preload_count;
+
+    if (count == 0)  //weird
+        return NULL;
+#endif
+
+    CuePoint* const* const pp = m_cue_points;
+    assert(pp);
+
+    CuePoint* const pCP = pp[0];
+    assert(pCP);
+    assert(pCP->GetTimeCode() >= 0);
+
+    return pCP;
+}
+
+
+const CuePoint* Cues::GetLast() const
+{
+    if (m_cue_points == NULL)
+        return NULL;
+
+    if (m_count <= 0)
+        return NULL;
+
+#if 0
+    LoadCuePoint();  //init cues
+
+    const size_t count = m_count + m_preload_count;
+
+    if (count == 0)  //weird
+        return NULL;
+
+    const size_t index = count - 1;
+
+    CuePoint* const* const pp = m_cue_points;
+    assert(pp);
+
+    CuePoint* const pCP = pp[index];
+    assert(pCP);
+
+    pCP->Load(m_pSegment->m_pReader);
+    assert(pCP->GetTimeCode() >= 0);
+#else
+    const long index = m_count - 1;
+
+    CuePoint* const* const pp = m_cue_points;
+    assert(pp);
+
+    CuePoint* const pCP = pp[index];
+    assert(pCP);
+    assert(pCP->GetTimeCode() >= 0);
+#endif
+
+    return pCP;
+}
+
+
+const CuePoint* Cues::GetNext(const CuePoint* pCurr) const
+{
+    if (pCurr == NULL)
+        return NULL;
+
+    assert(pCurr->GetTimeCode() >= 0);
+    assert(m_cue_points);
+    assert(m_count >= 1);
+
+#if 0
+    const size_t count = m_count + m_preload_count;
+
+    size_t index = pCurr->m_index;
+    assert(index < count);
+
+    CuePoint* const* const pp = m_cue_points;
+    assert(pp);
+    assert(pp[index] == pCurr);
+
+    ++index;
+
+    if (index >= count)
+        return NULL;
+
+    CuePoint* const pNext = pp[index];
+    assert(pNext);
+
+    pNext->Load(m_pSegment->m_pReader);
+#else
+    long index = pCurr->m_index;
+    assert(index < m_count);
+
+    CuePoint* const* const pp = m_cue_points;
+    assert(pp);
+    assert(pp[index] == pCurr);
+
+    ++index;
+
+    if (index >= m_count)
+        return NULL;
+
+    CuePoint* const pNext = pp[index];
+    assert(pNext);
+    assert(pNext->GetTimeCode() >= 0);
+#endif
+
+    return pNext;
+}
+
+
+const BlockEntry* Cues::GetBlock(
+    const CuePoint* pCP,
+    const CuePoint::TrackPosition* pTP) const
+{
+    if (pCP == NULL)
+        return NULL;
+
+    if (pTP == NULL)
+        return NULL;
+
+    return m_pSegment->GetBlock(*pCP, *pTP);
+}
+
+
+const BlockEntry* Segment::GetBlock(
+    const CuePoint& cp,
+    const CuePoint::TrackPosition& tp)
+{
+    Cluster** const ii = m_clusters;
+    Cluster** i = ii;
+
+    const long count = m_clusterCount + m_clusterPreloadCount;
+
+    Cluster** const jj = ii + count;
+    Cluster** j = jj;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[ii, i) < pTP->m_pos
+        //[i, j) ?
+        //[j, jj)  > pTP->m_pos
+
+        Cluster** const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        Cluster* const pCluster = *k;
+        assert(pCluster);
+
+        //const long long pos_ = pCluster->m_pos;
+        //assert(pos_);
+        //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
+
+        const long long pos = pCluster->GetPosition();
+        assert(pos >= 0);
+
+        if (pos < tp.m_pos)
+            i = k + 1;
+        else if (pos > tp.m_pos)
+            j = k;
+        else
+            return pCluster->GetEntry(cp, tp);
+    }
+
+    assert(i == j);
+    //assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+    Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1);
+    assert(pCluster);
+
+    const ptrdiff_t idx = i - m_clusters;
+
+    PreloadCluster(pCluster, idx);
+    assert(m_clusters);
+    assert(m_clusterPreloadCount > 0);
+    assert(m_clusters[idx] == pCluster);
+
+    return pCluster->GetEntry(cp, tp);
+}
+
+
+const Cluster* Segment::FindOrPreloadCluster(long long requested_pos)
+{
+    if (requested_pos < 0)
+        return 0;
+
+    Cluster** const ii = m_clusters;
+    Cluster** i = ii;
+
+    const long count = m_clusterCount + m_clusterPreloadCount;
+
+    Cluster** const jj = ii + count;
+    Cluster** j = jj;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[ii, i) < pTP->m_pos
+        //[i, j) ?
+        //[j, jj)  > pTP->m_pos
+
+        Cluster** const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        Cluster* const pCluster = *k;
+        assert(pCluster);
+
+        //const long long pos_ = pCluster->m_pos;
+        //assert(pos_);
+        //const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
+
+        const long long pos = pCluster->GetPosition();
+        assert(pos >= 0);
+
+        if (pos < requested_pos)
+            i = k + 1;
+        else if (pos > requested_pos)
+            j = k;
+        else
+            return pCluster;
+    }
+
+    assert(i == j);
+    //assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+    Cluster* const pCluster = Cluster::Create(
+                                this,
+                                -1,
+                                requested_pos);
+                                //-1);
+    assert(pCluster);
+
+    const ptrdiff_t idx = i - m_clusters;
+
+    PreloadCluster(pCluster, idx);
+    assert(m_clusters);
+    assert(m_clusterPreloadCount > 0);
+    assert(m_clusters[idx] == pCluster);
+
+    return pCluster;
+}
+
+
+CuePoint::CuePoint(long idx, long long pos) :
+    m_element_start(0),
+    m_element_size(0),
+    m_index(idx),
+    m_timecode(-1 * pos),
+    m_track_positions(NULL),
+    m_track_positions_count(0)
+{
+    assert(pos > 0);
+}
+
+
+CuePoint::~CuePoint()
+{
+    delete[] m_track_positions;
+}
+
+
+void CuePoint::Load(IMkvReader* pReader)
+{
+    //odbgstream os;
+    //os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
+
+    if (m_timecode >= 0)  //already loaded
+        return;
+
+    assert(m_track_positions == NULL);
+    assert(m_track_positions_count == 0);
+
+    long long pos_ = -m_timecode;
+    const long long element_start = pos_;
+
+    long long stop;
+
+    {
+        long len;
+
+        const long long id = ReadUInt(pReader, pos_, len);
+        assert(id == 0x3B);  //CuePoint ID
+        if (id != 0x3B)
+            return;
+
+        pos_ += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, pos_, len);
+        assert(size >= 0);
+
+        pos_ += len;  //consume Size field
+        //pos_ now points to start of payload
+
+        stop = pos_ + size;
+    }
+
+    const long long element_size = stop - element_start;
+
+    long long pos = pos_;
+
+    //First count number of track positions
+
+    while (pos < stop)
+    {
+        long len;
+
+        const long long id = ReadUInt(pReader, pos, len);
+        assert(id >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, pos, len);
+        assert(size >= 0);
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume Size field
+        assert((pos + size) <= stop);
+
+        if (id == 0x33)  //CueTime ID
+            m_timecode = UnserializeUInt(pReader, pos, size);
+
+        else if (id == 0x37) //CueTrackPosition(s) ID
+            ++m_track_positions_count;
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(m_timecode >= 0);
+    assert(m_track_positions_count > 0);
+
+    //os << "CuePoint::Load(cont'd): idpos=" << idpos
+    //   << " timecode=" << m_timecode
+    //   << endl;
+
+    m_track_positions = new TrackPosition[m_track_positions_count];
+
+    //Now parse track positions
+
+    TrackPosition* p = m_track_positions;
+    pos = pos_;
+
+    while (pos < stop)
+    {
+        long len;
+
+        const long long id = ReadUInt(pReader, pos, len);
+        assert(id >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, pos, len);
+        assert(size >= 0);
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume Size field
+        assert((pos + size) <= stop);
+
+        if (id == 0x37) //CueTrackPosition(s) ID
+        {
+            TrackPosition& tp = *p++;
+            tp.Parse(pReader, pos, size);
+        }
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(size_t(p - m_track_positions) == m_track_positions_count);
+
+    m_element_start = element_start;
+    m_element_size = element_size;
+}
+
+
+
+void CuePoint::TrackPosition::Parse(
+    IMkvReader* pReader,
+    long long start_,
+    long long size_)
+{
+    const long long stop = start_ + size_;
+    long long pos = start_;
+
+    m_track = -1;
+    m_pos = -1;
+    m_block = 1;  //default
+
+    while (pos < stop)
+    {
+        long len;
+
+        const long long id = ReadUInt(pReader, pos, len);
+        assert(id >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, pos, len);
+        assert(size >= 0);
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume Size field
+        assert((pos + size) <= stop);
+
+        if (id == 0x77)  //CueTrack ID
+            m_track = UnserializeUInt(pReader, pos, size);
+
+        else if (id == 0x71)  //CueClusterPos ID
+            m_pos = UnserializeUInt(pReader, pos, size);
+
+        else if (id == 0x1378)  //CueBlockNumber
+            m_block = UnserializeUInt(pReader, pos, size);
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(m_pos >= 0);
+    assert(m_track > 0);
+    //assert(m_block > 0);
+}
+
+
+const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const
+{
+    assert(pTrack);
+
+    const long long n = pTrack->GetNumber();
+
+    const TrackPosition* i = m_track_positions;
+    const TrackPosition* const j = i + m_track_positions_count;
+
+    while (i != j)
+    {
+        const TrackPosition& p = *i++;
+
+        if (p.m_track == n)
+            return &p;
+    }
+
+    return NULL;  //no matching track number found
+}
+
+
+long long CuePoint::GetTimeCode() const
+{
+    return m_timecode;
+}
+
+long long CuePoint::GetTime(const Segment* pSegment) const
+{
+    assert(pSegment);
+    assert(m_timecode >= 0);
+
+    const SegmentInfo* const pInfo = pSegment->GetInfo();
+    assert(pInfo);
+
+    const long long scale = pInfo->GetTimeCodeScale();
+    assert(scale >= 1);
+
+    const long long time = scale * m_timecode;
+
+    return time;
+}
+
+
+#if 0
+long long Segment::Unparsed() const
+{
+    if (m_size < 0)
+        return LLONG_MAX;
+
+    const long long stop = m_start + m_size;
+
+    const long long result = stop - m_pos;
+    assert(result >= 0);
+
+    return result;
+}
+#else
+bool Segment::DoneParsing() const
+{
+    if (m_size < 0)
+    {
+        long long total, avail;
+
+        const int status = m_pReader->Length(&total, &avail);
+
+        if (status < 0)  //error
+            return true;  //must assume done
+
+        if (total < 0)
+            return false;  //assume live stream
+
+        return (m_pos >= total);
+    }
+
+    const long long stop = m_start + m_size;
+
+    return (m_pos >= stop);
+}
+#endif
+
+
+const Cluster* Segment::GetFirst() const
+{
+    if ((m_clusters == NULL) || (m_clusterCount <= 0))
+       return &m_eos;
+
+    Cluster* const pCluster = m_clusters[0];
+    assert(pCluster);
+
+    return pCluster;
+}
+
+
+const Cluster* Segment::GetLast() const
+{
+    if ((m_clusters == NULL) || (m_clusterCount <= 0))
+        return &m_eos;
+
+    const long idx = m_clusterCount - 1;
+
+    Cluster* const pCluster = m_clusters[idx];
+    assert(pCluster);
+
+    return pCluster;
+}
+
+
+unsigned long Segment::GetCount() const
+{
+    return m_clusterCount;
+}
+
+
+const Cluster* Segment::GetNext(const Cluster* pCurr)
+{
+    assert(pCurr);
+    assert(pCurr != &m_eos);
+    assert(m_clusters);
+
+    long idx =  pCurr->m_index;
+
+    if (idx >= 0)
+    {
+        assert(m_clusterCount > 0);
+        assert(idx < m_clusterCount);
+        assert(pCurr == m_clusters[idx]);
+
+        ++idx;
+
+        if (idx >= m_clusterCount)
+            return &m_eos;  //caller will LoadCluster as desired
+
+        Cluster* const pNext = m_clusters[idx];
+        assert(pNext);
+        assert(pNext->m_index >= 0);
+        assert(pNext->m_index == idx);
+
+        return pNext;
+    }
+
+    assert(m_clusterPreloadCount > 0);
+
+    long long pos = pCurr->m_element_start;
+
+    assert(m_size >= 0);  //TODO
+    const long long stop = m_start + m_size;  //end of segment
+
+    {
+        long len;
+
+        long long result = GetUIntLength(m_pReader, pos, len);
+        assert(result == 0);
+        assert((pos + len) <= stop);  //TODO
+        if (result != 0)
+            return NULL;
+
+        const long long id = ReadUInt(m_pReader, pos, len);
+        assert(id == 0x0F43B675);  //Cluster ID
+        if (id != 0x0F43B675)
+            return NULL;
+
+        pos += len;  //consume ID
+
+        //Read Size
+        result = GetUIntLength(m_pReader, pos, len);
+        assert(result == 0);  //TODO
+        assert((pos + len) <= stop);  //TODO
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+        assert(size > 0);  //TODO
+        //assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
+
+        pos += len;  //consume length of size of element
+        assert((pos + size) <= stop);  //TODO
+
+        //Pos now points to start of payload
+
+        pos += size;  //consume payload
+    }
+
+    long long off_next = 0;
+
+    while (pos < stop)
+    {
+        long len;
+
+        long long result = GetUIntLength(m_pReader, pos, len);
+        assert(result == 0);
+        assert((pos + len) <= stop);  //TODO
+        if (result != 0)
+            return NULL;
+
+        const long long idpos = pos;  //pos of next (potential) cluster
+
+        const long long id = ReadUInt(m_pReader, idpos, len);
+        assert(id > 0);  //TODO
+
+        pos += len;  //consume ID
+
+        //Read Size
+        result = GetUIntLength(m_pReader, pos, len);
+        assert(result == 0);  //TODO
+        assert((pos + len) <= stop);  //TODO
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+        assert(size >= 0);  //TODO
+
+        pos += len;  //consume length of size of element
+        assert((pos + size) <= stop);  //TODO
+
+        //Pos now points to start of payload
+
+        if (size == 0)  //weird
+            continue;
+
+        if (id == 0x0F43B675)  //Cluster ID
+        {
+            const long long off_next_ = idpos - m_start;
+
+            long long pos_;
+            long len_;
+
+            const long status = Cluster::HasBlockEntries(
+                                    this,
+                                    off_next_,
+                                    pos_,
+                                    len_);
+
+            assert(status >= 0);
+
+            if (status > 0)
+            {
+                off_next = off_next_;
+                break;
+            }
+        }
+
+        pos += size;  //consume payload
+    }
+
+    if (off_next <= 0)
+        return 0;
+
+    Cluster** const ii = m_clusters + m_clusterCount;
+    Cluster** i = ii;
+
+    Cluster** const jj = ii + m_clusterPreloadCount;
+    Cluster** j = jj;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[0, i) < pos_next
+        //[i, j) ?
+        //[j, jj)  > pos_next
+
+        Cluster** const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        Cluster* const pNext = *k;
+        assert(pNext);
+        assert(pNext->m_index < 0);
+
+        //const long long pos_ = pNext->m_pos;
+        //assert(pos_);
+        //pos = pos_ * ((pos_ < 0) ? -1 : 1);
+
+        pos = pNext->GetPosition();
+
+        if (pos < off_next)
+            i = k + 1;
+        else if (pos > off_next)
+            j = k;
+        else
+            return pNext;
+    }
+
+    assert(i == j);
+
+    Cluster* const pNext = Cluster::Create(this,
+                                          -1,
+                                          off_next);
+    assert(pNext);
+
+    const ptrdiff_t idx_next = i - m_clusters;  //insertion position
+
+    PreloadCluster(pNext, idx_next);
+    assert(m_clusters);
+    assert(idx_next < m_clusterSize);
+    assert(m_clusters[idx_next] == pNext);
+
+    return pNext;
+}
+
+
+long Segment::ParseNext(
+    const Cluster* pCurr,
+    const Cluster*& pResult,
+    long long& pos,
+    long& len)
+{
+    assert(pCurr);
+    assert(!pCurr->EOS());
+    assert(m_clusters);
+
+    pResult = 0;
+
+    if (pCurr->m_index >= 0)  //loaded (not merely preloaded)
+    {
+        assert(m_clusters[pCurr->m_index] == pCurr);
+
+        const long next_idx = pCurr->m_index + 1;
+
+        if (next_idx < m_clusterCount)
+        {
+            pResult = m_clusters[next_idx];
+            return 0;  //success
+        }
+
+        //curr cluster is last among loaded
+
+        const long result = LoadCluster(pos, len);
+
+        if (result < 0)  //error or underflow
+            return result;
+
+        if (result > 0)  //no more clusters
+        {
+            //pResult = &m_eos;
+            return 1;
+        }
+
+        pResult = GetLast();
+        return 0;  //success
+    }
+
+    assert(m_pos > 0);
+
+    long long total, avail;
+
+    long status = m_pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+    //interrogate curr cluster
+
+    pos = pCurr->m_element_start;
+
+    if (pCurr->m_element_size >= 0)
+        pos += pCurr->m_element_size;
+    else
+    {
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id = ReadUInt(m_pReader, pos, len);
+
+        if (id != 0x0F43B675)  //weird: not Cluster ID
+            return -1;
+
+        pos += len;  //consume ID
+
+        //Read Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+
+        if (size < 0) //error
+            return static_cast<long>(size);
+
+        pos += len;  //consume size field
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size == unknown_size)          //TODO: should never happen
+            return E_FILE_FORMAT_INVALID;  //TODO: resolve this
+
+        //assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
+
+        if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        //Pos now points to start of payload
+
+        pos += size;  //consume payload (that is, the current cluster)
+        assert((segment_stop < 0) || (pos <= segment_stop));
+
+        //By consuming the payload, we are assuming that the curr
+        //cluster isn't interesting.  That is, we don't bother checking
+        //whether the payload of the curr cluster is less than what
+        //happens to be available (obtained via IMkvReader::Length).
+        //Presumably the caller has already dispensed with the current
+        //cluster, and really does want the next cluster.
+    }
+
+    //pos now points to just beyond the last fully-loaded cluster
+
+    for (;;)
+    {
+        const long status = DoParseNext(pResult, pos, len);
+
+        if (status <= 1)
+            return status;
+    }
+}
+
+
+long Segment::DoParseNext(
+    const Cluster*& pResult,
+    long long& pos,
+    long& len)
+{
+    long long total, avail;
+
+    long status = m_pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+    //Parse next cluster.  This is strictly a parsing activity.
+    //Creation of a new cluster object happens later, after the
+    //parsing is done.
+
+    long long off_next = 0;
+    long long cluster_size = -1;
+
+    for (;;)
+    {
+        if ((total >= 0) && (pos >= total))
+            return 1;  //EOF
+
+        if ((segment_stop >= 0) && (pos >= segment_stop))
+            return 1;  //EOF
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long idpos = pos;             //absolute
+        const long long idoff = pos - m_start;   //relative
+
+        const long long id = ReadUInt(m_pReader, idpos, len);  //absolute
+
+        if (id < 0)  //error
+            return static_cast<long>(id);
+
+        if (id == 0)  //weird
+            return -1;  //generic error
+
+        pos += len;  //consume ID
+
+        //Read Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(m_pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(m_pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        pos += len;  //consume length of size of element
+
+        //Pos now points to start of payload
+
+        if (size == 0)  //weird
+            continue;
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if ((segment_stop >= 0) &&
+            (size != unknown_size) &&
+            ((pos + size) > segment_stop))
+        {
+            return E_FILE_FORMAT_INVALID;
+        }
+
+        if (id == 0x0C53BB6B)  //Cues ID
+        {
+            if (size == unknown_size)
+                return E_FILE_FORMAT_INVALID;
+
+            const long long element_stop = pos + size;
+
+            if ((segment_stop >= 0) && (element_stop > segment_stop))
+                return E_FILE_FORMAT_INVALID;
+
+            const long long element_start = idpos;
+            const long long element_size = element_stop - element_start;
+
+            if (m_pCues == NULL)
+            {
+                m_pCues = new Cues(this,
+                                    pos,
+                                    size,
+                                    element_start,
+                                    element_size);
+                assert(m_pCues);  //TODO
+            }
+
+            pos += size;  //consume payload
+            assert((segment_stop < 0) || (pos <= segment_stop));
+
+            continue;
+        }
+
+        if (id != 0x0F43B675)  //not a Cluster ID
+        {
+            if (size == unknown_size)
+                return E_FILE_FORMAT_INVALID;
+
+            pos += size;  //consume payload
+            assert((segment_stop < 0) || (pos <= segment_stop));
+
+            continue;
+        }
+
+#if 0 //this is commented-out to support incremental cluster parsing
+        len = static_cast<long>(size);
+
+        if (element_stop > avail)
+            return E_BUFFER_NOT_FULL;
+#endif
+
+        //We have a cluster.
+
+        off_next = idoff;
+
+        if (size != unknown_size)
+            cluster_size = size;
+
+        break;
+    }
+
+    assert(off_next > 0);  //have cluster
+
+    //We have parsed the next cluster.
+    //We have not created a cluster object yet.  What we need
+    //to do now is determine whether it has already be preloaded
+    //(in which case, an object for this cluster has already been
+    //created), and if not, create a new cluster object.
+
+    Cluster** const ii = m_clusters + m_clusterCount;
+    Cluster** i = ii;
+
+    Cluster** const jj = ii + m_clusterPreloadCount;
+    Cluster** j = jj;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[0, i) < pos_next
+        //[i, j) ?
+        //[j, jj)  > pos_next
+
+        Cluster** const k = i + (j - i) / 2;
+        assert(k < jj);
+
+        const Cluster* const pNext = *k;
+        assert(pNext);
+        assert(pNext->m_index < 0);
+
+        pos = pNext->GetPosition();
+        assert(pos >= 0);
+
+        if (pos < off_next)
+            i = k + 1;
+        else if (pos > off_next)
+            j = k;
+        else
+        {
+            pResult = pNext;
+            return 0;  //success
+        }
+    }
+
+    assert(i == j);
+
+    long long pos_;
+    long len_;
+
+    status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
+
+    if (status < 0)  //error or underflow
+    {
+        pos = pos_;
+        len = len_;
+
+        return status;
+    }
+
+    if (status > 0)  //means "found at least one block entry"
+    {
+        Cluster* const pNext = Cluster::Create(this,
+                                                -1,   //preloaded
+                                                off_next);
+                                                //element_size);
+        assert(pNext);
+
+        const ptrdiff_t idx_next = i - m_clusters;  //insertion position
+
+        PreloadCluster(pNext, idx_next);
+        assert(m_clusters);
+        assert(idx_next < m_clusterSize);
+        assert(m_clusters[idx_next] == pNext);
+
+        pResult = pNext;
+        return 0;  //success
+    }
+
+    //status == 0 means "no block entries found"
+
+    if (cluster_size < 0)  //unknown size
+    {
+        const long long payload_pos = pos;  //absolute pos of cluster payload
+
+        for (;;)  //determine cluster size
+        {
+            if ((total >= 0) && (pos >= total))
+                break;
+
+            if ((segment_stop >= 0) && (pos >= segment_stop))
+                break;  //no more clusters
+
+            //Read ID
+
+            if ((pos + 1) > avail)
+            {
+                len = 1;
+                return E_BUFFER_NOT_FULL;
+            }
+
+            long long result = GetUIntLength(m_pReader, pos, len);
+
+            if (result < 0)  //error
+                return static_cast<long>(result);
+
+            if (result > 0)  //weird
+                return E_BUFFER_NOT_FULL;
+
+            if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+                return E_FILE_FORMAT_INVALID;
+
+            if ((pos + len) > avail)
+                return E_BUFFER_NOT_FULL;
+
+            const long long idpos = pos;
+            const long long id = ReadUInt(m_pReader, idpos, len);
+
+            if (id < 0)  //error (or underflow)
+                return static_cast<long>(id);
+
+            //This is the distinguished set of ID's we use to determine
+            //that we have exhausted the sub-element's inside the cluster
+            //whose ID we parsed earlier.
+
+            if (id == 0x0F43B675)  //Cluster ID
+                break;
+
+            if (id == 0x0C53BB6B)  //Cues ID
+                break;
+
+            pos += len;  //consume ID (of sub-element)
+
+            //Read Size
+
+            if ((pos + 1) > avail)
+            {
+                len = 1;
+                return E_BUFFER_NOT_FULL;
+            }
+
+            result = GetUIntLength(m_pReader, pos, len);
+
+            if (result < 0)  //error
+                return static_cast<long>(result);
+
+            if (result > 0)  //weird
+                return E_BUFFER_NOT_FULL;
+
+            if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+                return E_FILE_FORMAT_INVALID;
+
+            if ((pos + len) > avail)
+                return E_BUFFER_NOT_FULL;
+
+            const long long size = ReadUInt(m_pReader, pos, len);
+
+            if (size < 0)  //error
+                return static_cast<long>(size);
+
+            pos += len;  //consume size field of element
+
+            //pos now points to start of sub-element's payload
+
+            if (size == 0)  //weird
+                continue;
+
+            const long long unknown_size = (1LL << (7 * len)) - 1;
+
+            if (size == unknown_size)
+                return E_FILE_FORMAT_INVALID;  //not allowed for sub-elements
+
+            if ((segment_stop >= 0) && ((pos + size) > segment_stop))  //weird
+                return E_FILE_FORMAT_INVALID;
+
+            pos += size;  //consume payload of sub-element
+            assert((segment_stop < 0) || (pos <= segment_stop));
+        }  //determine cluster size
+
+        cluster_size = pos - payload_pos;
+        assert(cluster_size >= 0);  //TODO: handle cluster_size = 0
+
+        pos = payload_pos;  //reset and re-parse original cluster
+    }
+
+    pos += cluster_size;  //consume payload
+    assert((segment_stop < 0) || (pos <= segment_stop));
+
+    return 2;             //try to find a cluster that follows next
+}
+
+
+const Cluster* Segment::FindCluster(long long time_ns) const
+{
+    if ((m_clusters == NULL) || (m_clusterCount <= 0))
+        return &m_eos;
+
+    {
+        Cluster* const pCluster = m_clusters[0];
+        assert(pCluster);
+        assert(pCluster->m_index == 0);
+
+        if (time_ns <= pCluster->GetTime())
+            return pCluster;
+    }
+
+    //Binary search of cluster array
+
+    long i = 0;
+    long j = m_clusterCount;
+
+    while (i < j)
+    {
+        //INVARIANT:
+        //[0, i) <= time_ns
+        //[i, j) ?
+        //[j, m_clusterCount)  > time_ns
+
+        const long k = i + (j - i) / 2;
+        assert(k < m_clusterCount);
+
+        Cluster* const pCluster = m_clusters[k];
+        assert(pCluster);
+        assert(pCluster->m_index == k);
+
+        const long long t = pCluster->GetTime();
+
+        if (t <= time_ns)
+            i = k + 1;
+        else
+            j = k;
+
+        assert(i <= j);
+    }
+
+    assert(i == j);
+    assert(i > 0);
+    assert(i <= m_clusterCount);
+
+    const long k = i - 1;
+
+    Cluster* const pCluster = m_clusters[k];
+    assert(pCluster);
+    assert(pCluster->m_index == k);
+    assert(pCluster->GetTime() <= time_ns);
+
+    return pCluster;
+}
+
+
+#if 0
+const BlockEntry* Segment::Seek(
+    long long time_ns,
+    const Track* pTrack) const
+{
+    assert(pTrack);
+
+    if ((m_clusters == NULL) || (m_clusterCount <= 0))
+        return pTrack->GetEOS();
+
+    Cluster** const i = m_clusters;
+    assert(i);
+
+    {
+        Cluster* const pCluster = *i;
+        assert(pCluster);
+        assert(pCluster->m_index == 0);  //m_clusterCount > 0
+        assert(pCluster->m_pSegment == this);
+
+        if (time_ns <= pCluster->GetTime())
+            return pCluster->GetEntry(pTrack);
+    }
+
+    Cluster** const j = i + m_clusterCount;
+
+    if (pTrack->GetType() == 2)  //audio
+    {
+        //TODO: we could decide to use cues for this, as we do for video.
+        //But we only use it for video because looking around for a keyframe
+        //can get expensive.  Audio doesn't require anything special so a
+        //straight cluster search is good enough (we assume).
+
+        Cluster** lo = i;
+        Cluster** hi = j;
+
+        while (lo < hi)
+        {
+            //INVARIANT:
+            //[i, lo) <= time_ns
+            //[lo, hi) ?
+            //[hi, j)  > time_ns
+
+            Cluster** const mid = lo + (hi - lo) / 2;
+            assert(mid < hi);
+
+            Cluster* const pCluster = *mid;
+            assert(pCluster);
+            assert(pCluster->m_index == long(mid - m_clusters));
+            assert(pCluster->m_pSegment == this);
+
+            const long long t = pCluster->GetTime();
+
+            if (t <= time_ns)
+                lo = mid + 1;
+            else
+                hi = mid;
+
+            assert(lo <= hi);
+        }
+
+        assert(lo == hi);
+        assert(lo > i);
+        assert(lo <= j);
+
+        while (lo > i)
+        {
+            Cluster* const pCluster = *--lo;
+            assert(pCluster);
+            assert(pCluster->GetTime() <= time_ns);
+
+            const BlockEntry* const pBE = pCluster->GetEntry(pTrack);
+
+            if ((pBE != 0) && !pBE->EOS())
+                return pBE;
+
+            //landed on empty cluster (no entries)
+        }
+
+        return pTrack->GetEOS();  //weird
+    }
+
+    assert(pTrack->GetType() == 1);  //video
+
+    Cluster** lo = i;
+    Cluster** hi = j;
+
+    while (lo < hi)
+    {
+        //INVARIANT:
+        //[i, lo) <= time_ns
+        //[lo, hi) ?
+        //[hi, j)  > time_ns
+
+        Cluster** const mid = lo + (hi - lo) / 2;
+        assert(mid < hi);
+
+        Cluster* const pCluster = *mid;
+        assert(pCluster);
+
+        const long long t = pCluster->GetTime();
+
+        if (t <= time_ns)
+            lo = mid + 1;
+        else
+            hi = mid;
+
+        assert(lo <= hi);
+    }
+
+    assert(lo == hi);
+    assert(lo > i);
+    assert(lo <= j);
+
+    Cluster* pCluster = *--lo;
+    assert(pCluster);
+    assert(pCluster->GetTime() <= time_ns);
+
+    {
+        const BlockEntry* const pBE = pCluster->GetEntry(pTrack, time_ns);
+
+        if ((pBE != 0) && !pBE->EOS())  //found a keyframe
+            return pBE;
+    }
+
+    const VideoTrack* const pVideo = static_cast<const VideoTrack*>(pTrack);
+
+    while (lo != i)
+    {
+        pCluster = *--lo;
+        assert(pCluster);
+        assert(pCluster->GetTime() <= time_ns);
+
+        const BlockEntry* const pBlockEntry = pCluster->GetMaxKey(pVideo);
+
+        if ((pBlockEntry != 0) && !pBlockEntry->EOS())
+            return pBlockEntry;
+    }
+
+    //weird: we're on the first cluster, but no keyframe found
+    //should never happen but we must return something anyway
+
+    return pTrack->GetEOS();
+}
+#endif
+
+
+#if 0
+bool Segment::SearchCues(
+    long long time_ns,
+    Track* pTrack,
+    Cluster*& pCluster,
+    const BlockEntry*& pBlockEntry,
+    const CuePoint*& pCP,
+    const CuePoint::TrackPosition*& pTP)
+{
+    if (pTrack->GetType() != 1)  //not video
+        return false;  //TODO: for now, just handle video stream
+
+    if (m_pCues == NULL)
+        return false;
+
+    if (!m_pCues->Find(time_ns, pTrack, pCP, pTP))
+        return false;  //weird
+
+    assert(pCP);
+    assert(pTP);
+    assert(pTP->m_track == pTrack->GetNumber());
+
+    //We have the cue point and track position we want,
+    //so we now need to search for the cluster having
+    //the indicated position.
+
+    return GetCluster(pCP, pTP, pCluster, pBlockEntry);
+}
+#endif
+
+
+const Tracks* Segment::GetTracks() const
+{
+    return m_pTracks;
+}
+
+
+const SegmentInfo* Segment::GetInfo() const
+{
+    return m_pInfo;
+}
+
+
+const Cues* Segment::GetCues() const
+{
+    return m_pCues;
+}
+
+
+const Chapters* Segment::GetChapters() const
+{
+  return m_pChapters;
+}
+
+
+const SeekHead* Segment::GetSeekHead() const
+{
+    return m_pSeekHead;
+}
+
+
+long long Segment::GetDuration() const
+{
+    assert(m_pInfo);
+    return m_pInfo->GetDuration();
+}
+
+
+Chapters::Chapters(
+    Segment* pSegment,
+    long long payload_start,
+    long long payload_size,
+    long long element_start,
+    long long element_size) :
+    m_pSegment(pSegment),
+    m_start(payload_start),
+    m_size(payload_size),
+    m_element_start(element_start),
+    m_element_size(element_size),
+    m_editions(NULL),
+    m_editions_size(0),
+    m_editions_count(0)
+{
+}
+
+
+Chapters::~Chapters()
+{
+    while (m_editions_count > 0)
+    {
+        Edition& e = m_editions[--m_editions_count];
+        e.Clear();
+    }
+}
+
+
+long Chapters::Parse()
+{
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long pos = m_start;  // payload start
+    const long long stop = pos + m_size;  // payload stop
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        long status = ParseElementHeader(
+                        pReader,
+                        pos,
+                        stop,
+                        id,
+                        size);
+
+        if (status < 0)  // error
+            return status;
+
+        if (size == 0)  // weird
+            continue;
+
+        if (id == 0x05B9)  // EditionEntry ID
+        {
+            status = ParseEdition(pos, size);
+
+            if (status < 0)  // error
+                return status;
+        }
+
+        pos += size;
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+    return 0;
+}
+
+
+int Chapters::GetEditionCount() const
+{
+    return m_editions_count;
+}
+
+
+const Chapters::Edition* Chapters::GetEdition(int idx) const
+{
+    if (idx < 0)
+        return NULL;
+
+    if (idx >= m_editions_count)
+        return NULL;
+
+    return m_editions + idx;
+}
+
+
+bool Chapters::ExpandEditionsArray()
+{
+    if (m_editions_size > m_editions_count)
+        return true;  // nothing else to do
+
+    const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
+
+    Edition* const editions = new (std::nothrow) Edition[size];
+
+    if (editions == NULL)
+        return false;
+
+    for (int idx = 0; idx < m_editions_count; ++idx)
+    {
+        m_editions[idx].ShallowCopy(editions[idx]);
+    }
+
+    delete[] m_editions;
+    m_editions = editions;
+
+    m_editions_size = size;
+    return true;
+}
+
+
+long Chapters::ParseEdition(
+    long long pos,
+    long long size)
+{
+    if (!ExpandEditionsArray())
+        return -1;
+
+    Edition& e = m_editions[m_editions_count++];
+    e.Init();
+
+    return e.Parse(m_pSegment->m_pReader, pos, size);
+}
+
+
+Chapters::Edition::Edition()
+{
+}
+
+
+Chapters::Edition::~Edition()
+{
+}
+
+
+int Chapters::Edition::GetAtomCount() const
+{
+    return m_atoms_count;
+}
+
+
+const Chapters::Atom* Chapters::Edition::GetAtom(int index) const
+{
+    if (index < 0)
+        return NULL;
+
+    if (index >= m_atoms_count)
+        return NULL;
+
+    return m_atoms + index;
+}
+
+
+void Chapters::Edition::Init()
+{
+    m_atoms = NULL;
+    m_atoms_size = 0;
+    m_atoms_count = 0;
+}
+
+
+void Chapters::Edition::ShallowCopy(Edition& rhs) const
+{
+    rhs.m_atoms = m_atoms;
+    rhs.m_atoms_size = m_atoms_size;
+    rhs.m_atoms_count = m_atoms_count;
+}
+
+
+void Chapters::Edition::Clear()
+{
+    while (m_atoms_count > 0)
+    {
+        Atom& a = m_atoms[--m_atoms_count];
+        a.Clear();
+    }
+
+    delete[] m_atoms;
+    m_atoms = NULL;
+
+    m_atoms_size = 0;
+}
+
+
+long Chapters::Edition::Parse(
+    IMkvReader* pReader,
+    long long pos,
+    long long size)
+{
+    const long long stop = pos + size;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        long status = ParseElementHeader(
+                        pReader,
+                        pos,
+                        stop,
+                        id,
+                        size);
+
+        if (status < 0)  // error
+            return status;
+
+        if (size == 0)  // weird
+            continue;
+
+        if (id == 0x36)  // Atom ID
+        {
+            status = ParseAtom(pReader, pos, size);
+
+            if (status < 0)  // error
+                return status;
+        }
+
+        pos += size;
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+    return 0;
+}
+
+
+long Chapters::Edition::ParseAtom(
+    IMkvReader* pReader,
+    long long pos,
+    long long size)
+{
+    if (!ExpandAtomsArray())
+        return -1;
+
+    Atom& a = m_atoms[m_atoms_count++];
+    a.Init();
+
+    return a.Parse(pReader, pos, size);
+}
+
+
+bool Chapters::Edition::ExpandAtomsArray()
+{
+    if (m_atoms_size > m_atoms_count)
+        return true;  // nothing else to do
+
+    const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
+
+    Atom* const atoms = new (std::nothrow) Atom[size];
+
+    if (atoms == NULL)
+        return false;
+
+    for (int idx = 0; idx < m_atoms_count; ++idx)
+    {
+        m_atoms[idx].ShallowCopy(atoms[idx]);
+    }
+
+    delete[] m_atoms;
+    m_atoms = atoms;
+
+    m_atoms_size = size;
+    return true;
+}
+
+
+Chapters::Atom::Atom()
+{
+}
+
+
+Chapters::Atom::~Atom()
+{
+}
+
+
+unsigned long long Chapters::Atom::GetUID() const
+{
+    return m_uid;
+}
+
+
+const char* Chapters::Atom::GetStringUID() const
+{
+    return m_string_uid;
+}
+
+
+long long Chapters::Atom::GetStartTimecode() const
+{
+    return m_start_timecode;
+}
+
+
+long long Chapters::Atom::GetStopTimecode() const
+{
+    return m_stop_timecode;
+}
+
+
+long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const
+{
+    return GetTime(pChapters, m_start_timecode);
+}
+
+
+long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const
+{
+    return GetTime(pChapters, m_stop_timecode);
+}
+
+
+int Chapters::Atom::GetDisplayCount() const
+{
+    return m_displays_count;
+}
+
+
+const Chapters::Display* Chapters::Atom::GetDisplay(int index) const
+{
+    if (index < 0)
+        return NULL;
+
+    if (index >= m_displays_count)
+        return NULL;
+
+    return m_displays + index;
+}
+
+
+void Chapters::Atom::Init()
+{
+    m_string_uid = NULL;
+    m_uid = 0;
+    m_start_timecode = -1;
+    m_stop_timecode = -1;
+
+    m_displays = NULL;
+    m_displays_size = 0;
+    m_displays_count = 0;
+}
+
+
+void Chapters::Atom::ShallowCopy(Atom& rhs) const
+{
+    rhs.m_string_uid = m_string_uid;
+    rhs.m_uid = m_uid;
+    rhs.m_start_timecode = m_start_timecode;
+    rhs.m_stop_timecode = m_stop_timecode;
+
+    rhs.m_displays = m_displays;
+    rhs.m_displays_size = m_displays_size;
+    rhs.m_displays_count = m_displays_count;
+}
+
+
+void Chapters::Atom::Clear()
+{
+    delete[] m_string_uid;
+    m_string_uid = NULL;
+
+    while (m_displays_count > 0)
+    {
+        Display& d = m_displays[--m_displays_count];
+        d.Clear();
+    }
+
+    delete[] m_displays;
+    m_displays = NULL;
+
+    m_displays_size = 0;
+}
+
+
+long Chapters::Atom::Parse(
+    IMkvReader* pReader,
+    long long pos,
+    long long size)
+{
+    const long long stop = pos + size;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        long status = ParseElementHeader(
+                        pReader,
+                        pos,
+                        stop,
+                        id,
+                        size);
+
+        if (status < 0)  // error
+            return status;
+
+        if (size == 0)  // weird
+            continue;
+
+        if (id == 0x00)  // Display ID
+        {
+            status = ParseDisplay(pReader, pos, size);
+
+            if (status < 0)  // error
+                return status;
+        }
+        else if (id == 0x1654)  // StringUID ID
+        {
+            status = UnserializeString(pReader, pos, size, m_string_uid);
+
+            if (status < 0)  // error
+                return status;
+        }
+        else if (id == 0x33C4)  // UID ID
+        {
+            const long long val = UnserializeUInt(pReader, pos, size);
+
+            if (val < 0)  // error
+                return static_cast<long>(val);
+
+            m_uid = val;
+        }
+        else if (id == 0x11)  // TimeStart ID
+        {
+            const long long val = UnserializeUInt(pReader, pos, size);
+
+            if (val < 0)  // error
+                return static_cast<long>(val);
+
+            m_start_timecode = val;
+        }
+        else if (id == 0x12)  // TimeEnd ID
+        {
+            const long long val = UnserializeUInt(pReader, pos, size);
+
+            if (val < 0)  // error
+                return static_cast<long>(val);
+
+            m_stop_timecode = val;
+        }
+
+        pos += size;
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+    return 0;
+}
+
+
+long long Chapters::Atom::GetTime(
+    const Chapters* pChapters,
+    long long timecode)
+{
+    if (pChapters == NULL)
+        return -1;
+
+    Segment* const pSegment = pChapters->m_pSegment;
+
+    if (pSegment == NULL)  // weird
+        return -1;
+
+    const SegmentInfo* const pInfo = pSegment->GetInfo();
+
+    if (pInfo == NULL)
+        return -1;
+
+    const long long timecode_scale = pInfo->GetTimeCodeScale();
+
+    if (timecode_scale < 1)  // weird
+        return -1;
+
+    if (timecode < 0)
+        return -1;
+
+    const long long result = timecode_scale * timecode;
+
+    return result;
+}
+
+
+long Chapters::Atom::ParseDisplay(
+    IMkvReader* pReader,
+    long long pos,
+    long long size)
+{
+    if (!ExpandDisplaysArray())
+        return -1;
+
+    Display& d = m_displays[m_displays_count++];
+    d.Init();
+
+    return d.Parse(pReader, pos, size);
+}
+
+
+bool Chapters::Atom::ExpandDisplaysArray()
+{
+    if (m_displays_size > m_displays_count)
+        return true;  // nothing else to do
+
+    const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
+
+    Display* const displays = new (std::nothrow) Display[size];
+
+    if (displays == NULL)
+        return false;
+
+    for (int idx = 0; idx < m_displays_count; ++idx)
+    {
+        m_displays[idx].ShallowCopy(displays[idx]);
+    }
+
+    delete[] m_displays;
+    m_displays = displays;
+
+    m_displays_size = size;
+    return true;
+}
+
+
+Chapters::Display::Display()
+{
+}
+
+
+Chapters::Display::~Display()
+{
+}
+
+
+const char* Chapters::Display::GetString() const
+{
+    return m_string;
+}
+
+
+const char* Chapters::Display::GetLanguage() const
+{
+    return m_language;
+}
+
+
+const char* Chapters::Display::GetCountry() const
+{
+    return m_country;
+}
+
+
+void Chapters::Display::Init()
+{
+    m_string = NULL;
+    m_language = NULL;
+    m_country = NULL;
+}
+
+
+void Chapters::Display::ShallowCopy(Display& rhs) const
+{
+    rhs.m_string = m_string;
+    rhs.m_language = m_language;
+    rhs.m_country = m_country;
+}
+
+
+void Chapters::Display::Clear()
+{
+    delete[] m_string;
+    m_string = NULL;
+
+    delete[] m_language;
+    m_language = NULL;
+
+    delete[] m_country;
+    m_country = NULL;
+}
+
+
+long Chapters::Display::Parse(
+    IMkvReader* pReader,
+    long long pos,
+    long long size)
+{
+    const long long stop = pos + size;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        long status = ParseElementHeader(
+                        pReader,
+                        pos,
+                        stop,
+                        id,
+                        size);
+
+        if (status < 0)  // error
+            return status;
+
+        if (size == 0)  // weird
+            continue;
+
+        if (id == 0x05)  // ChapterString ID
+        {
+            status = UnserializeString(pReader, pos, size, m_string);
+
+            if (status)
+              return status;
+        }
+        else if (id == 0x037C)  // ChapterLanguage ID
+        {
+            status = UnserializeString(pReader, pos, size, m_language);
+
+            if (status)
+              return status;
+        }
+        else if (id == 0x037E)  // ChapterCountry ID
+        {
+            status = UnserializeString(pReader, pos, size, m_country);
+
+            if (status)
+              return status;
+        }
+
+        pos += size;
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+    return 0;
+}
+
+
+SegmentInfo::SegmentInfo(
+    Segment* pSegment,
+    long long start,
+    long long size_,
+    long long element_start,
+    long long element_size) :
+    m_pSegment(pSegment),
+    m_start(start),
+    m_size(size_),
+    m_element_start(element_start),
+    m_element_size(element_size),
+    m_pMuxingAppAsUTF8(NULL),
+    m_pWritingAppAsUTF8(NULL),
+    m_pTitleAsUTF8(NULL)
+{
+}
+
+SegmentInfo::~SegmentInfo()
+{
+    delete[] m_pMuxingAppAsUTF8;
+    m_pMuxingAppAsUTF8 = NULL;
+
+    delete[] m_pWritingAppAsUTF8;
+    m_pWritingAppAsUTF8 = NULL;
+
+    delete[] m_pTitleAsUTF8;
+    m_pTitleAsUTF8 = NULL;
+}
+
+
+long SegmentInfo::Parse()
+{
+    assert(m_pMuxingAppAsUTF8 == NULL);
+    assert(m_pWritingAppAsUTF8 == NULL);
+    assert(m_pTitleAsUTF8 == NULL);
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long pos = m_start;
+    const long long stop = m_start + m_size;
+
+    m_timecodeScale = 1000000;
+    m_duration = -1;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (id == 0x0AD7B1)  //Timecode Scale
+        {
+            m_timecodeScale = UnserializeUInt(pReader, pos, size);
+
+            if (m_timecodeScale <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x0489)  //Segment duration
+        {
+            const long status = UnserializeFloat(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    m_duration);
+
+            if (status < 0)
+                return status;
+
+            if (m_duration < 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x0D80)  //MuxingApp
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    m_pMuxingAppAsUTF8);
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x1741)  //WritingApp
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    m_pWritingAppAsUTF8);
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x3BA9)  //Title
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    m_pTitleAsUTF8);
+
+            if (status)
+                return status;
+        }
+
+        pos += size;
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    return 0;
+}
+
+
+long long SegmentInfo::GetTimeCodeScale() const
+{
+    return m_timecodeScale;
+}
+
+
+long long SegmentInfo::GetDuration() const
+{
+    if (m_duration < 0)
+        return -1;
+
+    assert(m_timecodeScale >= 1);
+
+    const double dd = double(m_duration) * double(m_timecodeScale);
+    const long long d = static_cast<long long>(dd);
+
+    return d;
+}
+
+const char* SegmentInfo::GetMuxingAppAsUTF8() const
+{
+    return m_pMuxingAppAsUTF8;
+}
+
+
+const char* SegmentInfo::GetWritingAppAsUTF8() const
+{
+    return m_pWritingAppAsUTF8;
+}
+
+const char* SegmentInfo::GetTitleAsUTF8() const
+{
+    return m_pTitleAsUTF8;
+}
+
+///////////////////////////////////////////////////////////////
+// ContentEncoding element
+ContentEncoding::ContentCompression::ContentCompression()
+    : algo(0),
+      settings(NULL),
+      settings_len(0) {
+}
+
+ContentEncoding::ContentCompression::~ContentCompression() {
+  delete [] settings;
+}
+
+ContentEncoding::ContentEncryption::ContentEncryption()
+    : algo(0),
+      key_id(NULL),
+      key_id_len(0),
+      signature(NULL),
+      signature_len(0),
+      sig_key_id(NULL),
+      sig_key_id_len(0),
+      sig_algo(0),
+      sig_hash_algo(0) {
+}
+
+ContentEncoding::ContentEncryption::~ContentEncryption() {
+  delete [] key_id;
+  delete [] signature;
+  delete [] sig_key_id;
+}
+
+ContentEncoding::ContentEncoding()
+    : compression_entries_(NULL),
+      compression_entries_end_(NULL),
+      encryption_entries_(NULL),
+      encryption_entries_end_(NULL),
+      encoding_order_(0),
+      encoding_scope_(1),
+      encoding_type_(0) {
+}
+
+ContentEncoding::~ContentEncoding() {
+  ContentCompression** comp_i = compression_entries_;
+  ContentCompression** const comp_j = compression_entries_end_;
+
+  while (comp_i != comp_j) {
+    ContentCompression* const comp = *comp_i++;
+    delete comp;
+  }
+
+  delete [] compression_entries_;
+
+  ContentEncryption** enc_i = encryption_entries_;
+  ContentEncryption** const enc_j = encryption_entries_end_;
+
+  while (enc_i != enc_j) {
+    ContentEncryption* const enc = *enc_i++;
+    delete enc;
+  }
+
+  delete [] encryption_entries_;
+}
+
+
+const ContentEncoding::ContentCompression*
+ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
+  const ptrdiff_t count = compression_entries_end_ - compression_entries_;
+  assert(count >= 0);
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return compression_entries_[idx];
+}
+
+unsigned long ContentEncoding::GetCompressionCount() const {
+  const ptrdiff_t count = compression_entries_end_ - compression_entries_;
+  assert(count >= 0);
+
+  return static_cast<unsigned long>(count);
+}
+
+const ContentEncoding::ContentEncryption*
+ContentEncoding::GetEncryptionByIndex(unsigned long idx) const {
+  const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
+  assert(count >= 0);
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return encryption_entries_[idx];
+}
+
+unsigned long ContentEncoding::GetEncryptionCount() const {
+  const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
+  assert(count >= 0);
+
+  return static_cast<unsigned long>(count);
+}
+
+long ContentEncoding::ParseContentEncAESSettingsEntry(
+    long long start,
+    long long size,
+    IMkvReader* pReader,
+    ContentEncAESSettings* aes) {
+  assert(pReader);
+  assert(aes);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader,
+                                           pos,
+                                           stop,
+                                           id,
+                                           size);
+    if (status < 0)  //error
+      return status;
+
+    if (id == 0x7E8) {
+      // AESSettingsCipherMode
+      aes->cipher_mode = UnserializeUInt(pReader, pos, size);
+      if (aes->cipher_mode != 1)
+        return E_FILE_FORMAT_INVALID;
+    }
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  return 0;
+}
+
+long ContentEncoding::ParseContentEncodingEntry(long long start,
+                                                long long size,
+                                                IMkvReader* pReader) {
+  assert(pReader);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  // Count ContentCompression and ContentEncryption elements.
+  int compression_count = 0;
+  int encryption_count = 0;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader,
+                                           pos,
+                                           stop,
+                                           id,
+                                           size);
+    if (status < 0)  //error
+      return status;
+
+    if (id == 0x1034)  // ContentCompression ID
+      ++compression_count;
+
+    if (id == 0x1035)  // ContentEncryption ID
+      ++encryption_count;
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  if (compression_count <= 0 && encryption_count <= 0)
+    return -1;
+
+  if (compression_count > 0) {
+    compression_entries_ =
+        new (std::nothrow) ContentCompression*[compression_count];
+    if (!compression_entries_)
+      return -1;
+    compression_entries_end_ = compression_entries_;
+  }
+
+  if (encryption_count > 0) {
+    encryption_entries_ =
+        new (std::nothrow) ContentEncryption*[encryption_count];
+    if (!encryption_entries_) {
+      delete [] compression_entries_;
+      return -1;
+    }
+    encryption_entries_end_ = encryption_entries_;
+  }
+
+  pos = start;
+  while (pos < stop) {
+    long long id, size;
+    long status = ParseElementHeader(pReader,
+                                     pos,
+                                     stop,
+                                     id,
+                                     size);
+    if (status < 0)  //error
+      return status;
+
+    if (id == 0x1031) {
+      // ContentEncodingOrder
+      encoding_order_ = UnserializeUInt(pReader, pos, size);
+    } else if (id == 0x1032) {
+      // ContentEncodingScope
+      encoding_scope_ = UnserializeUInt(pReader, pos, size);
+      if (encoding_scope_ < 1)
+        return -1;
+    } else if (id == 0x1033) {
+      // ContentEncodingType
+      encoding_type_ = UnserializeUInt(pReader, pos, size);
+    } else if (id == 0x1034) {
+      // ContentCompression ID
+      ContentCompression* const compression =
+        new (std::nothrow) ContentCompression();
+      if (!compression)
+        return -1;
+
+      status = ParseCompressionEntry(pos, size, pReader, compression);
+      if (status) {
+        delete compression;
+        return status;
+      }
+      *compression_entries_end_++ = compression;
+    } else if (id == 0x1035) {
+      // ContentEncryption ID
+      ContentEncryption* const encryption =
+          new (std::nothrow) ContentEncryption();
+      if (!encryption)
+        return -1;
+
+      status = ParseEncryptionEntry(pos, size, pReader, encryption);
+      if (status) {
+        delete encryption;
+        return status;
+      }
+      *encryption_entries_end_++ = encryption;
+    }
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  assert(pos == stop);
+  return 0;
+}
+
+long ContentEncoding::ParseCompressionEntry(
+    long long start,
+    long long size,
+    IMkvReader* pReader,
+    ContentCompression* compression) {
+  assert(pReader);
+  assert(compression);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  bool valid = false;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader,
+                                           pos,
+                                           stop,
+                                           id,
+                                           size);
+    if (status < 0)  //error
+      return status;
+
+    if (id == 0x254) {
+      // ContentCompAlgo
+      long long algo = UnserializeUInt(pReader, pos, size);
+      if (algo < 0)
+        return E_FILE_FORMAT_INVALID;
+      compression->algo = algo;
+      valid = true;
+    } else if (id == 0x255) {
+      // ContentCompSettings
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      typedef unsigned char* buf_t;
+      const buf_t buf = new (std::nothrow) unsigned char[buflen];
+      if (buf == NULL)
+        return -1;
+
+      const int read_status = pReader->Read(pos, buflen, buf);
+      if (read_status) {
+        delete [] buf;
+        return status;
+      }
+
+      compression->settings = buf;
+      compression->settings_len = buflen;
+    }
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  // ContentCompAlgo is mandatory
+  if (!valid)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+long ContentEncoding::ParseEncryptionEntry(
+    long long start,
+    long long size,
+    IMkvReader* pReader,
+    ContentEncryption* encryption) {
+  assert(pReader);
+  assert(encryption);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader,
+                                           pos,
+                                           stop,
+                                           id,
+                                           size);
+    if (status < 0)  //error
+      return status;
+
+    if (id == 0x7E1) {
+      // ContentEncAlgo
+      encryption->algo = UnserializeUInt(pReader, pos, size);
+      if (encryption->algo != 5)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == 0x7E2) {
+      // ContentEncKeyID
+      delete[] encryption->key_id;
+      encryption->key_id = NULL;
+      encryption->key_id_len = 0;
+
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      typedef unsigned char* buf_t;
+      const buf_t buf = new (std::nothrow) unsigned char[buflen];
+      if (buf == NULL)
+        return -1;
+
+      const int read_status = pReader->Read(pos, buflen, buf);
+      if (read_status) {
+        delete [] buf;
+        return status;
+      }
+
+      encryption->key_id = buf;
+      encryption->key_id_len = buflen;
+    } else if (id == 0x7E3) {
+      // ContentSignature
+      delete[] encryption->signature;
+      encryption->signature = NULL;
+      encryption->signature_len = 0;
+
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      typedef unsigned char* buf_t;
+      const buf_t buf = new (std::nothrow) unsigned char[buflen];
+      if (buf == NULL)
+        return -1;
+
+      const int read_status = pReader->Read(pos, buflen, buf);
+      if (read_status) {
+        delete [] buf;
+        return status;
+      }
+
+      encryption->signature = buf;
+      encryption->signature_len = buflen;
+    } else if (id == 0x7E4) {
+      // ContentSigKeyID
+      delete[] encryption->sig_key_id;
+      encryption->sig_key_id = NULL;
+      encryption->sig_key_id_len = 0;
+
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      typedef unsigned char* buf_t;
+      const buf_t buf = new (std::nothrow) unsigned char[buflen];
+      if (buf == NULL)
+        return -1;
+
+      const int read_status = pReader->Read(pos, buflen, buf);
+      if (read_status) {
+        delete [] buf;
+        return status;
+      }
+
+      encryption->sig_key_id = buf;
+      encryption->sig_key_id_len = buflen;
+    } else if (id == 0x7E5) {
+      // ContentSigAlgo
+      encryption->sig_algo = UnserializeUInt(pReader, pos, size);
+    } else if (id == 0x7E6) {
+      // ContentSigHashAlgo
+      encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size);
+    } else if (id == 0x7E7) {
+      // ContentEncAESSettings
+      const long status = ParseContentEncAESSettingsEntry(
+          pos,
+          size,
+          pReader,
+          &encryption->aes_settings);
+      if (status)
+        return status;
+    }
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  return 0;
+}
+
+Track::Track(
+    Segment* pSegment,
+    long long element_start,
+    long long element_size) :
+    m_pSegment(pSegment),
+    m_element_start(element_start),
+    m_element_size(element_size),
+    content_encoding_entries_(NULL),
+    content_encoding_entries_end_(NULL)
+{
+}
+
+Track::~Track()
+{
+    Info& info = const_cast<Info&>(m_info);
+    info.Clear();
+
+    ContentEncoding** i = content_encoding_entries_;
+    ContentEncoding** const j = content_encoding_entries_end_;
+
+    while (i != j) {
+        ContentEncoding* const encoding = *i++;
+        delete encoding;
+    }
+
+    delete [] content_encoding_entries_;
+}
+
+long Track::Create(
+    Segment* pSegment,
+    const Info& info,
+    long long element_start,
+    long long element_size,
+    Track*& pResult)
+{
+    if (pResult)
+        return -1;
+
+    Track* const pTrack = new (std::nothrow) Track(pSegment,
+                                                   element_start,
+                                                   element_size);
+
+    if (pTrack == NULL)
+        return -1;  //generic error
+
+    const int status = info.Copy(pTrack->m_info);
+
+    if (status)  // error
+    {
+        delete pTrack;
+        return status;
+    }
+
+    pResult = pTrack;
+    return 0;  //success
+}
+
+Track::Info::Info():
+    uid(0),
+    defaultDuration(0),
+    codecDelay(0),
+    seekPreRoll(0),
+    nameAsUTF8(NULL),
+    language(NULL),
+    codecId(NULL),
+    codecNameAsUTF8(NULL),
+    codecPrivate(NULL),
+    codecPrivateSize(0),
+    lacing(false)
+{
+}
+
+Track::Info::~Info()
+{
+    Clear();
+}
+
+void Track::Info::Clear()
+{
+    delete[] nameAsUTF8;
+    nameAsUTF8 = NULL;
+
+    delete[] language;
+    language = NULL;
+
+    delete[] codecId;
+    codecId = NULL;
+
+    delete[] codecPrivate;
+    codecPrivate = NULL;
+    codecPrivateSize = 0;
+
+    delete[] codecNameAsUTF8;
+    codecNameAsUTF8 = NULL;
+}
+
+int Track::Info::CopyStr(char* Info::*str, Info& dst_) const
+{
+    if (str == static_cast<char* Info::*>(NULL))
+        return -1;
+
+    char*& dst = dst_.*str;
+
+    if (dst)  //should be NULL already
+        return -1;
+
+    const char* const src = this->*str;
+
+    if (src == NULL)
+        return 0;
+
+    const size_t len = strlen(src);
+
+    dst = new (std::nothrow) char[len+1];
+
+    if (dst == NULL)
+        return -1;
+
+    strcpy(dst, src);
+
+    return 0;
+}
+
+
+int Track::Info::Copy(Info& dst) const
+{
+    if (&dst == this)
+        return 0;
+
+    dst.type = type;
+    dst.number = number;
+    dst.defaultDuration = defaultDuration;
+    dst.codecDelay = codecDelay;
+    dst.seekPreRoll = seekPreRoll;
+    dst.uid = uid;
+    dst.lacing = lacing;
+    dst.settings = settings;
+
+    //We now copy the string member variables from src to dst.
+    //This involves memory allocation so in principle the operation
+    //can fail (indeed, that's why we have Info::Copy), so we must
+    //report this to the caller.  An error return from this function
+    //therefore implies that the copy was only partially successful.
+
+    if (int status = CopyStr(&Info::nameAsUTF8, dst))
+        return status;
+
+    if (int status = CopyStr(&Info::language, dst))
+        return status;
+
+    if (int status = CopyStr(&Info::codecId, dst))
+        return status;
+
+    if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
+        return status;
+
+    if (codecPrivateSize > 0)
+    {
+        if (codecPrivate == NULL)
+            return -1;
+
+        if (dst.codecPrivate)
+            return -1;
+
+        if (dst.codecPrivateSize != 0)
+            return -1;
+
+        dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize];
+
+        if (dst.codecPrivate == NULL)
+            return -1;
+
+        memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
+        dst.codecPrivateSize = codecPrivateSize;
+    }
+
+    return 0;
+}
+
+const BlockEntry* Track::GetEOS() const
+{
+    return &m_eos;
+}
+
+long Track::GetType() const
+{
+    return m_info.type;
+}
+
+long Track::GetNumber() const
+{
+    return m_info.number;
+}
+
+unsigned long long Track::GetUid() const
+{
+    return m_info.uid;
+}
+
+const char* Track::GetNameAsUTF8() const
+{
+    return m_info.nameAsUTF8;
+}
+
+const char* Track::GetLanguage() const
+{
+    return m_info.language;
+}
+
+const char* Track::GetCodecNameAsUTF8() const
+{
+    return m_info.codecNameAsUTF8;
+}
+
+
+const char* Track::GetCodecId() const
+{
+    return m_info.codecId;
+}
+
+const unsigned char* Track::GetCodecPrivate(size_t& size) const
+{
+    size = m_info.codecPrivateSize;
+    return m_info.codecPrivate;
+}
+
+
+bool Track::GetLacing() const
+{
+    return m_info.lacing;
+}
+
+unsigned long long Track::GetDefaultDuration() const
+{
+    return m_info.defaultDuration;
+}
+
+unsigned long long Track::GetCodecDelay() const
+{
+    return m_info.codecDelay;
+}
+
+unsigned long long Track::GetSeekPreRoll() const
+{
+    return m_info.seekPreRoll;
+}
+
+long Track::GetFirst(const BlockEntry*& pBlockEntry) const
+{
+    const Cluster* pCluster = m_pSegment->GetFirst();
+
+    for (int i = 0; ; )
+    {
+        if (pCluster == NULL)
+        {
+            pBlockEntry = GetEOS();
+            return 1;
+        }
+
+        if (pCluster->EOS())
+        {
+#if 0
+            if (m_pSegment->Unparsed() <= 0)  //all clusters have been loaded
+            {
+                pBlockEntry = GetEOS();
+                return 1;
+            }
+#else
+            if (m_pSegment->DoneParsing())
+            {
+                pBlockEntry = GetEOS();
+                return 1;
+            }
+#endif
+
+            pBlockEntry = 0;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long status = pCluster->GetFirst(pBlockEntry);
+
+        if (status < 0)  //error
+            return status;
+
+        if (pBlockEntry == 0)  //empty cluster
+        {
+            pCluster = m_pSegment->GetNext(pCluster);
+            continue;
+        }
+
+        for (;;)
+        {
+            const Block* const pBlock = pBlockEntry->GetBlock();
+            assert(pBlock);
+
+            const long long tn = pBlock->GetTrackNumber();
+
+            if ((tn == m_info.number) && VetEntry(pBlockEntry))
+                return 0;
+
+            const BlockEntry* pNextEntry;
+
+            status = pCluster->GetNext(pBlockEntry, pNextEntry);
+
+            if (status < 0)  //error
+                return status;
+
+            if (pNextEntry == 0)
+                break;
+
+            pBlockEntry = pNextEntry;
+        }
+
+        ++i;
+
+        if (i >= 100)
+            break;
+
+        pCluster = m_pSegment->GetNext(pCluster);
+    }
+
+    //NOTE: if we get here, it means that we didn't find a block with
+    //a matching track number.  We interpret that as an error (which
+    //might be too conservative).
+
+    pBlockEntry = GetEOS();  //so we can return a non-NULL value
+    return 1;
+}
+
+
+long Track::GetNext(
+    const BlockEntry* pCurrEntry,
+    const BlockEntry*& pNextEntry) const
+{
+    assert(pCurrEntry);
+    assert(!pCurrEntry->EOS());  //?
+
+    const Block* const pCurrBlock = pCurrEntry->GetBlock();
+    assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
+    if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
+        return -1;
+
+    const Cluster* pCluster = pCurrEntry->GetCluster();
+    assert(pCluster);
+    assert(!pCluster->EOS());
+
+    long status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+    if (status < 0)  //error
+        return status;
+
+    for (int i = 0; ; )
+    {
+        while (pNextEntry)
+        {
+            const Block* const pNextBlock = pNextEntry->GetBlock();
+            assert(pNextBlock);
+
+            if (pNextBlock->GetTrackNumber() == m_info.number)
+                return 0;
+
+            pCurrEntry = pNextEntry;
+
+            status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+            if (status < 0) //error
+                return status;
+        }
+
+        pCluster = m_pSegment->GetNext(pCluster);
+
+        if (pCluster == NULL)
+        {
+            pNextEntry = GetEOS();
+            return 1;
+        }
+
+        if (pCluster->EOS())
+        {
+#if 0
+            if (m_pSegment->Unparsed() <= 0)   //all clusters have been loaded
+            {
+                pNextEntry = GetEOS();
+                return 1;
+            }
+#else
+            if (m_pSegment->DoneParsing())
+            {
+                pNextEntry = GetEOS();
+                return 1;
+            }
+#endif
+
+            //TODO: there is a potential O(n^2) problem here: we tell the
+            //caller to (pre)load another cluster, which he does, but then he
+            //calls GetNext again, which repeats the same search.  This is
+            //a pathological case, since the only way it can happen is if
+            //there exists a long sequence of clusters none of which contain a
+            // block from this track.  One way around this problem is for the
+            //caller to be smarter when he loads another cluster: don't call
+            //us back until you have a cluster that contains a block from this
+            //track. (Of course, that's not cheap either, since our caller
+            //would have to scan the each cluster as it's loaded, so that
+            //would just push back the problem.)
+
+            pNextEntry = NULL;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        status = pCluster->GetFirst(pNextEntry);
+
+        if (status < 0)  //error
+            return status;
+
+        if (pNextEntry == NULL)  //empty cluster
+            continue;
+
+        ++i;
+
+        if (i >= 100)
+            break;
+    }
+
+    //NOTE: if we get here, it means that we didn't find a block with
+    //a matching track number after lots of searching, so we give
+    //up trying.
+
+    pNextEntry = GetEOS();  //so we can return a non-NULL value
+    return 1;
+}
+
+bool Track::VetEntry(const BlockEntry* pBlockEntry) const
+{
+    assert(pBlockEntry);
+    const Block* const pBlock = pBlockEntry->GetBlock();
+    assert(pBlock);
+    assert(pBlock->GetTrackNumber() == m_info.number);
+    if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
+        return false;
+
+    // This function is used during a seek to determine whether the
+    // frame is a valid seek target.  This default function simply
+    // returns true, which means all frames are valid seek targets.
+    // It gets overridden by the VideoTrack class, because only video
+    // keyframes can be used as seek target.
+
+    return true;
+}
+
+long Track::Seek(
+    long long time_ns,
+    const BlockEntry*& pResult) const
+{
+    const long status = GetFirst(pResult);
+
+    if (status < 0)  //buffer underflow, etc
+        return status;
+
+    assert(pResult);
+
+    if (pResult->EOS())
+        return 0;
+
+    const Cluster* pCluster = pResult->GetCluster();
+    assert(pCluster);
+    assert(pCluster->GetIndex() >= 0);
+
+    if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+        return 0;
+
+    Cluster** const clusters = m_pSegment->m_clusters;
+    assert(clusters);
+
+    const long count = m_pSegment->GetCount();  //loaded only, not preloaded
+    assert(count > 0);
+
+    Cluster** const i = clusters + pCluster->GetIndex();
+    assert(i);
+    assert(*i == pCluster);
+    assert(pCluster->GetTime() <= time_ns);
+
+    Cluster** const j = clusters + count;
+
+    Cluster** lo = i;
+    Cluster** hi = j;
+
+    while (lo < hi)
+    {
+        //INVARIANT:
+        //[i, lo) <= time_ns
+        //[lo, hi) ?
+        //[hi, j)  > time_ns
+
+        Cluster** const mid = lo + (hi - lo) / 2;
+        assert(mid < hi);
+
+        pCluster = *mid;
+        assert(pCluster);
+        assert(pCluster->GetIndex() >= 0);
+        assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
+
+        const long long t = pCluster->GetTime();
+
+        if (t <= time_ns)
+            lo = mid + 1;
+        else
+            hi = mid;
+
+        assert(lo <= hi);
+    }
+
+    assert(lo == hi);
+    assert(lo > i);
+    assert(lo <= j);
+
+    while (lo > i)
+    {
+        pCluster = *--lo;
+        assert(pCluster);
+        assert(pCluster->GetTime() <= time_ns);
+
+        pResult = pCluster->GetEntry(this);
+
+        if ((pResult != 0) && !pResult->EOS())
+            return 0;
+
+        //landed on empty cluster (no entries)
+    }
+
+    pResult = GetEOS();  //weird
+    return 0;
+}
+
+const ContentEncoding*
+Track::GetContentEncodingByIndex(unsigned long idx) const {
+  const ptrdiff_t count =
+      content_encoding_entries_end_ - content_encoding_entries_;
+  assert(count >= 0);
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return content_encoding_entries_[idx];
+}
+
+unsigned long Track::GetContentEncodingCount() const {
+  const ptrdiff_t count =
+      content_encoding_entries_end_ - content_encoding_entries_;
+  assert(count >= 0);
+
+  return static_cast<unsigned long>(count);
+}
+
+long Track::ParseContentEncodingsEntry(long long start, long long size) {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+  assert(pReader);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  // Count ContentEncoding elements.
+  int count = 0;
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader,
+                                           pos,
+                                           stop,
+                                           id,
+                                           size);
+    if (status < 0)  //error
+      return status;
+
+
+    //pos now designates start of element
+    if (id == 0x2240)  // ContentEncoding ID
+      ++count;
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  if (count <= 0)
+    return -1;
+
+  content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count];
+  if (!content_encoding_entries_)
+    return -1;
+
+  content_encoding_entries_end_ = content_encoding_entries_;
+
+  pos = start;
+  while (pos < stop) {
+    long long id, size;
+    long status = ParseElementHeader(pReader,
+                                     pos,
+                                     stop,
+                                     id,
+                                     size);
+    if (status < 0)  //error
+      return status;
+
+    //pos now designates start of element
+    if (id == 0x2240) { // ContentEncoding ID
+      ContentEncoding* const content_encoding =
+          new (std::nothrow) ContentEncoding();
+      if (!content_encoding)
+        return -1;
+
+      status = content_encoding->ParseContentEncodingEntry(pos,
+                                                           size,
+                                                           pReader);
+      if (status) {
+        delete content_encoding;
+        return status;
+      }
+
+      *content_encoding_entries_end_++ = content_encoding;
+    }
+
+    pos += size;  //consume payload
+    assert(pos <= stop);
+  }
+
+  assert(pos == stop);
+
+  return 0;
+}
+
+Track::EOSBlock::EOSBlock() :
+    BlockEntry(NULL, LONG_MIN)
+{
+}
+
+BlockEntry::Kind Track::EOSBlock::GetKind() const
+{
+    return kBlockEOS;
+}
+
+
+const Block* Track::EOSBlock::GetBlock() const
+{
+    return NULL;
+}
+
+
+VideoTrack::VideoTrack(
+    Segment* pSegment,
+    long long element_start,
+    long long element_size) :
+    Track(pSegment, element_start, element_size)
+{
+}
+
+
+long VideoTrack::Parse(
+    Segment* pSegment,
+    const Info& info,
+    long long element_start,
+    long long element_size,
+    VideoTrack*& pResult)
+{
+    if (pResult)
+        return -1;
+
+    if (info.type != Track::kVideo)
+        return -1;
+
+    long long width = 0;
+    long long height = 0;
+    double rate = 0.0;
+
+    IMkvReader* const pReader = pSegment->m_pReader;
+
+    const Settings& s = info.settings;
+    assert(s.start >= 0);
+    assert(s.size >= 0);
+
+    long long pos = s.start;
+    assert(pos >= 0);
+
+    const long long stop = pos + s.size;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (id == 0x30)  //pixel width
+        {
+            width = UnserializeUInt(pReader, pos, size);
+
+            if (width <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x3A)  //pixel height
+        {
+            height = UnserializeUInt(pReader, pos, size);
+
+            if (height <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x0383E3)  //frame rate
+        {
+            const long status = UnserializeFloat(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    rate);
+
+            if (status < 0)
+                return status;
+
+            if (rate <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    VideoTrack* const pTrack = new (std::nothrow) VideoTrack(pSegment,
+                                                             element_start,
+                                                             element_size);
+
+    if (pTrack == NULL)
+        return -1;  //generic error
+
+    const int status = info.Copy(pTrack->m_info);
+
+    if (status)  // error
+    {
+        delete pTrack;
+        return status;
+    }
+
+    pTrack->m_width = width;
+    pTrack->m_height = height;
+    pTrack->m_rate = rate;
+
+    pResult = pTrack;
+    return 0;  //success
+}
+
+
+bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const
+{
+    return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
+}
+
+long VideoTrack::Seek(
+    long long time_ns,
+    const BlockEntry*& pResult) const
+{
+    const long status = GetFirst(pResult);
+
+    if (status < 0)  //buffer underflow, etc
+        return status;
+
+    assert(pResult);
+
+    if (pResult->EOS())
+        return 0;
+
+    const Cluster* pCluster = pResult->GetCluster();
+    assert(pCluster);
+    assert(pCluster->GetIndex() >= 0);
+
+    if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+        return 0;
+
+    Cluster** const clusters = m_pSegment->m_clusters;
+    assert(clusters);
+
+    const long count = m_pSegment->GetCount();  //loaded only, not pre-loaded
+    assert(count > 0);
+
+    Cluster** const i = clusters + pCluster->GetIndex();
+    assert(i);
+    assert(*i == pCluster);
+    assert(pCluster->GetTime() <= time_ns);
+
+    Cluster** const j = clusters + count;
+
+    Cluster** lo = i;
+    Cluster** hi = j;
+
+    while (lo < hi)
+    {
+        //INVARIANT:
+        //[i, lo) <= time_ns
+        //[lo, hi) ?
+        //[hi, j)  > time_ns
+
+        Cluster** const mid = lo + (hi - lo) / 2;
+        assert(mid < hi);
+
+        pCluster = *mid;
+        assert(pCluster);
+        assert(pCluster->GetIndex() >= 0);
+        assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
+
+        const long long t = pCluster->GetTime();
+
+        if (t <= time_ns)
+            lo = mid + 1;
+        else
+            hi = mid;
+
+        assert(lo <= hi);
+    }
+
+    assert(lo == hi);
+    assert(lo > i);
+    assert(lo <= j);
+
+    pCluster = *--lo;
+    assert(pCluster);
+    assert(pCluster->GetTime() <= time_ns);
+
+    pResult = pCluster->GetEntry(this, time_ns);
+
+    if ((pResult != 0) && !pResult->EOS())  //found a keyframe
+        return 0;
+
+    while (lo != i)
+    {
+        pCluster = *--lo;
+        assert(pCluster);
+        assert(pCluster->GetTime() <= time_ns);
+
+#if 0
+        //TODO:
+        //We need to handle the case when a cluster
+        //contains multiple keyframes.  Simply returning
+        //the largest keyframe on the cluster isn't
+        //good enough.
+        pResult = pCluster->GetMaxKey(this);
+#else
+        pResult = pCluster->GetEntry(this, time_ns);
+#endif
+
+        if ((pResult != 0) && !pResult->EOS())
+            return 0;
+    }
+
+    //weird: we're on the first cluster, but no keyframe found
+    //should never happen but we must return something anyway
+
+    pResult = GetEOS();
+    return 0;
+}
+
+
+long long VideoTrack::GetWidth() const
+{
+    return m_width;
+}
+
+
+long long VideoTrack::GetHeight() const
+{
+    return m_height;
+}
+
+
+double VideoTrack::GetFrameRate() const
+{
+    return m_rate;
+}
+
+
+AudioTrack::AudioTrack(
+    Segment* pSegment,
+    long long element_start,
+    long long element_size) :
+    Track(pSegment, element_start, element_size)
+{
+}
+
+
+long AudioTrack::Parse(
+    Segment* pSegment,
+    const Info& info,
+    long long element_start,
+    long long element_size,
+    AudioTrack*& pResult)
+{
+    if (pResult)
+        return -1;
+
+    if (info.type != Track::kAudio)
+        return -1;
+
+    IMkvReader* const pReader = pSegment->m_pReader;
+
+    const Settings& s = info.settings;
+    assert(s.start >= 0);
+    assert(s.size >= 0);
+
+    long long pos = s.start;
+    assert(pos >= 0);
+
+    const long long stop = pos + s.size;
+
+    double rate = 8000.0;  // MKV default
+    long long channels = 1;
+    long long bit_depth = 0;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (id == 0x35)  //Sample Rate
+        {
+            status = UnserializeFloat(pReader, pos, size, rate);
+
+            if (status < 0)
+                return status;
+
+            if (rate <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x1F)  //Channel Count
+        {
+            channels = UnserializeUInt(pReader, pos, size);
+
+            if (channels <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x2264)  //Bit Depth
+        {
+            bit_depth = UnserializeUInt(pReader, pos, size);
+
+            if (bit_depth <= 0)
+                return E_FILE_FORMAT_INVALID;
+        }
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    AudioTrack* const pTrack = new (std::nothrow) AudioTrack(pSegment,
+                                                             element_start,
+                                                             element_size);
+
+    if (pTrack == NULL)
+        return -1;  //generic error
+
+    const int status = info.Copy(pTrack->m_info);
+
+    if (status)
+    {
+        delete pTrack;
+        return status;
+    }
+
+    pTrack->m_rate = rate;
+    pTrack->m_channels = channels;
+    pTrack->m_bitDepth = bit_depth;
+
+    pResult = pTrack;
+    return 0;  //success
+}
+
+
+double AudioTrack::GetSamplingRate() const
+{
+    return m_rate;
+}
+
+
+long long AudioTrack::GetChannels() const
+{
+    return m_channels;
+}
+
+long long AudioTrack::GetBitDepth() const
+{
+    return m_bitDepth;
+}
+
+Tracks::Tracks(
+    Segment* pSegment,
+    long long start,
+    long long size_,
+    long long element_start,
+    long long element_size) :
+    m_pSegment(pSegment),
+    m_start(start),
+    m_size(size_),
+    m_element_start(element_start),
+    m_element_size(element_size),
+    m_trackEntries(NULL),
+    m_trackEntriesEnd(NULL)
+{
+}
+
+
+long Tracks::Parse()
+{
+    assert(m_trackEntries == NULL);
+    assert(m_trackEntriesEnd == NULL);
+
+    const long long stop = m_start + m_size;
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    int count = 0;
+    long long pos = m_start;
+
+    while (pos < stop)
+    {
+        long long id, size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (size == 0)  //weird
+            continue;
+
+        if (id == 0x2E)  //TrackEntry ID
+            ++count;
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    if (count <= 0)
+        return 0;  //success
+
+    m_trackEntries = new (std::nothrow) Track*[count];
+
+    if (m_trackEntries == NULL)
+        return -1;
+
+    m_trackEntriesEnd = m_trackEntries;
+
+    pos = m_start;
+
+    while (pos < stop)
+    {
+        const long long element_start = pos;
+
+        long long id, payload_size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                stop,
+                                id,
+                                payload_size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (payload_size == 0)  //weird
+            continue;
+
+        const long long payload_stop = pos + payload_size;
+        assert(payload_stop <= stop);  //checked in ParseElement
+
+        const long long element_size = payload_stop - element_start;
+
+        if (id == 0x2E)  //TrackEntry ID
+        {
+            Track*& pTrack = *m_trackEntriesEnd;
+            pTrack = NULL;
+
+            const long status = ParseTrackEntry(
+                                    pos,
+                                    payload_size,
+                                    element_start,
+                                    element_size,
+                                    pTrack);
+
+            if (status)
+                return status;
+
+            if (pTrack)
+                ++m_trackEntriesEnd;
+        }
+
+        pos = payload_stop;
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+
+    return 0;  //success
+}
+
+
+unsigned long Tracks::GetTracksCount() const
+{
+    const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
+    assert(result >= 0);
+
+    return static_cast<unsigned long>(result);
+}
+
+long Tracks::ParseTrackEntry(
+    long long track_start,
+    long long track_size,
+    long long element_start,
+    long long element_size,
+    Track*& pResult) const
+{
+    if (pResult)
+        return -1;
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long pos = track_start;
+    const long long track_stop = track_start + track_size;
+
+    Track::Info info;
+
+    info.type = 0;
+    info.number = 0;
+    info.uid = 0;
+    info.defaultDuration = 0;
+
+    Track::Settings v;
+    v.start = -1;
+    v.size = -1;
+
+    Track::Settings a;
+    a.start = -1;
+    a.size = -1;
+
+    Track::Settings e;  //content_encodings_settings;
+    e.start = -1;
+    e.size = -1;
+
+    long long lacing = 1;  //default is true
+
+    while (pos < track_stop)
+    {
+        long long id, size;
+
+        const long status = ParseElementHeader(
+                                pReader,
+                                pos,
+                                track_stop,
+                                id,
+                                size);
+
+        if (status < 0)  //error
+            return status;
+
+        if (size < 0)
+            return E_FILE_FORMAT_INVALID;
+
+        const long long start = pos;
+
+        if (id == 0x60)  // VideoSettings ID
+        {
+            v.start = start;
+            v.size = size;
+        }
+        else if (id == 0x61)  // AudioSettings ID
+        {
+            a.start = start;
+            a.size = size;
+        }
+        else if (id == 0x2D80) // ContentEncodings ID
+        {
+            e.start = start;
+            e.size = size;
+        }
+        else if (id == 0x33C5)  //Track UID
+        {
+            if (size > 8)
+                return E_FILE_FORMAT_INVALID;
+
+            info.uid = 0;
+
+            long long pos_ = start;
+            const long long pos_end = start + size;
+
+            while (pos_ != pos_end)
+            {
+                unsigned char b;
+
+                const int status = pReader->Read(pos_, 1, &b);
+
+                if (status)
+                    return status;
+
+                info.uid <<= 8;
+                info.uid |= b;
+
+                ++pos_;
+            }
+        }
+        else if (id == 0x57)  //Track Number
+        {
+            const long long num = UnserializeUInt(pReader, pos, size);
+
+            if ((num <= 0) || (num > 127))
+                return E_FILE_FORMAT_INVALID;
+
+            info.number = static_cast<long>(num);
+        }
+        else if (id == 0x03)  //Track Type
+        {
+            const long long type = UnserializeUInt(pReader, pos, size);
+
+            if ((type <= 0) || (type > 254))
+                return E_FILE_FORMAT_INVALID;
+
+            info.type = static_cast<long>(type);
+        }
+        else if (id == 0x136E)  //Track Name
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    info.nameAsUTF8);
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x02B59C)  //Track Language
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    info.language);
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x03E383)  //Default Duration
+        {
+            const long long duration = UnserializeUInt(pReader, pos, size);
+
+            if (duration < 0)
+                return E_FILE_FORMAT_INVALID;
+
+            info.defaultDuration = static_cast<unsigned long long>(duration);
+        }
+        else if (id == 0x06)  //CodecID
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    info.codecId);
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x1C)  //lacing
+        {
+            lacing = UnserializeUInt(pReader, pos, size);
+
+            if ((lacing < 0) || (lacing > 1))
+                return E_FILE_FORMAT_INVALID;
+        }
+        else if (id == 0x23A2)  //Codec Private
+        {
+            delete[] info.codecPrivate;
+            info.codecPrivate = NULL;
+            info.codecPrivateSize = 0;
+
+            const size_t buflen = static_cast<size_t>(size);
+
+            if (buflen)
+            {
+                typedef unsigned char* buf_t;
+
+                const buf_t buf = new (std::nothrow) unsigned char[buflen];
+
+                if (buf == NULL)
+                    return -1;
+
+                const int status = pReader->Read(pos, buflen, buf);
+
+                if (status)
+                {
+                    delete[] buf;
+                    return status;
+                }
+
+                info.codecPrivate = buf;
+                info.codecPrivateSize = buflen;
+            }
+        }
+        else if (id == 0x058688)  //Codec Name
+        {
+            const long status = UnserializeString(
+                                    pReader,
+                                    pos,
+                                    size,
+                                    info.codecNameAsUTF8);
+
+            if (status)
+                return status;
+        }
+        else if (id == 0x16AA)  //Codec Delay
+        {
+            info.codecDelay = UnserializeUInt(pReader, pos, size);
+
+        }
+        else if (id == 0x16BB) //Seek Pre Roll
+        {
+            info.seekPreRoll = UnserializeUInt(pReader, pos, size);
+        }
+
+        pos += size;  //consume payload
+        assert(pos <= track_stop);
+    }
+
+    assert(pos == track_stop);
+
+    if (info.number <= 0)  //not specified
+        return E_FILE_FORMAT_INVALID;
+
+    if (GetTrackByNumber(info.number))
+        return E_FILE_FORMAT_INVALID;
+
+    if (info.type <= 0)  //not specified
+        return E_FILE_FORMAT_INVALID;
+
+    info.lacing = (lacing > 0) ? true : false;
+
+    if (info.type == Track::kVideo)
+    {
+        if (v.start < 0)
+            return E_FILE_FORMAT_INVALID;
+
+        if (a.start >= 0)
+            return E_FILE_FORMAT_INVALID;
+
+        info.settings = v;
+
+        VideoTrack* pTrack = NULL;
+
+        const long status = VideoTrack::Parse(m_pSegment,
+                                              info,
+                                              element_start,
+                                              element_size,
+                                              pTrack);
+
+        if (status)
+            return status;
+
+        pResult = pTrack;
+        assert(pResult);
+
+        if (e.start >= 0)
+            pResult->ParseContentEncodingsEntry(e.start, e.size);
+    }
+    else if (info.type == Track::kAudio)
+    {
+        if (a.start < 0)
+            return E_FILE_FORMAT_INVALID;
+
+        if (v.start >= 0)
+            return E_FILE_FORMAT_INVALID;
+
+        info.settings = a;
+
+        AudioTrack* pTrack = NULL;
+
+        const long status = AudioTrack::Parse(m_pSegment,
+                                              info,
+                                              element_start,
+                                              element_size,
+                                              pTrack);
+
+        if (status)
+            return status;
+
+        pResult = pTrack;
+        assert(pResult);
+
+        if (e.start >= 0)
+            pResult->ParseContentEncodingsEntry(e.start, e.size);
+    }
+    else
+    {
+        // neither video nor audio - probably metadata or subtitles
+
+        if (a.start >= 0)
+            return E_FILE_FORMAT_INVALID;
+
+        if (v.start >= 0)
+            return E_FILE_FORMAT_INVALID;
+
+        if (e.start >= 0)
+            return E_FILE_FORMAT_INVALID;
+
+        info.settings.start = -1;
+        info.settings.size = 0;
+
+        Track* pTrack = NULL;
+
+        const long status = Track::Create(m_pSegment,
+                                          info,
+                                          element_start,
+                                          element_size,
+                                          pTrack);
+
+        if (status)
+            return status;
+
+        pResult = pTrack;
+        assert(pResult);
+    }
+
+    return 0;  //success
+}
+
+
+Tracks::~Tracks()
+{
+    Track** i = m_trackEntries;
+    Track** const j = m_trackEntriesEnd;
+
+    while (i != j)
+    {
+        Track* const pTrack = *i++;
+        delete pTrack;
+    }
+
+    delete[] m_trackEntries;
+}
+
+const Track* Tracks::GetTrackByNumber(long tn) const
+{
+    if (tn < 0)
+        return NULL;
+
+    Track** i = m_trackEntries;
+    Track** const j = m_trackEntriesEnd;
+
+    while (i != j)
+    {
+        Track* const pTrack = *i++;
+
+        if (pTrack == NULL)
+            continue;
+
+        if (tn == pTrack->GetNumber())
+            return pTrack;
+    }
+
+    return NULL;  //not found
+}
+
+
+const Track* Tracks::GetTrackByIndex(unsigned long idx) const
+{
+    const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
+
+    if (idx >= static_cast<unsigned long>(count))
+         return NULL;
+
+    return m_trackEntries[idx];
+}
+
+#if 0
+long long Cluster::Unparsed() const
+{
+    if (m_timecode < 0)  //not even partially loaded
+        return LLONG_MAX;
+
+    assert(m_pos >= m_element_start);
+    //assert(m_element_size > m_size);
+
+    const long long element_stop = m_element_start + m_element_size;
+    assert(m_pos <= element_stop);
+
+    const long long result = element_stop - m_pos;
+    assert(result >= 0);
+
+    return result;
+}
+#endif
+
+
+long Cluster::Load(long long& pos, long& len) const
+{
+    assert(m_pSegment);
+    assert(m_pos >= m_element_start);
+
+    if (m_timecode >= 0)  //at least partially loaded
+        return 0;
+
+    assert(m_pos == m_element_start);
+    assert(m_element_size < 0);
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long total, avail;
+
+    const int status = pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+    assert((total < 0) || (m_pos <= total));  //TODO: verify this
+
+    pos = m_pos;
+
+    long long cluster_size = -1;
+
+    {
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error or underflow
+            return static_cast<long>(result);
+
+        if (result > 0)  //underflow (weird)
+            return E_BUFFER_NOT_FULL;
+
+        //if ((pos + len) > segment_stop)
+        //    return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id_ = ReadUInt(pReader, pos, len);
+
+        if (id_ < 0)  //error
+            return static_cast<long>(id_);
+
+        if (id_ != 0x0F43B675)  //Cluster ID
+            return E_FILE_FORMAT_INVALID;
+
+        pos += len;  //consume id
+
+        //read cluster size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        //if ((pos + len) > segment_stop)
+        //    return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(cluster_size);
+
+        if (size == 0)
+            return E_FILE_FORMAT_INVALID;  //TODO: verify this
+
+        pos += len;  //consume length of size of element
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size != unknown_size)
+            cluster_size = size;
+    }
+
+    //pos points to start of payload
+
+#if 0
+    len = static_cast<long>(size_);
+
+    if (cluster_stop > avail)
+        return E_BUFFER_NOT_FULL;
+#endif
+
+    long long timecode = -1;
+    long long new_pos = -1;
+    bool bBlock = false;
+
+    long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
+
+    for (;;)
+    {
+        if ((cluster_stop >= 0) && (pos >= cluster_stop))
+            break;
+
+        //Parse ID
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id = ReadUInt(pReader, pos, len);
+
+        if (id < 0) //error
+            return static_cast<long>(id);
+
+        if (id == 0)
+            return E_FILE_FORMAT_INVALID;
+
+        //This is the distinguished set of ID's we use to determine
+        //that we have exhausted the sub-element's inside the cluster
+        //whose ID we parsed earlier.
+
+        if (id == 0x0F43B675)  //Cluster ID
+            break;
+
+        if (id == 0x0C53BB6B)  //Cues ID
+            break;
+
+        pos += len;  //consume ID field
+
+        //Parse Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;
+
+        pos += len;  //consume size field
+
+        if ((cluster_stop >= 0) && (pos > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        //pos now points to start of payload
+
+        if (size == 0)  //weird
+            continue;
+
+        if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if (id == 0x67)  //TimeCode ID
+        {
+            len = static_cast<long>(size);
+
+            if ((pos + size) > avail)
+                return E_BUFFER_NOT_FULL;
+
+            timecode = UnserializeUInt(pReader, pos, size);
+
+            if (timecode < 0)  //error (or underflow)
+                return static_cast<long>(timecode);
+
+            new_pos = pos + size;
+
+            if (bBlock)
+                break;
+        }
+        else if (id == 0x20)  //BlockGroup ID
+        {
+            bBlock = true;
+            break;
+        }
+        else if (id == 0x23)  //SimpleBlock ID
+        {
+            bBlock = true;
+            break;
+        }
+
+        pos += size;  //consume payload
+        assert((cluster_stop < 0) || (pos <= cluster_stop));
+    }
+
+    assert((cluster_stop < 0) || (pos <= cluster_stop));
+
+    if (timecode < 0)  //no timecode found
+        return E_FILE_FORMAT_INVALID;
+
+    if (!bBlock)
+        return E_FILE_FORMAT_INVALID;
+
+    m_pos = new_pos;  //designates position just beyond timecode payload
+    m_timecode = timecode;  // m_timecode >= 0 means we're partially loaded
+
+    if (cluster_size >= 0)
+        m_element_size = cluster_stop - m_element_start;
+
+    return 0;
+}
+
+
+long Cluster::Parse(long long& pos, long& len) const
+{
+    long status = Load(pos, len);
+
+    if (status < 0)
+        return status;
+
+    assert(m_pos >= m_element_start);
+    assert(m_timecode >= 0);
+    //assert(m_size > 0);
+    //assert(m_element_size > m_size);
+
+    const long long cluster_stop =
+        (m_element_size < 0) ? -1 : m_element_start + m_element_size;
+
+    if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
+        return 1;  //nothing else to do
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long total, avail;
+
+    status = pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    pos = m_pos;
+
+    for (;;)
+    {
+        if ((cluster_stop >= 0) && (pos >= cluster_stop))
+            break;
+
+        if ((total >= 0) && (pos >= total))
+        {
+            if (m_element_size < 0)
+                m_element_size = pos - m_element_start;
+
+            break;
+        }
+
+        //Parse ID
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id = ReadUInt(pReader, pos, len);
+
+        if (id < 0) //error
+            return static_cast<long>(id);
+
+        if (id == 0)  //weird
+            return E_FILE_FORMAT_INVALID;
+
+        //This is the distinguished set of ID's we use to determine
+        //that we have exhausted the sub-element's inside the cluster
+        //whose ID we parsed earlier.
+
+        if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) //Cluster or Cues ID
+        {
+            if (m_element_size < 0)
+                m_element_size = pos - m_element_start;
+
+            break;
+        }
+
+        pos += len;  //consume ID field
+
+        //Parse Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;
+
+        pos += len;  //consume size field
+
+        if ((cluster_stop >= 0) && (pos > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        //pos now points to start of payload
+
+        if (size == 0)  //weird
+            continue;
+
+        //const long long block_start = pos;
+        const long long block_stop = pos + size;
+
+        if (cluster_stop >= 0)
+        {
+            if (block_stop > cluster_stop)
+            {
+                if ((id == 0x20) || (id == 0x23))
+                    return E_FILE_FORMAT_INVALID;
+
+                pos = cluster_stop;
+                break;
+            }
+        }
+        else if ((total >= 0) && (block_stop > total))
+        {
+            m_element_size = total - m_element_start;
+            pos = total;
+            break;
+        }
+        else if (block_stop > avail)
+        {
+            len = static_cast<long>(size);
+            return E_BUFFER_NOT_FULL;
+        }
+
+        Cluster* const this_ = const_cast<Cluster*>(this);
+
+        if (id == 0x20)  //BlockGroup
+            return this_->ParseBlockGroup(size, pos, len);
+
+        if (id == 0x23)  //SimpleBlock
+            return this_->ParseSimpleBlock(size, pos, len);
+
+        pos += size;  //consume payload
+        assert((cluster_stop < 0) || (pos <= cluster_stop));
+    }
+
+    assert(m_element_size > 0);
+
+    m_pos = pos;
+    assert((cluster_stop < 0) || (m_pos <= cluster_stop));
+
+    if (m_entries_count > 0)
+    {
+        const long idx = m_entries_count - 1;
+
+        const BlockEntry* const pLast = m_entries[idx];
+        assert(pLast);
+
+        const Block* const pBlock = pLast->GetBlock();
+        assert(pBlock);
+
+        const long long start = pBlock->m_start;
+
+        if ((total >= 0) && (start > total))
+            return -1;  //defend against trucated stream
+
+        const long long size = pBlock->m_size;
+
+        const long long stop = start + size;
+        assert((cluster_stop < 0) || (stop <= cluster_stop));
+
+        if ((total >= 0) && (stop > total))
+            return -1;  //defend against trucated stream
+    }
+
+    return 1;  //no more entries
+}
+
+
+long Cluster::ParseSimpleBlock(
+    long long block_size,
+    long long& pos,
+    long& len)
+{
+    const long long block_start = pos;
+    const long long block_stop = pos + block_size;
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long total, avail;
+
+    long status = pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    //parse track number
+
+    if ((pos + 1) > avail)
+    {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  //error
+        return static_cast<long>(result);
+
+    if (result > 0)  //weird
+        return E_BUFFER_NOT_FULL;
+
+    if ((pos + len) > block_stop)
+        return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
+
+    const long long track = ReadUInt(pReader, pos, len);
+
+    if (track < 0) //error
+        return static_cast<long>(track);
+
+    if (track == 0)
+        return E_FILE_FORMAT_INVALID;
+
+#if 0
+    //TODO(matthewjheaney)
+    //This turned out to be too conservative.  The problem is that
+    //if we see a track header in the tracks element with an unsupported
+    //track type, we throw that track header away, so it is not present
+    //in the track map.  But even though we don't understand the track
+    //header, there are still blocks in the cluster with that track
+    //number.  It was our decision to ignore that track header, so it's
+    //up to us to deal with blocks associated with that track -- we
+    //cannot simply report an error since technically there's nothing
+    //wrong with the file.
+    //
+    //For now we go ahead and finish the parse, creating a block entry
+    //for this block.  This is somewhat wasteful, because without a
+    //track header there's nothing you can do with the block. What
+    //we really need here is a special return value that indicates to
+    //the caller that he should ignore this particular block, and
+    //continue parsing.
+
+    const Tracks* const pTracks = m_pSegment->GetTracks();
+    assert(pTracks);
+
+    const long tn = static_cast<long>(track);
+
+    const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+    if (pTrack == NULL)
+        return E_FILE_FORMAT_INVALID;
+#endif
+
+    pos += len;  //consume track number
+
+    if ((pos + 2) > block_stop)
+        return E_FILE_FORMAT_INVALID;
+
+    if ((pos + 2) > avail)
+    {
+        len = 2;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    pos += 2;  //consume timecode
+
+    if ((pos + 1) > block_stop)
+        return E_FILE_FORMAT_INVALID;
+
+    if ((pos + 1) > avail)
+    {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+    }
+
+    unsigned char flags;
+
+    status = pReader->Read(pos, 1, &flags);
+
+    if (status < 0)  //error or underflow
+    {
+        len = 1;
+        return status;
+    }
+
+    ++pos;  //consume flags byte
+    assert(pos <= avail);
+
+    if (pos >= block_stop)
+        return E_FILE_FORMAT_INVALID;
+
+    const int lacing = int(flags & 0x06) >> 1;
+
+    if ((lacing != 0) && (block_stop > avail))
+    {
+        len = static_cast<long>(block_stop - pos);
+        return E_BUFFER_NOT_FULL;
+    }
+
+    status = CreateBlock(0x23,  //simple block id
+                         block_start, block_size,
+                         0);  //DiscardPadding
+
+    if (status != 0)
+        return status;
+
+    m_pos = block_stop;
+
+    return 0;  //success
+}
+
+
+long Cluster::ParseBlockGroup(
+    long long payload_size,
+    long long& pos,
+    long& len)
+{
+    const long long payload_start = pos;
+    const long long payload_stop = pos + payload_size;
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long total, avail;
+
+    long status = pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    if ((total >= 0) && (payload_stop > total))
+        return E_FILE_FORMAT_INVALID;
+
+    if (payload_stop > avail)
+    {
+         len = static_cast<long>(payload_size);
+         return E_BUFFER_NOT_FULL;
+    }
+
+    long long discard_padding = 0;
+
+    while (pos < payload_stop)
+    {
+        //parse sub-block element ID
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((pos + len) > payload_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id = ReadUInt(pReader, pos, len);
+
+        if (id < 0) //error
+            return static_cast<long>(id);
+
+        if (id == 0)  //not a value ID
+            return E_FILE_FORMAT_INVALID;
+
+        pos += len;  //consume ID field
+
+        //Parse Size
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((pos + len) > payload_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        pos += len;  //consume size field
+
+        //pos now points to start of sub-block group payload
+
+        if (pos > payload_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        if (size == 0)  //weird
+            continue;
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;
+
+        if (id == 0x35A2)  //DiscardPadding
+        {
+            result = GetUIntLength(pReader, pos, len);
+
+            if (result < 0)  //error
+                return static_cast<long>(result);
+
+            status = UnserializeInt(pReader, pos, len, discard_padding);
+
+            if (status < 0)  //error
+                return status;
+        }
+
+        if (id != 0x21)  //sub-part of BlockGroup is not a Block
+        {
+            pos += size;  //consume sub-part of block group
+
+            if (pos > payload_stop)
+                return E_FILE_FORMAT_INVALID;
+
+            continue;
+        }
+
+        const long long block_stop = pos + size;
+
+        if (block_stop > payload_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        //parse track number
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((pos + len) > block_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long track = ReadUInt(pReader, pos, len);
+
+        if (track < 0) //error
+            return static_cast<long>(track);
+
+        if (track == 0)
+            return E_FILE_FORMAT_INVALID;
+
+#if 0
+        //TODO(matthewjheaney)
+        //This turned out to be too conservative.  The problem is that
+        //if we see a track header in the tracks element with an unsupported
+        //track type, we throw that track header away, so it is not present
+        //in the track map.  But even though we don't understand the track
+        //header, there are still blocks in the cluster with that track
+        //number.  It was our decision to ignore that track header, so it's
+        //up to us to deal with blocks associated with that track -- we
+        //cannot simply report an error since technically there's nothing
+        //wrong with the file.
+        //
+        //For now we go ahead and finish the parse, creating a block entry
+        //for this block.  This is somewhat wasteful, because without a
+        //track header there's nothing you can do with the block. What
+        //we really need here is a special return value that indicates to
+        //the caller that he should ignore this particular block, and
+        //continue parsing.
+
+        const Tracks* const pTracks = m_pSegment->GetTracks();
+        assert(pTracks);
+
+        const long tn = static_cast<long>(track);
+
+        const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+        if (pTrack == NULL)
+            return E_FILE_FORMAT_INVALID;
+#endif
+
+        pos += len;  //consume track number
+
+        if ((pos + 2) > block_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + 2) > avail)
+        {
+            len = 2;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        pos += 2;  //consume timecode
+
+        if ((pos + 1) > block_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        unsigned char flags;
+
+        status = pReader->Read(pos, 1, &flags);
+
+        if (status < 0)  //error or underflow
+        {
+            len = 1;
+            return status;
+        }
+
+        ++pos;  //consume flags byte
+        assert(pos <= avail);
+
+        if (pos >= block_stop)
+            return E_FILE_FORMAT_INVALID;
+
+        const int lacing = int(flags & 0x06) >> 1;
+
+        if ((lacing != 0) && (block_stop > avail))
+        {
+            len = static_cast<long>(block_stop - pos);
+            return E_BUFFER_NOT_FULL;
+        }
+
+        pos = block_stop;  //consume block-part of block group
+        assert(pos <= payload_stop);
+    }
+
+    assert(pos == payload_stop);
+
+    status = CreateBlock(0x20,  //BlockGroup ID
+                         payload_start, payload_size,
+                         discard_padding);
+    if (status != 0)
+        return status;
+
+    m_pos = payload_stop;
+
+    return 0;  //success
+}
+
+
+long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const
+{
+    assert(m_pos >= m_element_start);
+
+    pEntry = NULL;
+
+    if (index < 0)
+        return -1;  //generic error
+
+    if (m_entries_count < 0)
+        return E_BUFFER_NOT_FULL;
+
+    assert(m_entries);
+    assert(m_entries_size > 0);
+    assert(m_entries_count <= m_entries_size);
+
+    if (index < m_entries_count)
+    {
+        pEntry = m_entries[index];
+        assert(pEntry);
+
+        return 1;  //found entry
+    }
+
+    if (m_element_size < 0)        //we don't know cluster end yet
+        return E_BUFFER_NOT_FULL;  //underflow
+
+    const long long element_stop = m_element_start + m_element_size;
+
+    if (m_pos >= element_stop)
+        return 0;  //nothing left to parse
+
+    return E_BUFFER_NOT_FULL;  //underflow, since more remains to be parsed
+}
+
+
+Cluster* Cluster::Create(
+    Segment* pSegment,
+    long idx,
+    long long off)
+    //long long element_size)
+{
+    assert(pSegment);
+    assert(off >= 0);
+
+    const long long element_start = pSegment->m_start + off;
+
+    Cluster* const pCluster = new Cluster(pSegment,
+                                          idx,
+                                          element_start);
+                                          //element_size);
+    assert(pCluster);
+
+    return pCluster;
+}
+
+
+Cluster::Cluster() :
+    m_pSegment(NULL),
+    m_element_start(0),
+    m_index(0),
+    m_pos(0),
+    m_element_size(0),
+    m_timecode(0),
+    m_entries(NULL),
+    m_entries_size(0),
+    m_entries_count(0)  //means "no entries"
+{
+}
+
+
+Cluster::Cluster(
+    Segment* pSegment,
+    long idx,
+    long long element_start
+    /* long long element_size */ ) :
+    m_pSegment(pSegment),
+    m_element_start(element_start),
+    m_index(idx),
+    m_pos(element_start),
+    m_element_size(-1 /* element_size */ ),
+    m_timecode(-1),
+    m_entries(NULL),
+    m_entries_size(0),
+    m_entries_count(-1)  //means "has not been parsed yet"
+{
+}
+
+
+Cluster::~Cluster()
+{
+    if (m_entries_count <= 0)
+        return;
+
+    BlockEntry** i = m_entries;
+    BlockEntry** const j = m_entries + m_entries_count;
+
+    while (i != j)
+    {
+         BlockEntry* p = *i++;
+         assert(p);
+
+         delete p;
+    }
+
+    delete[] m_entries;
+}
+
+
+bool Cluster::EOS() const
+{
+    return (m_pSegment == NULL);
+}
+
+
+long Cluster::GetIndex() const
+{
+    return m_index;
+}
+
+
+long long Cluster::GetPosition() const
+{
+    const long long pos = m_element_start - m_pSegment->m_start;
+    assert(pos >= 0);
+
+    return pos;
+}
+
+
+long long Cluster::GetElementSize() const
+{
+    return m_element_size;
+}
+
+
+#if 0
+bool Cluster::HasBlockEntries(
+    const Segment* pSegment,
+    long long off)  //relative to start of segment payload
+{
+    assert(pSegment);
+    assert(off >= 0);  //relative to segment
+
+    IMkvReader* const pReader = pSegment->m_pReader;
+
+    long long pos = pSegment->m_start + off;  //absolute
+    long long size;
+
+    {
+        long len;
+
+        const long long id = ReadUInt(pReader, pos, len);
+        (void)id;
+        assert(id >= 0);
+        assert(id == 0x0F43B675);  //Cluster ID
+
+        pos += len;  //consume id
+
+        size = ReadUInt(pReader, pos, len);
+        assert(size > 0);
+
+        pos += len;  //consume size
+
+        //pos now points to start of payload
+    }
+
+    const long long stop = pos + size;
+
+    while (pos < stop)
+    {
+        long len;
+
+        const long long id = ReadUInt(pReader, pos, len);
+        assert(id >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume id
+
+        const long long size = ReadUInt(pReader, pos, len);
+        assert(size >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume size
+
+        if (id == 0x20)  //BlockGroup ID
+            return true;
+
+        if (id == 0x23)  //SimpleBlock ID
+            return true;
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    return false;
+}
+#endif
+
+
+long Cluster::HasBlockEntries(
+    const Segment* pSegment,
+    long long off,  //relative to start of segment payload
+    long long& pos,
+    long& len)
+{
+    assert(pSegment);
+    assert(off >= 0);  //relative to segment
+
+    IMkvReader* const pReader = pSegment->m_pReader;
+
+    long long total, avail;
+
+    long status = pReader->Length(&total, &avail);
+
+    if (status < 0)  //error
+        return status;
+
+    assert((total < 0) || (avail <= total));
+
+    pos = pSegment->m_start + off;  //absolute
+
+    if ((total >= 0) && (pos >= total))
+        return 0;  //we don't even have a complete cluster
+
+    const long long segment_stop =
+        (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
+
+    long long cluster_stop = -1;  //interpreted later to mean "unknown size"
+
+    {
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //need more data
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((total >= 0) && ((pos + len) > total))
+            return 0;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id = ReadUInt(pReader, pos, len);
+
+        if (id < 0)  //error
+            return static_cast<long>(id);
+
+        if (id != 0x0F43B675)  //weird: not cluster ID
+            return -1;         //generic error
+
+        pos += len;  //consume Cluster ID field
+
+        //read size field
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //weird
+            return E_BUFFER_NOT_FULL;
+
+        if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((total >= 0) && ((pos + len) > total))
+            return 0;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        if (size == 0)
+            return 0;  //cluster does not have entries
+
+        pos += len;  //consume size field
+
+        //pos now points to start of payload
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size != unknown_size)
+        {
+            cluster_stop = pos + size;
+            assert(cluster_stop >= 0);
+
+            if ((segment_stop >= 0) && (cluster_stop > segment_stop))
+                return E_FILE_FORMAT_INVALID;
+
+            if ((total >= 0) && (cluster_stop > total))
+                //return E_FILE_FORMAT_INVALID;  //too conservative
+                return 0;  //cluster does not have any entries
+        }
+    }
+
+    for (;;)
+    {
+        if ((cluster_stop >= 0) && (pos >= cluster_stop))
+            return 0;  //no entries detected
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        long long result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //need more data
+            return E_BUFFER_NOT_FULL;
+
+        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long id = ReadUInt(pReader, pos, len);
+
+        if (id < 0)  //error
+            return static_cast<long>(id);
+
+        //This is the distinguished set of ID's we use to determine
+        //that we have exhausted the sub-element's inside the cluster
+        //whose ID we parsed earlier.
+
+        if (id == 0x0F43B675)  //Cluster ID
+            return 0;  //no entries found
+
+        if (id == 0x0C53BB6B)  //Cues ID
+            return 0;  //no entries found
+
+        pos += len;  //consume id field
+
+        if ((cluster_stop >= 0) && (pos >= cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        //read size field
+
+        if ((pos + 1) > avail)
+        {
+            len = 1;
+            return E_BUFFER_NOT_FULL;
+        }
+
+        result = GetUIntLength(pReader, pos, len);
+
+        if (result < 0)  //error
+            return static_cast<long>(result);
+
+        if (result > 0)  //underflow
+            return E_BUFFER_NOT_FULL;
+
+        if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > avail)
+            return E_BUFFER_NOT_FULL;
+
+        const long long size = ReadUInt(pReader, pos, len);
+
+        if (size < 0)  //error
+            return static_cast<long>(size);
+
+        pos += len;  //consume size field
+
+        //pos now points to start of payload
+
+        if ((cluster_stop >= 0) && (pos > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if (size == 0)  //weird
+            continue;
+
+        const long long unknown_size = (1LL << (7 * len)) - 1;
+
+        if (size == unknown_size)
+            return E_FILE_FORMAT_INVALID;  //not supported inside cluster
+
+        if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+            return E_FILE_FORMAT_INVALID;
+
+        if (id == 0x20)  //BlockGroup ID
+            return 1;    //have at least one entry
+
+        if (id == 0x23)  //SimpleBlock ID
+            return 1;    //have at least one entry
+
+        pos += size;  //consume payload
+        assert((cluster_stop < 0) || (pos <= cluster_stop));
+    }
+}
+
+
+long long Cluster::GetTimeCode() const
+{
+    long long pos;
+    long len;
+
+    const long status = Load(pos, len);
+
+    if (status < 0) //error
+        return status;
+
+    return m_timecode;
+}
+
+
+long long Cluster::GetTime() const
+{
+    const long long tc = GetTimeCode();
+
+    if (tc < 0)
+        return tc;
+
+    const SegmentInfo* const pInfo = m_pSegment->GetInfo();
+    assert(pInfo);
+
+    const long long scale = pInfo->GetTimeCodeScale();
+    assert(scale >= 1);
+
+    const long long t = m_timecode * scale;
+
+    return t;
+}
+
+
+long long Cluster::GetFirstTime() const
+{
+    const BlockEntry* pEntry;
+
+    const long status = GetFirst(pEntry);
+
+    if (status < 0)  //error
+        return status;
+
+    if (pEntry == NULL)  //empty cluster
+        return GetTime();
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    return pBlock->GetTime(this);
+}
+
+
+long long Cluster::GetLastTime() const
+{
+    const BlockEntry* pEntry;
+
+    const long status = GetLast(pEntry);
+
+    if (status < 0)  //error
+        return status;
+
+    if (pEntry == NULL)  //empty cluster
+        return GetTime();
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    return pBlock->GetTime(this);
+}
+
+
+long Cluster::CreateBlock(
+    long long id,
+    long long pos,   //absolute pos of payload
+    long long size,
+    long long discard_padding)
+{
+    assert((id == 0x20) || (id == 0x23));  //BlockGroup or SimpleBlock
+
+    if (m_entries_count < 0)  //haven't parsed anything yet
+    {
+        assert(m_entries == NULL);
+        assert(m_entries_size == 0);
+
+        m_entries_size = 1024;
+        m_entries = new BlockEntry*[m_entries_size];
+
+        m_entries_count = 0;
+    }
+    else
+    {
+        assert(m_entries);
+        assert(m_entries_size > 0);
+        assert(m_entries_count <= m_entries_size);
+
+        if (m_entries_count >= m_entries_size)
+        {
+            const long entries_size = 2 * m_entries_size;
+
+            BlockEntry** const entries = new BlockEntry*[entries_size];
+            assert(entries);
+
+            BlockEntry** src = m_entries;
+            BlockEntry** const src_end = src + m_entries_count;
+
+            BlockEntry** dst = entries;
+
+            while (src != src_end)
+                *dst++ = *src++;
+
+            delete[] m_entries;
+
+            m_entries = entries;
+            m_entries_size = entries_size;
+        }
+    }
+
+    if (id == 0x20)  //BlockGroup ID
+        return CreateBlockGroup(pos, size, discard_padding);
+    else  //SimpleBlock ID
+        return CreateSimpleBlock(pos, size);
+}
+
+
+long Cluster::CreateBlockGroup(
+    long long start_offset,
+    long long size,
+    long long discard_padding)
+{
+    assert(m_entries);
+    assert(m_entries_size > 0);
+    assert(m_entries_count >= 0);
+    assert(m_entries_count < m_entries_size);
+
+    IMkvReader* const pReader = m_pSegment->m_pReader;
+
+    long long pos = start_offset;
+    const long long stop = start_offset + size;
+
+    //For WebM files, there is a bias towards previous reference times
+    //(in order to support alt-ref frames, which refer back to the previous
+    //keyframe).  Normally a 0 value is not possible, but here we tenatively
+    //allow 0 as the value of a reference frame, with the interpretation
+    //that this is a "previous" reference time.
+
+    long long prev = 1;  //nonce
+    long long next = 0;  //nonce
+    long long duration = -1;  //really, this is unsigned
+
+    long long bpos = -1;
+    long long bsize = -1;
+
+    while (pos < stop)
+    {
+        long len;
+        const long long id = ReadUInt(pReader, pos, len);
+        assert(id >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume ID
+
+        const long long size = ReadUInt(pReader, pos, len);
+        assert(size >= 0);  //TODO
+        assert((pos + len) <= stop);
+
+        pos += len;  //consume size
+
+        if (id == 0x21) //Block ID
+        {
+            if (bpos < 0) //Block ID
+            {
+                bpos = pos;
+                bsize = size;
+            }
+        }
+        else if (id == 0x1B)  //Duration ID
+        {
+            assert(size <= 8);
+
+            duration = UnserializeUInt(pReader, pos, size);
+            assert(duration >= 0);  //TODO
+        }
+        else if (id == 0x7B)  //ReferenceBlock
+        {
+            assert(size <= 8);
+            const long size_ = static_cast<long>(size);
+
+            long long time;
+
+            long status = UnserializeInt(pReader, pos, size_, time);
+            assert(status == 0);
+            if (status != 0)
+                return -1;
+
+            if (time <= 0)  //see note above
+                prev = time;
+            else  //weird
+                next = time;
+        }
+
+        pos += size;  //consume payload
+        assert(pos <= stop);
+    }
+
+    assert(pos == stop);
+    assert(bpos >= 0);
+    assert(bsize >= 0);
+
+    const long idx = m_entries_count;
+
+    BlockEntry** const ppEntry = m_entries + idx;
+    BlockEntry*& pEntry = *ppEntry;
+
+    pEntry = new (std::nothrow) BlockGroup(
+                                  this,
+                                  idx,
+                                  bpos,
+                                  bsize,
+                                  prev,
+                                  next,
+                                  duration,
+                                  discard_padding);
+
+    if (pEntry == NULL)
+        return -1;  //generic error
+
+    BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
+
+    const long status = p->Parse();
+
+    if (status == 0)  //success
+    {
+        ++m_entries_count;
+        return 0;
+    }
+
+    delete pEntry;
+    pEntry = 0;
+
+    return status;
+}
+
+
+
+long Cluster::CreateSimpleBlock(
+    long long st,
+    long long sz)
+{
+    assert(m_entries);
+    assert(m_entries_size > 0);
+    assert(m_entries_count >= 0);
+    assert(m_entries_count < m_entries_size);
+
+    const long idx = m_entries_count;
+
+    BlockEntry** const ppEntry = m_entries + idx;
+    BlockEntry*& pEntry = *ppEntry;
+
+    pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz);
+
+    if (pEntry == NULL)
+        return -1;  //generic error
+
+    SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
+
+    const long status = p->Parse();
+
+    if (status == 0)
+    {
+        ++m_entries_count;
+        return 0;
+    }
+
+    delete pEntry;
+    pEntry = 0;
+
+    return status;
+}
+
+
+long Cluster::GetFirst(const BlockEntry*& pFirst) const
+{
+    if (m_entries_count <= 0)
+    {
+        long long pos;
+        long len;
+
+        const long status = Parse(pos, len);
+
+        if (status < 0)  //error
+        {
+            pFirst = NULL;
+            return status;
+        }
+
+        if (m_entries_count <= 0)  //empty cluster
+        {
+            pFirst = NULL;
+            return 0;
+        }
+    }
+
+    assert(m_entries);
+
+    pFirst = m_entries[0];
+    assert(pFirst);
+
+    return 0;  //success
+}
+
+long Cluster::GetLast(const BlockEntry*& pLast) const
+{
+    for (;;)
+    {
+        long long pos;
+        long len;
+
+        const long status = Parse(pos, len);
+
+        if (status < 0)  //error
+        {
+            pLast = NULL;
+            return status;
+        }
+
+        if (status > 0)  //no new block
+            break;
+    }
+
+    if (m_entries_count <= 0)
+    {
+        pLast = NULL;
+        return 0;
+    }
+
+    assert(m_entries);
+
+    const long idx = m_entries_count - 1;
+
+    pLast = m_entries[idx];
+    assert(pLast);
+
+    return 0;
+}
+
+
+long Cluster::GetNext(
+    const BlockEntry* pCurr,
+    const BlockEntry*& pNext) const
+{
+    assert(pCurr);
+    assert(m_entries);
+    assert(m_entries_count > 0);
+
+    size_t idx = pCurr->GetIndex();
+    assert(idx < size_t(m_entries_count));
+    assert(m_entries[idx] == pCurr);
+
+    ++idx;
+
+    if (idx >= size_t(m_entries_count))
+    {
+        long long pos;
+        long len;
+
+        const long status = Parse(pos, len);
+
+        if (status < 0)  //error
+        {
+            pNext = NULL;
+            return status;
+        }
+
+        if (status > 0)
+        {
+            pNext = NULL;
+            return 0;
+        }
+
+        assert(m_entries);
+        assert(m_entries_count > 0);
+        assert(idx < size_t(m_entries_count));
+    }
+
+    pNext = m_entries[idx];
+    assert(pNext);
+
+    return 0;
+}
+
+
+long Cluster::GetEntryCount() const
+{
+    return m_entries_count;
+}
+
+
+const BlockEntry* Cluster::GetEntry(
+    const Track* pTrack,
+    long long time_ns) const
+{
+    assert(pTrack);
+
+    if (m_pSegment == NULL)  //this is the special EOS cluster
+        return pTrack->GetEOS();
+
+#if 0
+
+    LoadBlockEntries();
+
+    if ((m_entries == NULL) || (m_entries_count <= 0))
+        return NULL;  //return EOS here?
+
+    const BlockEntry* pResult = pTrack->GetEOS();
+
+    BlockEntry** i = m_entries;
+    assert(i);
+
+    BlockEntry** const j = i + m_entries_count;
+
+    while (i != j)
+    {
+        const BlockEntry* const pEntry = *i++;
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if (pBlock->GetTrackNumber() != pTrack->GetNumber())
+            continue;
+
+        if (pTrack->VetEntry(pEntry))
+        {
+            if (time_ns < 0)  //just want first candidate block
+                return pEntry;
+
+            const long long ns = pBlock->GetTime(this);
+
+            if (ns > time_ns)
+                break;
+
+            pResult = pEntry;
+        }
+        else if (time_ns >= 0)
+        {
+            const long long ns = pBlock->GetTime(this);
+
+            if (ns > time_ns)
+                break;
+        }
+    }
+
+    return pResult;
+
+#else
+
+    const BlockEntry* pResult = pTrack->GetEOS();
+
+    long index = 0;
+
+    for (;;)
+    {
+        if (index >= m_entries_count)
+        {
+            long long pos;
+            long len;
+
+            const long status = Parse(pos, len);
+            assert(status >= 0);
+
+            if (status > 0)  //completely parsed, and no more entries
+                return pResult;
+
+            if (status < 0)  //should never happen
+                return 0;
+
+            assert(m_entries);
+            assert(index < m_entries_count);
+        }
+
+        const BlockEntry* const pEntry = m_entries[index];
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if (pBlock->GetTrackNumber() != pTrack->GetNumber())
+        {
+            ++index;
+            continue;
+        }
+
+        if (pTrack->VetEntry(pEntry))
+        {
+            if (time_ns < 0)  //just want first candidate block
+                return pEntry;
+
+            const long long ns = pBlock->GetTime(this);
+
+            if (ns > time_ns)
+                return pResult;
+
+            pResult = pEntry;  //have a candidate
+        }
+        else if (time_ns >= 0)
+        {
+            const long long ns = pBlock->GetTime(this);
+
+            if (ns > time_ns)
+                return pResult;
+        }
+
+        ++index;
+    }
+
+#endif
+}
+
+
+const BlockEntry*
+Cluster::GetEntry(
+    const CuePoint& cp,
+    const CuePoint::TrackPosition& tp) const
+{
+    assert(m_pSegment);
+
+#if 0
+
+    LoadBlockEntries();
+
+    if (m_entries == NULL)
+        return NULL;
+
+    const long long count = m_entries_count;
+
+    if (count <= 0)
+        return NULL;
+
+    const long long tc = cp.GetTimeCode();
+
+    if ((tp.m_block > 0) && (tp.m_block <= count))
+    {
+        const size_t block = static_cast<size_t>(tp.m_block);
+        const size_t index = block - 1;
+
+        const BlockEntry* const pEntry = m_entries[index];
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if ((pBlock->GetTrackNumber() == tp.m_track) &&
+            (pBlock->GetTimeCode(this) == tc))
+        {
+            return pEntry;
+        }
+    }
+
+    const BlockEntry* const* i = m_entries;
+    const BlockEntry* const* const j = i + count;
+
+    while (i != j)
+    {
+#ifdef _DEBUG
+        const ptrdiff_t idx = i - m_entries;
+        idx;
+#endif
+
+        const BlockEntry* const pEntry = *i++;
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if (pBlock->GetTrackNumber() != tp.m_track)
+            continue;
+
+        const long long tc_ = pBlock->GetTimeCode(this);
+        assert(tc_ >= 0);
+
+        if (tc_ < tc)
+            continue;
+
+        if (tc_ > tc)
+            return NULL;
+
+        const Tracks* const pTracks = m_pSegment->GetTracks();
+        assert(pTracks);
+
+        const long tn = static_cast<long>(tp.m_track);
+        const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+        if (pTrack == NULL)
+            return NULL;
+
+        const long long type = pTrack->GetType();
+
+        if (type == 2)  //audio
+            return pEntry;
+
+        if (type != 1)  //not video
+            return NULL;
+
+        if (!pBlock->IsKey())
+            return NULL;
+
+        return pEntry;
+    }
+
+    return NULL;
+
+#else
+
+    const long long tc = cp.GetTimeCode();
+
+    if (tp.m_block > 0)
+    {
+        const long block = static_cast<long>(tp.m_block);
+        const long index = block - 1;
+
+        while (index >= m_entries_count)
+        {
+            long long pos;
+            long len;
+
+            const long status = Parse(pos, len);
+
+            if (status < 0)  //TODO: can this happen?
+                return NULL;
+
+            if (status > 0)  //nothing remains to be parsed
+                return NULL;
+        }
+
+        const BlockEntry* const pEntry = m_entries[index];
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if ((pBlock->GetTrackNumber() == tp.m_track) &&
+            (pBlock->GetTimeCode(this) == tc))
+        {
+            return pEntry;
+        }
+    }
+
+    long index = 0;
+
+    for (;;)
+    {
+        if (index >= m_entries_count)
+        {
+            long long pos;
+            long len;
+
+            const long status = Parse(pos, len);
+
+            if (status < 0)  //TODO: can this happen?
+                return NULL;
+
+            if (status > 0)  //nothing remains to be parsed
+                return NULL;
+
+            assert(m_entries);
+            assert(index < m_entries_count);
+        }
+
+        const BlockEntry* const pEntry = m_entries[index];
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if (pBlock->GetTrackNumber() != tp.m_track)
+        {
+            ++index;
+            continue;
+        }
+
+        const long long tc_ = pBlock->GetTimeCode(this);
+
+        if (tc_ < tc)
+        {
+            ++index;
+            continue;
+        }
+
+        if (tc_ > tc)
+            return NULL;
+
+        const Tracks* const pTracks = m_pSegment->GetTracks();
+        assert(pTracks);
+
+        const long tn = static_cast<long>(tp.m_track);
+        const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+        if (pTrack == NULL)
+            return NULL;
+
+        const long long type = pTrack->GetType();
+
+        if (type == 2)  //audio
+            return pEntry;
+
+        if (type != 1)  //not video
+            return NULL;
+
+        if (!pBlock->IsKey())
+            return NULL;
+
+        return pEntry;
+    }
+
+#endif
+
+}
+
+
+#if 0
+const BlockEntry* Cluster::GetMaxKey(const VideoTrack* pTrack) const
+{
+    assert(pTrack);
+
+    if (m_pSegment == NULL)  //EOS
+        return pTrack->GetEOS();
+
+    LoadBlockEntries();
+
+    if ((m_entries == NULL) || (m_entries_count <= 0))
+        return pTrack->GetEOS();
+
+    BlockEntry** i = m_entries + m_entries_count;
+    BlockEntry** const j = m_entries;
+
+    while (i != j)
+    {
+        const BlockEntry* const pEntry = *--i;
+        assert(pEntry);
+        assert(!pEntry->EOS());
+
+        const Block* const pBlock = pEntry->GetBlock();
+        assert(pBlock);
+
+        if (pBlock->GetTrackNumber() != pTrack->GetNumber())
+            continue;
+
+        if (pBlock->IsKey())
+            return pEntry;
+    }
+
+    return pTrack->GetEOS();  //no satisfactory block found
+}
+#endif
+
+
+BlockEntry::BlockEntry(Cluster* p, long idx) :
+    m_pCluster(p),
+    m_index(idx)
+{
+}
+
+
+BlockEntry::~BlockEntry()
+{
+}
+
+
+bool BlockEntry::EOS() const
+{
+    return (GetKind() == kBlockEOS);
+}
+
+
+const Cluster* BlockEntry::GetCluster() const
+{
+    return m_pCluster;
+}
+
+
+long BlockEntry::GetIndex() const
+{
+    return m_index;
+}
+
+
+SimpleBlock::SimpleBlock(
+    Cluster* pCluster,
+    long idx,
+    long long start,
+    long long size) :
+    BlockEntry(pCluster, idx),
+    m_block(start, size, 0)
+{
+}
+
+
+long SimpleBlock::Parse()
+{
+    return m_block.Parse(m_pCluster);
+}
+
+
+BlockEntry::Kind SimpleBlock::GetKind() const
+{
+    return kBlockSimple;
+}
+
+
+const Block* SimpleBlock::GetBlock() const
+{
+    return &m_block;
+}
+
+
+BlockGroup::BlockGroup(
+    Cluster* pCluster,
+    long idx,
+    long long block_start,
+    long long block_size,
+    long long prev,
+    long long next,
+    long long duration,
+    long long discard_padding) :
+    BlockEntry(pCluster, idx),
+    m_block(block_start, block_size, discard_padding),
+    m_prev(prev),
+    m_next(next),
+    m_duration(duration)
+{
+}
+
+
+long BlockGroup::Parse()
+{
+    const long status = m_block.Parse(m_pCluster);
+
+    if (status)
+        return status;
+
+    m_block.SetKey((m_prev > 0) && (m_next <= 0));
+
+    return 0;
+}
+
+
+#if 0
+void BlockGroup::ParseBlock(long long start, long long size)
+{
+    IMkvReader* const pReader = m_pCluster->m_pSegment->m_pReader;
+
+    Block* const pBlock = new Block(start, size, pReader);
+    assert(pBlock);  //TODO
+
+    //TODO: the Matroska spec says you have multiple blocks within the
+    //same block group, with blocks ranked by priority (the flag bits).
+
+    assert(m_pBlock == NULL);
+    m_pBlock = pBlock;
+}
+#endif
+
+
+BlockEntry::Kind BlockGroup::GetKind() const
+{
+    return kBlockGroup;
+}
+
+
+const Block* BlockGroup::GetBlock() const
+{
+    return &m_block;
+}
+
+
+long long BlockGroup::GetPrevTimeCode() const
+{
+    return m_prev;
+}
+
+
+long long BlockGroup::GetNextTimeCode() const
+{
+    return m_next;
+}
+
+long long BlockGroup::GetDurationTimeCode() const
+{
+    return m_duration;
+}
+
+Block::Block(long long start, long long size_, long long discard_padding) :
+    m_start(start),
+    m_size(size_),
+    m_track(0),
+    m_timecode(-1),
+    m_flags(0),
+    m_frames(NULL),
+    m_frame_count(-1),
+    m_discard_padding(discard_padding)
+{
+}
+
+
+Block::~Block()
+{
+    delete[] m_frames;
+}
+
+
+long Block::Parse(const Cluster* pCluster)
+{
+    if (pCluster == NULL)
+        return -1;
+
+    if (pCluster->m_pSegment == NULL)
+        return -1;
+
+    assert(m_start >= 0);
+    assert(m_size >= 0);
+    assert(m_track <= 0);
+    assert(m_frames == NULL);
+    assert(m_frame_count <= 0);
+
+    long long pos = m_start;
+    const long long stop = m_start + m_size;
+
+    long len;
+
+    IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
+
+    m_track = ReadUInt(pReader, pos, len);
+
+    if (m_track <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > stop)
+        return E_FILE_FORMAT_INVALID;
+
+    pos += len;  //consume track number
+
+    if ((stop - pos) < 2)
+        return E_FILE_FORMAT_INVALID;
+
+    long status;
+    long long value;
+
+    status = UnserializeInt(pReader, pos, 2, value);
+
+    if (status)
+        return E_FILE_FORMAT_INVALID;
+
+    if (value < SHRT_MIN)
+        return E_FILE_FORMAT_INVALID;
+
+    if (value > SHRT_MAX)
+        return E_FILE_FORMAT_INVALID;
+
+    m_timecode = static_cast<short>(value);
+
+    pos += 2;
+
+    if ((stop - pos) <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+    status = pReader->Read(pos, 1, &m_flags);
+
+    if (status)
+        return E_FILE_FORMAT_INVALID;
+
+    const int lacing = int(m_flags & 0x06) >> 1;
+
+    ++pos;  //consume flags byte
+
+    if (lacing == 0)  //no lacing
+    {
+        if (pos > stop)
+            return E_FILE_FORMAT_INVALID;
+
+        m_frame_count = 1;
+        m_frames = new Frame[m_frame_count];
+
+        Frame& f = m_frames[0];
+        f.pos = pos;
+
+        const long long frame_size = stop - pos;
+
+        if (frame_size > LONG_MAX)
+            return E_FILE_FORMAT_INVALID;
+
+        f.len = static_cast<long>(frame_size);
+
+        return 0;  //success
+    }
+
+    if (pos >= stop)
+        return E_FILE_FORMAT_INVALID;
+
+    unsigned char biased_count;
+
+    status = pReader->Read(pos, 1, &biased_count);
+
+    if (status)
+        return E_FILE_FORMAT_INVALID;
+
+    ++pos;  //consume frame count
+    assert(pos <= stop);
+
+    m_frame_count = int(biased_count) + 1;
+
+    m_frames = new Frame[m_frame_count];
+    assert(m_frames);
+
+    if (lacing == 1)  //Xiph
+    {
+        Frame* pf = m_frames;
+        Frame* const pf_end = pf + m_frame_count;
+
+        long size = 0;
+        int frame_count = m_frame_count;
+
+        while (frame_count > 1)
+        {
+            long frame_size = 0;
+
+            for (;;)
+            {
+                unsigned char val;
+
+                if (pos >= stop)
+                    return E_FILE_FORMAT_INVALID;
+
+                status = pReader->Read(pos, 1, &val);
+
+                if (status)
+                    return E_FILE_FORMAT_INVALID;
+
+                ++pos;  //consume xiph size byte
+
+                frame_size += val;
+
+                if (val < 255)
+                    break;
+            }
+
+            Frame& f = *pf++;
+            assert(pf < pf_end);
+
+            f.pos = 0;  //patch later
+
+            f.len = frame_size;
+            size += frame_size;  //contribution of this frame
+
+            --frame_count;
+        }
+
+        assert(pf < pf_end);
+        assert(pos <= stop);
+
+        {
+            Frame& f = *pf++;
+
+            if (pf != pf_end)
+                return E_FILE_FORMAT_INVALID;
+
+            f.pos = 0;  //patch later
+
+            const long long total_size = stop - pos;
+
+            if (total_size < size)
+                return E_FILE_FORMAT_INVALID;
+
+            const long long frame_size = total_size - size;
+
+            if (frame_size > LONG_MAX)
+                return E_FILE_FORMAT_INVALID;
+
+            f.len = static_cast<long>(frame_size);
+        }
+
+        pf = m_frames;
+        while (pf != pf_end)
+        {
+            Frame& f = *pf++;
+            assert((pos + f.len) <= stop);
+
+            f.pos = pos;
+            pos += f.len;
+        }
+
+        assert(pos == stop);
+    }
+    else if (lacing == 2)  //fixed-size lacing
+    {
+        const long long total_size = stop - pos;
+
+        if ((total_size % m_frame_count) != 0)
+            return E_FILE_FORMAT_INVALID;
+
+        const long long frame_size = total_size / m_frame_count;
+
+        if (frame_size > LONG_MAX)
+            return E_FILE_FORMAT_INVALID;
+
+        Frame* pf = m_frames;
+        Frame* const pf_end = pf + m_frame_count;
+
+        while (pf != pf_end)
+        {
+            assert((pos + frame_size) <= stop);
+
+            Frame& f = *pf++;
+
+            f.pos = pos;
+            f.len = static_cast<long>(frame_size);
+
+            pos += frame_size;
+        }
+
+        assert(pos == stop);
+    }
+    else
+    {
+        assert(lacing == 3);  //EBML lacing
+
+        if (pos >= stop)
+            return E_FILE_FORMAT_INVALID;
+
+        long size = 0;
+        int frame_count = m_frame_count;
+
+        long long frame_size = ReadUInt(pReader, pos, len);
+
+        if (frame_size < 0)
+            return E_FILE_FORMAT_INVALID;
+
+        if (frame_size > LONG_MAX)
+            return E_FILE_FORMAT_INVALID;
+
+        if ((pos + len) > stop)
+            return E_FILE_FORMAT_INVALID;
+
+        pos += len; //consume length of size of first frame
+
+        if ((pos + frame_size) > stop)
+            return E_FILE_FORMAT_INVALID;
+
+        Frame* pf = m_frames;
+        Frame* const pf_end = pf + m_frame_count;
+
+        {
+            Frame& curr = *pf;
+
+            curr.pos = 0;  //patch later
+
+            curr.len = static_cast<long>(frame_size);
+            size += curr.len;  //contribution of this frame
+        }
+
+        --frame_count;
+
+        while (frame_count > 1)
+        {
+            if (pos >= stop)
+                return E_FILE_FORMAT_INVALID;
+
+            assert(pf < pf_end);
+
+            const Frame& prev = *pf++;
+            assert(prev.len == frame_size);
+            if (prev.len != frame_size)
+                return E_FILE_FORMAT_INVALID;
+
+            assert(pf < pf_end);
+
+            Frame& curr = *pf;
+
+            curr.pos = 0;  //patch later
+
+            const long long delta_size_ = ReadUInt(pReader, pos, len);
+
+            if (delta_size_ < 0)
+                return E_FILE_FORMAT_INVALID;
+
+            if ((pos + len) > stop)
+                return E_FILE_FORMAT_INVALID;
+
+            pos += len;  //consume length of (delta) size
+            assert(pos <= stop);
+
+            const int exp = 7*len - 1;
+            const long long bias = (1LL << exp) - 1LL;
+            const long long delta_size = delta_size_ - bias;
+
+            frame_size += delta_size;
+
+            if (frame_size < 0)
+                return E_FILE_FORMAT_INVALID;
+
+            if (frame_size > LONG_MAX)
+                return E_FILE_FORMAT_INVALID;
+
+            curr.len = static_cast<long>(frame_size);
+            size += curr.len;  //contribution of this frame
+
+            --frame_count;
+        }
+
+        {
+            assert(pos <= stop);
+            assert(pf < pf_end);
+
+            const Frame& prev = *pf++;
+            assert(prev.len == frame_size);
+            if (prev.len != frame_size)
+                return E_FILE_FORMAT_INVALID;
+
+            assert(pf < pf_end);
+
+            Frame& curr = *pf++;
+            assert(pf == pf_end);
+
+            curr.pos = 0;  //patch later
+
+            const long long total_size = stop - pos;
+
+            if (total_size < size)
+                return E_FILE_FORMAT_INVALID;
+
+            frame_size = total_size - size;
+
+            if (frame_size > LONG_MAX)
+                return E_FILE_FORMAT_INVALID;
+
+            curr.len = static_cast<long>(frame_size);
+        }
+
+        pf = m_frames;
+        while (pf != pf_end)
+        {
+            Frame& f = *pf++;
+            assert((pos + f.len) <= stop);
+
+            f.pos = pos;
+            pos += f.len;
+        }
+
+        assert(pos == stop);
+    }
+
+    return 0;  //success
+}
+
+
+long long Block::GetTimeCode(const Cluster* pCluster) const
+{
+    if (pCluster == 0)
+        return m_timecode;
+
+    const long long tc0 = pCluster->GetTimeCode();
+    assert(tc0 >= 0);
+
+    const long long tc = tc0 + m_timecode;
+
+    return tc;  //unscaled timecode units
+}
+
+
+long long Block::GetTime(const Cluster* pCluster) const
+{
+    assert(pCluster);
+
+    const long long tc = GetTimeCode(pCluster);
+
+    const Segment* const pSegment = pCluster->m_pSegment;
+    const SegmentInfo* const pInfo = pSegment->GetInfo();
+    assert(pInfo);
+
+    const long long scale = pInfo->GetTimeCodeScale();
+    assert(scale >= 1);
+
+    const long long ns = tc * scale;
+
+    return ns;
+}
+
+
+long long Block::GetTrackNumber() const
+{
+    return m_track;
+}
+
+
+bool Block::IsKey() const
+{
+    return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
+}
+
+
+void Block::SetKey(bool bKey)
+{
+    if (bKey)
+        m_flags |= static_cast<unsigned char>(1 << 7);
+    else
+        m_flags &= 0x7F;
+}
+
+
+bool Block::IsInvisible() const
+{
+    return bool(int(m_flags & 0x08) != 0);
+}
+
+
+Block::Lacing Block::GetLacing() const
+{
+    const int value = int(m_flags & 0x06) >> 1;
+    return static_cast<Lacing>(value);
+}
+
+
+int Block::GetFrameCount() const
+{
+    return m_frame_count;
+}
+
+
+const Block::Frame& Block::GetFrame(int idx) const
+{
+    assert(idx >= 0);
+    assert(idx < m_frame_count);
+
+    const Frame& f = m_frames[idx];
+    assert(f.pos > 0);
+    assert(f.len > 0);
+
+    return f;
+}
+
+
+long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const
+{
+    assert(pReader);
+    assert(buf);
+
+    const long status = pReader->Read(pos, len, buf);
+    return status;
+}
+
+long long Block::GetDiscardPadding() const
+{
+    return m_discard_padding;
+}
+
+}  //end namespace mkvparser
diff --git a/source/libvpx/third_party/libwebm/mkvparser.hpp b/source/libvpx/third_party/libwebm/mkvparser.hpp
new file mode 100644
index 0000000..7184d26
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvparser.hpp
@@ -0,0 +1,1079 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVPARSER_HPP
+#define MKVPARSER_HPP
+
+#include <cstdlib>
+#include <cstdio>
+#include <cstddef>
+
+namespace mkvparser
+{
+
+const int E_FILE_FORMAT_INVALID = -2;
+const int E_BUFFER_NOT_FULL = -3;
+
+class IMkvReader
+{
+public:
+    virtual int Read(long long pos, long len, unsigned char* buf) = 0;
+    virtual int Length(long long* total, long long* available) = 0;
+protected:
+    virtual ~IMkvReader();
+};
+
+long long GetUIntLength(IMkvReader*, long long, long&);
+long long ReadUInt(IMkvReader*, long long, long&);
+long long UnserializeUInt(IMkvReader*, long long pos, long long size);
+
+long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
+long UnserializeInt(IMkvReader*, long long pos, long len, long long& result);
+
+long UnserializeString(
+        IMkvReader*,
+        long long pos,
+        long long size,
+        char*& str);
+
+long ParseElementHeader(
+    IMkvReader* pReader,
+    long long& pos,  //consume id and size fields
+    long long stop,  //if you know size of element's parent
+    long long& id,
+    long long& size);
+
+bool Match(IMkvReader*, long long&, unsigned long, long long&);
+bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&);
+
+void GetVersion(int& major, int& minor, int& build, int& revision);
+
+struct EBMLHeader
+{
+    EBMLHeader();
+    ~EBMLHeader();
+    long long m_version;
+    long long m_readVersion;
+    long long m_maxIdLength;
+    long long m_maxSizeLength;
+    char* m_docType;
+    long long m_docTypeVersion;
+    long long m_docTypeReadVersion;
+
+    long long Parse(IMkvReader*, long long&);
+    void Init();
+};
+
+
+class Segment;
+class Track;
+class Cluster;
+
+class Block
+{
+    Block(const Block&);
+    Block& operator=(const Block&);
+
+public:
+    const long long m_start;
+    const long long m_size;
+
+    Block(long long start, long long size, long long discard_padding);
+    ~Block();
+
+    long Parse(const Cluster*);
+
+    long long GetTrackNumber() const;
+    long long GetTimeCode(const Cluster*) const;  //absolute, but not scaled
+    long long GetTime(const Cluster*) const;      //absolute, and scaled (ns)
+    bool IsKey() const;
+    void SetKey(bool);
+    bool IsInvisible() const;
+
+    enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
+    Lacing GetLacing() const;
+
+    int GetFrameCount() const;  //to index frames: [0, count)
+
+    struct Frame
+    {
+        long long pos;  //absolute offset
+        long len;
+
+        long Read(IMkvReader*, unsigned char*) const;
+    };
+
+    const Frame& GetFrame(int frame_index) const;
+
+    long long GetDiscardPadding() const;
+
+private:
+    long long m_track;   //Track::Number()
+    short m_timecode;  //relative to cluster
+    unsigned char m_flags;
+
+    Frame* m_frames;
+    int m_frame_count;
+
+protected:
+    const long long m_discard_padding;
+};
+
+
+class BlockEntry
+{
+    BlockEntry(const BlockEntry&);
+    BlockEntry& operator=(const BlockEntry&);
+
+protected:
+    BlockEntry(Cluster*, long index);
+
+public:
+    virtual ~BlockEntry();
+
+    bool EOS() const;
+    const Cluster* GetCluster() const;
+    long GetIndex() const;
+    virtual const Block* GetBlock() const = 0;
+
+    enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
+    virtual Kind GetKind() const = 0;
+
+protected:
+    Cluster* const m_pCluster;
+    const long m_index;
+
+};
+
+
+class SimpleBlock : public BlockEntry
+{
+    SimpleBlock(const SimpleBlock&);
+    SimpleBlock& operator=(const SimpleBlock&);
+
+public:
+    SimpleBlock(Cluster*, long index, long long start, long long size);
+    long Parse();
+
+    Kind GetKind() const;
+    const Block* GetBlock() const;
+
+protected:
+    Block m_block;
+
+};
+
+
+class BlockGroup : public BlockEntry
+{
+    BlockGroup(const BlockGroup&);
+    BlockGroup& operator=(const BlockGroup&);
+
+public:
+    BlockGroup(
+        Cluster*,
+        long index,
+        long long block_start, //absolute pos of block's payload
+        long long block_size,  //size of block's payload
+        long long prev,
+        long long next,
+        long long duration,
+        long long discard_padding);
+
+    long Parse();
+
+    Kind GetKind() const;
+    const Block* GetBlock() const;
+
+    long long GetPrevTimeCode() const;  //relative to block's time
+    long long GetNextTimeCode() const;  //as above
+    long long GetDurationTimeCode() const;
+
+private:
+    Block m_block;
+    const long long m_prev;
+    const long long m_next;
+    const long long m_duration;
+};
+
+///////////////////////////////////////////////////////////////
+// ContentEncoding element
+// Elements used to describe if the track data has been encrypted or
+// compressed with zlib or header stripping.
+class ContentEncoding {
+public:
+    enum {
+      kCTR = 1
+    };
+
+    ContentEncoding();
+    ~ContentEncoding();
+
+    // ContentCompression element names
+    struct ContentCompression {
+        ContentCompression();
+        ~ContentCompression();
+
+        unsigned long long algo;
+        unsigned char* settings;
+        long long settings_len;
+    };
+
+    // ContentEncAESSettings element names
+    struct ContentEncAESSettings {
+      ContentEncAESSettings() : cipher_mode(kCTR) {}
+      ~ContentEncAESSettings() {}
+
+      unsigned long long cipher_mode;
+    };
+
+    // ContentEncryption element names
+    struct ContentEncryption {
+        ContentEncryption();
+        ~ContentEncryption();
+
+        unsigned long long algo;
+        unsigned char* key_id;
+        long long key_id_len;
+        unsigned char* signature;
+        long long signature_len;
+        unsigned char* sig_key_id;
+        long long sig_key_id_len;
+        unsigned long long sig_algo;
+        unsigned long long sig_hash_algo;
+
+        ContentEncAESSettings aes_settings;
+    };
+
+    // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
+    // is out of bounds.
+    const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
+
+    // Returns number of ContentCompression elements in this ContentEncoding
+    // element.
+    unsigned long GetCompressionCount() const;
+
+    // Parses the ContentCompression element from |pReader|. |start| is the
+    // starting offset of the ContentCompression payload. |size| is the size in
+    // bytes of the ContentCompression payload. |compression| is where the parsed
+    // values will be stored.
+    long ParseCompressionEntry(long long start,
+                               long long size,
+                               IMkvReader* pReader,
+                               ContentCompression* compression);
+
+    // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
+    // is out of bounds.
+    const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
+
+    // Returns number of ContentEncryption elements in this ContentEncoding
+    // element.
+    unsigned long GetEncryptionCount() const;
+
+    // Parses the ContentEncAESSettings element from |pReader|. |start| is the
+    // starting offset of the ContentEncAESSettings payload. |size| is the
+    // size in bytes of the ContentEncAESSettings payload. |encryption| is
+    // where the parsed values will be stored.
+    long ParseContentEncAESSettingsEntry(long long start,
+                                         long long size,
+                                         IMkvReader* pReader,
+                                         ContentEncAESSettings* aes);
+
+    // Parses the ContentEncoding element from |pReader|. |start| is the
+    // starting offset of the ContentEncoding payload. |size| is the size in
+    // bytes of the ContentEncoding payload. Returns true on success.
+    long ParseContentEncodingEntry(long long start,
+                                   long long size,
+                                   IMkvReader* pReader);
+
+    // Parses the ContentEncryption element from |pReader|. |start| is the
+    // starting offset of the ContentEncryption payload. |size| is the size in
+    // bytes of the ContentEncryption payload. |encryption| is where the parsed
+    // values will be stored.
+    long ParseEncryptionEntry(long long start,
+                              long long size,
+                              IMkvReader* pReader,
+                              ContentEncryption* encryption);
+
+    unsigned long long encoding_order() const { return encoding_order_; }
+    unsigned long long encoding_scope() const { return encoding_scope_; }
+    unsigned long long encoding_type() const { return encoding_type_; }
+
+private:
+    // Member variables for list of ContentCompression elements.
+    ContentCompression** compression_entries_;
+    ContentCompression** compression_entries_end_;
+
+    // Member variables for list of ContentEncryption elements.
+    ContentEncryption** encryption_entries_;
+    ContentEncryption** encryption_entries_end_;
+
+    // ContentEncoding element names
+    unsigned long long encoding_order_;
+    unsigned long long encoding_scope_;
+    unsigned long long encoding_type_;
+
+    // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
+    ContentEncoding(const ContentEncoding&);
+    ContentEncoding& operator=(const ContentEncoding&);
+};
+
+class Track
+{
+    Track(const Track&);
+    Track& operator=(const Track&);
+
+public:
+    class Info;
+    static long Create(
+        Segment*,
+        const Info&,
+        long long element_start,
+        long long element_size,
+        Track*&);
+
+    enum Type {
+        kVideo = 1,
+        kAudio = 2,
+        kSubtitle = 0x11,
+        kMetadata = 0x21
+     };
+
+    Segment* const m_pSegment;
+    const long long m_element_start;
+    const long long m_element_size;
+    virtual ~Track();
+
+    long GetType() const;
+    long GetNumber() const;
+    unsigned long long GetUid() const;
+    const char* GetNameAsUTF8() const;
+    const char* GetLanguage() const;
+    const char* GetCodecNameAsUTF8() const;
+    const char* GetCodecId() const;
+    const unsigned char* GetCodecPrivate(size_t&) const;
+    bool GetLacing() const;
+    unsigned long long GetDefaultDuration() const;
+    unsigned long long GetCodecDelay() const;
+    unsigned long long GetSeekPreRoll() const;
+
+    const BlockEntry* GetEOS() const;
+
+    struct Settings
+    {
+        long long start;
+        long long size;
+    };
+
+    class Info
+    {
+    public:
+        Info();
+        ~Info();
+        int Copy(Info&) const;
+        void Clear();
+        long type;
+        long number;
+        unsigned long long uid;
+        unsigned long long defaultDuration;
+        unsigned long long codecDelay;
+        unsigned long long seekPreRoll;
+        char* nameAsUTF8;
+        char* language;
+        char* codecId;
+        char* codecNameAsUTF8;
+        unsigned char* codecPrivate;
+        size_t codecPrivateSize;
+        bool lacing;
+        Settings settings;
+
+    private:
+        Info(const Info&);
+        Info& operator=(const Info&);
+        int CopyStr(char* Info::*str, Info&) const;
+    };
+
+    long GetFirst(const BlockEntry*&) const;
+    long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
+    virtual bool VetEntry(const BlockEntry*) const;
+    virtual long Seek(long long time_ns, const BlockEntry*&) const;
+
+    const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
+    unsigned long GetContentEncodingCount() const;
+
+    long ParseContentEncodingsEntry(long long start, long long size);
+
+protected:
+    Track(
+        Segment*,
+        long long element_start,
+        long long element_size);
+
+    Info m_info;
+
+    class EOSBlock : public BlockEntry
+    {
+    public:
+        EOSBlock();
+
+        Kind GetKind() const;
+        const Block* GetBlock() const;
+    };
+
+    EOSBlock m_eos;
+
+private:
+    ContentEncoding** content_encoding_entries_;
+    ContentEncoding** content_encoding_entries_end_;
+};
+
+
+class VideoTrack : public Track
+{
+    VideoTrack(const VideoTrack&);
+    VideoTrack& operator=(const VideoTrack&);
+
+    VideoTrack(
+        Segment*,
+        long long element_start,
+        long long element_size);
+
+public:
+    static long Parse(
+        Segment*,
+        const Info&,
+        long long element_start,
+        long long element_size,
+        VideoTrack*&);
+
+    long long GetWidth() const;
+    long long GetHeight() const;
+    double GetFrameRate() const;
+
+    bool VetEntry(const BlockEntry*) const;
+    long Seek(long long time_ns, const BlockEntry*&) const;
+
+private:
+    long long m_width;
+    long long m_height;
+    double m_rate;
+
+};
+
+
+class AudioTrack : public Track
+{
+    AudioTrack(const AudioTrack&);
+    AudioTrack& operator=(const AudioTrack&);
+
+    AudioTrack(
+        Segment*,
+        long long element_start,
+        long long element_size);
+public:
+    static long Parse(
+        Segment*,
+        const Info&,
+        long long element_start,
+        long long element_size,
+        AudioTrack*&);
+
+    double GetSamplingRate() const;
+    long long GetChannels() const;
+    long long GetBitDepth() const;
+
+private:
+    double m_rate;
+    long long m_channels;
+    long long m_bitDepth;
+};
+
+
+class Tracks
+{
+    Tracks(const Tracks&);
+    Tracks& operator=(const Tracks&);
+
+public:
+    Segment* const m_pSegment;
+    const long long m_start;
+    const long long m_size;
+    const long long m_element_start;
+    const long long m_element_size;
+
+    Tracks(
+        Segment*,
+        long long start,
+        long long size,
+        long long element_start,
+        long long element_size);
+
+    ~Tracks();
+
+    long Parse();
+
+    unsigned long GetTracksCount() const;
+
+    const Track* GetTrackByNumber(long tn) const;
+    const Track* GetTrackByIndex(unsigned long idx) const;
+
+private:
+    Track** m_trackEntries;
+    Track** m_trackEntriesEnd;
+
+    long ParseTrackEntry(
+        long long payload_start,
+        long long payload_size,
+        long long element_start,
+        long long element_size,
+        Track*&) const;
+
+};
+
+
+class Chapters
+{
+    Chapters(const Chapters&);
+    Chapters& operator=(const Chapters&);
+
+public:
+    Segment* const m_pSegment;
+    const long long m_start;
+    const long long m_size;
+    const long long m_element_start;
+    const long long m_element_size;
+
+    Chapters(
+        Segment*,
+        long long payload_start,
+        long long payload_size,
+        long long element_start,
+        long long element_size);
+
+    ~Chapters();
+
+    long Parse();
+
+    class Atom;
+    class Edition;
+
+    class Display
+    {
+        friend class Atom;
+        Display();
+        Display(const Display&);
+        ~Display();
+        Display& operator=(const Display&);
+    public:
+        const char* GetString() const;
+        const char* GetLanguage() const;
+        const char* GetCountry() const;
+    private:
+        void Init();
+        void ShallowCopy(Display&) const;
+        void Clear();
+        long Parse(IMkvReader*, long long pos, long long size);
+
+        char* m_string;
+        char* m_language;
+        char* m_country;
+    };
+
+    class Atom
+    {
+        friend class Edition;
+        Atom();
+        Atom(const Atom&);
+        ~Atom();
+        Atom& operator=(const Atom&);
+    public:
+        unsigned long long GetUID() const;
+        const char* GetStringUID() const;
+
+        long long GetStartTimecode() const;
+        long long GetStopTimecode() const;
+
+        long long GetStartTime(const Chapters*) const;
+        long long GetStopTime(const Chapters*) const;
+
+        int GetDisplayCount() const;
+        const Display* GetDisplay(int index) const;
+    private:
+        void Init();
+        void ShallowCopy(Atom&) const;
+        void Clear();
+        long Parse(IMkvReader*, long long pos, long long size);
+        static long long GetTime(const Chapters*, long long timecode);
+
+        long ParseDisplay(IMkvReader*, long long pos, long long size);
+        bool ExpandDisplaysArray();
+
+        char* m_string_uid;
+        unsigned long long m_uid;
+        long long m_start_timecode;
+        long long m_stop_timecode;
+
+        Display* m_displays;
+        int m_displays_size;
+        int m_displays_count;
+    };
+
+    class Edition
+    {
+        friend class Chapters;
+        Edition();
+        Edition(const Edition&);
+        ~Edition();
+        Edition& operator=(const Edition&);
+    public:
+        int GetAtomCount() const;
+        const Atom* GetAtom(int index) const;
+    private:
+        void Init();
+        void ShallowCopy(Edition&) const;
+        void Clear();
+        long Parse(IMkvReader*, long long pos, long long size);
+
+        long ParseAtom(IMkvReader*, long long pos, long long size);
+        bool ExpandAtomsArray();
+
+        Atom* m_atoms;
+        int m_atoms_size;
+        int m_atoms_count;
+    };
+
+    int GetEditionCount() const;
+    const Edition* GetEdition(int index) const;
+
+private:
+    long ParseEdition(long long pos, long long size);
+    bool ExpandEditionsArray();
+
+    Edition* m_editions;
+    int m_editions_size;
+    int m_editions_count;
+
+};
+
+
+class SegmentInfo
+{
+    SegmentInfo(const SegmentInfo&);
+    SegmentInfo& operator=(const SegmentInfo&);
+
+public:
+    Segment* const m_pSegment;
+    const long long m_start;
+    const long long m_size;
+    const long long m_element_start;
+    const long long m_element_size;
+
+    SegmentInfo(
+        Segment*,
+        long long start,
+        long long size,
+        long long element_start,
+        long long element_size);
+
+    ~SegmentInfo();
+
+    long Parse();
+
+    long long GetTimeCodeScale() const;
+    long long GetDuration() const;  //scaled
+    const char* GetMuxingAppAsUTF8() const;
+    const char* GetWritingAppAsUTF8() const;
+    const char* GetTitleAsUTF8() const;
+
+private:
+    long long m_timecodeScale;
+    double m_duration;
+    char* m_pMuxingAppAsUTF8;
+    char* m_pWritingAppAsUTF8;
+    char* m_pTitleAsUTF8;
+};
+
+
+class SeekHead
+{
+    SeekHead(const SeekHead&);
+    SeekHead& operator=(const SeekHead&);
+
+public:
+    Segment* const m_pSegment;
+    const long long m_start;
+    const long long m_size;
+    const long long m_element_start;
+    const long long m_element_size;
+
+    SeekHead(
+        Segment*,
+        long long start,
+        long long size,
+        long long element_start,
+        long long element_size);
+
+    ~SeekHead();
+
+    long Parse();
+
+    struct Entry
+    {
+        //the SeekHead entry payload
+        long long id;
+        long long pos;
+
+        //absolute pos of SeekEntry ID
+        long long element_start;
+
+        //SeekEntry ID size + size size + payload
+        long long element_size;
+    };
+
+    int GetCount() const;
+    const Entry* GetEntry(int idx) const;
+
+    struct VoidElement
+    {
+        //absolute pos of Void ID
+        long long element_start;
+
+        //ID size + size size + payload size
+        long long element_size;
+    };
+
+    int GetVoidElementCount() const;
+    const VoidElement* GetVoidElement(int idx) const;
+
+private:
+    Entry* m_entries;
+    int m_entry_count;
+
+    VoidElement* m_void_elements;
+    int m_void_element_count;
+
+    static bool ParseEntry(
+        IMkvReader*,
+        long long pos,  //payload
+        long long size,
+        Entry*);
+
+};
+
+class Cues;
+class CuePoint
+{
+    friend class Cues;
+
+    CuePoint(long, long long);
+    ~CuePoint();
+
+    CuePoint(const CuePoint&);
+    CuePoint& operator=(const CuePoint&);
+
+public:
+    long long m_element_start;
+    long long m_element_size;
+
+    void Load(IMkvReader*);
+
+    long long GetTimeCode() const;      //absolute but unscaled
+    long long GetTime(const Segment*) const;  //absolute and scaled (ns units)
+
+    struct TrackPosition
+    {
+        long long m_track;
+        long long m_pos;  //of cluster
+        long long m_block;
+        //codec_state  //defaults to 0
+        //reference = clusters containing req'd referenced blocks
+        //  reftime = timecode of the referenced block
+
+        void Parse(IMkvReader*, long long, long long);
+    };
+
+    const TrackPosition* Find(const Track*) const;
+
+private:
+    const long m_index;
+    long long m_timecode;
+    TrackPosition* m_track_positions;
+    size_t m_track_positions_count;
+
+};
+
+
+class Cues
+{
+    friend class Segment;
+
+    Cues(
+        Segment*,
+        long long start,
+        long long size,
+        long long element_start,
+        long long element_size);
+    ~Cues();
+
+    Cues(const Cues&);
+    Cues& operator=(const Cues&);
+
+public:
+    Segment* const m_pSegment;
+    const long long m_start;
+    const long long m_size;
+    const long long m_element_start;
+    const long long m_element_size;
+
+    bool Find(  //lower bound of time_ns
+        long long time_ns,
+        const Track*,
+        const CuePoint*&,
+        const CuePoint::TrackPosition*&) const;
+
+#if 0
+    bool FindNext(  //upper_bound of time_ns
+        long long time_ns,
+        const Track*,
+        const CuePoint*&,
+        const CuePoint::TrackPosition*&) const;
+#endif
+
+    const CuePoint* GetFirst() const;
+    const CuePoint* GetLast() const;
+    const CuePoint* GetNext(const CuePoint*) const;
+
+    const BlockEntry* GetBlock(
+                        const CuePoint*,
+                        const CuePoint::TrackPosition*) const;
+
+    bool LoadCuePoint() const;
+    long GetCount() const;  //loaded only
+    //long GetTotal() const;  //loaded + preloaded
+    bool DoneParsing() const;
+
+private:
+    void Init() const;
+    void PreloadCuePoint(long&, long long) const;
+
+    mutable CuePoint** m_cue_points;
+    mutable long m_count;
+    mutable long m_preload_count;
+    mutable long long m_pos;
+
+};
+
+
+class Cluster
+{
+    friend class Segment;
+
+    Cluster(const Cluster&);
+    Cluster& operator=(const Cluster&);
+
+public:
+    Segment* const m_pSegment;
+
+public:
+    static Cluster* Create(
+        Segment*,
+        long index,       //index in segment
+        long long off);   //offset relative to segment
+        //long long element_size);
+
+    Cluster();  //EndOfStream
+    ~Cluster();
+
+    bool EOS() const;
+
+    long long GetTimeCode() const;   //absolute, but not scaled
+    long long GetTime() const;       //absolute, and scaled (nanosecond units)
+    long long GetFirstTime() const;  //time (ns) of first (earliest) block
+    long long GetLastTime() const;   //time (ns) of last (latest) block
+
+    long GetFirst(const BlockEntry*&) const;
+    long GetLast(const BlockEntry*&) const;
+    long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
+
+    const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
+    const BlockEntry* GetEntry(
+        const CuePoint&,
+        const CuePoint::TrackPosition&) const;
+    //const BlockEntry* GetMaxKey(const VideoTrack*) const;
+
+//    static bool HasBlockEntries(const Segment*, long long);
+
+    static long HasBlockEntries(
+            const Segment*,
+            long long idoff,
+            long long& pos,
+            long& size);
+
+    long GetEntryCount() const;
+
+    long Load(long long& pos, long& size) const;
+
+    long Parse(long long& pos, long& size) const;
+    long GetEntry(long index, const mkvparser::BlockEntry*&) const;
+
+protected:
+    Cluster(
+        Segment*,
+        long index,
+        long long element_start);
+        //long long element_size);
+
+public:
+    const long long m_element_start;
+    long long GetPosition() const;  //offset relative to segment
+
+    long GetIndex() const;
+    long long GetElementSize() const;
+    //long long GetPayloadSize() const;
+
+    //long long Unparsed() const;
+
+private:
+    long m_index;
+    mutable long long m_pos;
+    //mutable long long m_size;
+    mutable long long m_element_size;
+    mutable long long m_timecode;
+    mutable BlockEntry** m_entries;
+    mutable long m_entries_size;
+    mutable long m_entries_count;
+
+    long ParseSimpleBlock(long long, long long&, long&);
+    long ParseBlockGroup(long long, long long&, long&);
+
+    long CreateBlock(long long id, long long pos, long long size,
+                     long long discard_padding);
+    long CreateBlockGroup(long long start_offset, long long size,
+                          long long discard_padding);
+    long CreateSimpleBlock(long long, long long);
+
+};
+
+
+class Segment
+{
+    friend class Cues;
+    friend class Track;
+    friend class VideoTrack;
+
+    Segment(const Segment&);
+    Segment& operator=(const Segment&);
+
+private:
+    Segment(
+        IMkvReader*,
+        long long elem_start,
+        //long long elem_size,
+        long long pos,
+        long long size);
+
+public:
+    IMkvReader* const m_pReader;
+    const long long m_element_start;
+    //const long long m_element_size;
+    const long long m_start;  //posn of segment payload
+    const long long m_size;   //size of segment payload
+    Cluster m_eos;  //TODO: make private?
+
+    static long long CreateInstance(IMkvReader*, long long, Segment*&);
+    ~Segment();
+
+    long Load();  //loads headers and all clusters
+
+    //for incremental loading
+    //long long Unparsed() const;
+    bool DoneParsing() const;
+    long long ParseHeaders();  //stops when first cluster is found
+    //long FindNextCluster(long long& pos, long& size) const;
+    long LoadCluster(long long& pos, long& size);  //load one cluster
+    long LoadCluster();
+
+    long ParseNext(
+            const Cluster* pCurr,
+            const Cluster*& pNext,
+            long long& pos,
+            long& size);
+
+#if 0
+    //This pair parses one cluster, but only changes the state of the
+    //segment object when the cluster is actually added to the index.
+    long ParseCluster(long long& cluster_pos, long long& new_pos) const;
+    bool AddCluster(long long cluster_pos, long long new_pos);
+#endif
+
+    const SeekHead* GetSeekHead() const;
+    const Tracks* GetTracks() const;
+    const SegmentInfo* GetInfo() const;
+    const Cues* GetCues() const;
+    const Chapters* GetChapters() const;
+
+    long long GetDuration() const;
+
+    unsigned long GetCount() const;
+    const Cluster* GetFirst() const;
+    const Cluster* GetLast() const;
+    const Cluster* GetNext(const Cluster*);
+
+    const Cluster* FindCluster(long long time_nanoseconds) const;
+    //const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
+
+    const Cluster* FindOrPreloadCluster(long long pos);
+
+    long ParseCues(
+        long long cues_off,  //offset relative to start of segment
+        long long& parse_pos,
+        long& parse_len);
+
+private:
+
+    long long m_pos;  //absolute file posn; what has been consumed so far
+    Cluster* m_pUnknownSize;
+
+    SeekHead* m_pSeekHead;
+    SegmentInfo* m_pInfo;
+    Tracks* m_pTracks;
+    Cues* m_pCues;
+    Chapters* m_pChapters;
+    Cluster** m_clusters;
+    long m_clusterCount;         //number of entries for which m_index >= 0
+    long m_clusterPreloadCount;  //number of entries for which m_index < 0
+    long m_clusterSize;          //array size
+
+    long DoLoadCluster(long long&, long&);
+    long DoLoadClusterUnknownSize(long long&, long&);
+    long DoParseNext(const Cluster*&, long long&, long&);
+
+    void AppendCluster(Cluster*);
+    void PreloadCluster(Cluster*, ptrdiff_t);
+
+    //void ParseSeekHead(long long pos, long long size);
+    //void ParseSeekEntry(long long pos, long long size);
+    //void ParseCues(long long);
+
+    const BlockEntry* GetBlock(
+        const CuePoint&,
+        const CuePoint::TrackPosition&);
+
+};
+
+}  //end namespace mkvparser
+
+inline long mkvparser::Segment::LoadCluster()
+{
+    long long pos;
+    long size;
+
+    return LoadCluster(pos, size);
+}
+
+#endif  //MKVPARSER_HPP
diff --git a/source/libvpx/third_party/libwebm/mkvreader.cpp b/source/libvpx/third_party/libwebm/mkvreader.cpp
new file mode 100644
index 0000000..b4b2459
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvreader.cpp
@@ -0,0 +1,143 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "mkvreader.hpp"
+
+#include <cassert>
+
+namespace mkvparser
+{
+
+MkvReader::MkvReader() :
+    m_file(NULL),
+    reader_owns_file_(true) {
+}
+
+MkvReader::MkvReader(FILE* fp) :
+    m_file(fp),
+    reader_owns_file_(false) {
+  GetFileSize();
+}
+
+MkvReader::~MkvReader() {
+  if (reader_owns_file_)
+    Close();
+  m_file = NULL;
+}
+
+int MkvReader::Open(const char* fileName)
+{
+    if (fileName == NULL)
+        return -1;
+
+    if (m_file)
+        return -1;
+
+#ifdef _MSC_VER
+    const errno_t e = fopen_s(&m_file, fileName, "rb");
+
+    if (e)
+        return -1;  //error
+#else
+    m_file = fopen(fileName, "rb");
+
+    if (m_file == NULL)
+        return -1;
+#endif
+    return !GetFileSize();
+}
+
+bool MkvReader::GetFileSize() {
+    if (m_file == NULL)
+        return false;
+#ifdef _MSC_VER
+    int status = _fseeki64(m_file, 0L, SEEK_END);
+
+    if (status)
+        return false;  //error
+
+    m_length = _ftelli64(m_file);
+#else
+    fseek(m_file, 0L, SEEK_END);
+    m_length = ftell(m_file);
+#endif
+    assert(m_length >= 0);
+
+    if (m_length < 0)
+        return false;
+
+#ifdef _MSC_VER
+    status = _fseeki64(m_file, 0L, SEEK_SET);
+
+    if (status)
+        return false;  //error
+#else
+    fseek(m_file, 0L, SEEK_SET);
+#endif
+
+    return true;
+}
+
+void MkvReader::Close()
+{
+    if (m_file != NULL)
+    {
+        fclose(m_file);
+        m_file = NULL;
+    }
+}
+
+int MkvReader::Length(long long* total, long long* available)
+{
+    if (m_file == NULL)
+        return -1;
+
+    if (total)
+        *total = m_length;
+
+    if (available)
+        *available = m_length;
+
+    return 0;
+}
+
+int MkvReader::Read(long long offset, long len, unsigned char* buffer)
+{
+    if (m_file == NULL)
+        return -1;
+
+    if (offset < 0)
+        return -1;
+
+    if (len < 0)
+        return -1;
+
+    if (len == 0)
+        return 0;
+
+    if (offset >= m_length)
+        return -1;
+
+#ifdef _MSC_VER
+    const int status = _fseeki64(m_file, offset, SEEK_SET);
+
+    if (status)
+        return -1;  //error
+#else
+    fseek(m_file, offset, SEEK_SET);
+#endif
+
+    const size_t size = fread(buffer, 1, len, m_file);
+
+    if (size < size_t(len))
+        return -1;  //error
+
+    return 0;  //success
+}
+
+}  //end namespace mkvparser
diff --git a/source/libvpx/third_party/libwebm/mkvreader.hpp b/source/libvpx/third_party/libwebm/mkvreader.hpp
new file mode 100644
index 0000000..8ebdd99
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvreader.hpp
@@ -0,0 +1,46 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVREADER_HPP
+#define MKVREADER_HPP
+
+#include "mkvparser.hpp"
+#include <cstdio>
+
+namespace mkvparser
+{
+
+class MkvReader : public IMkvReader
+{
+    MkvReader(const MkvReader&);
+    MkvReader& operator=(const MkvReader&);
+public:
+    MkvReader();
+    MkvReader(FILE* fp);
+    virtual ~MkvReader();
+
+    int Open(const char*);
+    void Close();
+
+    virtual int Read(long long position, long length, unsigned char* buffer);
+    virtual int Length(long long* total, long long* available);
+private:
+
+    // Determines the size of the file. This is called either by the constructor
+    // or by the Open function depending on file ownership. Returns true on
+    // success.
+    bool GetFileSize();
+
+    long long m_length;
+    FILE* m_file;
+    bool reader_owns_file_;
+};
+
+}  //end namespace mkvparser
+
+#endif //MKVREADER_HPP
diff --git a/source/libvpx/third_party/libwebm/mkvwriter.cpp b/source/libvpx/third_party/libwebm/mkvwriter.cpp
new file mode 100644
index 0000000..8de89a4
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvwriter.cpp
@@ -0,0 +1,97 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#include "mkvwriter.hpp"
+
+#ifdef _MSC_VER
+#include <share.h>  // for _SH_DENYWR
+#endif
+
+#include <new>
+
+namespace mkvmuxer {
+
+MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {
+}
+
+MkvWriter::MkvWriter(FILE* fp): file_(fp), writer_owns_file_(false) {
+}
+
+MkvWriter::~MkvWriter() {
+  Close();
+}
+
+int32 MkvWriter::Write(const void* buffer, uint32 length) {
+  if (!file_)
+    return -1;
+
+  if (length == 0)
+    return 0;
+
+  if (buffer == NULL)
+    return -1;
+
+  const size_t bytes_written = fwrite(buffer, 1, length, file_);
+
+  return (bytes_written == length) ? 0 : -1;
+}
+
+bool MkvWriter::Open(const char* filename) {
+  if (filename == NULL)
+    return false;
+
+  if (file_)
+    return false;
+
+#ifdef _MSC_VER
+  file_ = _fsopen(filename, "wb", _SH_DENYWR);
+#else
+  file_ = fopen(filename, "wb");
+#endif
+  if (file_ == NULL)
+    return false;
+  return true;
+}
+
+void MkvWriter::Close() {
+  if (file_ && writer_owns_file_) {
+    fclose(file_);
+  }
+  file_ = NULL;
+}
+
+int64 MkvWriter::Position() const {
+  if (!file_)
+    return 0;
+
+#ifdef _MSC_VER
+    return _ftelli64(file_);
+#else
+    return ftell(file_);
+#endif
+}
+
+int32 MkvWriter::Position(int64 position) {
+  if (!file_)
+    return -1;
+
+#ifdef _MSC_VER
+    return _fseeki64(file_, position, SEEK_SET);
+#else
+    return fseek(file_, position, SEEK_SET);
+#endif
+}
+
+bool MkvWriter::Seekable() const {
+  return true;
+}
+
+void MkvWriter::ElementStartNotify(uint64, int64) {
+}
+
+}  // namespace mkvmuxer
diff --git a/source/libvpx/third_party/libwebm/mkvwriter.hpp b/source/libvpx/third_party/libwebm/mkvwriter.hpp
new file mode 100644
index 0000000..524e0f7
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/mkvwriter.hpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVWRITER_HPP
+#define MKVWRITER_HPP
+
+#include <stdio.h>
+
+#include "mkvmuxer.hpp"
+#include "mkvmuxertypes.hpp"
+
+namespace mkvmuxer {
+
+// Default implementation of the IMkvWriter interface on Windows.
+class MkvWriter : public IMkvWriter {
+ public:
+  MkvWriter();
+  MkvWriter(FILE* fp);
+  virtual ~MkvWriter();
+
+  // IMkvWriter interface
+  virtual int64 Position() const;
+  virtual int32 Position(int64 position);
+  virtual bool Seekable() const;
+  virtual int32 Write(const void* buffer, uint32 length);
+  virtual void ElementStartNotify(uint64 element_id, int64 position);
+
+  // Creates and opens a file for writing. |filename| is the name of the file
+  // to open. This function will overwrite the contents of |filename|. Returns
+  // true on success.
+  bool Open(const char* filename);
+
+  // Closes an opened file.
+  void Close();
+
+ private:
+  // File handle to output file.
+  FILE* file_;
+  bool writer_owns_file_;
+
+  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter);
+};
+
+}  //end namespace mkvmuxer
+
+#endif // MKVWRITER_HPP
diff --git a/source/libvpx/third_party/libwebm/webmids.hpp b/source/libvpx/third_party/libwebm/webmids.hpp
new file mode 100644
index 0000000..65fab96
--- /dev/null
+++ b/source/libvpx/third_party/libwebm/webmids.hpp
@@ -0,0 +1,141 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef WEBMIDS_HPP
+#define WEBMIDS_HPP
+
+namespace mkvmuxer {
+
+enum MkvId {
+  kMkvEBML                    = 0x1A45DFA3,
+  kMkvEBMLVersion             = 0x4286,
+  kMkvEBMLReadVersion         = 0x42F7,
+  kMkvEBMLMaxIDLength         = 0x42F2,
+  kMkvEBMLMaxSizeLength       = 0x42F3,
+  kMkvDocType                 = 0x4282,
+  kMkvDocTypeVersion          = 0x4287,
+  kMkvDocTypeReadVersion      = 0x4285,
+  kMkvVoid                    = 0xEC,
+  kMkvSignatureSlot           = 0x1B538667,
+  kMkvSignatureAlgo           = 0x7E8A,
+  kMkvSignatureHash           = 0x7E9A,
+  kMkvSignaturePublicKey      = 0x7EA5,
+  kMkvSignature               = 0x7EB5,
+  kMkvSignatureElements       = 0x7E5B,
+  kMkvSignatureElementList    = 0x7E7B,
+  kMkvSignedElement           = 0x6532,
+  //segment
+  kMkvSegment                 = 0x18538067,
+  //Meta Seek Information
+  kMkvSeekHead                = 0x114D9B74,
+  kMkvSeek                    = 0x4DBB,
+  kMkvSeekID                  = 0x53AB,
+  kMkvSeekPosition            = 0x53AC,
+  //Segment Information
+  kMkvInfo                    = 0x1549A966,
+  kMkvTimecodeScale           = 0x2AD7B1,
+  kMkvDuration                = 0x4489,
+  kMkvDateUTC                 = 0x4461,
+  kMkvMuxingApp               = 0x4D80,
+  kMkvWritingApp              = 0x5741,
+  //Cluster
+  kMkvCluster                 = 0x1F43B675,
+  kMkvTimecode                = 0xE7,
+  kMkvPrevSize                = 0xAB,
+  kMkvBlockGroup              = 0xA0,
+  kMkvBlock                   = 0xA1,
+  kMkvBlockDuration           = 0x9B,
+  kMkvReferenceBlock          = 0xFB,
+  kMkvLaceNumber              = 0xCC,
+  kMkvSimpleBlock             = 0xA3,
+  kMkvBlockAdditions          = 0x75A1,
+  kMkvBlockMore               = 0xA6,
+  kMkvBlockAddID              = 0xEE,
+  kMkvBlockAdditional         = 0xA5,
+  kMkvDiscardPadding          = 0x75A2,
+  //Track
+  kMkvTracks                  = 0x1654AE6B,
+  kMkvTrackEntry              = 0xAE,
+  kMkvTrackNumber             = 0xD7,
+  kMkvTrackUID                = 0x73C5,
+  kMkvTrackType               = 0x83,
+  kMkvFlagEnabled             = 0xB9,
+  kMkvFlagDefault             = 0x88,
+  kMkvFlagForced              = 0x55AA,
+  kMkvFlagLacing              = 0x9C,
+  kMkvDefaultDuration         = 0x23E383,
+  kMkvMaxBlockAdditionID      = 0x55EE,
+  kMkvName                    = 0x536E,
+  kMkvLanguage                = 0x22B59C,
+  kMkvCodecID                 = 0x86,
+  kMkvCodecPrivate            = 0x63A2,
+  kMkvCodecName               = 0x258688,
+  kMkvCodecDelay              = 0x56AA,
+  kMkvSeekPreRoll             = 0x56BB,
+  //video
+  kMkvVideo                   = 0xE0,
+  kMkvFlagInterlaced          = 0x9A,
+  kMkvStereoMode              = 0x53B8,
+  kMkvAlphaMode               = 0x53C0,
+  kMkvPixelWidth              = 0xB0,
+  kMkvPixelHeight             = 0xBA,
+  kMkvPixelCropBottom         = 0x54AA,
+  kMkvPixelCropTop            = 0x54BB,
+  kMkvPixelCropLeft           = 0x54CC,
+  kMkvPixelCropRight          = 0x54DD,
+  kMkvDisplayWidth            = 0x54B0,
+  kMkvDisplayHeight           = 0x54BA,
+  kMkvDisplayUnit             = 0x54B2,
+  kMkvAspectRatioType         = 0x54B3,
+  kMkvFrameRate               = 0x2383E3,
+  //end video
+  //audio
+  kMkvAudio                   = 0xE1,
+  kMkvSamplingFrequency       = 0xB5,
+  kMkvOutputSamplingFrequency = 0x78B5,
+  kMkvChannels                = 0x9F,
+  kMkvBitDepth                = 0x6264,
+  //end audio
+  //ContentEncodings
+  kMkvContentEncodings        = 0x6D80,
+  kMkvContentEncoding         = 0x6240,
+  kMkvContentEncodingOrder    = 0x5031,
+  kMkvContentEncodingScope    = 0x5032,
+  kMkvContentEncodingType     = 0x5033,
+  kMkvContentEncryption       = 0x5035,
+  kMkvContentEncAlgo          = 0x47E1,
+  kMkvContentEncKeyID         = 0x47E2,
+  kMkvContentEncAESSettings   = 0x47E7,
+  kMkvAESSettingsCipherMode   = 0x47E8,
+  kMkvAESSettingsCipherInitData = 0x47E9,
+  //end ContentEncodings
+  //Cueing Data
+  kMkvCues                    = 0x1C53BB6B,
+  kMkvCuePoint                = 0xBB,
+  kMkvCueTime                 = 0xB3,
+  kMkvCueTrackPositions       = 0xB7,
+  kMkvCueTrack                = 0xF7,
+  kMkvCueClusterPosition      = 0xF1,
+  kMkvCueBlockNumber          = 0x5378,
+  //Chapters
+  kMkvChapters                = 0x1043A770,
+  kMkvEditionEntry            = 0x45B9,
+  kMkvChapterAtom             = 0xB6,
+  kMkvChapterUID              = 0x73C4,
+  kMkvChapterStringUID        = 0x5654,
+  kMkvChapterTimeStart        = 0x91,
+  kMkvChapterTimeEnd          = 0x92,
+  kMkvChapterDisplay          = 0x80,
+  kMkvChapString              = 0x85,
+  kMkvChapLanguage            = 0x437C,
+  kMkvChapCountry             = 0x437E
+};
+
+}  // end namespace mkvmuxer
+
+#endif // WEBMIDS_HPP
diff --git a/source/libvpx/third_party/nestegg/README.webm b/source/libvpx/third_party/nestegg/README.webm
index 7860a7c..8e3760b 100644
--- a/source/libvpx/third_party/nestegg/README.webm
+++ b/source/libvpx/third_party/nestegg/README.webm
@@ -18,3 +18,7 @@ nestegg.c|975 col 6| warning: ‘r’ may be used uninitialized in this function
 - fix track_number uint64->uint32 warnings
 - fix track_scale double->uint64 warning
 - nestegg_packet_track: fix uint64->uint32 warning
+- ne_read_(string|binary|block): normalize size_t usage
+- ne_parse: normalize size_t usage
+- quiet read related uint64->size_t warnings
+- ne_buffer_read: quiet uint64->size_t warning
diff --git a/source/libvpx/third_party/nestegg/src/nestegg.c b/source/libvpx/third_party/nestegg/src/nestegg.c
index 35ce9f1..c7e2b02 100644
--- a/source/libvpx/third_party/nestegg/src/nestegg.c
+++ b/source/libvpx/third_party/nestegg/src/nestegg.c
@@ -694,14 +694,15 @@ ne_read_string(nestegg * ctx, char ** val, uint64_t length)
 {
   char * str;
   int r;
+  const size_t alloc_size = (size_t)length + 1;
 
   if (length == 0 || length > LIMIT_STRING)
     return -1;
-  str = ne_pool_alloc(length + 1, ctx->alloc_pool);
-  r = ne_io_read(ctx->io, (unsigned char *) str, length);
+  str = ne_pool_alloc(alloc_size, ctx->alloc_pool);
+  r = ne_io_read(ctx->io, (unsigned char *) str, alloc_size - 1);
   if (r != 1)
     return r;
-  str[length] = '\0';
+  str[alloc_size - 1] = '\0';
   *val = str;
   return 1;
 }
@@ -711,9 +712,9 @@ ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length)
 {
   if (length == 0 || length > LIMIT_BINARY)
     return -1;
-  val->data = ne_pool_alloc(length, ctx->alloc_pool);
-  val->length = length;
-  return ne_io_read(ctx->io, val->data, length);
+  val->length = (size_t)length;
+  val->data = ne_pool_alloc(val->length, ctx->alloc_pool);
+  return ne_io_read(ctx->io, val->data, val->length);
 }
 
 static int
@@ -1043,7 +1044,7 @@ ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset
           ne_read_single_master(ctx, element);
         continue;
       } else {
-        r = ne_read_simple(ctx, element, size);
+        r = ne_read_simple(ctx, element, (size_t)size);
         if (r < 0)
           break;
       }
@@ -1062,7 +1063,7 @@ ne_parse(nestegg * ctx, struct ebml_element_desc * top_level, int64_t max_offset
 
       if (id != ID_VOID && id != ID_CRC32)
         ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id);
-      r = ne_io_read_skip(ctx->io, size);
+      r = ne_io_read_skip(ctx->io, (size_t)size);
       if (r != 1)
         break;
     }
@@ -1151,7 +1152,8 @@ ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, ui
   r = ne_read_vint(io, &lace, &length);
   if (r != 1)
     return r;
-  *read += length;
+  assert(length <= 8);
+  *read += (size_t)length;
 
   sizes[i] = lace;
   sum = sizes[i];
@@ -1163,7 +1165,8 @@ ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, ui
     r = ne_read_svint(io, &slace, &length);
     if (r != 1)
       return r;
-    *read += length;
+    assert(length <= 8);
+    *read += (size_t)length;
     sizes[i] = sizes[i - 1] + slace;
     sum += sizes[i];
     i += 1;
@@ -1263,7 +1266,8 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
   if (track_number == 0 || (unsigned int)track_number != track_number)
     return -1;
 
-  consumed += length;
+  assert(length <= 8);
+  consumed += (size_t)length;
 
   r = ne_read_int(ctx->io, &timecode, 2);
   if (r != 1)
@@ -1307,7 +1311,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
   case LACING_XIPH:
     if (frames == 1)
       return -1;
-    r = ne_read_xiph_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    r = ne_read_xiph_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes);
     if (r != 1)
       return r;
     break;
@@ -1320,7 +1324,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
   case LACING_EBML:
     if (frames == 1)
       return -1;
-    r = ne_read_ebml_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    r = ne_read_ebml_lacing(ctx->io, (size_t)block_size, &consumed, frames, frame_sizes);
     if (r != 1)
       return r;
     break;
@@ -1365,9 +1369,9 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
       return -1;
     }
     f = ne_alloc(sizeof(*f));
-    f->data = ne_alloc(frame_sizes[i]);
-    f->length = frame_sizes[i];
-    r = ne_io_read(ctx->io, f->data, frame_sizes[i]);
+    f->length = (size_t)frame_sizes[i];
+    f->data = ne_alloc(f->length);
+    r = ne_io_read(ctx->io, f->data, f->length);
     if (r != 1) {
       free(f->data);
       free(f);
@@ -1406,7 +1410,8 @@ ne_read_discard_padding(nestegg * ctx, nestegg_packet * pkt)
   if (!element)
     return 1;
 
-  r = ne_read_simple(ctx, element, size);
+  assert((size_t)size == size);
+  r = ne_read_simple(ctx, element, (size_t)size);
   if (r != 1)
     return r;
   storage = (struct ebml_type *) (ctx->ancestor->data + element->offset);
@@ -1600,7 +1605,7 @@ ne_buffer_read(void * buffer, size_t length, void * user_data)
   struct sniff_buffer * sb = user_data;
 
   int rv = 1;
-  size_t available = sb->length - sb->offset;
+  size_t available = sb->length - (size_t)sb->offset;
 
   if (available < length)
     return 0;
@@ -2074,7 +2079,7 @@ nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item,
         p += sizes[i];
       }
       *data = p;
-      *length = sizes[item];
+      *length = (size_t)sizes[item];
   } else {
     *data = codec_private.data;
     *length = codec_private.length;
diff --git a/source/libvpx/tools_common.h b/source/libvpx/tools_common.h
index 58894de..549e895 100644
--- a/source/libvpx/tools_common.h
+++ b/source/libvpx/tools_common.h
@@ -22,10 +22,12 @@
 #endif
 
 #if defined(_MSC_VER)
-/* MSVS doesn't define off_t, and uses _f{seek,tell}i64. */
-typedef __int64 off_t;
+/* MSVS uses _f{seek,tell}i64. */
 #define fseeko _fseeki64
 #define ftello _ftelli64
+typedef long _off_t;  // NOLINT - MSVS compatible type
+typedef __int64 off_t;  // fseeki64 compatible type
+#define _OFF_T_DEFINED
 #elif defined(_WIN32)
 /* MinGW defines off_t as long and uses f{seek,tell}o64/off64_t for large
  * files. */
diff --git a/source/libvpx/vp8/common/loopfilter.c b/source/libvpx/vp8/common/loopfilter.c
index 19857a7..7a07e76 100644
--- a/source/libvpx/vp8/common/loopfilter.c
+++ b/source/libvpx/vp8/common/loopfilter.c
@@ -15,7 +15,6 @@
 #include "onyxc_int.h"
 #include "vpx_mem/vpx_mem.h"
 
-typedef unsigned char uc;
 
 static void lf_init_lut(loop_filter_info_n *lfi)
 {
diff --git a/source/libvpx/vp8/common/postproc.c b/source/libvpx/vp8/common/postproc.c
index e3bee32..7d0fbf6 100644
--- a/source/libvpx/vp8/common/postproc.c
+++ b/source/libvpx/vp8/common/postproc.c
@@ -303,8 +303,8 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i
             {
                 d[r&15] = (rv2[r&127] + sum + s[0]) >> 4;
             }
-
-            s[-8*pitch] = d[(r-8)&15];
+            if (r >= 8)
+              s[-8*pitch] = d[(r-8)&15];
             s += pitch;
         }
     }
diff --git a/source/libvpx/vp8/common/rtcd_defs.pl b/source/libvpx/vp8/common/rtcd_defs.pl
new file mode 100644
index 0000000..130d965
--- /dev/null
+++ b/source/libvpx/vp8/common/rtcd_defs.pl
@@ -0,0 +1,541 @@
+sub vp8_common_forward_decls() {
+print <<EOF
+/*
+ * VP8
+ */
+
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp8_common_forward_decls/;
+
+#
+# system state
+#
+add_proto qw/void vp8_clear_system_state/, "";
+specialize qw/vp8_clear_system_state mmx/;
+$vp8_clear_system_state_mmx=vpx_reset_mmx_state;
+
+#
+# Dequant
+#
+add_proto qw/void vp8_dequantize_b/, "struct blockd*, short *dqc";
+specialize qw/vp8_dequantize_b mmx media neon/;
+$vp8_dequantize_b_media=vp8_dequantize_b_v6;
+
+add_proto qw/void vp8_dequant_idct_add/, "short *input, short *dq, unsigned char *output, int stride";
+specialize qw/vp8_dequant_idct_add mmx media neon dspr2/;
+$vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
+$vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
+specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
+$vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
+$vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp8_loop_filter_mbv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
+$vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
+
+add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
+$vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
+
+add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_mbh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
+$vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
+
+add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
+specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
+$vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
+$vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
+
+
+add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
+$vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
+$vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
+$vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
+$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
+$vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c;
+$vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx;
+$vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2;
+$vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
+$vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
+$vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
+$vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
+$vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
+$vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
+$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
+
+add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
+specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
+$vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c;
+$vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx;
+$vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2;
+$vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6;
+$vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon;
+
+#
+# IDCT
+#
+#idct16
+add_proto qw/void vp8_short_idct4x4llm/, "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride";
+specialize qw/vp8_short_idct4x4llm mmx media neon dspr2/;
+$vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual;
+$vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2;
+
+#iwalsh1
+add_proto qw/void vp8_short_inv_walsh4x4_1/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4_1 dspr2/;
+$vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2;
+# no asm yet
+
+#iwalsh16
+add_proto qw/void vp8_short_inv_walsh4x4/, "short *input, short *output";
+specialize qw/vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2/;
+$vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6;
+$vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2;
+
+#idct1_scalar_add
+add_proto qw/void vp8_dc_only_idct_add/, "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride";
+specialize qw/vp8_dc_only_idct_add	mmx media neon dspr2/;
+$vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6;
+$vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
+
+#
+# RECON
+#
+add_proto qw/void vp8_copy_mem16x16/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/;
+$vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6;
+$vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2;
+
+add_proto qw/void vp8_copy_mem8x8/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x8 mmx media neon dspr2/;
+$vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6;
+$vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2;
+
+add_proto qw/void vp8_copy_mem8x4/, "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_copy_mem8x4 mmx media neon dspr2/;
+$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
+$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
+
+add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
+specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3/;
+#TODO: fix assembly for neon
+
+add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
+specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3/;
+
+add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
+specialize qw/vp8_intra4x4_predict media/;
+$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
+
+#
+# Postproc
+#
+if (vpx_config("CONFIG_POSTPROC") eq "yes") {
+    add_proto qw/void vp8_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+    specialize qw/vp8_mbpost_proc_down mmx sse2/;
+    $vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm;
+
+    add_proto qw/void vp8_mbpost_proc_across_ip/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
+    specialize qw/vp8_mbpost_proc_across_ip sse2/;
+    $vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm;
+
+    add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
+    specialize qw/vp8_post_proc_down_and_across_mb_row sse2/;
+
+    add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
+    specialize qw/vp8_plane_add_noise mmx sse2/;
+    $vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
+
+    add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+    # no asm yet
+
+    add_proto qw/void vp8_blend_mb_outer/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+    # no asm yet
+
+    add_proto qw/void vp8_blend_b/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
+    # no asm yet
+
+    add_proto qw/void vp8_filter_by_weight16x16/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+    specialize qw/vp8_filter_by_weight16x16 sse2/;
+
+    add_proto qw/void vp8_filter_by_weight8x8/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+    specialize qw/vp8_filter_by_weight8x8 sse2/;
+
+    add_proto qw/void vp8_filter_by_weight4x4/, "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight";
+    # no asm yet
+}
+
+#
+# Subpixel
+#
+add_proto qw/void vp8_sixtap_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6;
+$vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6;
+$vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2;
+
+add_proto qw/void vp8_sixtap_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2/;
+$vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
+$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
+
+add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2/;
+$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
+$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
+
+add_proto qw/void vp8_bilinear_predict16x16/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x8/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/;
+$vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6;
+
+add_proto qw/void vp8_bilinear_predict8x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
+$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
+
+add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
+specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
+$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
+
+#
+# Whole-pixel Variance
+#
+add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance4x4 mmx sse2/;
+$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x8 mmx sse2 media neon/;
+$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
+$vp8_variance8x8_media=vp8_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance8x16 mmx sse2 neon/;
+$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x8 mmx sse2 neon/;
+$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance16x16 mmx sse2 media neon/;
+$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
+$vp8_variance16x16_media=vp8_variance16x16_armv6;
+
+#
+# Sub-pixel Variance
+#
+add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
+$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
+$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
+$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
+$vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/;
+$vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
+
+add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
+$vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
+$vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
+$vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
+$vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
+
+add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
+$vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
+$vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
+
+#
+# Single block SAD
+#
+add_proto qw/unsigned int vp8_sad4x4/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad4x4 mmx sse2 neon/;
+$vp8_sad4x4_sse2=vp8_sad4x4_wmt;
+
+add_proto qw/unsigned int vp8_sad8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x8 mmx sse2 neon/;
+$vp8_sad8x8_sse2=vp8_sad8x8_wmt;
+
+add_proto qw/unsigned int vp8_sad8x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad8x16 mmx sse2 neon/;
+$vp8_sad8x16_sse2=vp8_sad8x16_wmt;
+
+add_proto qw/unsigned int vp8_sad16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x8 mmx sse2 neon/;
+$vp8_sad16x8_sse2=vp8_sad16x8_wmt;
+
+add_proto qw/unsigned int vp8_sad16x16/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp8_sad16x16 mmx sse2 sse3 media neon/;
+$vp8_sad16x16_sse2=vp8_sad16x16_wmt;
+$vp8_sad16x16_media=vp8_sad16x16_armv6;
+
+#
+# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
+#
+add_proto qw/void vp8_sad4x4x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x3 sse3/;
+
+add_proto qw/void vp8_sad8x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x3 sse3/;
+
+add_proto qw/void vp8_sad8x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x3 sse3/;
+
+add_proto qw/void vp8_sad16x8x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp8_sad16x16x3/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x3 sse3 ssse3/;
+
+# Note the only difference in the following prototypes is that they return into
+# an array of short
+add_proto qw/void vp8_sad4x4x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad4x4x8 sse4_1/;
+$vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4;
+
+add_proto qw/void vp8_sad8x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x8x8 sse4_1/;
+$vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4;
+
+add_proto qw/void vp8_sad8x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad8x16x8 sse4_1/;
+$vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4;
+
+add_proto qw/void vp8_sad16x8x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x8x8 sse4_1/;
+$vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4;
+
+add_proto qw/void vp8_sad16x16x8/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array";
+specialize qw/vp8_sad16x16x8 sse4_1/;
+$vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4;
+
+#
+# Multi-block SAD, comparing a reference to N independent blocks
+#
+add_proto qw/void vp8_sad4x4x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad4x4x4d sse3/;
+
+add_proto qw/void vp8_sad8x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x8x4d sse3/;
+
+add_proto qw/void vp8_sad8x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad8x16x4d sse3/;
+
+add_proto qw/void vp8_sad16x8x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x8x4d sse3/;
+
+add_proto qw/void vp8_sad16x16x4d/, "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp8_sad16x16x4d sse3/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
+
+#
+# Sum of squares (vector)
+#
+add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
+specialize qw/vp8_get_mb_ss mmx sse2/;
+
+#
+# SSE (Sum Squared Error)
+#
+add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
+specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
+$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
+
+add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
+specialize qw/vp8_mse16x16 mmx sse2 media neon/;
+$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
+$vp8_mse16x16_media=vp8_mse16x16_armv6;
+
+add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
+specialize qw/vp8_get4x4sse_cs mmx neon/;
+
+#
+# Block copy
+#
+if ($opts{arch} =~ /x86/) {
+    add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
+    specialize qw/vp8_copy32xn sse2 sse3/;
+}
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";
+
+    add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";
+
+    add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";
+}
+
+#
+# Forward DCT
+#
+add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
+$vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
+
+add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
+$vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
+
+add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
+specialize qw/vp8_short_walsh4x4 sse2 media neon/;
+$vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
+
+#
+# Quantizer
+#
+add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_regular_quantize_b sse2/;
+# TODO(johann) Update sse4 implementation and re-enable
+#$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
+
+add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
+specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
+$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+
+add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+# no asm yet
+
+add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
+specialize qw/vp8_fast_quantize_b_pair neon/;
+
+add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
+specialize qw/vp8_quantize_mb neon/;
+
+add_proto qw/void vp8_quantize_mby/, "struct macroblock *";
+specialize qw/vp8_quantize_mby neon/;
+
+add_proto qw/void vp8_quantize_mbuv/, "struct macroblock *";
+specialize qw/vp8_quantize_mbuv neon/;
+
+#
+# Block subtraction
+#
+add_proto qw/int vp8_block_error/, "short *coeff, short *dqcoeff";
+specialize qw/vp8_block_error mmx sse2/;
+$vp8_block_error_sse2=vp8_block_error_xmm;
+
+add_proto qw/int vp8_mbblock_error/, "struct macroblock *mb, int dc";
+specialize qw/vp8_mbblock_error mmx sse2/;
+$vp8_mbblock_error_sse2=vp8_mbblock_error_xmm;
+
+add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
+specialize qw/vp8_mbuverror mmx sse2/;
+$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
+
+add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
+specialize qw/vp8_subtract_b mmx sse2 media neon/;
+$vp8_subtract_b_media=vp8_subtract_b_armv6;
+
+add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
+specialize qw/vp8_subtract_mby mmx sse2 media neon/;
+$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+
+add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
+$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+
+#
+# Motion search
+#
+add_proto qw/int vp8_full_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_full_search_sad sse3 sse4_1/;
+$vp8_full_search_sad_sse3=vp8_full_search_sadx3;
+$vp8_full_search_sad_sse4_1=vp8_full_search_sadx8;
+
+add_proto qw/int vp8_refining_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+specialize qw/vp8_refining_search_sad sse3/;
+$vp8_refining_search_sad_sse3=vp8_refining_search_sadx4;
+
+add_proto qw/int vp8_diamond_search_sad/, "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv";
+$vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4;
+
+#
+# Alt-ref Noise Reduction (ARNR)
+#
+if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
+    add_proto qw/void vp8_temporal_filter_apply/, "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count";
+    specialize qw/vp8_temporal_filter_apply sse2/;
+}
+
+#
+# Pick Loopfilter
+#
+add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_partial_frame neon/;
+
+#
+# Denoiser filter
+#
+if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
+    add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
+    specialize qw/vp8_denoiser_filter sse2 neon/;
+}
+
+# End of encoder only functions
+}
+1;
diff --git a/source/libvpx/vp8/common/rtcd_defs.sh b/source/libvpx/vp8/common/rtcd_defs.sh
deleted file mode 100755
index 28e6754..0000000
--- a/source/libvpx/vp8/common/rtcd_defs.sh
+++ /dev/null
@@ -1,542 +0,0 @@
-vp8_common_forward_decls() {
-cat <<EOF
-/*
- * VP8
- */
-
-struct blockd;
-struct macroblockd;
-struct loop_filter_info;
-
-/* Encoder forward decls */
-struct block;
-struct macroblock;
-struct variance_vtable;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp8_common_forward_decls
-
-#
-# system state
-#
-prototype void vp8_clear_system_state ""
-specialize vp8_clear_system_state mmx
-vp8_clear_system_state_mmx=vpx_reset_mmx_state
-
-#
-# Dequant
-#
-prototype void vp8_dequantize_b "struct blockd*, short *dqc"
-specialize vp8_dequantize_b mmx media neon
-vp8_dequantize_b_media=vp8_dequantize_b_v6
-
-prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
-specialize vp8_dequant_idct_add mmx media neon dspr2
-vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6
-vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2
-
-prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
-specialize vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
-specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2
-vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6
-vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2
-
-#
-# Loopfilter
-#
-prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbv mmx sse2 media neon dspr2
-vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6
-vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2
-
-prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bv mmx sse2 media neon dspr2
-vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6
-vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2
-
-prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_mbh mmx sse2 media neon dspr2
-vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6
-vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2
-
-prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
-specialize vp8_loop_filter_bh mmx sse2 media neon dspr2
-vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6
-vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2
-
-
-prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbv mmx sse2 media neon
-vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c
-vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx
-vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2
-vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6
-vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon
-
-prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_mbh mmx sse2 media neon
-vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c
-vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx
-vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2
-vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6
-vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon
-
-prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bv mmx sse2 media neon
-vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c
-vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx
-vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2
-vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6
-vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon
-
-prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
-specialize vp8_loop_filter_simple_bh mmx sse2 media neon
-vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c
-vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx
-vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2
-vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6
-vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon
-
-#
-# IDCT
-#
-#idct16
-prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
-specialize vp8_short_idct4x4llm mmx media neon dspr2
-vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual
-vp8_short_idct4x4llm_dspr2=vp8_short_idct4x4llm_dspr2
-
-#iwalsh1
-prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4_1 dspr2
-vp8_short_inv_walsh4x4_1_dspr2=vp8_short_inv_walsh4x4_1_dspr2
-# no asm yet
-
-#iwalsh16
-prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
-specialize vp8_short_inv_walsh4x4 mmx sse2 media neon dspr2
-vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6
-vp8_short_inv_walsh4x4_dspr2=vp8_short_inv_walsh4x4_dspr2
-
-#idct1_scalar_add
-prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
-specialize vp8_dc_only_idct_add	mmx media neon dspr2
-vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
-vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2
-
-#
-# RECON
-#
-prototype void vp8_copy_mem16x16 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem16x16 mmx sse2 media neon dspr2
-vp8_copy_mem16x16_media=vp8_copy_mem16x16_v6
-vp8_copy_mem16x16_dspr2=vp8_copy_mem16x16_dspr2
-
-prototype void vp8_copy_mem8x8 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x8 mmx media neon dspr2
-vp8_copy_mem8x8_media=vp8_copy_mem8x8_v6
-vp8_copy_mem8x8_dspr2=vp8_copy_mem8x8_dspr2
-
-prototype void vp8_copy_mem8x4 "unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch"
-specialize vp8_copy_mem8x4 mmx media neon dspr2
-vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6
-vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2
-
-prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride"
-specialize vp8_build_intra_predictors_mby_s sse2 ssse3
-#TODO: fix assembly for neon
-
-prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
-specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
-
-prototype void vp8_intra4x4_predict "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left"
-specialize vp8_intra4x4_predict media
-vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6
-
-#
-# Postproc
-#
-if [ "$CONFIG_POSTPROC" = "yes" ]; then
-    prototype void vp8_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols,int flimit"
-    specialize vp8_mbpost_proc_down mmx sse2
-    vp8_mbpost_proc_down_sse2=vp8_mbpost_proc_down_xmm
-
-    prototype void vp8_mbpost_proc_across_ip "unsigned char *dst, int pitch, int rows, int cols,int flimit"
-    specialize vp8_mbpost_proc_across_ip sse2
-    vp8_mbpost_proc_across_ip_sse2=vp8_mbpost_proc_across_ip_xmm
-
-    prototype void vp8_post_proc_down_and_across_mb_row "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size"
-    specialize vp8_post_proc_down_and_across_mb_row sse2
-
-    prototype void vp8_plane_add_noise "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch"
-    specialize vp8_plane_add_noise mmx sse2
-    vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt
-
-    prototype void vp8_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
-    # no asm yet
-
-    prototype void vp8_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
-    # no asm yet
-
-    prototype void vp8_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
-    # no asm yet
-
-    prototype void vp8_filter_by_weight16x16 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
-    specialize vp8_filter_by_weight16x16 sse2
-
-    prototype void vp8_filter_by_weight8x8 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
-    specialize vp8_filter_by_weight8x8 sse2
-
-    prototype void vp8_filter_by_weight4x4 "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight"
-    # no asm yet
-fi
-
-#
-# Subpixel
-#
-prototype void vp8_sixtap_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict16x16 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict16x16_media=vp8_sixtap_predict16x16_armv6
-vp8_sixtap_predict16x16_dspr2=vp8_sixtap_predict16x16_dspr2
-
-prototype void vp8_sixtap_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x8 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x8_media=vp8_sixtap_predict8x8_armv6
-vp8_sixtap_predict8x8_dspr2=vp8_sixtap_predict8x8_dspr2
-
-prototype void vp8_sixtap_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict8x4 mmx sse2 ssse3 media neon dspr2
-vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6
-vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2
-
-prototype void vp8_sixtap_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_sixtap_predict4x4 mmx ssse3 media neon dspr2
-vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6
-vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2
-
-prototype void vp8_bilinear_predict16x16 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
-vp8_bilinear_predict16x16_media=vp8_bilinear_predict16x16_armv6
-
-prototype void vp8_bilinear_predict8x8 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon
-vp8_bilinear_predict8x8_media=vp8_bilinear_predict8x8_armv6
-
-prototype void vp8_bilinear_predict8x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict8x4 mmx media neon
-vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6
-
-prototype void vp8_bilinear_predict4x4 "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch"
-specialize vp8_bilinear_predict4x4 mmx media neon
-vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6
-
-#
-# Whole-pixel Variance
-#
-prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance4x4 mmx sse2
-vp8_variance4x4_sse2=vp8_variance4x4_wmt
-
-prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance8x8 mmx sse2 media neon
-vp8_variance8x8_sse2=vp8_variance8x8_wmt
-vp8_variance8x8_media=vp8_variance8x8_armv6
-
-prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance8x16 mmx sse2 neon
-vp8_variance8x16_sse2=vp8_variance8x16_wmt
-
-prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance16x8 mmx sse2 neon
-vp8_variance16x8_sse2=vp8_variance16x8_wmt
-
-prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance16x16 mmx sse2 media neon
-vp8_variance16x16_sse2=vp8_variance16x16_wmt
-vp8_variance16x16_media=vp8_variance16x16_armv6
-
-#
-# Sub-pixel Variance
-#
-prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance4x4 mmx sse2
-vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x8 mmx sse2 media neon
-vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt
-vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6
-
-prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance8x16 mmx sse2
-vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x8 mmx sse2 ssse3
-vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt
-
-prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon
-vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt
-vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_h mmx sse2 media neon
-vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt
-vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_v mmx sse2 media neon
-vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt
-vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6
-
-prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_variance_halfpixvar16x16_hv mmx sse2 media neon
-vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt
-vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6
-
-#
-# Single block SAD
-#
-prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad4x4 mmx sse2 neon
-vp8_sad4x4_sse2=vp8_sad4x4_wmt
-
-prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x8 mmx sse2 neon
-vp8_sad8x8_sse2=vp8_sad8x8_wmt
-
-prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad8x16 mmx sse2 neon
-vp8_sad8x16_sse2=vp8_sad8x16_wmt
-
-prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x8 mmx sse2 neon
-vp8_sad16x8_sse2=vp8_sad16x8_wmt
-
-prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp8_sad16x16 mmx sse2 sse3 media neon
-vp8_sad16x16_sse2=vp8_sad16x16_wmt
-vp8_sad16x16_media=vp8_sad16x16_armv6
-
-#
-# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
-#
-prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x3 sse3
-
-prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x3 sse3
-
-prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x3 sse3
-
-prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x3 sse3 ssse3
-
-prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x3 sse3 ssse3
-
-# Note the only difference in the following prototypes is that they return into
-# an array of short
-prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad4x4x8 sse4_1
-vp8_sad4x4x8_sse4_1=vp8_sad4x4x8_sse4
-
-prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x8x8 sse4_1
-vp8_sad8x8x8_sse4_1=vp8_sad8x8x8_sse4
-
-prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad8x16x8 sse4_1
-vp8_sad8x16x8_sse4_1=vp8_sad8x16x8_sse4
-
-prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x8x8 sse4_1
-vp8_sad16x8x8_sse4_1=vp8_sad16x8x8_sse4
-
-prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array"
-specialize vp8_sad16x16x8 sse4_1
-vp8_sad16x16x8_sse4_1=vp8_sad16x16x8_sse4
-
-#
-# Multi-block SAD, comparing a reference to N independent blocks
-#
-prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad4x4x4d sse3
-
-prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x8x4d sse3
-
-prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad8x16x4d sse3
-
-prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x8x4d sse3
-
-prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp8_sad16x16x4d sse3
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then
-
-#
-# Sum of squares (vector)
-#
-prototype unsigned int vp8_get_mb_ss "const short *"
-specialize vp8_get_mb_ss mmx sse2
-
-#
-# SSE (Sum Squared Error)
-#
-prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse"
-specialize vp8_sub_pixel_mse16x16 mmx sse2
-vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt
-
-prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse"
-specialize vp8_mse16x16 mmx sse2 media neon
-vp8_mse16x16_sse2=vp8_mse16x16_wmt
-vp8_mse16x16_media=vp8_mse16x16_armv6
-
-prototype unsigned int vp8_get4x4sse_cs "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride"
-specialize vp8_get4x4sse_cs mmx neon
-
-#
-# Block copy
-#
-case $arch in
-    x86*)
-    prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n"
-    specialize vp8_copy32xn sse2 sse3
-    ;;
-esac
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
-    [ $arch = "x86_64" ] && sse2_on_x86_64=sse2
-
-    prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp8_ssim_parms_8x8 $sse2_on_x86_64
-
-    prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
-fi
-
-#
-# Forward DCT
-#
-prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct4x4 mmx sse2 media neon
-vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6
-
-prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch"
-specialize vp8_short_fdct8x4 mmx sse2 media neon
-vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6
-
-prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch"
-specialize vp8_short_walsh4x4 sse2 media neon
-vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6
-
-#
-# Quantizer
-#
-prototype void vp8_regular_quantize_b "struct block *, struct blockd *"
-specialize vp8_regular_quantize_b sse2 #sse4_1
-# TODO(johann) Update sse4 implementation and re-enable
-#vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4
-
-prototype void vp8_fast_quantize_b "struct block *, struct blockd *"
-specialize vp8_fast_quantize_b sse2 ssse3 media neon
-vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6
-
-prototype void vp8_regular_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-# no asm yet
-
-prototype void vp8_fast_quantize_b_pair "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2"
-specialize vp8_fast_quantize_b_pair neon
-
-prototype void vp8_quantize_mb "struct macroblock *"
-specialize vp8_quantize_mb neon
-
-prototype void vp8_quantize_mby "struct macroblock *"
-specialize vp8_quantize_mby neon
-
-prototype void vp8_quantize_mbuv "struct macroblock *"
-specialize vp8_quantize_mbuv neon
-
-#
-# Block subtraction
-#
-prototype int vp8_block_error "short *coeff, short *dqcoeff"
-specialize vp8_block_error mmx sse2
-vp8_block_error_sse2=vp8_block_error_xmm
-
-prototype int vp8_mbblock_error "struct macroblock *mb, int dc"
-specialize vp8_mbblock_error mmx sse2
-vp8_mbblock_error_sse2=vp8_mbblock_error_xmm
-
-prototype int vp8_mbuverror "struct macroblock *mb"
-specialize vp8_mbuverror mmx sse2
-vp8_mbuverror_sse2=vp8_mbuverror_xmm
-
-prototype void vp8_subtract_b "struct block *be, struct blockd *bd, int pitch"
-specialize vp8_subtract_b mmx sse2 media neon
-vp8_subtract_b_media=vp8_subtract_b_armv6
-
-prototype void vp8_subtract_mby "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride"
-specialize vp8_subtract_mby mmx sse2 media neon
-vp8_subtract_mby_media=vp8_subtract_mby_armv6
-
-prototype void vp8_subtract_mbuv "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride"
-specialize vp8_subtract_mbuv mmx sse2 media neon
-vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6
-
-#
-# Motion search
-#
-prototype int vp8_full_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_full_search_sad sse3 sse4_1
-vp8_full_search_sad_sse3=vp8_full_search_sadx3
-vp8_full_search_sad_sse4_1=vp8_full_search_sadx8
-
-prototype int vp8_refining_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-specialize vp8_refining_search_sad sse3
-vp8_refining_search_sad_sse3=vp8_refining_search_sadx4
-
-prototype int vp8_diamond_search_sad "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv"
-vp8_diamond_search_sad_sse3=vp8_diamond_search_sadx4
-
-#
-# Alt-ref Noise Reduction (ARNR)
-#
-if [ "$CONFIG_REALTIME_ONLY" != "yes" ]; then
-    prototype void vp8_temporal_filter_apply "unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count"
-    specialize vp8_temporal_filter_apply sse2
-fi
-
-#
-# Pick Loopfilter
-#
-prototype void vp8_yv12_copy_partial_frame "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_partial_frame neon
-
-#
-# Denoiser filter
-#
-if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
-    prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
-    specialize vp8_denoiser_filter sse2 neon
-fi
-
-# End of encoder only functions
-fi
diff --git a/source/libvpx/vp8/common/x86/postproc_mmx.asm b/source/libvpx/vp8/common/x86/postproc_mmx.asm
index 5cf110b..8be3431 100644
--- a/source/libvpx/vp8/common/x86/postproc_mmx.asm
+++ b/source/libvpx/vp8/common/x86/postproc_mmx.asm
@@ -204,13 +204,16 @@ sym(vp8_mbpost_proc_down_mmx):
             and         rcx,        15
             movd        DWORD PTR   [rsp+rcx*4], mm1 ;d[rcx*4]
 
+            cmp         edx,        8
+            jl          .skip_assignment
+
             mov         rcx,        rdx
             sub         rcx,        8
-
             and         rcx,        15
             movd        mm1,        DWORD PTR [rsp+rcx*4] ;d[rcx*4]
-
             movd        [rsi],      mm1
+
+.skip_assignment
             lea         rsi,        [rsi+rax]
 
             lea         rdi,        [rdi+rax]
diff --git a/source/libvpx/vp8/common/x86/postproc_sse2.asm b/source/libvpx/vp8/common/x86/postproc_sse2.asm
index 00f84a3..f53daa7 100644
--- a/source/libvpx/vp8/common/x86/postproc_sse2.asm
+++ b/source/libvpx/vp8/common/x86/postproc_sse2.asm
@@ -425,13 +425,16 @@ sym(vp8_mbpost_proc_down_xmm):
             and         rcx,        15
             movq        QWORD PTR   [rsp + rcx*8], xmm1 ;d[rcx*8]
 
+            cmp         edx,        8
+            jl          .skip_assignment
+
             mov         rcx,        rdx
             sub         rcx,        8
-
             and         rcx,        15
             movq        mm0,        [rsp + rcx*8] ;d[rcx*8]
-
             movq        [rsi],      mm0
+
+.skip_assignment
             lea         rsi,        [rsi+rax]
 
             lea         rdi,        [rdi+rax]
diff --git a/source/libvpx/vp8/encoder/onyx_if.c b/source/libvpx/vp8/encoder/onyx_if.c
index 849a0ed..ef37c0e 100644
--- a/source/libvpx/vp8/encoder/onyx_if.c
+++ b/source/libvpx/vp8/encoder/onyx_if.c
@@ -1401,6 +1401,7 @@ static void update_layer_contexts (VP8_COMP *cpi)
         unsigned int i;
         double prev_layer_framerate=0;
 
+        assert(oxcf->number_of_layers <= VPX_TS_MAX_LAYERS);
         for (i=0; i<oxcf->number_of_layers; i++)
         {
             LAYER_CONTEXT *lc = &cpi->layer_context[i];
@@ -5071,6 +5072,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                 unsigned int i;
 
                 /* Update frame rates for each layer */
+                assert(cpi->oxcf.number_of_layers <= VPX_TS_MAX_LAYERS);
                 for (i=0; i<cpi->oxcf.number_of_layers; i++)
                 {
                     LAYER_CONTEXT *lc = &cpi->layer_context[i];
diff --git a/source/libvpx/vp8/encoder/rdopt.c b/source/libvpx/vp8/encoder/rdopt.c
index 5016cc4..387701c 100644
--- a/source/libvpx/vp8/encoder/rdopt.c
+++ b/source/libvpx/vp8/encoder/rdopt.c
@@ -528,19 +528,16 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a,
 
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
-# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
-
+    assert(eob <= 16);
     for (; c < eob; c++)
     {
-        int v = QC(c);
-        int t = vp8_dct_value_tokens_ptr[v].Token;
+        const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]];
+        const int t = vp8_dct_value_tokens_ptr[v].Token;
         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
         cost += vp8_dct_value_cost_ptr[v];
         pt = vp8_prev_token_class[t];
     }
 
-# undef QC
-
     if (c < 16)
         cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
 
diff --git a/source/libvpx/vp8/encoder/tokenize.c b/source/libvpx/vp8/encoder/tokenize.c
index 11559a7..2dc8205 100644
--- a/source/libvpx/vp8/encoder/tokenize.c
+++ b/source/libvpx/vp8/encoder/tokenize.c
@@ -213,6 +213,7 @@ static void tokenize1st_order_b
     /* Luma */
     for (block = 0; block < 16; block++, b++)
     {
+        const int eob = *b->eob;
         tmp1 = vp8_block2above[block];
         tmp2 = vp8_block2left[block];
         qcoeff_ptr = b->qcoeff;
@@ -223,7 +224,7 @@ static void tokenize1st_order_b
 
         c = type ? 0 : 1;
 
-        if(c >= *b->eob)
+        if(c >= eob)
         {
             /* c = band for this case */
             t->Token = DCT_EOB_TOKEN;
@@ -250,7 +251,8 @@ static void tokenize1st_order_b
         t++;
         c++;
 
-        for (; c < *b->eob; c++)
+        assert(eob <= 16);
+        for (; c < eob; c++)
         {
             rc = vp8_default_zig_zag1d[c];
             band = vp8_coef_bands[c];
@@ -286,6 +288,7 @@ static void tokenize1st_order_b
     /* Chroma */
     for (block = 16; block < 24; block++, b++)
     {
+        const int eob = *b->eob;
         tmp1 = vp8_block2above[block];
         tmp2 = vp8_block2left[block];
         qcoeff_ptr = b->qcoeff;
@@ -294,7 +297,7 @@ static void tokenize1st_order_b
 
         VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
 
-        if(!(*b->eob))
+        if(!eob)
         {
             /* c = band for this case */
             t->Token = DCT_EOB_TOKEN;
@@ -321,7 +324,8 @@ static void tokenize1st_order_b
         t++;
         c = 1;
 
-        for (; c < *b->eob; c++)
+        assert(eob <= 16);
+        for (; c < eob; c++)
         {
             rc = vp8_default_zig_zag1d[c];
             band = vp8_coef_bands[c];
diff --git a/source/libvpx/vp8/vp8_common.mk b/source/libvpx/vp8/vp8_common.mk
index ac91d7a..dfb54a5 100644
--- a/source/libvpx/vp8/vp8_common.mk
+++ b/source/libvpx/vp8/vp8_common.mk
@@ -47,7 +47,7 @@ VP8_COMMON_SRCS-yes += common/quant_common.h
 VP8_COMMON_SRCS-yes += common/reconinter.h
 VP8_COMMON_SRCS-yes += common/reconintra4x4.h
 VP8_COMMON_SRCS-yes += common/rtcd.c
-VP8_COMMON_SRCS-yes += common/rtcd_defs.sh
+VP8_COMMON_SRCS-yes += common/rtcd_defs.pl
 VP8_COMMON_SRCS-yes += common/setupintrarecon.h
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
 VP8_COMMON_SRCS-yes += common/systemdependent.h
@@ -189,4 +189,4 @@ VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequant_idct_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequantizeb_neon.c
 
 
-$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
+$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/source/libvpx/vp9/common/vp9_alloccommon.c b/source/libvpx/vp9/common/vp9_alloccommon.c
index a72821b..f44ada1 100644
--- a/source/libvpx/vp9/common/vp9_alloccommon.c
+++ b/source/libvpx/vp9/common/vp9_alloccommon.c
@@ -8,7 +8,6 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
 #include "./vpx_config.h"
 #include "vpx_mem/vpx_mem.h"
 
@@ -18,70 +17,100 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_systemdependent.h"
 
-void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
-  const int stride = cm->mode_info_stride;
+static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
   int i;
 
-  // Clear down top border row
-  vpx_memset(mi, 0, sizeof(MODE_INFO) * stride);
+  // Top border row
+  vpx_memset(mi, 0, sizeof(*mi) * cm->mi_stride);
 
-  // Clear left border column
-  for (i = 1; i < cm->mi_rows + 1; i++)
-    vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO));
+  // Left border column
+  for (i = 1; i < cm->mi_rows + 1; ++i)
+    vpx_memset(&mi[i * cm->mi_stride], 0, sizeof(*mi));
 }
 
-void vp9_free_frame_buffers(VP9_COMMON *cm) {
-  int i;
+static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
+  cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
+  cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
+  cm->mi_stride = cm->mi_cols + MI_BLOCK_SIZE;
 
-  for (i = 0; i < FRAME_BUFFERS; i++) {
-    vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
+  cm->mb_cols = (cm->mi_cols + 1) >> 1;
+  cm->mb_rows = (cm->mi_rows + 1) >> 1;
+  cm->MBs = cm->mb_rows * cm->mb_cols;
+}
 
-    if (cm->frame_bufs[i].ref_count > 0 &&
-        cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
-      cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
-      cm->frame_bufs[i].ref_count = 0;
-    }
-  }
+static void setup_mi(VP9_COMMON *cm) {
+  cm->mi = cm->mip + cm->mi_stride + 1;
+  cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+  cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 
-  vp9_free_frame_buffer(&cm->post_proc_buffer);
+  vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 
+  vpx_memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) *
+                                      sizeof(*cm->mi_grid_base));
+
+  clear_mi_border(cm, cm->prev_mip);
+}
+
+static int alloc_mi(VP9_COMMON *cm, int mi_size) {
+  cm->mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip));
+  if (cm->mip == NULL)
+    return 1;
+
+  cm->prev_mip = (MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->prev_mip));
+  if (cm->prev_mip == NULL)
+    return 1;
+
+  cm->mi_grid_base =
+      (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
+  if (cm->mi_grid_base == NULL)
+    return 1;
+
+  cm->prev_mi_grid_base =
+      (MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
+  if (cm->prev_mi_grid_base == NULL)
+    return 1;
+
+  return 0;
+}
+
+static void free_mi(VP9_COMMON *cm) {
   vpx_free(cm->mip);
   vpx_free(cm->prev_mip);
-  vpx_free(cm->last_frame_seg_map);
   vpx_free(cm->mi_grid_base);
   vpx_free(cm->prev_mi_grid_base);
 
   cm->mip = NULL;
   cm->prev_mip = NULL;
-  cm->last_frame_seg_map = NULL;
   cm->mi_grid_base = NULL;
   cm->prev_mi_grid_base = NULL;
 }
 
-static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) {
-  cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
-  cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
-  cm->mode_info_stride = cm->mi_cols + MI_BLOCK_SIZE;
+void vp9_free_frame_buffers(VP9_COMMON *cm) {
+  int i;
 
-  cm->mb_cols = (cm->mi_cols + 1) >> 1;
-  cm->mb_rows = (cm->mi_rows + 1) >> 1;
-  cm->MBs = cm->mb_rows * cm->mb_cols;
-}
+  for (i = 0; i < FRAME_BUFFERS; ++i) {
+    vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
 
-static void setup_mi(VP9_COMMON *cm) {
-  cm->mi = cm->mip + cm->mode_info_stride + 1;
-  cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
-  cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1;
-  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
+    if (cm->frame_bufs[i].ref_count > 0 &&
+        cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
+      cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
+      cm->frame_bufs[i].ref_count = 0;
+    }
+  }
 
-  vpx_memset(cm->mip, 0,
-             cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
+  vp9_free_frame_buffer(&cm->post_proc_buffer);
+
+  free_mi(cm);
+
+  vpx_free(cm->last_frame_seg_map);
+  cm->last_frame_seg_map = NULL;
 
-  vpx_memset(cm->mi_grid_base, 0,
-             cm->mode_info_stride * (cm->mi_rows + 1) *
-             sizeof(*cm->mi_grid_base));
+  vpx_free(cm->above_context);
+  cm->above_context = NULL;
 
-  vp9_update_mode_info_border(cm, cm->prev_mip);
+  vpx_free(cm->above_seg_context);
+  cm->above_seg_context = NULL;
 }
 
 int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
@@ -89,7 +118,6 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
   const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
   const int ss_x = cm->subsampling_x;
   const int ss_y = cm->subsampling_y;
-  int mi_size;
 
   if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
                                VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
@@ -97,37 +125,33 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
 
   set_mb_mi(cm, aligned_width, aligned_height);
 
-  // Allocation
-  mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
-
-  vpx_free(cm->mip);
-  cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
-  if (!cm->mip)
-    goto fail;
-
-  vpx_free(cm->prev_mip);
-  cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
-  if (!cm->prev_mip)
-    goto fail;
-
-  vpx_free(cm->mi_grid_base);
-  cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
-  if (!cm->mi_grid_base)
-    goto fail;
-
-  vpx_free(cm->prev_mi_grid_base);
-  cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
-  if (!cm->prev_mi_grid_base)
+  free_mi(cm);
+  if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
     goto fail;
 
   setup_mi(cm);
 
   // Create the segmentation map structure and set to 0.
   vpx_free(cm->last_frame_seg_map);
-  cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
+  cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
   if (!cm->last_frame_seg_map)
     goto fail;
 
+  vpx_free(cm->above_context);
+  cm->above_context =
+      (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+                                        MAX_MB_PLANE,
+                                    sizeof(*cm->above_context));
+  if (!cm->above_context)
+    goto fail;
+
+  vpx_free(cm->above_seg_context);
+  cm->above_seg_context =
+     (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
+                                     sizeof(*cm->above_seg_context));
+  if (!cm->above_seg_context)
+    goto fail;
+
   return 0;
 
  fail:
@@ -136,13 +160,11 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
 }
 
 int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
-  int i;
-
   const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
   const int ss_x = cm->subsampling_x;
   const int ss_y = cm->subsampling_y;
-  int mi_size;
+  int i;
 
   vp9_free_frame_buffers(cm);
 
@@ -167,30 +189,27 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
 
   set_mb_mi(cm, aligned_width, aligned_height);
 
-  // Allocation
-  mi_size = cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE);
-
-  cm->mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
-  if (!cm->mip)
+  if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
     goto fail;
 
-  cm->prev_mip = vpx_calloc(mi_size, sizeof(MODE_INFO));
-  if (!cm->prev_mip)
-    goto fail;
+  setup_mi(cm);
 
-  cm->mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
-  if (!cm->mi_grid_base)
+  // Create the segmentation map structure and set to 0.
+  cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
+  if (!cm->last_frame_seg_map)
     goto fail;
 
-  cm->prev_mi_grid_base = vpx_calloc(mi_size, sizeof(*cm->prev_mi_grid_base));
-  if (!cm->prev_mi_grid_base)
+  cm->above_context =
+      (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
+                                        MAX_MB_PLANE,
+                                    sizeof(*cm->above_context));
+  if (!cm->above_context)
     goto fail;
 
-  setup_mi(cm);
-
-  // Create the segmentation map structure and set to 0.
-  cm->last_frame_seg_map = vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
-  if (!cm->last_frame_seg_map)
+  cm->above_seg_context =
+      (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
+                                      sizeof(*cm->above_seg_context));
+  if (!cm->above_seg_context)
     goto fail;
 
   return 0;
@@ -205,10 +224,6 @@ void vp9_remove_common(VP9_COMMON *cm) {
   vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
 }
 
-void vp9_initialize_common() {
-  vp9_init_neighbors();
-}
-
 void vp9_update_frame_size(VP9_COMMON *cm) {
   const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2);
   const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2);
@@ -220,3 +235,19 @@ void vp9_update_frame_size(VP9_COMMON *cm) {
   if (cm->last_frame_seg_map)
     vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
 }
+
+void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
+  // Current mip will be the prev_mip for the next frame.
+  MODE_INFO *temp = cm->prev_mip;
+  MODE_INFO **temp2 = cm->prev_mi_grid_base;
+  cm->prev_mip = cm->mip;
+  cm->mip = temp;
+  cm->prev_mi_grid_base = cm->mi_grid_base;
+  cm->mi_grid_base = temp2;
+
+  // Update the upper left visible macroblock ptrs.
+  cm->mi = cm->mip + cm->mi_stride + 1;
+  cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+  cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
+}
diff --git a/source/libvpx/vp9/common/vp9_alloccommon.h b/source/libvpx/vp9/common/vp9_alloccommon.h
index 066c778..06636a9 100644
--- a/source/libvpx/vp9/common/vp9_alloccommon.h
+++ b/source/libvpx/vp9/common/vp9_alloccommon.h
@@ -12,24 +12,23 @@
 #ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_
 #define VP9_COMMON_VP9_ALLOCCOMMON_H_
 
-#include "vp9/common/vp9_onyxc_int.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-void vp9_initialize_common();
+struct VP9Common;
+
+void vp9_remove_common(struct VP9Common *cm);
 
-void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi);
+int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height);
 
-void vp9_remove_common(VP9_COMMON *cm);
+int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height);
 
-int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height);
-int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height);
-void vp9_free_frame_buffers(VP9_COMMON *cm);
+void vp9_free_frame_buffers(struct VP9Common *cm);
 
+void vp9_update_frame_size(struct VP9Common *cm);
 
-void vp9_update_frame_size(VP9_COMMON *cm);
+void vp9_swap_mi_and_prev_mi(struct VP9Common *cm);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/source/libvpx/vp9/common/vp9_blockd.c b/source/libvpx/vp9/common/vp9_blockd.c
index e1d1318..fedfb18 100644
--- a/source/libvpx/vp9/common/vp9_blockd.c
+++ b/source/libvpx/vp9/common/vp9_blockd.c
@@ -40,7 +40,7 @@ void vp9_foreach_transformed_block_in_plane(
     const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
     foreach_transformed_block_visitor visit, void *arg) {
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MB_MODE_INFO* mbmi = &xd->mi_8x8[0]->mbmi;
+  const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
   // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
   // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
   // transform size varies per plane, look it up in a common way.
diff --git a/source/libvpx/vp9/common/vp9_blockd.h b/source/libvpx/vp9/common/vp9_blockd.h
index 2a0ebfb..55320a6 100644
--- a/source/libvpx/vp9/common/vp9_blockd.h
+++ b/source/libvpx/vp9/common/vp9_blockd.h
@@ -120,23 +120,23 @@ static INLINE int mi_width_log2(BLOCK_SIZE sb_type) {
 
 // This structure now relates to 8x8 block regions.
 typedef struct {
-  MB_PREDICTION_MODE mode, uv_mode;
-  MV_REFERENCE_FRAME ref_frame[2];
+  // Common for both INTER and INTRA blocks
+  BLOCK_SIZE sb_type;
+  MB_PREDICTION_MODE mode;
   TX_SIZE tx_size;
-  int_mv mv[2];                // for each reference frame used
-  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
-
-  uint8_t mode_context[MAX_REF_FRAMES];
-
-  unsigned char skip;    // 0=need to decode coeffs, 1=no coefficients
-  unsigned char segment_id;    // Segment id for this block.
+  uint8_t skip;
+  uint8_t segment_id;
+  uint8_t seg_id_predicted;  // valid only when temporal_update is enabled
 
-  // Flags used for prediction status of various bit-stream signals
-  unsigned char seg_id_predicted;
+  // Only for INTRA blocks
+  MB_PREDICTION_MODE uv_mode;
 
+  // Only for INTER blocks
+  MV_REFERENCE_FRAME ref_frame[2];
+  int_mv mv[2];
+  int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+  uint8_t mode_context[MAX_REF_FRAMES];
   INTERP_FILTER interp_filter;
-
-  BLOCK_SIZE sb_type;
 } MB_MODE_INFO;
 
 typedef struct {
@@ -204,13 +204,10 @@ typedef struct RefBuffer {
 typedef struct macroblockd {
   struct macroblockd_plane plane[MAX_MB_PLANE];
 
-  MODE_INFO *last_mi;
-  int mode_info_stride;
+  int mi_stride;
 
   // A NULL indicates that the 8x8 is not part of the image
-  MODE_INFO **mi_8x8;
-  MODE_INFO **prev_mi_8x8;
-  MODE_INFO *mi_stream;
+  MODE_INFO **mi;
 
   int up_available;
   int left_available;
@@ -234,11 +231,10 @@ typedef struct macroblockd {
   /* Inverse transform function pointers. */
   void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
 
-  const InterpKernel *interp_kernel;
-
   int corrupted;
 
-  /* Y,U,V,(A) */
+  DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+
   ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
   ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
 
@@ -246,8 +242,6 @@ typedef struct macroblockd {
   PARTITION_CONTEXT left_seg_context[8];
 } MACROBLOCKD;
 
-
-
 static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
                                      PARTITION_TYPE partition) {
   const BLOCK_SIZE subsize = subsize_lookup[partition][bsize];
@@ -255,28 +249,25 @@ static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
   return subsize;
 }
 
-extern const TX_TYPE mode2txfm_map[MB_MODE_COUNT];
+extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES];
+
+static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
+                                  const MACROBLOCKD *xd) {
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+  if (plane_type != PLANE_TYPE_Y || is_inter_block(mbmi))
+    return DCT_DCT;
+  return intra_mode_to_tx_type_lookup[mbmi->mode];
+}
 
 static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
                                       const MACROBLOCKD *xd, int ib) {
-  const MODE_INFO *const mi = xd->mi_8x8[0];
+  const MODE_INFO *const mi = xd->mi[0];
 
   if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi))
     return DCT_DCT;
 
-  return mode2txfm_map[get_y_mode(mi, ib)];
-}
-
-static INLINE TX_TYPE get_tx_type_8x8(PLANE_TYPE plane_type,
-                                      const MACROBLOCKD *xd) {
-  return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode]
-                                    : DCT_DCT;
-}
-
-static INLINE TX_TYPE get_tx_type_16x16(PLANE_TYPE plane_type,
-                                        const MACROBLOCKD *xd) {
-  return plane_type == PLANE_TYPE_Y ? mode2txfm_map[xd->mi_8x8[0]->mbmi.mode]
-                                    : DCT_DCT;
+  return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)];
 }
 
 void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
diff --git a/source/libvpx/vp9/common/vp9_debugmodes.c b/source/libvpx/vp9/common/vp9_debugmodes.c
index 24c785f..8f150a4 100644
--- a/source/libvpx/vp9/common/vp9_debugmodes.c
+++ b/source/libvpx/vp9/common/vp9_debugmodes.c
@@ -22,7 +22,7 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) {
  * and uses the passed in member offset to print out the value of an integer
  * for each mbmi member value in the mi structure.
  */
-static void print_mi_data(VP9_COMMON *cm, FILE *file, char *descriptor,
+static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
                           size_t member_offset) {
   int mi_row;
   int mi_col;
@@ -47,7 +47,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, char *descriptor,
   }
   fprintf(file, "\n");
 }
-void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, char *file) {
+void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
   int mi_row;
   int mi_col;
   int mi_index = 0;
diff --git a/source/libvpx/vp9/common/vp9_entropy.h b/source/libvpx/vp9/common/vp9_entropy.h
index bd5086a..6788eb6 100644
--- a/source/libvpx/vp9/common/vp9_entropy.h
+++ b/source/libvpx/vp9/common/vp9_entropy.h
@@ -16,7 +16,6 @@
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_scan.h"
-#include "vp9/common/vp9_entropymode.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -176,13 +175,13 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
 
 static const INLINE scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
                                          PLANE_TYPE type, int block_idx) {
-  const MODE_INFO *const mi = xd->mi_8x8[0];
+  const MODE_INFO *const mi = xd->mi[0];
 
   if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
     return &vp9_default_scan_orders[tx_size];
   } else {
     const MB_PREDICTION_MODE mode = get_y_mode(mi, block_idx);
-    return &vp9_scan_orders[tx_size][mode2txfm_map[mode]];
+    return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
   }
 }
 
diff --git a/source/libvpx/vp9/common/vp9_entropymode.c b/source/libvpx/vp9/common/vp9_entropymode.c
index 8921539..5b00b00 100644
--- a/source/libvpx/vp9/common/vp9_entropymode.c
+++ b/source/libvpx/vp9/common/vp9_entropymode.c
@@ -10,7 +10,6 @@
 
 #include "vpx_mem/vpx_mem.h"
 
-#include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_seg_common.h"
 
@@ -315,18 +314,18 @@ static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
   { 149, 144, },
 };
 
-void vp9_init_mbmode_probs(VP9_COMMON *cm) {
-  vp9_copy(cm->fc.uv_mode_prob, default_if_uv_probs);
-  vp9_copy(cm->fc.y_mode_prob, default_if_y_probs);
-  vp9_copy(cm->fc.switchable_interp_prob, default_switchable_interp_prob);
-  vp9_copy(cm->fc.partition_prob, default_partition_probs);
-  vp9_copy(cm->fc.intra_inter_prob, default_intra_inter_p);
-  vp9_copy(cm->fc.comp_inter_prob, default_comp_inter_p);
-  vp9_copy(cm->fc.comp_ref_prob, default_comp_ref_p);
-  vp9_copy(cm->fc.single_ref_prob, default_single_ref_p);
-  cm->fc.tx_probs = default_tx_probs;
-  vp9_copy(cm->fc.skip_probs, default_skip_probs);
-  vp9_copy(cm->fc.inter_mode_probs, default_inter_mode_probs);
+void vp9_init_mode_probs(FRAME_CONTEXT *fc) {
+  vp9_copy(fc->uv_mode_prob, default_if_uv_probs);
+  vp9_copy(fc->y_mode_prob, default_if_y_probs);
+  vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
+  vp9_copy(fc->partition_prob, default_partition_probs);
+  vp9_copy(fc->intra_inter_prob, default_intra_inter_p);
+  vp9_copy(fc->comp_inter_prob, default_comp_inter_p);
+  vp9_copy(fc->comp_ref_prob, default_comp_ref_p);
+  vp9_copy(fc->single_ref_prob, default_single_ref_p);
+  fc->tx_probs = default_tx_probs;
+  vp9_copy(fc->skip_probs, default_skip_probs);
+  vp9_copy(fc->inter_mode_probs, default_inter_mode_probs);
 }
 
 const vp9_tree_index vp9_switchable_interp_tree
@@ -452,7 +451,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
   lf->last_sharpness_level = -1;
 
   vp9_default_coef_probs(cm);
-  vp9_init_mbmode_probs(cm);
+  vp9_init_mode_probs(&cm->fc);
   vp9_init_mv_probs(cm);
 
   if (cm->frame_type == KEY_FRAME ||
@@ -466,11 +465,10 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
   }
 
   if (frame_is_intra_only(cm))
-    vpx_memset(cm->prev_mip, 0,
-               cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
+    vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) *
+                                    sizeof(*cm->prev_mip));
 
-  vpx_memset(cm->mip, 0,
-             cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO));
+  vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 
   vp9_zero(cm->ref_frame_sign_bias);
 
diff --git a/source/libvpx/vp9/common/vp9_entropymode.h b/source/libvpx/vp9/common/vp9_entropymode.h
index deec3f6..c7b1911 100644
--- a/source/libvpx/vp9/common/vp9_entropymode.h
+++ b/source/libvpx/vp9/common/vp9_entropymode.h
@@ -12,6 +12,8 @@
 #define VP9_COMMON_VP9_ENTROPYMODE_H_
 
 #include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymv.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -35,6 +37,42 @@ struct tx_counts {
   unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
 };
 
+typedef struct frame_contexts {
+  vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+  vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+  vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
+  vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
+  vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+                                 [SWITCHABLE_FILTERS - 1];
+  vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
+  vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
+  vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
+  vp9_prob single_ref_prob[REF_CONTEXTS][2];
+  vp9_prob comp_ref_prob[REF_CONTEXTS];
+  struct tx_probs tx_probs;
+  vp9_prob skip_probs[SKIP_CONTEXTS];
+  nmv_context nmvc;
+} FRAME_CONTEXT;
+
+typedef struct {
+  unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
+  unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
+  unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
+  vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
+  unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES]
+                         [COEF_BANDS][COEFF_CONTEXTS];
+  unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
+                                [SWITCHABLE_FILTERS];
+  unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
+  unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
+  unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
+  unsigned int single_ref[REF_CONTEXTS][2][2];
+  unsigned int comp_ref[REF_CONTEXTS][2];
+  struct tx_counts tx;
+  unsigned int skip[SKIP_CONTEXTS][2];
+  nmv_context_counts mv;
+} FRAME_COUNTS;
+
 extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
 extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
                                         [INTRA_MODES - 1];
@@ -48,7 +86,7 @@ extern const vp9_tree_index vp9_switchable_interp_tree
 
 void vp9_setup_past_independence(struct VP9Common *cm);
 
-void vp9_init_mbmode_probs(struct VP9Common *cm);
+void vp9_init_mode_probs(FRAME_CONTEXT *fc);
 
 void vp9_adapt_mode_probs(struct VP9Common *cm);
 
@@ -59,6 +97,15 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
 void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
                                     unsigned int (*ct_8x8p)[2]);
 
+static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
+                                               const MODE_INFO *above_mi,
+                                               const MODE_INFO *left_mi,
+                                               int block) {
+  const MB_PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
+  const MB_PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
+  return vp9_kf_y_mode_prob[above][left];
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/common/vp9_entropymv.c b/source/libvpx/vp9/common/vp9_entropymv.c
index e1f5ef7..5bb0482 100644
--- a/source/libvpx/vp9/common/vp9_entropymv.c
+++ b/source/libvpx/vp9/common/vp9_entropymv.c
@@ -8,14 +8,13 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_entropymv.h"
 
 #define MV_COUNT_SAT 20
 #define MV_MAX_UPDATE_FACTOR 128
 
-/* Integer pel reference mv threshold for use of high-precision 1/8 mv */
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
 #define COMPANDED_MVREF_THRESH 8
 
 const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
@@ -49,32 +48,30 @@ const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = {
 
 static const nmv_context default_nmv_context = {
   {32, 64, 96},
-  { // NOLINT
-    { /* vert component */ // NOLINT
-      128,                                                  /* sign */
-      {224, 144, 192, 168, 192, 176, 192, 198, 198, 245},   /* class */
-      {216},                                                /* class0 */
-      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   /* bits */
-      {{128, 128, 64}, {96, 112, 64}},                      /* class0_fp */
-      {64, 96, 64},                                         /* fp */
-      160,                                                  /* class0_hp bit */
-      128,                                                  /* hp */
+  {
+    { // Vertical component
+      128,                                                  // sign
+      {224, 144, 192, 168, 192, 176, 192, 198, 198, 245},   // class
+      {216},                                                // class0
+      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   // bits
+      {{128, 128, 64}, {96, 112, 64}},                      // class0_fp
+      {64, 96, 64},                                         // fp
+      160,                                                  // class0_hp bit
+      128,                                                  // hp
     },
-    { /* hor component */ // NOLINT
-      128,                                                  /* sign */
-      {216, 128, 176, 160, 176, 176, 192, 198, 198, 208},   /* class */
-      {208},                                                /* class0 */
-      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   /* bits */
-      {{128, 128, 64}, {96, 112, 64}},                      /* class0_fp */
-      {64, 96, 64},                                         /* fp */
-      160,                                                  /* class0_hp bit */
-      128,                                                  /* hp */
+    { // Horizontal component
+      128,                                                  // sign
+      {216, 128, 176, 160, 176, 176, 192, 198, 198, 208},   // class
+      {208},                                                // class0
+      {136, 140, 148, 160, 176, 192, 224, 234, 234, 240},   // bits
+      {{128, 128, 64}, {96, 112, 64}},                      // class0_fp
+      {64, 96, 64},                                         // fp
+      160,                                                  // class0_hp bit
+      128,                                                  // hp
     }
   },
 };
 
-#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0)
-
 static const uint8_t log_in_base_2[] = {
   0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
   4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -121,13 +118,13 @@ static const uint8_t log_in_base_2[] = {
   9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
 };
 
-MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
-  MV_CLASS_TYPE c = MV_CLASS_0;
-  if (z >= CLASS0_SIZE * 4096)
-    c = MV_CLASS_10;
-  else
-    c = log_in_base_2[z >> 3];
+static INLINE int mv_class_base(MV_CLASS_TYPE c) {
+  return c ? CLASS0_SIZE << (c + 2) : 0;
+}
 
+MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) {
+  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ?
+      MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3];
   if (offset)
     *offset = z - mv_class_base(c);
   return c;
diff --git a/source/libvpx/vp9/common/vp9_entropymv.h b/source/libvpx/vp9/common/vp9_entropymv.h
index 7e1f147..e7033e4 100644
--- a/source/libvpx/vp9/common/vp9_entropymv.h
+++ b/source/libvpx/vp9/common/vp9_entropymv.h
@@ -26,7 +26,7 @@ void vp9_init_mv_probs(struct VP9Common *cm);
 void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
 int vp9_use_mv_hp(const MV *ref);
 
-#define NMV_UPDATE_PROB  252
+#define MV_UPDATE_PROB 252
 
 /* Symbols for coding which components are zero jointly */
 #define MV_JOINTS     4
diff --git a/source/libvpx/vp9/common/vp9_enums.h b/source/libvpx/vp9/common/vp9_enums.h
index e96e769..068284f 100644
--- a/source/libvpx/vp9/common/vp9_enums.h
+++ b/source/libvpx/vp9/common/vp9_enums.h
@@ -25,6 +25,23 @@ extern "C" {
 
 #define MI_MASK (MI_BLOCK_SIZE - 1)
 
+// Bitstream profiles indicated by 2 bits in the uncompressed header.
+// 00: Profile 0. 4:2:0 only.
+// 10: Profile 1. adds 4:4:4, 4:2:2, alpha.
+// 01: Profile 2. Supports 10-bit and 12-bit color only.
+// 11: Undefined profile.
+typedef enum BITSTREAM_PROFILE {
+  PROFILE_0,
+  PROFILE_1,
+  PROFILE_2,
+  MAX_PROFILES
+} BITSTREAM_PROFILE;
+
+typedef enum BIT_DEPTH {
+  BITS_8,
+  BITS_10,
+  BITS_12
+} BIT_DEPTH;
 
 typedef enum BLOCK_SIZE {
   BLOCK_4X4,
@@ -94,6 +111,12 @@ typedef enum {
   SRGB       = 7   // RGB
 } COLOR_SPACE;
 
+typedef enum {
+  VP9_LAST_FLAG = 1 << 0,
+  VP9_GOLD_FLAG = 1 << 1,
+  VP9_ALT_FLAG = 1 << 2,
+} VP9_REFFRAME;
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/common/vp9_frame_buffers.c b/source/libvpx/vp9/common/vp9_frame_buffers.c
index dffeb8a..a0b1e03 100644
--- a/source/libvpx/vp9/common/vp9_frame_buffers.c
+++ b/source/libvpx/vp9/common/vp9_frame_buffers.c
@@ -19,8 +19,9 @@ int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
 
   list->num_internal_frame_buffers =
       VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
-  list->int_fb = vpx_calloc(list->num_internal_frame_buffers,
-                            sizeof(*list->int_fb));
+  list->int_fb =
+      (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers,
+                                        sizeof(*list->int_fb));
   return (list->int_fb == NULL);
 }
 
diff --git a/source/libvpx/vp9/common/vp9_loopfilter.c b/source/libvpx/vp9/common/vp9_loopfilter.c
index 868a66a..3ac5a05 100644
--- a/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -228,6 +228,12 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
   }
 }
 
+static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
+                                const MB_MODE_INFO *mbmi) {
+  return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
+                   [mode_lf_lut[mbmi->mode]];
+}
+
 void vp9_loop_filter_init(VP9_COMMON *cm) {
   loop_filter_info_n *lfi = &cm->lf_info;
   struct loopfilter *lf = &cm->lf;
@@ -493,27 +499,25 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
                         const MODE_INFO *mi, const int shift_y,
                         const int shift_uv,
                         LOOP_FILTER_MASK *lfm) {
-  const BLOCK_SIZE block_size = mi->mbmi.sb_type;
-  const TX_SIZE tx_size_y = mi->mbmi.tx_size;
-  const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi);
-  const int skip = mi->mbmi.skip;
-  const int seg = mi->mbmi.segment_id;
-  const int ref = mi->mbmi.ref_frame[0];
-  const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
-  uint64_t *left_y = &lfm->left_y[tx_size_y];
-  uint64_t *above_y = &lfm->above_y[tx_size_y];
-  uint64_t *int_4x4_y = &lfm->int_4x4_y;
-  uint16_t *left_uv = &lfm->left_uv[tx_size_uv];
-  uint16_t *above_uv = &lfm->above_uv[tx_size_uv];
-  uint16_t *int_4x4_uv = &lfm->int_4x4_uv;
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  const BLOCK_SIZE block_size = mbmi->sb_type;
+  const TX_SIZE tx_size_y = mbmi->tx_size;
+  const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
+  const int filter_level = get_filter_level(lfi_n, mbmi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
   int i;
-  int w = num_8x8_blocks_wide_lookup[block_size];
-  int h = num_8x8_blocks_high_lookup[block_size];
 
   // If filter level is 0 we don't loop filter.
   if (!filter_level) {
     return;
   } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
     int index = shift_y;
     for (i = 0; i < h; i++) {
       vpx_memset(&lfm->lfl_y[index], filter_level, w);
@@ -540,7 +544,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
 
   // If the block has no coefficients and is not intra we skip applying
   // the loop filter on block edges.
-  if (skip && ref > INTRA_FRAME)
+  if (mbmi->skip && is_inter_block(mbmi))
     return;
 
   // Here we are adding a mask for the transform size.  The transform
@@ -561,12 +565,11 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
   // boundaries.  These differ from the 4x4 boundaries on the outside edge of
   // an 8x8 in that the internal ones can be skipped and don't depend on
   // the prediction block size.
-  if (tx_size_y == TX_4X4) {
+  if (tx_size_y == TX_4X4)
     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
-  }
-  if (tx_size_uv == TX_4X4) {
+
+  if (tx_size_uv == TX_4X4)
     *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
-  }
 }
 
 // This function does the same thing as the one above with the exception that
@@ -575,22 +578,20 @@ static void build_masks(const loop_filter_info_n *const lfi_n,
 static void build_y_mask(const loop_filter_info_n *const lfi_n,
                          const MODE_INFO *mi, const int shift_y,
                          LOOP_FILTER_MASK *lfm) {
-  const BLOCK_SIZE block_size = mi->mbmi.sb_type;
-  const TX_SIZE tx_size_y = mi->mbmi.tx_size;
-  const int skip = mi->mbmi.skip;
-  const int seg = mi->mbmi.segment_id;
-  const int ref = mi->mbmi.ref_frame[0];
-  const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
-  uint64_t *left_y = &lfm->left_y[tx_size_y];
-  uint64_t *above_y = &lfm->above_y[tx_size_y];
-  uint64_t *int_4x4_y = &lfm->int_4x4_y;
+  const MB_MODE_INFO *mbmi = &mi->mbmi;
+  const BLOCK_SIZE block_size = mbmi->sb_type;
+  const TX_SIZE tx_size_y = mbmi->tx_size;
+  const int filter_level = get_filter_level(lfi_n, mbmi);
+  uint64_t *const left_y = &lfm->left_y[tx_size_y];
+  uint64_t *const above_y = &lfm->above_y[tx_size_y];
+  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
   int i;
-  int w = num_8x8_blocks_wide_lookup[block_size];
-  int h = num_8x8_blocks_high_lookup[block_size];
 
   if (!filter_level) {
     return;
   } else {
+    const int w = num_8x8_blocks_wide_lookup[block_size];
+    const int h = num_8x8_blocks_high_lookup[block_size];
     int index = shift_y;
     for (i = 0; i < h; i++) {
       vpx_memset(&lfm->lfl_y[index], filter_level, w);
@@ -601,7 +602,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
   *above_y |= above_prediction_mask[block_size] << shift_y;
   *left_y |= left_prediction_mask[block_size] << shift_y;
 
-  if (skip && ref > INTRA_FRAME)
+  if (mbmi->skip && is_inter_block(mbmi))
     return;
 
   *above_y |= (size_mask[block_size] &
@@ -610,9 +611,8 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
   *left_y |= (size_mask[block_size] &
               left_64x64_txform_mask[tx_size_y]) << shift_y;
 
-  if (tx_size_y == TX_4X4) {
+  if (tx_size_y == TX_4X4)
     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
-  }
 }
 
 // This function sets up the bit masks for the entire 64x64 region represented
@@ -868,13 +868,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
   assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
 }
 
-static uint8_t build_lfi(const loop_filter_info_n *lfi_n,
-                     const MB_MODE_INFO *mbmi) {
-  const int seg = mbmi->segment_id;
-  const int ref = mbmi->ref_frame[0];
-  return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]];
-}
-
 static void filter_selectively_vert(uint8_t *s, int pitch,
                                     unsigned int mask_16x16,
                                     unsigned int mask_8x8,
@@ -916,7 +909,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
   const int ss_y = plane->subsampling_y;
   const int row_step = 1 << ss_x;
   const int col_step = 1 << ss_y;
-  const int row_step_stride = cm->mode_info_stride * row_step;
+  const int row_step_stride = cm->mi_stride * row_step;
   struct buf_2d *const dst = &plane->dst;
   uint8_t* const dst0 = dst->buf;
   unsigned int mask_16x16[MI_BLOCK_SIZE] = {0};
@@ -953,7 +946,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm,
 
       // Filter level can vary per MI
       if (!(lfl[(r << 3) + (c >> ss_x)] =
-          build_lfi(&cm->lf_info, &mi[0].mbmi)))
+            get_filter_level(&cm->lf_info, &mi[0].mbmi)))
         continue;
 
       // Build masks based on the transform size of each block
@@ -1208,17 +1201,17 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
       xd->plane[1].subsampling_x == 1);
 
   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
+    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
       int plane;
 
-      setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
 
       // TODO(JBB): Make setup_mask work for non 420.
       if (use_420)
-        vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
-                       cm->mode_info_stride, &lfm);
+        vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride,
+                       &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
         if (use_420)
diff --git a/source/libvpx/vp9/common/vp9_mvref_common.c b/source/libvpx/vp9/common/vp9_mvref_common.c
index e5f3fed..61682c4 100644
--- a/source/libvpx/vp9/common/vp9_mvref_common.c
+++ b/source/libvpx/vp9/common/vp9_mvref_common.c
@@ -148,28 +148,30 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
 // This macro is used to add a motion vector mv_ref list if it isn't
 // already in the list.  If it's the second motion vector it will also
 // skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(MV) \
+#define ADD_MV_REF_LIST(mv) \
   do { \
     if (refmv_count) { \
-      if ((MV).as_int != mv_ref_list[0].as_int) { \
-        mv_ref_list[refmv_count] = (MV); \
+      if ((mv).as_int != mv_ref_list[0].as_int) { \
+        mv_ref_list[refmv_count] = (mv); \
         goto Done; \
       } \
     } else { \
-      mv_ref_list[refmv_count++] = (MV); \
+      mv_ref_list[refmv_count++] = (mv); \
     } \
   } while (0)
 
 // If either reference frame is different, not INTRA, and they
 // are different from each other scale and add the mv to our list.
-#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
   do { \
-    if ((CANDIDATE)->ref_frame[0] != ref_frame) \
-      ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \
-    if ((CANDIDATE)->ref_frame[1] != ref_frame && \
-        has_second_ref(CANDIDATE) && \
-        (CANDIDATE)->mv[1].as_int != (CANDIDATE)->mv[0].as_int) \
-      ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \
+    if (is_inter_block(mbmi)) { \
+      if ((mbmi)->ref_frame[0] != ref_frame) \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
+      if (has_second_ref(mbmi) && \
+          (mbmi)->ref_frame[1] != ref_frame && \
+          (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+        ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
+    } \
   } while (0)
 
 
@@ -188,15 +190,19 @@ static INLINE int is_inside(const TileInfo *const tile,
 // to try and find candidate reference vectors.
 static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              const TileInfo *const tile,
-                             MODE_INFO *mi, const MODE_INFO *prev_mi,
-                             MV_REFERENCE_FRAME ref_frame,
+                             MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                              int_mv *mv_ref_list,
-                             int block_idx, int mi_row, int mi_col) {
+                             int block, int mi_row, int mi_col) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
+  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
+        ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
+        : NULL;
+  const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
+
+
   const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
-  const MB_MODE_INFO *const prev_mbmi = cm->coding_use_prev_mi && prev_mi ?
-      &prev_mi->mbmi : NULL;
+
   int different_ref_found = 0;
   int context_counter = 0;
 
@@ -209,24 +215,17 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
   for (i = 0; i < 2; ++i) {
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
-      const MODE_INFO *const candidate_mi = xd->mi_8x8[mv_ref->col + mv_ref->row
-                                                   * xd->mode_info_stride];
+      const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
+                                                   xd->mi_stride];
       const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
       // Keep counts for entropy encoding.
       context_counter += mode_2_counter[candidate->mode];
+      different_ref_found = 1;
 
-      // Check if the candidate comes from the same reference frame.
-      if (candidate->ref_frame[0] == ref_frame) {
-        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0,
-                                         mv_ref->col, block_idx));
-        different_ref_found = candidate->ref_frame[1] != ref_frame;
-      } else {
-        if (candidate->ref_frame[1] == ref_frame)
-          // Add second motion vector if it has the same ref_frame.
-          ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1,
-                                           mv_ref->col, block_idx));
-        different_ref_found = 1;
-      }
+      if (candidate->ref_frame[0] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block));
+      else if (candidate->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block));
     }
   }
 
@@ -236,18 +235,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
   for (; i < MVREF_NEIGHBOURS; ++i) {
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
-      const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col +
-                                            mv_ref->row
-                                            * xd->mode_info_stride]->mbmi;
+      const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
+                                                    xd->mi_stride]->mbmi;
+      different_ref_found = 1;
 
-      if (candidate->ref_frame[0] == ref_frame) {
+      if (candidate->ref_frame[0] == ref_frame)
         ADD_MV_REF_LIST(candidate->mv[0]);
-        different_ref_found = candidate->ref_frame[1] != ref_frame;
-      } else {
-        if (candidate->ref_frame[1] == ref_frame)
-          ADD_MV_REF_LIST(candidate->mv[1]);
-        different_ref_found = 1;
-      }
+      else if (candidate->ref_frame[1] == ref_frame)
+        ADD_MV_REF_LIST(candidate->mv[1]);
     }
   }
 
@@ -266,19 +261,17 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
     for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
       const POSITION *mv_ref = &mv_ref_search[i];
       if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
-        const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col +
-                                                          mv_ref->row
-                                              * xd->mode_info_stride]->mbmi;
+        const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
+                                              * xd->mi_stride]->mbmi;
 
         // If the candidate is INTRA we don't want to consider its mv.
-        if (is_inter_block(candidate))
-          IF_DIFF_REF_FRAME_ADD_MV(candidate);
+        IF_DIFF_REF_FRAME_ADD_MV(candidate);
       }
     }
   }
 
   // Since we still don't have a candidate we'll try the last frame.
-  if (prev_mbmi && is_inter_block(prev_mbmi))
+  if (prev_mbmi)
     IF_DIFF_REF_FRAME_ADD_MV(prev_mbmi);
 
  Done:
@@ -292,11 +285,10 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
 
 void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                                     const TileInfo *const tile,
-                                    MODE_INFO *mi, const MODE_INFO *prev_mi,
-                                    MV_REFERENCE_FRAME ref_frame,
+                                    MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
                                     int_mv *mv_ref_list,
                                     int mi_row, int mi_col) {
-  find_mv_refs_idx(cm, xd, tile, mi, prev_mi, ref_frame, mv_ref_list, -1,
+  find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
                    mi_row, mi_col);
 }
 
@@ -328,14 +320,14 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
                                    int block, int ref, int mi_row, int mi_col,
                                    int_mv *nearest, int_mv *near) {
   int_mv mv_list[MAX_MV_REF_CANDIDATES];
-  MODE_INFO *const mi = xd->mi_8x8[0];
+  MODE_INFO *const mi = xd->mi[0];
   b_mode_info *bmi = mi->bmi;
   int n;
 
   assert(MAX_MV_REF_CANDIDATES == 2);
 
-  find_mv_refs_idx(cm, xd, tile, mi, xd->last_mi, mi->mbmi.ref_frame[ref],
-                   mv_list, block, mi_row, mi_col);
+  find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
+                   mi_row, mi_col);
 
   near->as_int = 0;
   switch (block) {
diff --git a/source/libvpx/vp9/common/vp9_mvref_common.h b/source/libvpx/vp9/common/vp9_mvref_common.h
index 04cb000..903ac02 100644
--- a/source/libvpx/vp9/common/vp9_mvref_common.h
+++ b/source/libvpx/vp9/common/vp9_mvref_common.h
@@ -31,10 +31,8 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
 
 void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                       const TileInfo *const tile,
-                      MODE_INFO *mi, const MODE_INFO *prev_mi,
-                      MV_REFERENCE_FRAME ref_frame,
-                      int_mv *mv_ref_list,
-                      int mi_row, int mi_col);
+                      MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                      int_mv *mv_ref_list, int mi_row, int mi_col);
 
 // check a list of motion vectors by sad score using a number rows of pixels
 // above and a number cols of pixels in the left to select the one with best
diff --git a/source/libvpx/vp9/common/vp9_onyx.h b/source/libvpx/vp9/common/vp9_onyx.h
deleted file mode 100644
index 2220868..0000000
--- a/source/libvpx/vp9/common/vp9_onyx.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_ONYX_H_
-#define VP9_COMMON_VP9_ONYX_H_
-
-#include "./vpx_config.h"
-#include "vpx/internal/vpx_codec_internal.h"
-#include "vpx/vp8cx.h"
-#include "vpx_scale/yv12config.h"
-#include "vp9/common/vp9_ppflags.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_SEGMENTS 8
-
-  typedef int *VP9_PTR;
-
-  /* Create/destroy static data structures. */
-
-  typedef enum {
-    NORMAL      = 0,
-    FOURFIVE    = 1,
-    THREEFIVE   = 2,
-    ONETWO      = 3
-  } VPX_SCALING;
-
-  typedef enum {
-    VP9_LAST_FLAG = 1,
-    VP9_GOLD_FLAG = 2,
-    VP9_ALT_FLAG = 4
-  } VP9_REFFRAME;
-
-
-  typedef enum {
-    USAGE_LOCAL_FILE_PLAYBACK   = 0x0,
-    USAGE_STREAM_FROM_SERVER    = 0x1,
-    USAGE_CONSTRAINED_QUALITY   = 0x2,
-    USAGE_CONSTANT_QUALITY      = 0x3,
-  } END_USAGE;
-
-
-  typedef enum {
-    MODE_GOODQUALITY    = 0x1,
-    MODE_BESTQUALITY    = 0x2,
-    MODE_FIRSTPASS      = 0x3,
-    MODE_SECONDPASS     = 0x4,
-    MODE_SECONDPASS_BEST = 0x5,
-    MODE_REALTIME       = 0x6,
-  } MODE;
-
-  typedef enum {
-    FRAMEFLAGS_KEY    = 1,
-    FRAMEFLAGS_GOLDEN = 2,
-    FRAMEFLAGS_ALTREF = 4,
-  } FRAMETYPE_FLAGS;
-
-  typedef enum {
-    NO_AQ = 0,
-    VARIANCE_AQ = 1,
-    COMPLEXITY_AQ = 2,
-    AQ_MODES_COUNT  // This should always be the last member of the enum
-  } AQ_MODES;
-
-  typedef struct {
-    int version;  // 4 versions of bitstream defined:
-                  //   0 - best quality/slowest decode,
-                  //   3 - lowest quality/fastest decode
-    int width;  // width of data passed to the compressor
-    int height;  // height of data passed to the compressor
-    double framerate;  // set to passed in framerate
-    int64_t target_bandwidth;  // bandwidth to be used in kilobits per second
-
-    int noise_sensitivity;  // pre processing blur: recommendation 0
-    int sharpness;  // sharpening output: recommendation 0:
-    int cpu_used;
-    unsigned int rc_max_intra_bitrate_pct;
-
-    // mode ->
-    // (0)=Realtime/Live Encoding. This mode is optimized for realtime
-    //     encoding (for example, capturing a television signal or feed from
-    //     a live camera). ( speed setting controls how fast )
-    // (1)=Good Quality Fast Encoding. The encoder balances quality with the
-    //     amount of time it takes to encode the output. ( speed setting
-    //     controls how fast )
-    // (2)=One Pass - Best Quality. The encoder places priority on the
-    //     quality of the output over encoding speed. The output is compressed
-    //     at the highest possible quality. This option takes the longest
-    //     amount of time to encode. ( speed setting ignored )
-    // (3)=Two Pass - First Pass. The encoder generates a file of statistics
-    //     for use in the second encoding pass. ( speed setting controls how
-    //     fast )
-    // (4)=Two Pass - Second Pass. The encoder uses the statistics that were
-    //     generated in the first encoding pass to create the compressed
-    //     output. ( speed setting controls how fast )
-    // (5)=Two Pass - Second Pass Best.  The encoder uses the statistics that
-    //     were generated in the first encoding pass to create the compressed
-    //     output using the highest possible quality, and taking a
-    //    longer amount of time to encode.. ( speed setting ignored )
-    int mode;
-
-    // Key Framing Operations
-    int auto_key;  // autodetect cut scenes and set the keyframes
-    int key_freq;  // maximum distance to key frame.
-
-    int lag_in_frames;  // how many frames lag before we start encoding
-
-    // ----------------------------------------------------------------
-    // DATARATE CONTROL OPTIONS
-
-    int end_usage;  // vbr or cbr
-
-    // buffer targeting aggressiveness
-    int under_shoot_pct;
-    int over_shoot_pct;
-
-    // buffering parameters
-    int64_t starting_buffer_level;  // in seconds
-    int64_t optimal_buffer_level;
-    int64_t maximum_buffer_size;
-
-    // Frame drop threshold.
-    int drop_frames_water_mark;
-
-    // controlling quality
-    int fixed_q;
-    int worst_allowed_q;
-    int best_allowed_q;
-    int cq_level;
-    int lossless;
-    int aq_mode;  // Adaptive Quantization mode
-
-    // two pass datarate control
-    int two_pass_vbrbias;        // two pass datarate control tweaks
-    int two_pass_vbrmin_section;
-    int two_pass_vbrmax_section;
-    // END DATARATE CONTROL OPTIONS
-    // ----------------------------------------------------------------
-
-    // Spatial and temporal scalability.
-    int ss_number_layers;  // Number of spatial layers.
-    int ts_number_layers;  // Number of temporal layers.
-    // Bitrate allocation for spatial layers.
-    int ss_target_bitrate[VPX_SS_MAX_LAYERS];
-    // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
-    int ts_target_bitrate[VPX_TS_MAX_LAYERS];
-    int ts_rate_decimator[VPX_TS_MAX_LAYERS];
-
-    // these parameters aren't to be used in final build don't use!!!
-    int play_alternate;
-    int alt_freq;
-
-    int encode_breakout;  // early breakout : for video conf recommend 800
-
-    /* Bitfield defining the error resiliency features to enable.
-     * Can provide decodable frames after losses in previous
-     * frames and decodable partitions after losses in the same frame.
-     */
-    unsigned int error_resilient_mode;
-
-    /* Bitfield defining the parallel decoding mode where the
-     * decoding in successive frames may be conducted in parallel
-     * just by decoding the frame headers.
-     */
-    unsigned int frame_parallel_decoding_mode;
-
-    int arnr_max_frames;
-    int arnr_strength;
-    int arnr_type;
-
-    int tile_columns;
-    int tile_rows;
-
-    struct vpx_fixed_buf         two_pass_stats_in;
-    struct vpx_codec_pkt_list  *output_pkt_list;
-
-    vp8e_tuning tuning;
-  } VP9_CONFIG;
-
-
-  void vp9_initialize_enc();
-
-  VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf);
-  void vp9_remove_compressor(VP9_PTR *comp);
-
-  void vp9_change_config(VP9_PTR onyx, VP9_CONFIG *oxcf);
-
-  // receive a frames worth of data. caller can assume that a copy of this
-  // frame is made and not just a copy of the pointer..
-  int vp9_receive_raw_frame(VP9_PTR comp, unsigned int frame_flags,
-                            YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
-                            int64_t end_time_stamp);
-
-  int vp9_get_compressed_data(VP9_PTR comp, unsigned int *frame_flags,
-                              size_t *size, uint8_t *dest,
-                              int64_t *time_stamp, int64_t *time_end,
-                              int flush);
-
-  int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
-                                vp9_ppflags_t *flags);
-
-  int vp9_use_as_reference(VP9_PTR comp, int ref_frame_flags);
-
-  int vp9_update_reference(VP9_PTR comp, int ref_frame_flags);
-
-  int vp9_copy_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
-                             YV12_BUFFER_CONFIG *sd);
-
-  int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb);
-
-  int vp9_set_reference_enc(VP9_PTR comp, VP9_REFFRAME ref_frame_flag,
-                            YV12_BUFFER_CONFIG *sd);
-
-  int vp9_update_entropy(VP9_PTR comp, int update);
-
-  int vp9_set_roimap(VP9_PTR comp, unsigned char *map,
-                     unsigned int rows, unsigned int cols,
-                     int delta_q[MAX_SEGMENTS],
-                     int delta_lf[MAX_SEGMENTS],
-                     unsigned int threshold[MAX_SEGMENTS]);
-
-  int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
-                         unsigned int rows, unsigned int cols);
-
-  int vp9_set_internal_size(VP9_PTR comp,
-                            VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
-
-  int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
-                           unsigned int height);
-
-  void vp9_set_svc(VP9_PTR comp, int use_svc);
-
-  int vp9_get_quantizer(VP9_PTR c);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_COMMON_VP9_ONYX_H_
diff --git a/source/libvpx/vp9/common/vp9_onyxc_int.h b/source/libvpx/vp9/common/vp9_onyxc_int.h
index e6d6ea7..77f563f 100644
--- a/source/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/source/libvpx/vp9/common/vp9_onyxc_int.h
@@ -48,42 +48,6 @@ extern const struct {
   PARTITION_CONTEXT left;
 } partition_context_lookup[BLOCK_SIZES];
 
-typedef struct frame_contexts {
-  vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
-  vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
-  vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-  vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
-  vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
-                                 [SWITCHABLE_FILTERS - 1];
-  vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
-  vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
-  vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
-  vp9_prob single_ref_prob[REF_CONTEXTS][2];
-  vp9_prob comp_ref_prob[REF_CONTEXTS];
-  struct tx_probs tx_probs;
-  vp9_prob skip_probs[SKIP_CONTEXTS];
-  nmv_context nmvc;
-} FRAME_CONTEXT;
-
-typedef struct {
-  unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
-  unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
-  unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
-  vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
-  unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES]
-                         [COEF_BANDS][COEFF_CONTEXTS];
-  unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
-                                [SWITCHABLE_FILTERS];
-  unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
-  unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
-  unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
-  unsigned int single_ref[REF_CONTEXTS][2][2];
-  unsigned int comp_ref[REF_CONTEXTS][2];
-  struct tx_counts tx;
-  unsigned int skip[SKIP_CONTEXTS][2];
-  nmv_context_counts mv;
-} FRAME_COUNTS;
-
 
 typedef enum {
   SINGLE_REFERENCE      = 0,
@@ -162,7 +126,7 @@ typedef struct VP9Common {
   int MBs;
   int mb_rows, mi_rows;
   int mb_cols, mi_cols;
-  int mode_info_stride;
+  int mi_stride;
 
   /* profile settings */
   TX_MODE tx_mode;
@@ -215,7 +179,10 @@ typedef struct VP9Common {
   FRAME_COUNTS counts;
 
   unsigned int current_video_frame;
-  int version;
+  BITSTREAM_PROFILE profile;
+
+  // BITS_8 in versions 0 and 1, BITS_10 or BITS_12 in version 2
+  BIT_DEPTH bit_depth;
 
 #if CONFIG_VP9_POSTPROC
   struct postproc_state  postproc_state;
@@ -238,6 +205,9 @@ typedef struct VP9Common {
 
   // Handles memory for the codec.
   InternalFrameBufferList int_frame_buffers;
+
+  PARTITION_CONTEXT *above_seg_context;
+  ENTROPY_CONTEXT *above_context;
 } VP9_COMMON;
 
 static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
@@ -270,23 +240,33 @@ static INLINE int mi_cols_aligned_to_sb(int n_mis) {
   return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
 }
 
-static INLINE const vp9_prob* get_partition_probs(VP9_COMMON *cm, int ctx) {
+static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) {
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    xd->plane[i].dqcoeff = xd->dqcoeff[i];
+    xd->above_context[i] = cm->above_context +
+        i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
+  }
+
+  xd->above_seg_context = cm->above_seg_context;
+  xd->mi_stride = cm->mi_stride;
+}
+
+static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm,
+                                                  int ctx) {
   return cm->frame_type == KEY_FRAME ? vp9_kf_partition_probs[ctx]
                                      : cm->fc.partition_prob[ctx];
 }
 
-static INLINE void set_skip_context(
-    MACROBLOCKD *xd,
-    ENTROPY_CONTEXT *above_context[MAX_MB_PLANE],
-    ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16],
-    int mi_row, int mi_col) {
+static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
   const int above_idx = mi_col * 2;
   const int left_idx = (mi_row * 2) & 15;
   int i;
-  for (i = 0; i < MAX_MB_PLANE; i++) {
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
     struct macroblockd_plane *const pd = &xd->plane[i];
-    pd->above_context = above_context[i] + (above_idx >> pd->subsampling_x);
-    pd->left_context = left_context[i] + (left_idx >> pd->subsampling_y);
+    pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x];
+    pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y];
   }
 }
 
@@ -304,7 +284,7 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
   xd->left_available  = (mi_col > tile->mi_col_start);
 }
 
-static void set_prev_mi(VP9_COMMON *cm) {
+static INLINE void set_prev_mi(VP9_COMMON *cm) {
   const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
                                        cm->height == cm->last_height &&
                                        !cm->intra_only &&
@@ -312,19 +292,19 @@ static void set_prev_mi(VP9_COMMON *cm) {
   // Special case: set prev_mi to NULL when the previous mode info
   // context cannot be used.
   cm->prev_mi = use_prev_in_find_mv_refs ?
-                  cm->prev_mip + cm->mode_info_stride + 1 : NULL;
+                  cm->prev_mip + cm->mi_stride + 1 : NULL;
 }
 
 static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
   return cm->frame_type == KEY_FRAME || cm->intra_only;
 }
 
-static INLINE void update_partition_context(
-    PARTITION_CONTEXT *above_seg_context,
-    PARTITION_CONTEXT left_seg_context[8],
-    int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) {
-  PARTITION_CONTEXT *const above_ctx = above_seg_context + mi_col;
-  PARTITION_CONTEXT *const left_ctx = left_seg_context + (mi_row & MI_MASK);
+static INLINE void update_partition_context(MACROBLOCKD *xd,
+                                            int mi_row, int mi_col,
+                                            BLOCK_SIZE subsize,
+                                            BLOCK_SIZE bsize) {
+  PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+  PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
 
   // num_4x4_blocks_wide_lookup[bsize] / 2
   const int bs = num_8x8_blocks_wide_lookup[bsize];
@@ -336,12 +316,11 @@ static INLINE void update_partition_context(
   vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs);
 }
 
-static INLINE int partition_plane_context(
-    const PARTITION_CONTEXT *above_seg_context,
-    const PARTITION_CONTEXT left_seg_context[8],
-    int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col;
-  const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK);
+static INLINE int partition_plane_context(const MACROBLOCKD *xd,
+                                          int mi_row, int mi_col,
+                                          BLOCK_SIZE bsize) {
+  const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+  const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK);
 
   const int bsl = mi_width_log2(bsize);
   const int bs = 1 << bsl;
diff --git a/source/libvpx/vp9/common/vp9_postproc.h b/source/libvpx/vp9/common/vp9_postproc.h
index b07d5d0..ebebc1a 100644
--- a/source/libvpx/vp9/common/vp9_postproc.h
+++ b/source/libvpx/vp9/common/vp9_postproc.h
@@ -13,6 +13,7 @@
 #define VP9_COMMON_VP9_POSTPROC_H_
 
 #include "vpx_ports/mem.h"
+#include "vpx_scale/yv12config.h"
 #include "vp9/common/vp9_ppflags.h"
 
 #ifdef __cplusplus
diff --git a/source/libvpx/vp9/common/vp9_ppflags.h b/source/libvpx/vp9/common/vp9_ppflags.h
index 8168935..e8b04d2 100644
--- a/source/libvpx/vp9/common/vp9_ppflags.h
+++ b/source/libvpx/vp9/common/vp9_ppflags.h
@@ -33,10 +33,12 @@ typedef struct {
   int post_proc_flag;
   int deblocking_level;
   int noise_level;
+#if CONFIG_POSTPROC_VISUALIZER
   int display_ref_frame_flag;
   int display_mb_modes_flag;
   int display_b_modes_flag;
   int display_mv_flag;
+#endif  // CONFIG_POSTPROC_VISUALIZER
 } vp9_ppflags_t;
 
 #ifdef __cplusplus
diff --git a/source/libvpx/vp9/common/vp9_pred_common.c b/source/libvpx/vp9/common/vp9_pred_common.c
index 197bcb6..bc9d6ef 100644
--- a/source/libvpx/vp9/common/vp9_pred_common.c
+++ b/source/libvpx/vp9/common/vp9_pred_common.c
@@ -348,7 +348,7 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
 // left of the entries corresponding to real blocks.
 // The prediction flags in these dummy entries are initialized to 0.
 int vp9_get_tx_size_context(const MACROBLOCKD *xd) {
-  const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type];
+  const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
   const MB_MODE_INFO *const above_mbmi = get_mbmi(get_above_mi(xd));
   const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
   const int has_above = above_mbmi != NULL;
diff --git a/source/libvpx/vp9/common/vp9_pred_common.h b/source/libvpx/vp9/common/vp9_pred_common.h
index 6c7a0d3..1a7ba86 100644
--- a/source/libvpx/vp9/common/vp9_pred_common.h
+++ b/source/libvpx/vp9/common/vp9_pred_common.h
@@ -19,11 +19,11 @@ extern "C" {
 #endif
 
 static INLINE const MODE_INFO *get_above_mi(const MACROBLOCKD *const xd) {
-  return xd->up_available ? xd->mi_8x8[-xd->mode_info_stride] : NULL;
+  return xd->up_available ? xd->mi[-xd->mi_stride] : NULL;
 }
 
 static INLINE const MODE_INFO *get_left_mi(const MACROBLOCKD *const xd) {
-  return xd->left_available ? xd->mi_8x8[-1] : NULL;
+  return xd->left_available ? xd->mi[-1] : NULL;
 }
 
 int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids,
diff --git a/source/libvpx/vp9/common/vp9_reconinter.c b/source/libvpx/vp9/common/vp9_reconinter.c
index bdcfafa..e722d6a 100644
--- a/source/libvpx/vp9/common/vp9_reconinter.c
+++ b/source/libvpx/vp9/common/vp9_reconinter.c
@@ -144,8 +144,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                                    int x, int y, int w, int h,
                                    int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MODE_INFO *mi = xd->mi_8x8[0];
+  const MODE_INFO *mi = xd->mi[0];
   const int is_compound = has_second_ref(&mi->mbmi);
+  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
   int ref;
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -193,8 +194,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
            + (scaled_mv.col >> SUBPEL_BITS);
 
     inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
-                    subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel,
-                    xs, ys);
+                    subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
   }
 }
 
@@ -212,7 +212,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
     const int bw = 4 * num_4x4_w;
     const int bh = 4 * num_4x4_h;
 
-    if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) {
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
       int i = 0, x, y;
       assert(bsize == BLOCK_8X8);
       for (y = 0; y < num_4x4_h; ++y)
@@ -244,11 +244,13 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
 // TODO(jingning): This function serves as a placeholder for decoder prediction
 // using on demand border extension. It should be moved to /decoder/ directory.
 static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+                                       int bw, int bh,
                                        int x, int y, int w, int h,
                                        int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MODE_INFO *mi = xd->mi_8x8[0];
+  const MODE_INFO *mi = xd->mi[0];
   const int is_compound = has_second_ref(&mi->mbmi);
+  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
   int ref;
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -265,15 +267,21 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
                ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
                              : mi_mv_pred_q4(mi, ref))
                : mi->mbmi.mv[ref].as_mv;
+
+    // TODO(jkoleszar): This clamping is done in the incorrect place for the
+    // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+    // MV. Note however that it performs the subsampling aware scaling so
+    // that the result is always q4.
+    // mv_precision precision is MV_PRECISION_Q4.
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
+                                               pd->subsampling_x,
+                                               pd->subsampling_y);
+
     MV32 scaled_mv;
     int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride,
         subpel_x, subpel_y;
     uint8_t *ref_frame, *buf_ptr;
     const YV12_BUFFER_CONFIG *ref_buf = xd->block_refs[ref]->buf;
-    const MV mv_q4 = {
-      mv.row * (1 << (1 - pd->subsampling_y)),
-      mv.col * (1 << (1 - pd->subsampling_x))
-    };
 
     // Get reference frame pointer, width and height.
     if (plane == 0) {
@@ -286,24 +294,38 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
       ref_frame = plane == 1 ? ref_buf->u_buffer : ref_buf->v_buffer;
     }
 
-    // Get block position in current frame.
-    x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
-    y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+    if (vp9_is_scaled(sf)) {
+      // Co-ordinate of containing block to pixel precision.
+      int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+      int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
 
-    // Precision of x0_16 and y0_16 is 1/16th pixel.
-    x0_16 = x0 << SUBPEL_BITS;
-    y0_16 = y0 << SUBPEL_BITS;
+      // Co-ordinate of the block to 1/16th pixel precision.
+      x0_16 = (x_start + x) << SUBPEL_BITS;
+      y0_16 = (y_start + y) << SUBPEL_BITS;
 
-    if (vp9_is_scaled(sf)) {
+      // Co-ordinate of current block in reference frame
+      // to 1/16th pixel precision.
+      x0_16 = sf->scale_value_x(x0_16, sf);
+      y0_16 = sf->scale_value_y(y0_16, sf);
+
+      // Map the top left corner of the block into the reference frame.
+      x0 = sf->scale_value_x(x_start + x, sf);
+      y0 = sf->scale_value_y(y_start + y, sf);
+
+      // Scale the MV and incorporate the sub-pixel offset of the block
+      // in the reference frame.
       scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
       xs = sf->x_step_q4;
       ys = sf->y_step_q4;
-      // Map the top left corner of the block into the reference frame.
-      x0 = sf->scale_value_x(x0, sf);
-      y0 = sf->scale_value_y(y0, sf);
-      x0_16 = sf->scale_value_x(x0_16, sf);
-      y0_16 = sf->scale_value_y(y0_16, sf);
     } else {
+      // Co-ordinate of containing block to pixel precision.
+      x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+      y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+      // Co-ordinate of the block to 1/16th pixel precision.
+      x0_16 = x0 << SUBPEL_BITS;
+      y0_16 = y0 << SUBPEL_BITS;
+
       scaled_mv.row = mv_q4.row;
       scaled_mv.col = mv_q4.col;
       xs = ys = 16;
@@ -347,15 +369,16 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
           y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
         uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
         // Extend the border.
-        build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0,
-                        x0, y0, x1 - x0, y1 - y0, frame_width, frame_height);
-        buf_stride = x1 - x0;
+        build_mc_border(buf_ptr1, pre_buf->stride, xd->mc_buf, x1 - x0 + 1,
+                        x0, y0, x1 - x0 + 1, y1 - y0 + 1, frame_width,
+                        frame_height);
+        buf_stride = x1 - x0 + 1;
         buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
       }
     }
 
     inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
-                    subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys);
+                    subpel_y, sf, w, h, ref, kernel, xs, ys);
   }
 }
 
@@ -372,16 +395,51 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
     const int bw = 4 * num_4x4_w;
     const int bh = 4 * num_4x4_h;
 
-    if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) {
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
       int i = 0, x, y;
       assert(bsize == BLOCK_8X8);
       for (y = 0; y < num_4x4_h; ++y)
         for (x = 0; x < num_4x4_w; ++x)
-          dec_build_inter_predictors(xd, plane, i++,
+          dec_build_inter_predictors(xd, plane, i++, bw, bh,
                                      4 * x, 4 * y, 4, 4, mi_x, mi_y);
     } else {
-      dec_build_inter_predictors(xd, plane, 0,
+      dec_build_inter_predictors(xd, plane, 0, bw, bh,
                                  0, 0, bw, bh, mi_x, mi_y);
     }
   }
 }
+
+void vp9_setup_dst_planes(MACROBLOCKD *xd,
+                          const YV12_BUFFER_CONFIG *src,
+                          int mi_row, int mi_col) {
+  uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+                               src->alpha_buffer};
+  const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+                          src->alpha_stride};
+  int i;
+
+  for (i = 0; i < MAX_MB_PLANE; ++i) {
+    struct macroblockd_plane *const pd = &xd->plane[i];
+    setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
+                     pd->subsampling_x, pd->subsampling_y);
+  }
+}
+
+void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
+                          const YV12_BUFFER_CONFIG *src,
+                          int mi_row, int mi_col,
+                          const struct scale_factors *sf) {
+  if (src != NULL) {
+    int i;
+    uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+                                 src->alpha_buffer};
+    const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+                            src->alpha_stride};
+
+    for (i = 0; i < MAX_MB_PLANE; ++i) {
+      struct macroblockd_plane *const pd = &xd->plane[i];
+      setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col,
+                       sf, pd->subsampling_x, pd->subsampling_y);
+    }
+  }
+}
diff --git a/source/libvpx/vp9/common/vp9_reconinter.h b/source/libvpx/vp9/common/vp9_reconinter.h
index dccd609..86f3158 100644
--- a/source/libvpx/vp9/common/vp9_reconinter.h
+++ b/source/libvpx/vp9/common/vp9_reconinter.h
@@ -57,41 +57,12 @@ static INLINE void setup_pred_plane(struct buf_2d *dst,
   dst->stride = stride;
 }
 
-// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col
-static void setup_dst_planes(MACROBLOCKD *xd,
-                             const YV12_BUFFER_CONFIG *src,
-                             int mi_row, int mi_col) {
-  uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
-                               src->alpha_buffer};
-  const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
-                          src->alpha_stride};
-  int i;
+void vp9_setup_dst_planes(MACROBLOCKD *xd, const YV12_BUFFER_CONFIG *src,
+                          int mi_row, int mi_col);
 
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    struct macroblockd_plane *const pd = &xd->plane[i];
-    setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
-                     pd->subsampling_x, pd->subsampling_y);
-  }
-}
-
-static void setup_pre_planes(MACROBLOCKD *xd, int idx,
-                             const YV12_BUFFER_CONFIG *src,
-                             int mi_row, int mi_col,
-                             const struct scale_factors *sf) {
-  if (src != NULL) {
-    int i;
-    uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
-                                 src->alpha_buffer};
-    const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
-                            src->alpha_stride};
-
-    for (i = 0; i < MAX_MB_PLANE; ++i) {
-      struct macroblockd_plane *const pd = &xd->plane[i];
-      setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col,
-                       sf, pd->subsampling_x, pd->subsampling_y);
-    }
-  }
-}
+void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
+                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
+                          const struct scale_factors *sf);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/source/libvpx/vp9/common/vp9_reconintra.c b/source/libvpx/vp9/common/vp9_reconintra.c
index 71a41a9..44951b5 100644
--- a/source/libvpx/vp9/common/vp9_reconintra.c
+++ b/source/libvpx/vp9/common/vp9_reconintra.c
@@ -18,21 +18,17 @@
 #include "vp9/common/vp9_reconintra.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
-const TX_TYPE mode2txfm_map[MB_MODE_COUNT] = {
-    DCT_DCT,    // DC
-    ADST_DCT,   // V
-    DCT_ADST,   // H
-    DCT_DCT,    // D45
-    ADST_ADST,  // D135
-    ADST_DCT,   // D117
-    DCT_ADST,   // D153
-    DCT_ADST,   // D207
-    ADST_DCT,   // D63
-    ADST_ADST,  // TM
-    DCT_DCT,    // NEARESTMV
-    DCT_DCT,    // NEARMV
-    DCT_DCT,    // ZEROMV
-    DCT_DCT     // NEWMV
+const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
+  DCT_DCT,    // DC
+  ADST_DCT,   // V
+  DCT_ADST,   // H
+  DCT_DCT,    // D45
+  ADST_ADST,  // D135
+  ADST_DCT,   // D117
+  DCT_ADST,   // D153
+  DCT_ADST,   // D207
+  ADST_DCT,   // D63
+  ADST_ADST,  // TM
 };
 
 #define intra_pred_sized(type, size) \
@@ -351,6 +347,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
   x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
   y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
 
+  vpx_memset(left_col, 129, 64);
+
   // left
   if (left_available) {
     if (xd->mb_to_bottom_edge < 0) {
@@ -370,8 +368,6 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
       for (i = 0; i < bs; ++i)
         left_col[i] = ref[i * ref_stride - 1];
     }
-  } else {
-    vpx_memset(left_col, 129, bs);
   }
 
   // TODO(hkuang) do not extend 2*bs pixels for all modes.
@@ -438,7 +434,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
 }
 
 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
-                             TX_SIZE tx_size, int mode,
+                             TX_SIZE tx_size, MB_PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,
                              int aoff, int loff, int plane) {
diff --git a/source/libvpx/vp9/common/vp9_reconintra.h b/source/libvpx/vp9/common/vp9_reconintra.h
index 800736d..abc1767 100644
--- a/source/libvpx/vp9/common/vp9_reconintra.h
+++ b/source/libvpx/vp9/common/vp9_reconintra.h
@@ -19,7 +19,7 @@ extern "C" {
 #endif
 
 void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
-                             TX_SIZE tx_size, int mode,
+                             TX_SIZE tx_size, MB_PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride,
                              uint8_t *dst, int dst_stride,
                              int aoff, int loff, int plane);
diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
new file mode 100644
index 0000000..b455592
--- /dev/null
+++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -0,0 +1,782 @@
+sub vp9_common_forward_decls() {
+print <<EOF
+/*
+ * VP9
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_enums.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vp9_variance_vtable;
+
+#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp9_common_forward_decls/;
+
+# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
+if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
+  $mmx_x86inc = 'mmx';
+  $sse_x86inc = 'sse';
+  $sse2_x86inc = 'sse2';
+  $ssse3_x86inc = 'ssse3';
+  $avx_x86inc = 'avx';
+  $avx2_x86inc = 'avx2';
+} else {
+  $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
+  $avx_x86inc = $avx2_x86inc = '';
+}
+
+# this variable is for functions that are 64 bit only.
+if ($opts{arch} eq "x86_64") {
+  $mmx_x86_64 = 'mmx';
+  $sse2_x86_64 = 'sse2';
+  $ssse3_x86_64 = 'ssse3';
+  $avx_x86_64 = 'avx';
+  $avx2_x86_64 = 'avx2';
+} else {
+  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
+  $avx_x86_64 = $avx2_x86_64 = '';
+}
+
+#
+# RECON
+#
+add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_4x4/;
+
+add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_4x4/;
+
+add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+
+add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_4x4/;
+
+add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_4x4/;
+
+add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_4x4/;
+
+add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_8x8/;
+
+add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_8x8/;
+
+add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+
+add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_8x8/;
+
+add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_8x8/;
+
+add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_8x8/;
+
+add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_16x16/;
+
+add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_16x16/;
+
+add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_16x16/;
+
+add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_16x16/;
+
+add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_16x16/;
+
+add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+
+add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d117_predictor_32x32/;
+
+add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d135_predictor_32x32/;
+
+add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_d153_predictor_32x32/;
+
+add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+
+add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+
+add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+
+add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_top_predictor_32x32/;
+
+add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_left_predictor_32x32/;
+
+add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vp9_dc_128_predictor_32x32/;
+
+#
+# Loopfilter
+#
+add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+
+add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+
+add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+
+#
+# post proc
+#
+if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
+specialize qw/vp9_mbpost_proc_down mmx sse2/;
+$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;
+
+add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
+specialize qw/vp9_mbpost_proc_across_ip sse2/;
+$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;
+
+add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
+specialize qw/vp9_post_proc_down_and_across mmx sse2/;
+$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
+
+add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
+specialize qw/vp9_plane_add_noise mmx sse2/;
+$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
+}
+
+add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_mb_inner/;
+
+add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_mb_outer/;
+
+add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
+specialize qw/vp9_blend_b/;
+
+#
+# Sub Pixel Filters
+#
+add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
+
+add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+
+add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+
+#
+# dct
+#
+add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
+$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
+
+add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
+specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
+specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+
+add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
+specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
+
+# dct and add
+
+add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_iwht4x4_1_add/;
+
+add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_iwht4x4_16_add/;
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+
+
+# variance
+add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x16/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance64x32/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance32x32/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_16x16 sse2/;
+$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
+
+add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/void vp9_get_sse_sum_8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+specialize qw/vp9_get_sse_sum_8x8 sse2/;
+$vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2;
+
+add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance4x8/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+
+# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
+add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
+
+add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
+specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad64x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x64/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad64x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x16/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad32x32/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad8x4/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad4x8/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad";
+specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
+specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_h/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_v/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar64x64_hv/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_h/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_v/;
+
+add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_variance_halfpixvar32x32_hv/;
+
+add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x64x3/;
+
+add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x32x3/;
+
+add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x16x3 sse3 ssse3/;
+
+add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x8x3 sse3 ssse3/;
+
+add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x16x3 sse3/;
+
+add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x8x3 sse3/;
+
+add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x4x3 sse3/;
+
+add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad64x64x8/;
+
+add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad32x32x8/;
+
+add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad16x16x8 sse4/;
+
+add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad16x8x8 sse4/;
+
+add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x16x8 sse4/;
+
+add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x8x8 sse4/;
+
+add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad8x4x8/;
+
+add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad4x8x8/;
+
+add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
+specialize qw/vp9_sad4x4x8 sse4/;
+
+add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x64x4d sse2 avx2/;
+
+add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x64x4d sse2/;
+
+add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad64x32x4d sse2/;
+
+add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x16x4d sse2/;
+
+add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x32x4d sse2/;
+
+add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad32x32x4d sse2 avx2/;
+
+add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x16x4d sse2/;
+
+add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad16x8x4d sse2/;
+
+add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x16x4d sse2/;
+
+add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x8x4d sse2/;
+
+# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
+add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad8x4x4d sse2/;
+
+add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x8x4d sse/;
+
+add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
+specialize qw/vp9_sad4x4x4d sse/;
+
+#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
+#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
+
+add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
+
+add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse8x16/;
+
+add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse16x8/;
+
+add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
+specialize qw/vp9_mse8x8/;
+
+add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_mse64x64/;
+
+add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+specialize qw/vp9_sub_pixel_mse32x32/;
+
+add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
+specialize qw/vp9_get_mb_ss mmx sse2/;
+# ENCODEMB INVOKE
+
+add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+specialize qw/vp9_block_error/, "$sse2_x86inc";
+
+add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
+specialize qw/vp9_subtract_block/, "$sse2_x86inc";
+
+add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
+
+add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";
+
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
+
+    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
+}
+
+# fdct functions
+add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht4x4 sse2 avx2/;
+
+add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht8x8 sse2 avx2/;
+
+add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
+specialize qw/vp9_fht16x16 sse2 avx2/;
+
+add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fwht4x4/;
+
+add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct4x4 sse2 avx2/;
+
+add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct8x8 sse2 avx2/;
+
+add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct16x16 sse2 avx2/;
+
+add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct32x32 sse2 avx2/;
+
+add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
+specialize qw/vp9_fdct32x32_rd sse2 avx2/;
+
+#
+# Motion search
+#
+add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
+specialize qw/vp9_full_search_sad sse3 sse4_1/;
+$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
+$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
+
+add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_refining_search_sad sse3/;
+$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
+
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_diamond_search_sad sse3/;
+$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
+
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+specialize qw/vp9_full_range_search/;
+
+add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+specialize qw/vp9_temporal_filter_apply sse2/;
+
+}
+# end encoder functions
+1;
diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.sh b/source/libvpx/vp9/common/vp9_rtcd_defs.sh
deleted file mode 100755
index 5b44970..0000000
--- a/source/libvpx/vp9/common/vp9_rtcd_defs.sh
+++ /dev/null
@@ -1,760 +0,0 @@
-vp9_common_forward_decls() {
-cat <<EOF
-/*
- * VP9
- */
-
-#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_enums.h"
-
-struct macroblockd;
-
-/* Encoder forward decls */
-struct macroblock;
-struct vp9_variance_vtable;
-
-#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
-struct mv;
-union int_mv;
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vp9_common_forward_decls
-
-# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
-[ "$CONFIG_USE_X86INC" = "yes" ] && mmx_x86inc=mmx && sse_x86inc=sse &&
-  sse2_x86inc=sse2 && ssse3_x86inc=ssse3 && avx_x86inc=avx && avx2_x86inc=avx2
-
-# this variable is for functions that are 64 bit only.
-[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 && 
-  ssse3_x86_64=ssse3 && avx_x86_64=avx && avx2_x86_64=avx2
-
-#
-# RECON
-#
-prototype void vp9_d207_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_4x4 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_4x4
-
-prototype void vp9_d135_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_4x4
-
-prototype void vp9_d153_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_4x4 $ssse3_x86inc
-
-prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_4x4 $sse_x86inc neon
-
-prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2
-
-prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_4x4
-
-prototype void vp9_dc_left_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_4x4
-
-prototype void vp9_dc_128_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_4x4
-
-prototype void vp9_d207_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_8x8 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_8x8
-
-prototype void vp9_d135_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_8x8
-
-prototype void vp9_d153_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_8x8 $ssse3_x86inc
-
-prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_8x8 $sse_x86inc neon
-
-prototype void vp9_tm_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_8x8 $sse2_x86inc neon dspr2
-
-prototype void vp9_dc_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_8x8 $sse_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_8x8
-
-prototype void vp9_dc_left_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_8x8
-
-prototype void vp9_dc_128_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_8x8
-
-prototype void vp9_d207_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_16x16 $ssse3_x86inc neon dspr2
-
-prototype void vp9_d117_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_16x16
-
-prototype void vp9_d135_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_16x16
-
-prototype void vp9_d153_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_16x16 $ssse3_x86inc
-
-prototype void vp9_v_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_16x16 $sse2_x86inc neon
-
-prototype void vp9_tm_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_16x16 $sse2_x86inc neon
-
-prototype void vp9_dc_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_16x16 $sse2_x86inc dspr2
-
-prototype void vp9_dc_top_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_16x16
-
-prototype void vp9_dc_left_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_16x16
-
-prototype void vp9_dc_128_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_16x16
-
-prototype void vp9_d207_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d207_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_d45_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d45_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_32x32 $ssse3_x86inc
-
-prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_h_predictor_32x32 $ssse3_x86inc neon
-
-prototype void vp9_d117_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d117_predictor_32x32
-
-prototype void vp9_d135_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d135_predictor_32x32
-
-prototype void vp9_d153_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d153_predictor_32x32
-
-prototype void vp9_v_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_v_predictor_32x32 $sse2_x86inc neon
-
-prototype void vp9_tm_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_tm_predictor_32x32 $sse2_x86_64 neon
-
-prototype void vp9_dc_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_predictor_32x32 $sse2_x86inc
-
-prototype void vp9_dc_top_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_top_predictor_32x32
-
-prototype void vp9_dc_left_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_left_predictor_32x32
-
-prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_dc_128_predictor_32x32
-
-#
-# Loopfilter
-#
-prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_lpf_vertical_16 sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_lpf_vertical_16_dual sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_vertical_8 sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_vertical_8_dual sse2 neon dspr2
-
-prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_vertical_4 mmx neon dspr2
-
-prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_vertical_4_dual sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2
-
-prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_8 sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2
-
-prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_lpf_horizontal_4 mmx neon dspr2
-
-prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2
-
-#
-# post proc
-#
-if [ "$CONFIG_VP9_POSTPROC" = "yes" ]; then
-prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit"
-specialize vp9_mbpost_proc_down mmx sse2
-vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm
-
-prototype void vp9_mbpost_proc_across_ip "uint8_t *src, int pitch, int rows, int cols, int flimit"
-specialize vp9_mbpost_proc_across_ip sse2
-vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm
-
-prototype void vp9_post_proc_down_and_across "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"
-specialize vp9_post_proc_down_and_across mmx sse2
-vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm
-
-prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"
-specialize vp9_plane_add_noise mmx sse2
-vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt
-fi
-
-prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_mb_inner
-
-prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_mb_outer
-
-prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
-specialize vp9_blend_b
-
-#
-# Sub Pixel Filters
-#
-prototype void vp9_convolve_copy "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_copy $sse2_x86inc neon dspr2
-
-prototype void vp9_convolve_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve_avg $sse2_x86inc neon dspr2
-
-prototype void vp9_convolve8 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8 sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2
-
-prototype void vp9_convolve8_avg "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg sse2 ssse3 neon dspr2
-
-prototype void vp9_convolve8_avg_horiz "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2
-
-prototype void vp9_convolve8_avg_vert "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"
-specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2
-
-#
-# dct
-#
-prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct4x4_1_add sse2 neon dspr2
-
-prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct4x4_16_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_1_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_64_add sse2 neon dspr2
-
-prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct8x8_10_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_1_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_256_add sse2 neon dspr2
-
-prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct16x16_10_add sse2 neon dspr2
-
-prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1024_add sse2 neon dspr2
-
-prototype void vp9_idct32x32_34_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_34_add sse2 neon dspr2
-vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon
-
-prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_idct32x32_1_add sse2 neon dspr2
-
-prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_iht4x4_16_add sse2 neon dspr2
-
-prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_iht8x8_64_add sse2 neon dspr2
-
-prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type"
-specialize vp9_iht16x16_256_add sse2 dspr2
-
-# dct and add
-
-prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_iwht4x4_1_add
-
-prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_iwht4x4_16_add
-
-#
-# Encoder functions below this point.
-#
-if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
-
-
-# variance
-prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x16 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x32 $sse2_x86inc
-
-prototype unsigned int vp9_variance64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x32 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x64 $sse2_x86inc
-
-prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance32x32 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance64x64 $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x16 mmx $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_variance16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance16x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_variance8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_variance8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x8 mmx $sse2_x86inc
-
-prototype void vp9_get_sse_sum_8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"
-specialize vp9_get_sse_sum_8x8 sse2
-vp9_get_sse_sum_8x8_sse2=vp9_get8x8var_sse2
-
-prototype unsigned int vp9_variance8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance8x4 $sse2_x86inc
-
-prototype unsigned int vp9_variance4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x8 $sse2_x86inc
-
-prototype unsigned int vp9_variance4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance4x4 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_avg_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x64 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x64 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x64 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance64x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance64x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance64x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x32 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_avg_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance32x32 $sse2_x86inc $ssse3_x86inc avx2
-
-prototype unsigned int vp9_sub_pixel_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x16 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x16 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance16x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance16x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance16x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x8 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x8 $sse2_x86inc $ssse3_x86inc
-
-# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
-prototype unsigned int vp9_sub_pixel_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance8x4 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance8x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance8x4 $sse2_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x8 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_avg_variance4x8 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x8 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_variance4x4 $sse_x86inc $ssse3_x86inc
-#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
-
-prototype unsigned int vp9_sub_pixel_avg_variance4x4 "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"
-specialize vp9_sub_pixel_avg_variance4x4 $sse_x86inc $ssse3_x86inc
-
-prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad64x64 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x64 $sse2_x86inc
-
-prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad64x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad32x16 $sse2_x86inc
-
-prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad16x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad32x32 $sse2_x86inc
-
-prototype unsigned int vp9_sad16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad16x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad16x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad16x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad8x16 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad8x8 mmx $sse2_x86inc
-
-prototype unsigned int vp9_sad8x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad8x4 $sse2_x86inc
-
-prototype unsigned int vp9_sad4x8 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
-specialize vp9_sad4x8 $sse_x86inc
-
-prototype unsigned int vp9_sad4x4 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad"
-specialize vp9_sad4x4 mmx $sse_x86inc
-
-prototype unsigned int vp9_sad64x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x64_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x64_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x64_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad64x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad64x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad32x32_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad32x32_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad16x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad16x8_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x16_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x16_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x8_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad8x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad8x4_avg $sse2_x86inc
-
-prototype unsigned int vp9_sad4x8_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x8_avg $sse_x86inc
-
-prototype unsigned int vp9_sad4x4_avg "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad"
-specialize vp9_sad4x4_avg $sse_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_h $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_v $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar16x16_hv $sse2_x86inc
-
-prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_h
-
-prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_v
-
-prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar64x64_hv
-
-prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_h
-
-prototype unsigned int vp9_variance_halfpixvar32x32_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_v
-
-prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_variance_halfpixvar32x32_hv
-
-prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x3
-
-prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x3
-
-prototype void vp9_sad16x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x3 sse3 ssse3
-
-prototype void vp9_sad16x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x3 sse3 ssse3
-
-prototype void vp9_sad8x16x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x3 sse3
-
-prototype void vp9_sad8x8x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x3 sse3
-
-prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x3 sse3
-
-prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad64x64x8
-
-prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad32x32x8
-
-prototype void vp9_sad16x16x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad16x16x8 sse4
-
-prototype void vp9_sad16x8x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad16x8x8 sse4
-
-prototype void vp9_sad8x16x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x16x8 sse4
-
-prototype void vp9_sad8x8x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x8x8 sse4
-
-prototype void vp9_sad8x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad8x4x8
-
-prototype void vp9_sad4x8x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"
-specialize vp9_sad4x8x8
-
-prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array"
-specialize vp9_sad4x4x8 sse4
-
-prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x64x4d sse2
-
-prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x64x4d sse2
-
-prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad64x32x4d sse2
-
-prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x16x4d sse2
-
-prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x32x4d sse2
-
-prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad32x32x4d sse2
-
-prototype void vp9_sad16x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x16x4d sse2
-
-prototype void vp9_sad16x8x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad16x8x4d sse2
-
-prototype void vp9_sad8x16x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x16x4d sse2
-
-prototype void vp9_sad8x8x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x8x4d sse2
-
-# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
-prototype void vp9_sad8x4x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad8x4x4d sse2
-
-prototype void vp9_sad4x8x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x8x4d sse
-
-prototype void vp9_sad4x4x4d "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array"
-specialize vp9_sad4x4x4d sse
-
-#prototype unsigned int vp9_sub_pixel_mse16x16 "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse"
-#specialize vp9_sub_pixel_mse16x16 sse2 mmx
-
-prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse16x16 mmx $sse2_x86inc $avx2_x86inc
-
-prototype unsigned int vp9_mse8x16 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse8x16
-
-prototype unsigned int vp9_mse16x8 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse16x8
-
-prototype unsigned int vp9_mse8x8 "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse"
-specialize vp9_mse8x8
-
-prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_mse64x64
-
-prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
-specialize vp9_sub_pixel_mse32x32
-
-prototype unsigned int vp9_get_mb_ss "const int16_t *"
-specialize vp9_get_mb_ss mmx sse2
-# ENCODEMB INVOKE
-
-prototype int64_t vp9_block_error "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz"
-specialize vp9_block_error $sse2_x86inc
-
-prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
-specialize vp9_subtract_block $sse2_x86inc
-
-prototype void vp9_quantize_b "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b $ssse3_x86_64
-
-prototype void vp9_quantize_b_32x32 "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
-specialize vp9_quantize_b_32x32 $ssse3_x86_64
-
-#
-# Structured Similarity (SSIM)
-#
-if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
-    prototype void vp9_ssim_parms_8x8 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp9_ssim_parms_8x8 $sse2_x86_64
-
-    prototype void vp9_ssim_parms_16x16 "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr"
-    specialize vp9_ssim_parms_16x16 $sse2_x86_64
-fi
-
-# fdct functions
-prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht4x4 sse2 avx2
-
-prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht8x8 sse2 avx2
-
-prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
-specialize vp9_fht16x16 sse2 avx2
-
-prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fwht4x4
-
-prototype void vp9_fdct4x4 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct4x4 sse2 avx2
-
-prototype void vp9_fdct8x8 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct8x8 sse2 avx2
-
-prototype void vp9_fdct16x16 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct16x16 sse2 avx2
-
-prototype void vp9_fdct32x32 "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct32x32 sse2 avx2
-
-prototype void vp9_fdct32x32_rd "const int16_t *input, int16_t *output, int stride"
-specialize vp9_fdct32x32_rd sse2 avx2
-
-#
-# Motion search
-#
-prototype int vp9_full_search_sad "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"
-specialize vp9_full_search_sad sse3 sse4_1
-vp9_full_search_sad_sse3=vp9_full_search_sadx3
-vp9_full_search_sad_sse4_1=vp9_full_search_sadx8
-
-prototype int vp9_refining_search_sad "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_refining_search_sad sse3
-vp9_refining_search_sad_sse3=vp9_refining_search_sadx4
-
-prototype int vp9_diamond_search_sad "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_diamond_search_sad sse3
-vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4
-
-prototype int vp9_full_range_search "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"
-specialize vp9_full_range_search
-
-prototype void vp9_temporal_filter_apply "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"
-specialize vp9_temporal_filter_apply sse2
-
-fi
-# end encoder functions
diff --git a/source/libvpx/vp9/common/vp9_scale.c b/source/libvpx/vp9/common/vp9_scale.c
index e0f1e34..d3405fc 100644
--- a/source/libvpx/vp9/common/vp9_scale.c
+++ b/source/libvpx/vp9/common/vp9_scale.c
@@ -13,11 +13,11 @@
 #include "vp9/common/vp9_scale.h"
 
 static INLINE int scaled_x(int val, const struct scale_factors *sf) {
-  return val * sf->x_scale_fp >> REF_SCALE_SHIFT;
+  return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
 }
 
 static INLINE int scaled_y(int val, const struct scale_factors *sf) {
-  return val * sf->y_scale_fp >> REF_SCALE_SHIFT;
+  return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT);
 }
 
 static int unscaled_value(int val, const struct scale_factors *sf) {
diff --git a/source/libvpx/vp9/common/vp9_systemdependent.h b/source/libvpx/vp9/common/vp9_systemdependent.h
index 72edbca..e971158 100644
--- a/source/libvpx/vp9/common/vp9_systemdependent.h
+++ b/source/libvpx/vp9/common/vp9_systemdependent.h
@@ -12,11 +12,11 @@
 #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
 
 #ifdef _MSC_VER
+# include <math.h>  // the ceil() definition must precede intrin.h
 # if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
 #  include <intrin.h>
 #  define USE_MSC_INTRIN
 # endif
-# include <math.h>
 # define snprintf _snprintf
 #endif
 
diff --git a/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm b/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm
index dd522c6..b263837 100644
--- a/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm
+++ b/source/libvpx/vp9/common/x86/vp9_copy_sse2.asm
@@ -133,10 +133,14 @@ INIT_MMX sse
   movh                    m3, [srcq+r5q]
   lea                   srcq, [srcq+src_strideq*4]
 %ifidn %1, avg
-  pavgb                   m0, [dstq]
-  pavgb                   m1, [dstq+dst_strideq]
-  pavgb                   m2, [dstq+dst_strideq*2]
-  pavgb                   m3, [dstq+r6q]
+  movh                    m4, [dstq]
+  movh                    m5, [dstq+dst_strideq]
+  movh                    m6, [dstq+dst_strideq*2]
+  movh                    m7, [dstq+r6q]
+  pavgb                   m0, m4
+  pavgb                   m1, m5
+  pavgb                   m2, m6
+  pavgb                   m3, m7
 %endif
   movh  [dstq              ], m0
   movh  [dstq+dst_strideq  ], m1
diff --git a/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index efa960c..b84db97 100644
--- a/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -32,6 +32,27 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
   6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
 };
 
+#if defined(__clang__)
+# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \
+      (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0)
+#  define MM256_BROADCASTSI128_SI256(x) \
+       _mm_broadcastsi128_si256((__m128i const *)&(x))
+# else  // clang > 3.3, and not 5.0 on macosx.
+#  define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
+# endif  // clang <= 3.3
+#elif defined(__GNUC__)
+# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6)
+#  define MM256_BROADCASTSI128_SI256(x) \
+       _mm_broadcastsi128_si256((__m128i const *)&(x))
+# elif __GNUC__ == 4 && __GNUC_MINOR__ == 7
+#  define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x)
+# else  // gcc > 4.7
+#  define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
+# endif  // gcc <= 4.6
+#else  // !(gcc || clang)
+# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
+#endif  // __clang__
+
 void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
                                   unsigned int src_pixels_per_line,
                                   unsigned char *output_ptr,
@@ -53,18 +74,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
   // in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
   // have the same data in both lanes of a 256 bit register
-#if defined (__GNUC__)
-#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
-(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
-  filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
-#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
-  filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
+  filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
 
   // duplicate only the first 16 bits (first and second byte)
   // across 256 bit register
@@ -309,18 +319,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
   // same data in both lanes of 128 bit register.
   filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
   // have the same data in both lanes of a 256 bit register
-#if defined (__GNUC__)
-#if ( __GNUC__ < 4 || (__GNUC__ == 4 && \
-(__GNUC_MINOR__ < 6 || (__GNUC_MINOR__ == 6 && __GNUC_PATCHLEVEL__ > 0))))
-  filtersReg32 = _mm_broadcastsi128_si256((__m128i const *)&filtersReg);
-#elif(__GNUC__ == 4 && (__GNUC_MINOR__ == 7 && __GNUC_PATCHLEVEL__ > 0))
-  filtersReg32 = _mm_broadcastsi128_si256(filtersReg);
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
-#else
-  filtersReg32 = _mm256_broadcastsi128_si256(filtersReg);
-#endif
+  filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
 
   // duplicate only the first 16 bits (first and second byte)
   // across 256 bit register
diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.c b/source/libvpx/vp9/decoder/vp9_decodeframe.c
index 8bebca5..022a429 100644
--- a/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -33,9 +33,9 @@
 #include "vp9/decoder/vp9_decodeframe.h"
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_decodemv.h"
+#include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_dsubexp.h"
 #include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_read_bit_buffer.h"
 #include "vp9/decoder/vp9_reader.h"
 #include "vp9/decoder/vp9_thread.h"
@@ -146,7 +146,7 @@ static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
 static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) {
   int i;
   for (i = 0; i < n; ++i)
-    if (vp9_read(r, NMV_UPDATE_PROB))
+    if (vp9_read(r, MV_UPDATE_PROB))
       p[i] = (vp9_read_literal(r, 7) << 1) | 1;
 }
 
@@ -187,54 +187,13 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) {
     xd->plane[i].dequant = cm->uv_dequant[q_index];
 }
 
-// Allocate storage for each tile column.
-// TODO(jzern): when max_threads <= 1 the same storage could be used for each
-// tile.
-static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) {
-  VP9_COMMON *const cm = &pbi->common;
-  const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
-  int i, tile_row, tile_col;
-
-  CHECK_MEM_ERROR(cm, pbi->mi_streams,
-                  vpx_realloc(pbi->mi_streams, tile_rows * tile_cols *
-                              sizeof(*pbi->mi_streams)));
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      TileInfo tile;
-      vp9_tile_init(&tile, cm, tile_row, tile_col);
-      pbi->mi_streams[tile_row * tile_cols + tile_col] =
-          &cm->mi[tile.mi_row_start * cm->mode_info_stride
-                  + tile.mi_col_start];
-    }
-  }
-
-  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
-  // block where mi unit size is 8x8.
-  CHECK_MEM_ERROR(cm, pbi->above_context[0],
-                  vpx_realloc(pbi->above_context[0],
-                              sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
-                              2 * aligned_mi_cols));
-  for (i = 1; i < MAX_MB_PLANE; ++i) {
-    pbi->above_context[i] = pbi->above_context[0] +
-                            i * sizeof(*pbi->above_context[0]) *
-                            2 * aligned_mi_cols;
-  }
-
-  // This is sized based on the entire frame. Each tile operates within its
-  // column bounds.
-  CHECK_MEM_ERROR(cm, pbi->above_seg_context,
-                  vpx_realloc(pbi->above_seg_context,
-                              sizeof(*pbi->above_seg_context) *
-                              aligned_mi_cols));
-}
-
 static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
                                     TX_SIZE tx_size, uint8_t *dst, int stride,
                                     int eob) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   if (eob > 0) {
     TX_TYPE tx_type;
-    const int plane_type = pd->plane_type;
+    const PLANE_TYPE plane_type = pd->plane_type;
     int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     switch (tx_size) {
       case TX_4X4:
@@ -245,11 +204,11 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
           vp9_iht4x4_16_add(dqcoeff, dst, stride, tx_type);
         break;
       case TX_8X8:
-        tx_type = get_tx_type_8x8(plane_type, xd);
+        tx_type = get_tx_type(plane_type, xd);
         vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
         break;
       case TX_16X16:
-        tx_type = get_tx_type_16x16(plane_type, xd);
+        tx_type = get_tx_type(plane_type, xd);
         vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
         break;
       case TX_32X32:
@@ -282,11 +241,11 @@ struct intra_args {
 static void predict_and_reconstruct_intra_block(int plane, int block,
                                                 BLOCK_SIZE plane_bsize,
                                                 TX_SIZE tx_size, void *arg) {
-  struct intra_args *const args = arg;
+  struct intra_args *const args = (struct intra_args *)arg;
   VP9_COMMON *const cm = args->cm;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  MODE_INFO *const mi = xd->mi_8x8[0];
+  MODE_INFO *const mi = xd->mi[0];
   const MB_PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block)
                                                : mi->mbmi.uv_mode;
   int x, y;
@@ -318,7 +277,7 @@ struct inter_args {
 static void reconstruct_inter_block(int plane, int block,
                                     BLOCK_SIZE plane_bsize,
                                     TX_SIZE tx_size, void *arg) {
-  struct inter_args *args = arg;
+  struct inter_args *args = (struct inter_args *)arg;
   VP9_COMMON *const cm = args->cm;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -332,67 +291,57 @@ static void reconstruct_inter_block(int plane, int block,
   *args->eobtotal += eob;
 }
 
-static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                        const TileInfo *const tile,
-                        BLOCK_SIZE bsize, int mi_row, int mi_col) {
+static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+                                 const TileInfo *const tile,
+                                 BLOCK_SIZE bsize, int mi_row, int mi_col) {
   const int bw = num_8x8_blocks_wide_lookup[bsize];
   const int bh = num_8x8_blocks_high_lookup[bsize];
   const int x_mis = MIN(bw, cm->mi_cols - mi_col);
   const int y_mis = MIN(bh, cm->mi_rows - mi_row);
-  const int offset = mi_row * cm->mode_info_stride + mi_col;
-  const int tile_offset = tile->mi_row_start * cm->mode_info_stride +
-                          tile->mi_col_start;
+  const int offset = mi_row * cm->mi_stride + mi_col;
   int x, y;
 
-  xd->mi_8x8 = cm->mi_grid_visible + offset;
-  xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
-
-  xd->last_mi = cm->coding_use_prev_mi && cm->prev_mi ?
-      xd->prev_mi_8x8[0] : NULL;
-
-  xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset;
-  xd->mi_8x8[0]->mbmi.sb_type = bsize;
+  xd->mi = cm->mi_grid_visible + offset;
+  xd->mi[0] = &cm->mi[offset];
+  xd->mi[0]->mbmi.sb_type = bsize;
   for (y = 0; y < y_mis; ++y)
     for (x = !y; x < x_mis; ++x)
-      xd->mi_8x8[y * cm->mode_info_stride + x] = xd->mi_8x8[0];
+      xd->mi[y * cm->mi_stride + x] = xd->mi[0];
 
-  set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col);
+  set_skip_context(xd, mi_row, mi_col);
 
   // Distance of Mb to the various image edges. These are specified to 8th pel
   // as they are always compared to values that are in 1/8th pel units
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
 
-  setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+  return &xd->mi[0]->mbmi;
 }
 
 static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                     int idx, int mi_row, int mi_col) {
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME];
   xd->block_refs[idx] = ref_buffer;
   if (!vp9_is_valid_scale(&ref_buffer->sf))
     vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                        "Invalid scale factors");
-  setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, &ref_buffer->sf);
+  vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col,
+                       &ref_buffer->sf);
   xd->corrupted |= ref_buffer->buf->corrupted;
 }
 
-static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                           const TileInfo *const tile,
-                           int mi_row, int mi_col,
-                           vp9_reader *r, BLOCK_SIZE bsize) {
+static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+                         const TileInfo *const tile,
+                         int mi_row, int mi_col,
+                         vp9_reader *r, BLOCK_SIZE bsize) {
   const int less8x8 = bsize < BLOCK_8X8;
-  MB_MODE_INFO *mbmi;
-
-  set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
+  MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
   vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r);
 
   if (less8x8)
     bsize = BLOCK_8X8;
 
-  // Has to be called after set_offsets
-  mbmi = &xd->mi_8x8[0]->mbmi;
-
   if (mbmi->skip) {
     reset_skip_context(xd, bsize);
   } else {
@@ -411,8 +360,6 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
     if (has_second_ref(mbmi))
       set_ref(cm, xd, 1, mi_row, mi_col);
 
-    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
-
     // Prediction
     vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
 
@@ -432,16 +379,14 @@ static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
                                      int mi_row, int mi_col, BLOCK_SIZE bsize,
                                      vp9_reader *r) {
-  const int ctx = partition_plane_context(xd->above_seg_context,
-                                          xd->left_seg_context,
-                                          mi_row, mi_col, bsize);
+  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   const vp9_prob *const probs = get_partition_probs(cm, ctx);
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
   PARTITION_TYPE p;
 
   if (has_rows && has_cols)
-    p = vp9_read_tree(r, vp9_partition_tree, probs);
+    p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs);
   else if (!has_rows && has_cols)
     p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
   else if (has_rows && !has_cols)
@@ -455,10 +400,10 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
   return p;
 }
 
-static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
-                            const TileInfo *const tile,
-                            int mi_row, int mi_col,
-                            vp9_reader* r, BLOCK_SIZE bsize) {
+static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+                             const TileInfo *const tile,
+                             int mi_row, int mi_col,
+                             vp9_reader* r, BLOCK_SIZE bsize) {
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
@@ -469,27 +414,27 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
   subsize = get_subsize(bsize, partition);
   if (subsize < BLOCK_8X8) {
-    decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+    decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
   } else {
     switch (partition) {
       case PARTITION_NONE:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
         break;
       case PARTITION_HORZ:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
         if (mi_row + hbs < cm->mi_rows)
-          decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
+          decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
         break;
       case PARTITION_VERT:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
         if (mi_col + hbs < cm->mi_cols)
-          decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
+          decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
         break;
       case PARTITION_SPLIT:
-        decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize);
-        decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
-        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
-        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
+        decode_partition(cm, xd, tile, mi_row,       mi_col,       r, subsize);
+        decode_partition(cm, xd, tile, mi_row,       mi_col + hbs, r, subsize);
+        decode_partition(cm, xd, tile, mi_row + hbs, mi_col,       r, subsize);
+        decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
         break;
       default:
         assert(0 && "Invalid partition type");
@@ -499,8 +444,7 @@ static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(xd->above_seg_context, xd->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 static void setup_token_decoder(const uint8_t *data,
@@ -668,9 +612,7 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
     read_frame_size(rb, &cm->display_width, &cm->display_height);
 }
 
-static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
-  VP9_COMMON *cm = &pbi->common;
-
+static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
   if (cm->width != width || cm->height != height) {
     // Change in frame size.
     // TODO(agrange) Don't test width/height, check overall size.
@@ -697,18 +639,15 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
   }
 }
 
-static void setup_frame_size(VP9D_COMP *pbi,
-                             struct vp9_read_bit_buffer *rb) {
+static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
   int width, height;
   read_frame_size(rb, &width, &height);
-  apply_frame_size(pbi, width, height);
-  setup_display_size(&pbi->common, rb);
+  apply_frame_size(cm, width, height);
+  setup_display_size(cm, rb);
 }
 
-static void setup_frame_size_with_refs(VP9D_COMP *pbi,
+static void setup_frame_size_with_refs(VP9_COMMON *cm,
                                        struct vp9_read_bit_buffer *rb) {
-  VP9_COMMON *const cm = &pbi->common;
-
   int width, height;
   int found = 0, i;
   for (i = 0; i < REFS_PER_FRAME; ++i) {
@@ -728,24 +667,11 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi,
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Referenced frame with invalid size");
 
-  apply_frame_size(pbi, width, height);
+  apply_frame_size(cm, width, height);
   setup_display_size(cm, rb);
 }
 
-static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
-                               int tile_row, int tile_col) {
-  int i;
-  const int tile_cols = 1 << pbi->common.log2_tile_cols;
-  xd->mi_stream = pbi->mi_streams[tile_row * tile_cols + tile_col];
-
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    xd->above_context[i] = pbi->above_context[i];
-  }
-  // see note in alloc_tile_storage().
-  xd->above_seg_context = pbi->above_seg_context;
-}
-
-static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
+static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
                         vp9_reader *r) {
   const int num_threads = pbi->oxcf.max_threads;
   VP9_COMMON *const cm = &pbi->common;
@@ -769,7 +695,7 @@ static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile,
     vp9_zero(xd->left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE) {
-      decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
+      decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
     }
 
     if (pbi->do_loopfilter_inline) {
@@ -850,15 +776,15 @@ typedef struct TileBuffer {
   int col;  // only used with multi-threaded decoding
 } TileBuffer;
 
-static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
+static const uint8_t *decode_tiles(VP9Decoder *pbi,
+                                   const uint8_t *data,
+                                   const uint8_t *data_end) {
   VP9_COMMON *const cm = &pbi->common;
-  MACROBLOCKD *const xd = &pbi->mb;
   const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
   TileBuffer tile_buffers[4][1 << 6];
   int tile_row, tile_col;
-  const uint8_t *const data_end = pbi->source + pbi->source_sz;
   const uint8_t *end = NULL;
   vp9_reader r;
 
@@ -867,11 +793,11 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pbi->above_context[0], 0,
-             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols);
+  vpx_memset(cm->above_context, 0,
+             sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
 
-  vpx_memset(pbi->above_seg_context, 0,
-             sizeof(*pbi->above_seg_context) * aligned_cols);
+  vpx_memset(cm->above_seg_context, 0,
+             sizeof(*cm->above_seg_context) * aligned_cols);
 
   // Load tile data into tile_buffers
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
@@ -898,7 +824,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
 
       vp9_tile_init(&tile, cm, tile_row, col);
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r);
-      setup_tile_context(pbi, xd, tile_row, col);
       decode_tile(pbi, &tile, &r);
 
       if (last_tile)
@@ -909,17 +834,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
   return end;
 }
 
-static void setup_tile_macroblockd(TileWorkerData *const tile_data) {
-  MACROBLOCKD *xd = &tile_data->xd;
-  struct macroblockd_plane *const pd = xd->plane;
-  int i;
-
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    pd[i].dqcoeff = tile_data->dqcoeff[i];
-    vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
-  }
-}
-
 static int tile_worker_hook(void *arg1, void *arg2) {
   TileWorkerData *const tile_data = (TileWorkerData*)arg1;
   const TileInfo *const tile = (TileInfo*)arg2;
@@ -931,8 +845,8 @@ static int tile_worker_hook(void *arg1, void *arg2) {
     vp9_zero(tile_data->xd.left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE) {
-      decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
-                      mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
+      decode_partition(tile_data->cm, &tile_data->xd, tile,
+                       mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
     }
   }
   return !tile_data->xd.corrupted;
@@ -951,10 +865,11 @@ static int compare_tile_buffers(const void *a, const void *b) {
   }
 }
 
-static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
+static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
+                                      const uint8_t *data,
+                                      const uint8_t *data_end) {
   VP9_COMMON *const cm = &pbi->common;
   const uint8_t *bit_reader_end = NULL;
-  const uint8_t *const data_end = pbi->source + pbi->source_sz;
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
@@ -967,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
   assert(tile_rows == 1);
   (void)tile_rows;
 
-  if (num_workers > pbi->num_tile_workers) {
+  // TODO(jzern): See if we can remove the restriction of passing in max
+  // threads to the decoder.
+  if (pbi->num_tile_workers == 0) {
+    const int num_threads = pbi->oxcf.max_threads & ~1;
     int i;
+    // TODO(jzern): Allocate one less worker, as in the current code we only
+    // use num_threads - 1 workers.
     CHECK_MEM_ERROR(cm, pbi->tile_workers,
-                    vpx_realloc(pbi->tile_workers,
-                                num_workers * sizeof(*pbi->tile_workers)));
-    for (i = pbi->num_tile_workers; i < num_workers; ++i) {
+                    vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
+    for (i = 0; i < num_threads; ++i) {
       VP9Worker *const worker = &pbi->tile_workers[i];
       ++pbi->num_tile_workers;
 
@@ -980,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
       CHECK_MEM_ERROR(cm, worker->data1,
                       vpx_memalign(32, sizeof(TileWorkerData)));
       CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
-      if (i < num_workers - 1 && !vp9_worker_reset(worker)) {
+      if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
         vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                            "Tile decoder thread creation failed");
       }
@@ -988,17 +907,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
   }
 
   // Reset tile decoding hook
-  for (n = 0; n < pbi->num_tile_workers; ++n) {
+  for (n = 0; n < num_workers; ++n) {
     pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
   }
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(pbi->above_context[0], 0,
-             sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
-             2 * aligned_mi_cols);
-  vpx_memset(pbi->above_seg_context, 0,
-             sizeof(*pbi->above_seg_context) * aligned_mi_cols);
+  vpx_memset(cm->above_context, 0,
+             sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
+  vpx_memset(cm->above_seg_context, 0,
+             sizeof(*cm->above_seg_context) * aligned_mi_cols);
 
   // Load tile data into tile_buffers
   for (n = 0; n < tile_cols; ++n) {
@@ -1043,11 +961,10 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) {
       tile_data->xd = pbi->mb;
       tile_data->xd.corrupted = 0;
       vp9_tile_init(tile, tile_data->cm, 0, buf->col);
-
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
                           &tile_data->bit_reader);
-      setup_tile_context(pbi, &tile_data->xd, 0, buf->col);
-      setup_tile_macroblockd(tile_data);
+      init_macroblockd(cm, &tile_data->xd);
+      vp9_zero(tile_data->xd.dqcoeff);
 
       worker->had_error = 0;
       if (i == num_workers - 1 || n == tile_cols - 1) {
@@ -1092,12 +1009,13 @@ static void error_handler(void *data) {
   vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
 }
 
-#define RESERVED \
-  if (vp9_rb_read_bit(rb)) \
-      vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \
-                         "Reserved bit must be unset")
+static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) {
+  int profile = vp9_rb_read_bit(rb);
+  profile |= vp9_rb_read_bit(rb) << 1;
+  return (BITSTREAM_PROFILE) profile;
+}
 
-static size_t read_uncompressed_header(VP9D_COMP *pbi,
+static size_t read_uncompressed_header(VP9Decoder *pbi,
                                        struct vp9_read_bit_buffer *rb) {
   VP9_COMMON *const cm = &pbi->common;
   size_t sz;
@@ -1109,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
       vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
                          "Invalid frame marker");
 
-  cm->version = vp9_rb_read_bit(rb);
-  RESERVED;
+  cm->profile = read_profile(rb);
+  if (cm->profile >= MAX_PROFILES)
+    vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+                       "Unsupported bitstream profile");
 
   cm->show_existing_frame = vp9_rb_read_bit(rb);
   if (cm->show_existing_frame) {
@@ -1135,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
 
   if (cm->frame_type == KEY_FRAME) {
     check_sync_code(cm, rb);
-
-    cm->color_space = vp9_rb_read_literal(rb, 3);  // colorspace
+    if (cm->profile > PROFILE_1)
+      cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10;
+    cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
     if (cm->color_space != SRGB) {
       vp9_rb_read_bit(rb);  // [16,235] (including xvycc) vs [0,255] range
-      if (cm->version == 1) {
+      if (cm->profile >= PROFILE_1) {
         cm->subsampling_x = vp9_rb_read_bit(rb);
         cm->subsampling_y = vp9_rb_read_bit(rb);
         vp9_rb_read_bit(rb);  // has extra plane
@@ -1147,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
         cm->subsampling_y = cm->subsampling_x = 1;
       }
     } else {
-      if (cm->version == 1) {
+      if (cm->profile >= PROFILE_1) {
         cm->subsampling_y = cm->subsampling_x = 0;
         vp9_rb_read_bit(rb);  // has extra plane
       } else {
@@ -1163,7 +1084,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
       cm->frame_refs[i].buf = get_frame_new_buffer(cm);
     }
 
-    setup_frame_size(pbi, rb);
+    setup_frame_size(cm, rb);
   } else {
     cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);
 
@@ -1174,7 +1095,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
       check_sync_code(cm, rb);
 
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
-      setup_frame_size(pbi, rb);
+      setup_frame_size(cm, rb);
     } else {
       pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
 
@@ -1186,7 +1107,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
         cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
       }
 
-      setup_frame_size_with_refs(pbi, rb);
+      setup_frame_size_with_refs(cm, rb);
 
       cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
       cm->interp_filter = read_interp_filter(rb);
@@ -1234,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
   return sz;
 }
 
-static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data,
+static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
                                   size_t partition_size) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
@@ -1334,14 +1255,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
 }
 #endif  // NDEBUG
 
-int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
-  int i;
+int vp9_decode_frame(VP9Decoder *pbi,
+                     const uint8_t *data, const uint8_t *data_end,
+                     const uint8_t **p_data_end) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
 
-  const uint8_t *data = pbi->source;
-  const uint8_t *const data_end = pbi->source + pbi->source_sz;
-
   struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler };
   const size_t first_partition_size = read_uncompressed_header(pbi, &rb);
   const int keyframe = cm->frame_type == KEY_FRAME;
@@ -1367,7 +1286,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   pbi->do_loopfilter_inline =
       (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level;
   if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
-    CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData)));
+    CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
+                    vpx_memalign(32, sizeof(LFWorkerData)));
     pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
     if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
@@ -1375,9 +1295,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
     }
   }
 
-  alloc_tile_storage(pbi, tile_rows, tile_cols);
+  init_macroblockd(cm, &pbi->mb);
 
-  xd->mode_info_stride = cm->mode_info_stride;
   if (cm->coding_use_prev_mi)
     set_prev_mi(cm);
   else
@@ -1388,8 +1307,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
 
   cm->fc = cm->frame_contexts[cm->frame_context_idx];
   vp9_zero(cm->counts);
-  for (i = 0; i < MAX_MB_PLANE; ++i)
-    vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t));
+  vp9_zero(xd->dqcoeff);
 
   xd->corrupted = 0;
   new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
@@ -1398,9 +1316,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
   // single-frame tile decoding.
   if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
       cm->frame_parallel_decoding_mode) {
-    *p_data_end = decode_tiles_mt(pbi, data + first_partition_size);
+    *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
   } else {
-    *p_data_end = decode_tiles(pbi, data + first_partition_size);
+    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
   }
 
   new_fb->corrupted |= xd->corrupted;
diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.h b/source/libvpx/vp9/decoder/vp9_decodeframe.h
index 4537bc8..8a19daf 100644
--- a/source/libvpx/vp9/decoder/vp9_decodeframe.h
+++ b/source/libvpx/vp9/decoder/vp9_decodeframe.h
@@ -17,10 +17,13 @@ extern "C" {
 #endif
 
 struct VP9Common;
-struct VP9Decompressor;
+struct VP9Decoder;
 
 void vp9_init_dequantizer(struct VP9Common *cm);
-int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end);
+
+int vp9_decode_frame(struct VP9Decoder *pbi,
+                     const uint8_t *data, const uint8_t *data_end,
+                     const uint8_t **p_data_end);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/source/libvpx/vp9/decoder/vp9_decodemv.c b/source/libvpx/vp9/decoder/vp9_decodemv.c
index 0fb7a15..3618f12 100644
--- a/source/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/source/libvpx/vp9/decoder/vp9_decodemv.c
@@ -21,7 +21,6 @@
 
 #include "vp9/decoder/vp9_decodemv.h"
 #include "vp9/decoder/vp9_decodeframe.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_reader.h"
 
 static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
@@ -64,7 +63,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
                                      TX_SIZE max_tx_size, vp9_reader *r) {
   const int ctx = vp9_get_tx_size_context(xd);
   const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs);
-  TX_SIZE tx_size = vp9_read(r, tx_probs[0]);
+  int tx_size = vp9_read(r, tx_probs[0]);
   if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
     tx_size += vp9_read(r, tx_probs[1]);
     if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
@@ -73,7 +72,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
 
   if (!cm->frame_parallel_decoding_mode)
     ++get_tx_counts(max_tx_size, ctx, &cm->counts.tx)[tx_size];
-  return tx_size;
+  return (TX_SIZE)tx_size;
 }
 
 static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_MODE tx_mode,
@@ -105,7 +104,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                                  int mi_row, int mi_col,
                                  vp9_reader *r) {
   struct segmentation *const seg = &cm->seg;
-  const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   int segment_id;
 
   if (!seg->enabled)
@@ -122,7 +121,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                                  int mi_row, int mi_col, vp9_reader *r) {
   struct segmentation *const seg = &cm->seg;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   int predicted_segment_id, segment_id;
 
@@ -162,11 +161,12 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
 static void read_intra_frame_mode_info(VP9_COMMON *const cm,
                                        MACROBLOCKD *const xd,
                                        int mi_row, int mi_col, vp9_reader *r) {
-  MODE_INFO *const mi = xd->mi_8x8[0];
+  MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
-  const MODE_INFO *above_mi = xd->mi_8x8[-cm->mode_info_stride];
-  const MODE_INFO *left_mi  = xd->left_available ? xd->mi_8x8[-1] : NULL;
+  const MODE_INFO *above_mi = xd->mi[-cm->mi_stride];
+  const MODE_INFO *left_mi  = xd->left_available ? xd->mi[-1] : NULL;
   const BLOCK_SIZE bsize = mbmi->sb_type;
+  int i;
 
   mbmi->segment_id = read_intra_segment_id(cm, xd, mi_row, mi_col, r);
   mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
@@ -174,32 +174,28 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm,
   mbmi->ref_frame[0] = INTRA_FRAME;
   mbmi->ref_frame[1] = NONE;
 
-  if (bsize >= BLOCK_8X8) {
-    const MB_PREDICTION_MODE A = vp9_above_block_mode(mi, above_mi, 0);
-    const MB_PREDICTION_MODE L = vp9_left_block_mode(mi, left_mi, 0);
-    mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]);
-  } else {
-    // Only 4x4, 4x8, 8x4 blocks
-    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
-    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];  // 1 or 2
-    int idx, idy;
-
-    for (idy = 0; idy < 2; idy += num_4x4_h) {
-      for (idx = 0; idx < 2; idx += num_4x4_w) {
-        const int ib = idy * 2 + idx;
-        const MB_PREDICTION_MODE A = vp9_above_block_mode(mi, above_mi, ib);
-        const MB_PREDICTION_MODE L = vp9_left_block_mode(mi, left_mi, ib);
-        const MB_PREDICTION_MODE b_mode = read_intra_mode(r,
-                                              vp9_kf_y_mode_prob[A][L]);
-        mi->bmi[ib].as_mode = b_mode;
-        if (num_4x4_h == 2)
-          mi->bmi[ib + 2].as_mode = b_mode;
-        if (num_4x4_w == 2)
-          mi->bmi[ib + 1].as_mode = b_mode;
-      }
-    }
-
-    mbmi->mode = mi->bmi[3].as_mode;
+  switch (bsize) {
+    case BLOCK_4X4:
+      for (i = 0; i < 4; ++i)
+        mi->bmi[i].as_mode =
+            read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i));
+      mbmi->mode = mi->bmi[3].as_mode;
+      break;
+    case BLOCK_4X8:
+      mi->bmi[0].as_mode = mi->bmi[2].as_mode =
+          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
+      mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1));
+      break;
+    case BLOCK_8X4:
+      mi->bmi[0].as_mode = mi->bmi[1].as_mode =
+          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0));
+      mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+          read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2));
+      break;
+    default:
+      mbmi->mode = read_intra_mode(r,
+                                   get_y_mode_probs(mi, above_mi, left_mi, 0));
   }
 
   mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]);
@@ -241,14 +237,15 @@ static int read_mv_component(vp9_reader *r,
 static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
                            const nmv_context *ctx,
                            nmv_context_counts *counts, int allow_hp) {
-  const MV_JOINT_TYPE j = vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints);
+  const MV_JOINT_TYPE joint_type =
+      (MV_JOINT_TYPE)vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints);
   const int use_hp = allow_hp && vp9_use_mv_hp(ref);
   MV diff = {0, 0};
 
-  if (mv_joint_vertical(j))
+  if (mv_joint_vertical(joint_type))
     diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
 
-  if (mv_joint_horizontal(j))
+  if (mv_joint_horizontal(joint_type))
     diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
 
   vp9_inc_mv(&diff, counts);
@@ -262,7 +259,8 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
                                                 vp9_reader *r) {
   if (cm->reference_mode == REFERENCE_MODE_SELECT) {
     const int ctx = vp9_get_reference_mode_context(cm, xd);
-    const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
+    const REFERENCE_MODE mode =
+        (REFERENCE_MODE)vp9_read(r, cm->fc.comp_inter_prob[ctx]);
     if (!cm->frame_parallel_decoding_mode)
       ++cm->counts.comp_inter[ctx][mode];
     return mode;  // SINGLE_REFERENCE or COMPOUND_REFERENCE
@@ -279,7 +277,8 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   FRAME_COUNTS *const counts = &cm->counts;
 
   if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
-    ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
+    ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id,
+                                                       SEG_LVL_REF_FRAME);
     ref_frame[1] = NONE;
   } else {
     const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
@@ -318,8 +317,9 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
 static INLINE INTERP_FILTER read_switchable_interp_filter(
     VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) {
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
-  const int type = vp9_read_tree(r, vp9_switchable_interp_tree,
-                                 cm->fc.switchable_interp_prob[ctx]);
+  const INTERP_FILTER type =
+      (INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree,
+                                   cm->fc.switchable_interp_prob[ctx]);
   if (!cm->frame_parallel_decoding_mode)
     ++cm->counts.switchable_interp[ctx][type];
   return type;
@@ -329,30 +329,29 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi,
                                        vp9_reader *r) {
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+  int i;
 
   mbmi->ref_frame[0] = INTRA_FRAME;
   mbmi->ref_frame[1] = NONE;
 
-  if (bsize >= BLOCK_8X8) {
-    mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]);
-  } else {
-     // Only 4x4, 4x8, 8x4 blocks
-     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
-     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];  // 1 or 2
-     int idx, idy;
-
-     for (idy = 0; idy < 2; idy += num_4x4_h) {
-       for (idx = 0; idx < 2; idx += num_4x4_w) {
-         const int ib = idy * 2 + idx;
-         const int b_mode = read_intra_mode_y(cm, r, 0);
-         mi->bmi[ib].as_mode = b_mode;
-         if (num_4x4_h == 2)
-           mi->bmi[ib + 2].as_mode = b_mode;
-         if (num_4x4_w == 2)
-           mi->bmi[ib + 1].as_mode = b_mode;
-      }
-    }
-    mbmi->mode = mi->bmi[3].as_mode;
+  switch (bsize) {
+    case BLOCK_4X4:
+      for (i = 0; i < 4; ++i)
+        mi->bmi[i].as_mode = read_intra_mode_y(cm, r, 0);
+      mbmi->mode = mi->bmi[3].as_mode;
+      break;
+    case BLOCK_4X8:
+      mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, r, 0);
+      mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+          read_intra_mode_y(cm, r, 0);
+      break;
+    case BLOCK_8X4:
+      mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, r, 0);
+      mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+          read_intra_mode_y(cm, r, 0);
+      break;
+    default:
+      mbmi->mode = read_intra_mode_y(cm, r, size_group_lookup[bsize]);
   }
 
   mbmi->uv_mode = read_intra_mode_uv(cm, r, mbmi->mode);
@@ -437,7 +436,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
-    vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, frame, mbmi->ref_mvs[frame],
+    vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame],
                      mi_row, mi_col);
   }
 
@@ -470,7 +469,7 @@ static void read_inter_block_mode_info(VP9_COMMON *const cm,
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];  // 1 or 2
     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];  // 1 or 2
     int idx, idy;
-    int b_mode;
+    MB_PREDICTION_MODE b_mode;
     int_mv nearest_sub8x8[2], near_sub8x8[2];
     for (idy = 0; idy < 2; idy += num_4x4_h) {
       for (idx = 0; idx < 2; idx += num_4x4_w) {
@@ -516,7 +515,7 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm,
                                        MACROBLOCKD *const xd,
                                        const TileInfo *const tile,
                                        int mi_row, int mi_col, vp9_reader *r) {
-  MODE_INFO *const mi = xd->mi_8x8[0];
+  MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   int inter_block;
 
diff --git a/source/libvpx/vp9/decoder/vp9_decodemv.h b/source/libvpx/vp9/decoder/vp9_decodemv.h
index 539c984..7394b62 100644
--- a/source/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/source/libvpx/vp9/decoder/vp9_decodemv.h
@@ -11,7 +11,6 @@
 #ifndef VP9_DECODER_VP9_DECODEMV_H_
 #define VP9_DECODER_VP9_DECODEMV_H_
 
-#include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_reader.h"
 
 #ifdef __cplusplus
diff --git a/source/libvpx/vp9/decoder/vp9_onyxd_if.c b/source/libvpx/vp9/decoder/vp9_decoder.c
index 24248a4..fd74478 100644
--- a/source/libvpx/vp9/decoder/vp9_onyxd_if.c
+++ b/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -12,23 +12,25 @@
 #include <limits.h>
 #include <stdio.h>
 
+#include "./vpx_scale_rtcd.h"
+
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/vpx_timer.h"
+#include "vpx_scale/vpx_scale.h"
+
+#include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_loopfilter.h"
 #include "vp9/common/vp9_onyxc_int.h"
 #if CONFIG_VP9_POSTPROC
 #include "vp9/common/vp9_postproc.h"
 #endif
-#include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp9/common/vp9_alloccommon.h"
-#include "vp9/common/vp9_loopfilter.h"
 #include "vp9/common/vp9_quant_common.h"
-#include "vpx_scale/vpx_scale.h"
 #include "vp9/common/vp9_systemdependent.h"
-#include "vpx_ports/vpx_timer.h"
+
 #include "vp9/decoder/vp9_decodeframe.h"
+#include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_dthread.h"
-#include "./vpx_scale_rtcd.h"
 
 #define WRITE_RECON_BUFFER 0
 #if WRITE_RECON_BUFFER == 1
@@ -102,23 +104,14 @@ void vp9_initialize_dec() {
   static int init_done = 0;
 
   if (!init_done) {
-    vp9_initialize_common();
+    vp9_init_neighbors();
     vp9_init_quant_tables();
     init_done = 1;
   }
 }
 
-static void init_macroblockd(VP9D_COMP *const pbi) {
-  MACROBLOCKD *xd = &pbi->mb;
-  struct macroblockd_plane *const pd = xd->plane;
-  int i;
-
-  for (i = 0; i < MAX_MB_PLANE; ++i)
-    pd[i].dqcoeff = pbi->dqcoeff[i];
-}
-
-VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
-  VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP));
+VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) {
+  VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
   VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
 
   if (!cm)
@@ -126,12 +119,9 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
 
   vp9_zero(*pbi);
 
-  // Initialize the references to not point to any frame buffers.
-  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
-
   if (setjmp(cm->error.jmp)) {
     cm->error.setjmp = 0;
-    vp9_remove_decompressor(pbi);
+    vp9_decoder_remove(pbi);
     return NULL;
   }
 
@@ -140,9 +130,13 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
 
   vp9_rtcd();
 
+  // Initialize the references to not point to any frame buffers.
+  vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+
+  cm->current_video_frame = 0;
   pbi->oxcf = *oxcf;
   pbi->ready_for_new_data = 1;
-  cm->current_video_frame = 0;
+  pbi->decoded_key_frame = 0;
 
   // vp9_init_dequantizer() is first called here. Add check in
   // frame_init_dequantizer() to avoid unnecessary calling of
@@ -152,22 +146,17 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) {
   vp9_loop_filter_init(cm);
 
   cm->error.setjmp = 0;
-  pbi->decoded_key_frame = 0;
-
-  init_macroblockd(pbi);
 
   vp9_worker_init(&pbi->lf_worker);
 
   return pbi;
 }
 
-void vp9_remove_decompressor(VP9D_COMP *pbi) {
+void vp9_decoder_remove(VP9Decoder *pbi) {
+  VP9_COMMON *const cm = &pbi->common;
   int i;
 
-  if (!pbi)
-    return;
-
-  vp9_remove_common(&pbi->common);
+  vp9_remove_common(cm);
   vp9_worker_end(&pbi->lf_worker);
   vpx_free(pbi->lf_worker.data1);
   for (i = 0; i < pbi->num_tile_workers; ++i) {
@@ -179,17 +168,11 @@ void vp9_remove_decompressor(VP9D_COMP *pbi) {
   vpx_free(pbi->tile_workers);
 
   if (pbi->num_tile_workers) {
-    VP9_COMMON *const cm = &pbi->common;
     const int sb_rows =
         mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
-    VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-
-    vp9_loop_filter_dealloc(lf_sync, sb_rows);
+    vp9_loop_filter_dealloc(&pbi->lf_row_sync, sb_rows);
   }
 
-  vpx_free(pbi->mi_streams);
-  vpx_free(pbi->above_context[0]);
-  vpx_free(pbi->above_seg_context);
   vpx_free(pbi);
 }
 
@@ -199,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
            a->uv_height == b->uv_height && a->uv_width == b->uv_width;
 }
 
-vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi,
+vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi,
                                        VP9_REFFRAME ref_frame_flag,
                                        YV12_BUFFER_CONFIG *sd) {
   VP9_COMMON *cm = &pbi->common;
@@ -226,17 +209,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi,
 }
 
 
-vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
+vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                       VP9_REFFRAME ref_frame_flag,
                                       YV12_BUFFER_CONFIG *sd) {
-  VP9_COMMON *cm = &pbi->common;
   RefBuffer *ref_buf = NULL;
 
-  /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
-   * encoder is using the frame buffers for. This is just a stub to keep the
-   * vpxenc --test-decode functionality working, and will be replaced in a
-   * later commit that adds VP9-specific controls for this functionality.
-   */
+  // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+  // encoder is using the frame buffers for. This is just a stub to keep the
+  // vpxenc --test-decode functionality working, and will be replaced in a
+  // later commit that adds VP9-specific controls for this functionality.
   if (ref_frame_flag == VP9_LAST_FLAG) {
     ref_buf = &cm->frame_refs[0];
   } else if (ref_frame_flag == VP9_GOLD_FLAG) {
@@ -244,13 +225,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
   } else if (ref_frame_flag == VP9_ALT_FLAG) {
     ref_buf = &cm->frame_refs[2];
   } else {
-    vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+    vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                        "Invalid reference frame");
-    return pbi->common.error.error_code;
+    return cm->error.error_code;
   }
 
   if (!equal_dimensions(ref_buf->buf, sd)) {
-    vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
+    vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                        "Incorrect buffer dimensions");
   } else {
     int *ref_fb_ptr = &ref_buf->idx;
@@ -267,11 +248,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi,
     vp8_yv12_copy_frame(sd, ref_buf->buf);
   }
 
-  return pbi->common.error.error_code;
+  return cm->error.error_code;
 }
 
 
-int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) {
+int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) {
   VP9_COMMON *cm = &pbi->common;
 
   if (index < 0 || index >= REF_FRAMES)
@@ -282,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) {
 }
 
 /* If any buffer updating is signaled it should be done here. */
-static void swap_frame_buffers(VP9D_COMP *pbi) {
+static void swap_frame_buffers(VP9Decoder *pbi) {
   int ref_index = 0, mask;
   VP9_COMMON *const cm = &pbi->common;
 
@@ -306,35 +287,24 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
     cm->frame_refs[ref_index].idx = INT_MAX;
 }
 
-int vp9_receive_compressed_data(VP9D_COMP *pbi,
+int vp9_receive_compressed_data(VP9Decoder *pbi,
                                 size_t size, const uint8_t **psource,
                                 int64_t time_stamp) {
-  VP9_COMMON *cm = NULL;
+  VP9_COMMON *const cm = &pbi->common;
   const uint8_t *source = *psource;
   int retcode = 0;
 
-  /*if(pbi->ready_for_new_data == 0)
-      return -1;*/
-
-  if (!pbi)
-    return -1;
-
-  cm = &pbi->common;
   cm->error.error_code = VPX_CODEC_OK;
 
-  pbi->source = source;
-  pbi->source_sz = size;
-
-  if (pbi->source_sz == 0) {
-    /* This is used to signal that we are missing frames.
-     * We do not know if the missing frame(s) was supposed to update
-     * any of the reference buffers, but we act conservative and
-     * mark only the last buffer as corrupted.
-     *
-     * TODO(jkoleszar): Error concealment is undefined and non-normative
-     * at this point, but if it becomes so, [0] may not always be the correct
-     * thing to do here.
-     */
+  if (size == 0) {
+    // This is used to signal that we are missing frames.
+    // We do not know if the missing frame(s) was supposed to update
+    // any of the reference buffers, but we act conservative and
+    // mark only the last buffer as corrupted.
+    //
+    // TODO(jkoleszar): Error concealment is undefined and non-normative
+    // at this point, but if it becomes so, [0] may not always be the correct
+    // thing to do here.
     if (cm->frame_refs[0].idx != INT_MAX)
       cm->frame_refs[0].buf->corrupted = 1;
   }
@@ -348,14 +318,13 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
   if (setjmp(cm->error.jmp)) {
     cm->error.setjmp = 0;
 
-    /* We do not know if the missing frame(s) was supposed to update
-     * any of the reference buffers, but we act conservative and
-     * mark only the last buffer as corrupted.
-     *
-     * TODO(jkoleszar): Error concealment is undefined and non-normative
-     * at this point, but if it becomes so, [0] may not always be the correct
-     * thing to do here.
-     */
+    // We do not know if the missing frame(s) was supposed to update
+    // any of the reference buffers, but we act conservative and
+    // mark only the last buffer as corrupted.
+    //
+    // TODO(jkoleszar): Error concealment is undefined and non-normative
+    // at this point, but if it becomes so, [0] may not always be the correct
+    // thing to do here.
     if (cm->frame_refs[0].idx != INT_MAX)
       cm->frame_refs[0].buf->corrupted = 1;
 
@@ -367,7 +336,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
 
   cm->error.setjmp = 1;
 
-  retcode = vp9_decode_frame(pbi, psource);
+  retcode = vp9_decode_frame(pbi, source, source + size, psource);
 
   if (retcode < 0) {
     cm->error.error_code = VPX_CODEC_ERROR;
@@ -421,37 +390,20 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi,
   if (!cm->show_existing_frame)
     cm->last_show_frame = cm->show_frame;
   if (cm->show_frame) {
-    if (!cm->show_existing_frame) {
-      // current mip will be the prev_mip for the next frame
-      MODE_INFO *temp = cm->prev_mip;
-      MODE_INFO **temp2 = cm->prev_mi_grid_base;
-      cm->prev_mip = cm->mip;
-      cm->mip = temp;
-      cm->prev_mi_grid_base = cm->mi_grid_base;
-      cm->mi_grid_base = temp2;
-
-      // update the upper left visible macroblock ptrs
-      cm->mi = cm->mip + cm->mode_info_stride + 1;
-      cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
-      cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1;
-      cm->prev_mi_grid_visible = cm->prev_mi_grid_base +
-                                 cm->mode_info_stride + 1;
-
-      pbi->mb.mi_8x8 = cm->mi_grid_visible;
-      pbi->mb.mi_8x8[0] = cm->mi;
-    }
+    if (!cm->show_existing_frame)
+      vp9_swap_mi_and_prev_mi(cm);
+
     cm->current_video_frame++;
   }
 
   pbi->ready_for_new_data = 0;
   pbi->last_time_stamp = time_stamp;
-  pbi->source_sz = 0;
 
   cm->error.setjmp = 0;
   return retcode;
 }
 
-int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
+int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
                       int64_t *time_stamp, int64_t *time_end_stamp,
                       vp9_ppflags_t *flags) {
   int ret = -1;
@@ -470,19 +422,12 @@ int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd,
 #if CONFIG_VP9_POSTPROC
   ret = vp9_post_proc_frame(&pbi->common, sd, flags);
 #else
-
-  if (pbi->common.frame_to_show) {
     *sd = *pbi->common.frame_to_show;
     sd->y_width = pbi->common.width;
     sd->y_height = pbi->common.height;
     sd->uv_width = sd->y_width >> pbi->common.subsampling_x;
     sd->uv_height = sd->y_height >> pbi->common.subsampling_y;
-
     ret = 0;
-  } else {
-    ret = -1;
-  }
-
 #endif /*!CONFIG_POSTPROC*/
   vp9_clear_system_state();
   return ret;
diff --git a/source/libvpx/vp9/decoder/vp9_onyxd.h b/source/libvpx/vp9/decoder/vp9_decoder.h
index 203e9fa..c9dc251 100644
--- a/source/libvpx/vp9/decoder/vp9_onyxd.h
+++ b/source/libvpx/vp9/decoder/vp9_decoder.h
@@ -8,64 +8,88 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VP9_DECODER_VP9_ONYXD_H_
-#define VP9_DECODER_VP9_ONYXD_H_
+#ifndef VP9_DECODER_VP9_DECODER_H_
+#define VP9_DECODER_VP9_DECODER_H_
 
+#include "./vpx_config.h"
+
+#include "vpx/vpx_codec.h"
 #include "vpx_scale/yv12config.h"
+
+#include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_ppflags.h"
-#include "vpx/vpx_codec.h"
+
+#include "vp9/decoder/vp9_decoder.h"
+#include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_thread.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct VP9Decompressor;
-
 typedef struct {
   int width;
   int height;
   int version;
-  int postprocess;
   int max_threads;
   int inv_tile_order;
-  int input_partition;
 } VP9D_CONFIG;
 
-typedef enum {
-  VP9_LAST_FLAG = 1,
-  VP9_GOLD_FLAG = 2,
-  VP9_ALT_FLAG = 4
-} VP9_REFFRAME;
+typedef struct VP9Decoder {
+  DECLARE_ALIGNED(16, MACROBLOCKD, mb);
+
+  DECLARE_ALIGNED(16, VP9_COMMON, common);
+
+  VP9D_CONFIG oxcf;
+
+  int64_t last_time_stamp;
+  int ready_for_new_data;
+
+  int refresh_frame_flags;
+
+  int decoded_key_frame;
+
+  int initial_width;
+  int initial_height;
+
+  int do_loopfilter_inline;  // apply loopfilter to available rows immediately
+  VP9Worker lf_worker;
+
+  VP9Worker *tile_workers;
+  int num_tile_workers;
+
+  VP9LfSync lf_row_sync;
+} VP9Decoder;
 
 void vp9_initialize_dec();
 
-int vp9_receive_compressed_data(struct VP9Decompressor *pbi,
+int vp9_receive_compressed_data(struct VP9Decoder *pbi,
                                 size_t size, const uint8_t **dest,
                                 int64_t time_stamp);
 
-int vp9_get_raw_frame(struct VP9Decompressor *pbi,
+int vp9_get_raw_frame(struct VP9Decoder *pbi,
                       YV12_BUFFER_CONFIG *sd,
                       int64_t *time_stamp, int64_t *time_end_stamp,
                       vp9_ppflags_t *flags);
 
-vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi,
+vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi,
                                        VP9_REFFRAME ref_frame_flag,
                                        YV12_BUFFER_CONFIG *sd);
 
-vpx_codec_err_t vp9_set_reference_dec(struct VP9Decompressor *pbi,
+vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
                                       VP9_REFFRAME ref_frame_flag,
                                       YV12_BUFFER_CONFIG *sd);
 
-int vp9_get_reference_dec(struct VP9Decompressor *pbi,
+int vp9_get_reference_dec(struct VP9Decoder *pbi,
                           int index, YV12_BUFFER_CONFIG **fb);
 
 
-struct VP9Decompressor *vp9_create_decompressor(VP9D_CONFIG *oxcf);
+struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf);
 
-void vp9_remove_decompressor(struct VP9Decompressor *pbi);
+void vp9_decoder_remove(struct VP9Decoder *pbi);
 
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 
-#endif  // VP9_DECODER_VP9_ONYXD_H_
+#endif  // VP9_DECODER_VP9_DECODER_H_
diff --git a/source/libvpx/vp9/decoder/vp9_detokenize.c b/source/libvpx/vp9/decoder/vp9_detokenize.c
index 52e78cd..860da53 100644
--- a/source/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/source/libvpx/vp9/decoder/vp9_detokenize.c
@@ -86,7 +86,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
   const int max_eob = 16 << (tx_size << 1);
   const FRAME_CONTEXT *const fc = &cm->fc;
   FRAME_COUNTS *const counts = &cm->counts;
-  const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
+  const int ref = is_inter_block(&xd->mi[0]->mbmi);
   int band, c = 0;
   const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
       fc->coef_probs[tx_size][type][ref];
diff --git a/source/libvpx/vp9/decoder/vp9_detokenize.h b/source/libvpx/vp9/decoder/vp9_detokenize.h
index ce3d765..5278e97 100644
--- a/source/libvpx/vp9/decoder/vp9_detokenize.h
+++ b/source/libvpx/vp9/decoder/vp9_detokenize.h
@@ -12,7 +12,7 @@
 #ifndef VP9_DECODER_VP9_DETOKENIZE_H_
 #define VP9_DECODER_VP9_DETOKENIZE_H_
 
-#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_reader.h"
 
 #ifdef __cplusplus
diff --git a/source/libvpx/vp9/decoder/vp9_dthread.c b/source/libvpx/vp9/decoder/vp9_dthread.c
index 542732a..9b124c9 100644
--- a/source/libvpx/vp9/decoder/vp9_dthread.c
+++ b/source/libvpx/vp9/decoder/vp9_dthread.c
@@ -9,10 +9,13 @@
  */
 
 #include "./vpx_config.h"
+
+#include "vpx_mem/vpx_mem.h"
+
 #include "vp9/common/vp9_reconinter.h"
+
 #include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
+#include "vp9/decoder/vp9_decoder.h"
 
 #if CONFIG_MULTITHREAD
 static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
@@ -96,7 +99,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
 
   for (r = start; r < stop; r += num_lf_workers) {
     const int mi_row = r << MI_BLOCK_SIZE_LOG2;
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
+    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (c = 0; c < sb_cols; ++c) {
       const int mi_col = c << MI_BLOCK_SIZE_LOG2;
@@ -104,9 +107,8 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
 
       sync_read(lf_sync, r, c);
 
-      setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
-      vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
-                     &lfm);
+      vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
         vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
@@ -130,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
 
 // VP9 decoder: Implement multi-threaded loopfilter that uses the tile
 // threads.
-void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
+void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
                               VP9_COMMON *cm,
                               MACROBLOCKD *xd,
                               int frame_filter_level,
                               int y_only, int partial_frame) {
   // Number of superblock rows and cols
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+  const int tile_cols = 1 << cm->log2_tile_cols;
+  const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
   int i;
 
   // Allocate memory used in thread synchronization.
@@ -166,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
              sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
 
   // Set up loopfilter thread data.
-  for (i = 0; i < pbi->num_tile_workers; ++i) {
+  // The decoder is using num_workers instead of pbi->num_tile_workers
+  // because it has been observed that using more threads on the
+  // loopfilter, than there are tile columns in the frame will hurt
+  // performance on Android. This is because the system will only
+  // schedule the tile decode workers on cores equal to the number
+  // of tile columns. Then if the decoder tries to use more threads for the
+  // loopfilter, it will hurt performance because of contention. If the
+  // multithreading code changes in the future then the number of workers
+  // used by the loopfilter should be revisited.
+  for (i = 0; i < num_workers; ++i) {
     VP9Worker *const worker = &pbi->tile_workers[i];
     TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
     LFWorkerData *const lf_data = &tile_data->lfdata;
@@ -182,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
     lf_data->y_only = y_only;   // always do all planes in decoder
 
     lf_data->lf_sync = &pbi->lf_row_sync;
-    lf_data->num_lf_workers = pbi->num_tile_workers;
+    lf_data->num_lf_workers = num_workers;
 
     // Start loopfiltering
-    if (i == pbi->num_tile_workers - 1) {
+    if (i == num_workers - 1) {
       vp9_worker_execute(worker);
     } else {
       vp9_worker_launch(worker);
@@ -193,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
   }
 
   // Wait till all rows are finished
-  for (i = 0; i < pbi->num_tile_workers; ++i) {
+  for (i = 0; i < num_workers; ++i) {
     vp9_worker_sync(&pbi->tile_workers[i]);
   }
 }
diff --git a/source/libvpx/vp9/decoder/vp9_dthread.h b/source/libvpx/vp9/decoder/vp9_dthread.h
index 6d4450f..005bd7b 100644
--- a/source/libvpx/vp9/decoder/vp9_dthread.h
+++ b/source/libvpx/vp9/decoder/vp9_dthread.h
@@ -18,13 +18,12 @@
 
 struct macroblockd;
 struct VP9Common;
-struct VP9Decompressor;
+struct VP9Decoder;
 
 typedef struct TileWorkerData {
   struct VP9Common *cm;
   vp9_reader bit_reader;
   DECLARE_ALIGNED(16, struct macroblockd, xd);
-  DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
 
   // Row-based parallel loopfilter data
   LFWorkerData lfdata;
@@ -51,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
 void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
 
 // Multi-threaded loopfilter that uses the tile threads.
-void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi,
+void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
                               struct VP9Common *cm,
                               struct macroblockd *xd,
                               int frame_filter_level,
diff --git a/source/libvpx/vp9/decoder/vp9_onyxd_int.h b/source/libvpx/vp9/decoder/vp9_onyxd_int.h
deleted file mode 100644
index 6c6c239..0000000
--- a/source/libvpx/vp9/decoder/vp9_onyxd_int.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_DECODER_VP9_ONYXD_INT_H_
-#define VP9_DECODER_VP9_ONYXD_INT_H_
-
-#include "./vpx_config.h"
-
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_thread.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct VP9Decompressor {
-  DECLARE_ALIGNED(16, MACROBLOCKD, mb);
-
-  DECLARE_ALIGNED(16, VP9_COMMON, common);
-
-  DECLARE_ALIGNED(16, int16_t,  dqcoeff[MAX_MB_PLANE][64 * 64]);
-
-  VP9D_CONFIG oxcf;
-
-  const uint8_t *source;
-  size_t source_sz;
-
-  int64_t last_time_stamp;
-  int ready_for_new_data;
-
-  int refresh_frame_flags;
-
-  int decoded_key_frame;
-
-  int initial_width;
-  int initial_height;
-
-  int do_loopfilter_inline;  // apply loopfilter to available rows immediately
-  VP9Worker lf_worker;
-
-  VP9Worker *tile_workers;
-  int num_tile_workers;
-
-  VP9LfSync lf_row_sync;
-
-  /* Each tile column has its own MODE_INFO stream. This array indexes them by
-     tile column index. */
-  MODE_INFO **mi_streams;
-
-  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
-  PARTITION_CONTEXT *above_seg_context;
-} VP9D_COMP;
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.c b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.c
new file mode 100644
index 0000000..778a635
--- /dev/null
+++ b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.c
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vp9/decoder/vp9_read_bit_buffer.h"
+
+size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) {
+  return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0);
+}
+
+int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) {
+  const size_t off = rb->bit_offset;
+  const size_t p = off / CHAR_BIT;
+  const int q = CHAR_BIT - 1 - (int)off % CHAR_BIT;
+  if (rb->bit_buffer + p >= rb->bit_buffer_end) {
+    rb->error_handler(rb->error_handler_data);
+    return 0;
+  } else {
+    const int bit = (rb->bit_buffer[p] & (1 << q)) >> q;
+    rb->bit_offset = off + 1;
+    return bit;
+  }
+}
+
+int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) {
+  int value = 0, bit;
+  for (bit = bits - 1; bit >= 0; bit--)
+    value |= vp9_rb_read_bit(rb) << bit;
+  return value;
+}
+
+int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb,
+                               int bits) {
+  const int value = vp9_rb_read_literal(rb, bits);
+  return vp9_rb_read_bit(rb) ? -value : value;
+}
diff --git a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h
index 8cb4247..fc88bd7 100644
--- a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h
+++ b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h
@@ -30,36 +30,13 @@ struct vp9_read_bit_buffer {
   vp9_rb_error_handler error_handler;
 };
 
-static size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) {
-  return rb->bit_offset / CHAR_BIT + (rb->bit_offset % CHAR_BIT > 0);
-}
+size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb);
 
-static int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) {
-  const size_t off = rb->bit_offset;
-  const size_t p = off / CHAR_BIT;
-  const int q = CHAR_BIT - 1 - (int)off % CHAR_BIT;
-  if (rb->bit_buffer + p >= rb->bit_buffer_end) {
-    rb->error_handler(rb->error_handler_data);
-    return 0;
-  } else {
-    const int bit = (rb->bit_buffer[p] & (1 << q)) >> q;
-    rb->bit_offset = off + 1;
-    return bit;
-  }
-}
+int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb);
 
-static int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) {
-  int value = 0, bit;
-  for (bit = bits - 1; bit >= 0; bit--)
-    value |= vp9_rb_read_bit(rb) << bit;
-  return value;
-}
+int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits);
 
-static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb,
-                                      int bits) {
-  const int value = vp9_rb_read_literal(rb, bits);
-  return vp9_rb_read_bit(rb) ? -value : value;
-}
+int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, int bits);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/source/libvpx/vp9/encoder/vp9_aq_complexity.c b/source/libvpx/vp9/encoder/vp9_aq_complexity.c
new file mode 100644
index 0000000..47ad8d8
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_aq_complexity.c
@@ -0,0 +1,103 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "vp9/common/vp9_seg_common.h"
+
+#include "vp9/encoder/vp9_segmentation.h"
+
+static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
+  {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+
+void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  struct segmentation *const seg = &cm->seg;
+
+  // Make SURE use of floating point in this function is safe.
+  vp9_clear_system_state();
+
+  if (cm->frame_type == KEY_FRAME ||
+      cpi->refresh_alt_ref_frame ||
+      (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+    int segment;
+
+    // Clear down the segment map.
+    vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
+
+    // Clear down the complexity map used for rd.
+    vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
+
+    vp9_enable_segmentation(seg);
+    vp9_clearall_segfeatures(seg);
+
+    // Select delta coding method.
+    seg->abs_delta = SEGMENT_DELTADATA;
+
+    // Segment 0 "Q" feature is disabled so it defaults to the baseline Q.
+    vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
+
+    // Use some of the segments for in frame Q adjustment.
+    for (segment = 1; segment < 2; segment++) {
+      const int qindex_delta =
+          vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
+                                     in_frame_q_adj_ratio[segment]);
+      vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
+      vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+    }
+  }
+}
+
+// Select a segment for the current SB64
+void vp9_select_in_frame_q_segment(VP9_COMP *cpi,
+                                      int mi_row, int mi_col,
+                                      int output_enabled, int projected_rate) {
+  VP9_COMMON *const cm = &cpi->common;
+
+  const int mi_offset = mi_row * cm->mi_cols + mi_col;
+  const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+  const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+  const int xmis = MIN(cm->mi_cols - mi_col, bw);
+  const int ymis = MIN(cm->mi_rows - mi_row, bh);
+  int complexity_metric = 64;
+  int x, y;
+
+  unsigned char segment;
+
+  if (!output_enabled) {
+    segment = 0;
+  } else {
+    // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
+    // It is converted to bits * 256 units.
+    const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
+                            (bw * bh);
+
+    if (projected_rate < (target_rate / 4)) {
+      segment = 1;
+    } else {
+      segment = 0;
+    }
+
+    if (target_rate > 0) {
+      complexity_metric =
+        clamp((int)((projected_rate * 64) / target_rate), 16, 255);
+    }
+  }
+
+  // Fill in the entires in the segment map corresponding to this SB64.
+  for (y = 0; y < ymis; y++) {
+    for (x = 0; x < xmis; x++) {
+      cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
+      cpi->complexity_map[mi_offset + y * cm->mi_cols + x] =
+        (unsigned char)complexity_metric;
+    }
+  }
+}
diff --git a/source/libvpx/vp9/encoder/vp9_aq_complexity.h b/source/libvpx/vp9/encoder/vp9_aq_complexity.h
new file mode 100644
index 0000000..af031a4
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_aq_complexity.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9_COMP;
+
+// Select a segment for the current SB64.
+void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col,
+                                   int output_enabled, int projected_rate);
+
+
+// This function sets up a set of segments with delta Q values around
+// the baseline frame quantizer.
+void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
diff --git a/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
new file mode 100644
index 0000000..7879091
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -0,0 +1,324 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+
+#include "vp9/common/vp9_seg_common.h"
+
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_segmentation.h"
+
+struct CYCLIC_REFRESH {
+  // Percentage of super-blocks per frame that are targeted as candidates
+  // for cyclic refresh.
+  int max_sbs_perframe;
+  // Maximum q-delta as percentage of base q.
+  int max_qdelta_perc;
+  // Block size below which we don't apply cyclic refresh.
+  BLOCK_SIZE min_block_size;
+  // Superblock starting index for cycling through the frame.
+  int sb_index;
+  // Controls how long a block will need to wait to be refreshed again.
+  int time_for_refresh;
+  // Actual number of (8x8) blocks that were applied delta-q (segment 1).
+  int num_seg_blocks;
+  // Actual encoding bits for segment 1.
+  int actual_seg_bits;
+  // RD mult. parameters for segment 1.
+  int rdmult;
+  // Cyclic refresh map.
+  signed char *map;
+  // Projected rate and distortion for the current superblock.
+  int64_t projected_rate_sb;
+  int64_t projected_dist_sb;
+  // Thresholds applied to projected rate/distortion of the superblock.
+  int64_t thresh_rate_sb;
+  int64_t thresh_dist_sb;
+};
+
+CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+  CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
+  if (cr == NULL)
+    return NULL;
+
+  cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
+  if (cr->map == NULL) {
+    vpx_free(cr);
+    return NULL;
+  }
+
+  return cr;
+}
+
+void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
+  vpx_free(cr->map);
+  vpx_free(cr);
+}
+
+// Check if we should turn off cyclic refresh based on bitrate condition.
+static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm,
+                                        const RATE_CONTROL *rc) {
+  // Turn off cyclic refresh if bits available per frame is not sufficiently
+  // larger than bit cost of segmentation. Segment map bit cost should scale
+  // with number of seg blocks, so compare available bits to number of blocks.
+  // Average bits available per frame = av_per_frame_bandwidth
+  // Number of (8x8) blocks in frame = mi_rows * mi_cols;
+  const float factor  = 0.5;
+  const int number_blocks = cm->mi_rows  * cm->mi_cols;
+  // The condition below corresponds to turning off at target bitrates:
+  // ~24kbps for CIF, 72kbps for VGA (at 30fps).
+  // Also turn off at very small frame sizes, to avoid too large fraction of
+  // superblocks to be refreshed per frame. Threshold below is less than QCIF.
+  if (rc->av_per_frame_bandwidth < factor * number_blocks ||
+      number_blocks / 64 < 5)
+    return 0;
+  else
+    return 1;
+}
+
+// Check if this coding block, of size bsize, should be considered for refresh
+// (lower-qp coding). Decision can be based on various factors, such as
+// size of the coding block (i.e., below min_block size rejected), coding
+// mode, and rate/distortion.
+static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
+                                const MB_MODE_INFO *mbmi,
+                                BLOCK_SIZE bsize, int use_rd) {
+  if (use_rd) {
+    // If projected rate is below the thresh_rate (well below target,
+    // so undershoot expected), accept it for lower-qp coding.
+    if (cr->projected_rate_sb < cr->thresh_rate_sb)
+      return 1;
+    // Otherwise, reject the block for lower-qp coding if any of the following:
+    // 1) prediction block size is below min_block_size
+    // 2) mode is non-zero mv and projected distortion is above thresh_dist
+    // 3) mode is an intra-mode (we may want to allow some of this under
+    // another thresh_dist)
+    else if (bsize < cr->min_block_size ||
+             (mbmi->mv[0].as_int != 0 &&
+              cr->projected_dist_sb > cr->thresh_dist_sb) ||
+             !is_inter_block(mbmi))
+      return 0;
+    else
+      return 1;
+  } else {
+    // Rate/distortion not used for update.
+    if (bsize < cr->min_block_size ||
+        mbmi->mv[0].as_int != 0 ||
+        !is_inter_block(mbmi))
+      return 0;
+    else
+      return 1;
+  }
+}
+
+// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
+// check if we should reset the segment_id, and update the cyclic_refresh map
+// and segmentation map.
+void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
+                                       MB_MODE_INFO *const mbmi,
+                                       int mi_row, int mi_col,
+                                       BLOCK_SIZE bsize, int use_rd) {
+  const VP9_COMMON *const cm = &cpi->common;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int xmis = MIN(cm->mi_cols - mi_col, bw);
+  const int ymis = MIN(cm->mi_rows - mi_row, bh);
+  const int block_index = mi_row * cm->mi_cols + mi_col;
+  const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd);
+  // Default is to not update the refresh map.
+  int new_map_value = cr->map[block_index];
+  int x = 0; int y = 0;
+
+  // Check if we should reset the segment_id for this block.
+  if (mbmi->segment_id > 0 && !refresh_this_block)
+    mbmi->segment_id = 0;
+
+  // Update the cyclic refresh map, to be used for setting segmentation map
+  // for the next frame. If the block  will be refreshed this frame, mark it
+  // as clean. The magnitude of the -ve influences how long before we consider
+  // it for refresh again.
+  if (mbmi->segment_id == 1) {
+    new_map_value = -cr->time_for_refresh;
+  } else if (refresh_this_block) {
+    // Else if it is accepted as candidate for refresh, and has not already
+    // been refreshed (marked as 1) then mark it as a candidate for cleanup
+    // for future time (marked as 0), otherwise don't update it.
+    if (cr->map[block_index] == 1)
+      new_map_value = 0;
+  } else {
+    // Leave it marked as block that is not candidate for refresh.
+    new_map_value = 1;
+  }
+  // Update entries in the cyclic refresh map with new_map_value, and
+  // copy mbmi->segment_id into global segmentation map.
+  for (y = 0; y < ymis; y++)
+    for (x = 0; x < xmis; x++) {
+      cr->map[block_index + y * cm->mi_cols + x] = new_map_value;
+      cpi->segmentation_map[block_index + y * cm->mi_cols + x] =
+          mbmi->segment_id;
+    }
+  // Keep track of actual number (in units of 8x8) of blocks in segment 1 used
+  // for encoding this frame.
+  if (mbmi->segment_id)
+    cr->num_seg_blocks += xmis * ymis;
+}
+
+// Setup cyclic background refresh: set delta q and segmentation map.
+void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  const RATE_CONTROL *const rc = &cpi->rc;
+  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+  struct segmentation *const seg = &cm->seg;
+  unsigned char *const seg_map = cpi->segmentation_map;
+  const int apply_cyclic_refresh  = apply_cyclic_refresh_bitrate(cm, rc);
+  // Don't apply refresh on key frame or enhancement layer frames.
+  if (!apply_cyclic_refresh ||
+      (cm->frame_type == KEY_FRAME) ||
+      (cpi->svc.temporal_layer_id > 0)) {
+    // Set segmentation map to 0 and disable.
+    vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
+    vp9_disable_segmentation(&cm->seg);
+    if (cm->frame_type == KEY_FRAME)
+      cr->sb_index = 0;
+    return;
+  } else {
+    int qindex_delta = 0;
+    int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
+    int xmis, ymis, x, y, qindex2;
+
+    // Rate target ratio to set q delta.
+    const float rate_ratio_qdelta = 2.0;
+    vp9_clear_system_state();
+    // Some of these parameters may be set via codec-control function later.
+    cr->max_sbs_perframe = 10;
+    cr->max_qdelta_perc = 50;
+    cr->min_block_size = BLOCK_8X8;
+    cr->time_for_refresh = 1;
+    // Set rate threshold to some fraction of target (and scaled by 256).
+    cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2;
+    // Distortion threshold, quadratic in Q, scale factor to be adjusted.
+    cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
+        vp9_convert_qindex_to_q(cm->base_qindex));
+    if (cpi->sf.use_nonrd_pick_mode) {
+      // May want to be more conservative with thresholds in non-rd mode for now
+      // as rate/distortion are derived from model based on prediction residual.
+      cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3;
+      cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
+          vp9_convert_qindex_to_q(cm->base_qindex));
+    }
+
+    cr->num_seg_blocks = 0;
+    // Set up segmentation.
+    // Clear down the segment map.
+    vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
+    vp9_enable_segmentation(&cm->seg);
+    vp9_clearall_segfeatures(seg);
+    // Select delta coding method.
+    seg->abs_delta = SEGMENT_DELTADATA;
+
+    // Note: setting temporal_update has no effect, as the seg-map coding method
+    // (temporal or spatial) is determined in vp9_choose_segmap_coding_method(),
+    // based on the coding cost of each method. For error_resilient mode on the
+    // last_frame_seg_map is set to 0, so if temporal coding is used, it is
+    // relative to 0 previous map.
+    // seg->temporal_update = 0;
+
+    // Segment 0 "Q" feature is disabled so it defaults to the baseline Q.
+    vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
+    // Use segment 1 for in-frame Q adjustment.
+    vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
+
+    // Set the q delta for segment 1.
+    qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type,
+                                              cm->base_qindex,
+                                              rate_ratio_qdelta);
+    // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from
+    // previous encoded frame.
+    if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100)
+      qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100;
+
+    // Compute rd-mult for segment 1.
+    qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
+    cr->rdmult = vp9_compute_rd_mult(cpi, qindex2);
+
+    vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta);
+
+    sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+    sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+    sbs_in_frame = sb_cols * sb_rows;
+    // Number of target superblocks to get the q delta (segment 1).
+    block_count = cr->max_sbs_perframe * sbs_in_frame / 100;
+    // Set the segmentation map: cycle through the superblocks, starting at
+    // cr->mb_index, and stopping when either block_count blocks have been found
+    // to be refreshed, or we have passed through whole frame.
+    assert(cr->sb_index < sbs_in_frame);
+    i = cr->sb_index;
+    do {
+      int sum_map = 0;
+      // Get the mi_row/mi_col corresponding to superblock index i.
+      int sb_row_index = (i / sb_cols);
+      int sb_col_index = i - sb_row_index * sb_cols;
+      int mi_row = sb_row_index * MI_BLOCK_SIZE;
+      int mi_col = sb_col_index * MI_BLOCK_SIZE;
+      assert(mi_row >= 0 && mi_row < cm->mi_rows);
+      assert(mi_col >= 0 && mi_col < cm->mi_cols);
+      bl_index = mi_row * cm->mi_cols + mi_col;
+      // Loop through all 8x8 blocks in superblock and update map.
+      xmis = MIN(cm->mi_cols - mi_col,
+                 num_8x8_blocks_wide_lookup[BLOCK_64X64]);
+      ymis = MIN(cm->mi_rows - mi_row,
+                 num_8x8_blocks_high_lookup[BLOCK_64X64]);
+      for (y = 0; y < ymis; y++) {
+        for (x = 0; x < xmis; x++) {
+          const int bl_index2 = bl_index + y * cm->mi_cols + x;
+          // If the block is as a candidate for clean up then mark it
+          // for possible boost/refresh (segment 1). The segment id may get
+          // reset to 0 later if block gets coded anything other than ZEROMV.
+          if (cr->map[bl_index2] == 0) {
+            seg_map[bl_index2] = 1;
+            sum_map++;
+          } else if (cr->map[bl_index2] < 0) {
+            cr->map[bl_index2]++;
+          }
+        }
+      }
+      // Enforce constant segment over superblock.
+      // If segment is partial over superblock, reset to either all 1 or 0.
+      if (sum_map > 0 && sum_map < xmis * ymis) {
+        const int new_value = (sum_map >= xmis * ymis / 2);
+        for (y = 0; y < ymis; y++)
+          for (x = 0; x < xmis; x++)
+            seg_map[bl_index + y * cm->mi_cols + x] = new_value;
+      }
+      i++;
+      if (i == sbs_in_frame) {
+        i = 0;
+      }
+      if (sum_map >= xmis * ymis /2)
+        block_count--;
+    } while (block_count && i != cr->sb_index);
+    cr->sb_index = i;
+  }
+}
+
+void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr,
+                                             int64_t rate_sb, int64_t dist_sb) {
+  cr->projected_rate_sb = rate_sb;
+  cr->projected_dist_sb = dist_sb;
+}
+
+int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
+  return cr->rdmult;
+}
diff --git a/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
new file mode 100644
index 0000000..f556d65
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -0,0 +1,50 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+
+#include "vp9/common/vp9_blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9_COMP;
+
+struct CYCLIC_REFRESH;
+typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
+
+CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols);
+
+void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr);
+
+// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
+// check if we should reset the segment_id, and update the cyclic_refresh map
+// and segmentation map.
+void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi,
+                                       MB_MODE_INFO *const mbmi,
+                                       int mi_row, int mi_col,
+                                       BLOCK_SIZE bsize, int use_rd);
+
+// Setup cyclic background refresh: set delta q and segmentation map.
+void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi);
+
+void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr,
+                                             int64_t rate_sb, int64_t dist_sb);
+
+int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
diff --git a/source/libvpx/vp9/encoder/vp9_vaq.c b/source/libvpx/vp9/encoder/vp9_aq_variance.c
index c71c171..ae2a163 100644
--- a/source/libvpx/vp9/encoder/vp9_vaq.c
+++ b/source/libvpx/vp9/encoder/vp9_aq_variance.c
@@ -10,7 +10,7 @@
 
 #include <math.h>
 
-#include "vp9/encoder/vp9_vaq.h"
+#include "vp9/encoder/vp9_aq_variance.h"
 
 #include "vp9/common/vp9_seg_common.h"
 
@@ -99,7 +99,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
         continue;
       }
 
-      qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i));
+      qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i));
       vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta);
       vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q);
 
diff --git a/source/libvpx/vp9/encoder/vp9_vaq.h b/source/libvpx/vp9/encoder/vp9_aq_variance.h
index c73114a..381fe50 100644
--- a/source/libvpx/vp9/encoder/vp9_vaq.h
+++ b/source/libvpx/vp9/encoder/vp9_aq_variance.h
@@ -9,8 +9,8 @@
  */
 
 
-#ifndef VP9_ENCODER_VP9_VAQ_H_
-#define VP9_ENCODER_VP9_VAQ_H_
+#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#define VP9_ENCODER_VP9_AQ_VARIANCE_H_
 
 #include "vp9/encoder/vp9_onyx_int.h"
 
@@ -31,4 +31,4 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
 }  // extern "C"
 #endif
 
-#endif  // VP9_ENCODER_VP9_VAQ_H_
+#endif  // VP9_ENCODER_VP9_AQ_VARIANCE_H_
diff --git a/source/libvpx/vp9/encoder/vp9_bitstream.c b/source/libvpx/vp9/encoder/vp9_bitstream.c
index 0f1692d..8d2afb9 100644
--- a/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -26,6 +26,7 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_tile_common.h"
 
+#include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_bitstream.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_mcomp.h"
@@ -34,10 +35,6 @@
 #include "vp9/encoder/vp9_tokenize.h"
 #include "vp9/encoder/vp9_write_bit_buffer.h"
 
-#ifdef ENTROPY_STATS
-extern unsigned int active_section;
-#endif
-
 static struct vp9_token intra_mode_encodings[INTRA_MODES];
 static struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS];
 static struct vp9_token partition_encodings[PARTITION_TYPES];
@@ -97,13 +94,13 @@ static void write_selected_tx_size(const VP9_COMP *cpi,
   }
 }
 
-static int write_skip(const VP9_COMP *cpi, int segment_id, MODE_INFO *m,
+static int write_skip(const VP9_COMP *cpi, int segment_id, const MODE_INFO *mi,
                       vp9_writer *w) {
   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   if (vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
     return 1;
   } else {
-    const int skip = m->mbmi.skip;
+    const int skip = mi->mbmi.skip;
     vp9_write(w, skip, vp9_get_skip_prob(&cpi->common, xd));
     return skip;
   }
@@ -195,7 +192,7 @@ static void write_segment_id(vp9_writer *w, const struct segmentation *seg,
 static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) {
   const VP9_COMMON *const cm = &cpi->common;
   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int is_compound = has_second_ref(mbmi);
   const int segment_id = mbmi->segment_id;
 
@@ -228,169 +225,147 @@ static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) {
   }
 }
 
-static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) {
+static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
+                                vp9_writer *w) {
   VP9_COMMON *const cm = &cpi->common;
   const nmv_context *nmvc = &cm->fc.nmvc;
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
+  const MACROBLOCK *const x = &cpi->mb;
+  const MACROBLOCKD *const xd = &x->e_mbd;
   const struct segmentation *const seg = &cm->seg;
-  const MB_MODE_INFO *const mi = &m->mbmi;
-  const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
-  const MV_REFERENCE_FRAME ref1 = mi->ref_frame[1];
-  const MB_PREDICTION_MODE mode = mi->mode;
-  const int segment_id = mi->segment_id;
-  const BLOCK_SIZE bsize = mi->sb_type;
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const MB_PREDICTION_MODE mode = mbmi->mode;
+  const int segment_id = mbmi->segment_id;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
   const int allow_hp = cm->allow_high_precision_mv;
-  int skip;
-
-#ifdef ENTROPY_STATS
-  active_section = 9;
-#endif
+  const int is_inter = is_inter_block(mbmi);
+  const int is_compound = has_second_ref(mbmi);
+  int skip, ref;
 
   if (seg->update_map) {
     if (seg->temporal_update) {
-      const int pred_flag = mi->seg_id_predicted;
+      const int pred_flag = mbmi->seg_id_predicted;
       vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
-      vp9_write(bc, pred_flag, pred_prob);
+      vp9_write(w, pred_flag, pred_prob);
       if (!pred_flag)
-        write_segment_id(bc, seg, segment_id);
+        write_segment_id(w, seg, segment_id);
     } else {
-      write_segment_id(bc, seg, segment_id);
+      write_segment_id(w, seg, segment_id);
     }
   }
 
-  skip = write_skip(cpi, segment_id, m, bc);
+  skip = write_skip(cpi, segment_id, mi, w);
 
   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
-    vp9_write(bc, ref0 != INTRA_FRAME, vp9_get_intra_inter_prob(cm, xd));
+    vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
-      !(ref0 != INTRA_FRAME &&
+      !(is_inter &&
         (skip || vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)))) {
-    write_selected_tx_size(cpi, mi->tx_size, bsize, bc);
+    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
   }
 
-  if (ref0 == INTRA_FRAME) {
-#ifdef ENTROPY_STATS
-    active_section = 6;
-#endif
-
+  if (!is_inter) {
     if (bsize >= BLOCK_8X8) {
-      write_intra_mode(bc, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]);
+      write_intra_mode(w, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]);
     } else {
       int idx, idy;
-      const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-      const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-      for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
-        for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
-          const MB_PREDICTION_MODE bm = m->bmi[idy * 2 + idx].as_mode;
-          write_intra_mode(bc, bm, cm->fc.y_mode_prob[0]);
+      const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+      const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+      for (idy = 0; idy < 2; idy += num_4x4_h) {
+        for (idx = 0; idx < 2; idx += num_4x4_w) {
+          const MB_PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
+          write_intra_mode(w, b_mode, cm->fc.y_mode_prob[0]);
         }
       }
     }
-    write_intra_mode(bc, mi->uv_mode, cm->fc.uv_mode_prob[mode]);
+    write_intra_mode(w, mbmi->uv_mode, cm->fc.uv_mode_prob[mode]);
   } else {
-    vp9_prob *mv_ref_p;
-    write_ref_frames(cpi, bc);
-    mv_ref_p = cm->fc.inter_mode_probs[mi->mode_context[ref0]];
-
-#ifdef ENTROPY_STATS
-    active_section = 3;
-#endif
+    const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
+    const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx];
+    write_ref_frames(cpi, w);
 
     // If segment skip is not enabled code the mode.
     if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
       if (bsize >= BLOCK_8X8) {
-        write_inter_mode(bc, mode, mv_ref_p);
-        ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(mode)];
+        write_inter_mode(w, mode, inter_probs);
+        ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(mode)];
       }
     }
 
     if (cm->interp_filter == SWITCHABLE) {
       const int ctx = vp9_get_pred_context_switchable_interp(xd);
-      vp9_write_token(bc, vp9_switchable_interp_tree,
+      vp9_write_token(w, vp9_switchable_interp_tree,
                       cm->fc.switchable_interp_prob[ctx],
-                      &switchable_interp_encodings[mi->interp_filter]);
+                      &switchable_interp_encodings[mbmi->interp_filter]);
     } else {
-      assert(mi->interp_filter == cm->interp_filter);
+      assert(mbmi->interp_filter == cm->interp_filter);
     }
 
     if (bsize < BLOCK_8X8) {
-      const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-      const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+      const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+      const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
       int idx, idy;
-      for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
-        for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
+      for (idy = 0; idy < 2; idy += num_4x4_h) {
+        for (idx = 0; idx < 2; idx += num_4x4_w) {
           const int j = idy * 2 + idx;
-          const MB_PREDICTION_MODE b_mode = m->bmi[j].as_mode;
-          write_inter_mode(bc, b_mode, mv_ref_p);
-          ++cm->counts.inter_mode[mi->mode_context[ref0]][INTER_OFFSET(b_mode)];
+          const MB_PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+          write_inter_mode(w, b_mode, inter_probs);
+          ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
           if (b_mode == NEWMV) {
-#ifdef ENTROPY_STATS
-            active_section = 11;
-#endif
-            vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
-                          &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp);
-
-            if (has_second_ref(mi))
-              vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
-                            &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp);
+            for (ref = 0; ref < 1 + is_compound; ++ref)
+              vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
+                            &mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+                            nmvc, allow_hp);
           }
         }
       }
-    } else if (mode == NEWMV) {
-#ifdef ENTROPY_STATS
-      active_section = 5;
-#endif
-      vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
-                    &mi->ref_mvs[ref0][0].as_mv, nmvc, allow_hp);
-
-      if (has_second_ref(mi))
-        vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
-                      &mi->ref_mvs[ref1][0].as_mv, nmvc, allow_hp);
+    } else {
+      if (mode == NEWMV) {
+        for (ref = 0; ref < 1 + is_compound; ++ref)
+          vp9_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
+                        &mbmi->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, nmvc,
+                        allow_hp);
+      }
     }
   }
 }
 
 static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8,
-                              vp9_writer *bc) {
+                              vp9_writer *w) {
   const VP9_COMMON *const cm = &cpi->common;
   const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   const struct segmentation *const seg = &cm->seg;
-  MODE_INFO *m = mi_8x8[0];
-  const int ym = m->mbmi.mode;
-  const int segment_id = m->mbmi.segment_id;
-  MODE_INFO *above_mi = mi_8x8[-xd->mode_info_stride];
-  MODE_INFO *left_mi = xd->left_available ? mi_8x8[-1] : NULL;
+  const MODE_INFO *const mi = mi_8x8[0];
+  const MODE_INFO *const above_mi = mi_8x8[-xd->mi_stride];
+  const MODE_INFO *const left_mi = xd->left_available ? mi_8x8[-1] : NULL;
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
 
   if (seg->update_map)
-    write_segment_id(bc, seg, m->mbmi.segment_id);
+    write_segment_id(w, seg, mbmi->segment_id);
 
-  write_skip(cpi, segment_id, m, bc);
+  write_skip(cpi, mbmi->segment_id, mi, w);
 
-  if (m->mbmi.sb_type >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
-    write_selected_tx_size(cpi, m->mbmi.tx_size, m->mbmi.sb_type, bc);
+  if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT)
+    write_selected_tx_size(cpi, mbmi->tx_size, bsize, w);
 
-  if (m->mbmi.sb_type >= BLOCK_8X8) {
-    const MB_PREDICTION_MODE A = vp9_above_block_mode(m, above_mi, 0);
-    const MB_PREDICTION_MODE L = vp9_left_block_mode(m, left_mi, 0);
-    write_intra_mode(bc, ym, vp9_kf_y_mode_prob[A][L]);
+  if (bsize >= BLOCK_8X8) {
+    write_intra_mode(w, mbmi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0));
   } else {
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
     int idx, idy;
-    const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[m->mbmi.sb_type];
-    const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[m->mbmi.sb_type];
-    for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
-      for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
-        int i = idy * 2 + idx;
-        const MB_PREDICTION_MODE A = vp9_above_block_mode(m, above_mi, i);
-        const MB_PREDICTION_MODE L = vp9_left_block_mode(m, left_mi, i);
-        const int bm = m->bmi[i].as_mode;
-        write_intra_mode(bc, bm, vp9_kf_y_mode_prob[A][L]);
+
+    for (idy = 0; idy < 2; idy += num_4x4_h) {
+      for (idx = 0; idx < 2; idx += num_4x4_w) {
+        const int block = idy * 2 + idx;
+        write_intra_mode(w, mi->bmi[block].as_mode,
+                         get_y_mode_probs(mi, above_mi, left_mi, block));
       }
     }
   }
 
-  write_intra_mode(bc, m->mbmi.uv_mode, vp9_kf_uv_mode_prob[ym]);
+  write_intra_mode(w, mbmi->uv_mode, vp9_kf_uv_mode_prob[mbmi->mode]);
 }
 
 static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
@@ -400,35 +375,27 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   MODE_INFO *m;
 
-  xd->mi_8x8 = cm->mi_grid_visible + (mi_row * cm->mode_info_stride + mi_col);
-  m = xd->mi_8x8[0];
+  xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
+  m = xd->mi[0];
 
   set_mi_row_col(xd, tile,
                  mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
                  mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
                  cm->mi_rows, cm->mi_cols);
   if (frame_is_intra_only(cm)) {
-    write_mb_modes_kf(cpi, xd->mi_8x8, w);
-#ifdef ENTROPY_STATS
-    active_section = 8;
-#endif
+    write_mb_modes_kf(cpi, xd->mi, w);
   } else {
     pack_inter_mode_mvs(cpi, m, w);
-#ifdef ENTROPY_STATS
-    active_section = 1;
-#endif
   }
 
   assert(*tok < tok_end);
   pack_mb_tokens(w, tok, tok_end);
 }
 
-static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col,
+static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
+                            int hbs, int mi_row, int mi_col,
                             PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
-  VP9_COMMON *const cm = &cpi->common;
-  const int ctx = partition_plane_context(cpi->above_seg_context,
-                                          cpi->left_seg_context,
-                                          mi_row, mi_col, bsize);
+  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   const vp9_prob *const probs = get_partition_probs(cm, ctx);
   const int has_rows = (mi_row + hbs) < cm->mi_rows;
   const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -446,21 +413,24 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col,
   }
 }
 
-static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
+static void write_modes_sb(VP9_COMP *cpi,
+                           const TileInfo *const tile,
                            vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end,
                            int mi_row, int mi_col, BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
   const int bsl = b_width_log2(bsize);
   const int bs = (1 << bsl) / 4;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
-  MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mode_info_stride + mi_col];
+  MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
   partition = partition_lookup[bsl][m->mbmi.sb_type];
-  write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w);
+  write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
   subsize = get_subsize(bsize, partition);
   if (subsize < BLOCK_8X8) {
     write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
@@ -496,29 +466,30 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
   // update partition context
   if (bsize >= BLOCK_8X8 &&
       (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
-    update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
-static void write_modes(VP9_COMP *cpi, const TileInfo *const tile,
+static void write_modes(VP9_COMP *cpi,
+                        const TileInfo *const tile,
                         vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) {
   int mi_row, mi_col;
 
   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
        mi_row += MI_BLOCK_SIZE) {
-      vp9_zero(cpi->left_seg_context);
+    vp9_zero(cpi->mb.e_mbd.left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE)
-      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64);
+      write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col,
+                     BLOCK_64X64);
   }
 }
 
-static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) {
+static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
+                                    vp9_coeff_stats *coef_branch_ct) {
   vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size];
   vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size];
   unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
       cpi->common.counts.eob_branch[tx_size];
-  vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size];
   int i, j, k, l, m;
 
   for (i = 0; i < PLANE_TYPES; ++i) {
@@ -541,16 +512,16 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) {
 }
 
 static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
-                                     TX_SIZE tx_size) {
+                                     TX_SIZE tx_size,
+                                     vp9_coeff_stats *frame_branch_ct) {
   vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size];
   vp9_coeff_probs_model *old_frame_coef_probs =
       cpi->common.fc.coef_probs[tx_size];
-  vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size];
   const vp9_prob upd = DIFF_UPDATE_PROB;
   const int entropy_nodes_update = UNCONSTRAINED_NODES;
   int i, j, k, l, t;
   switch (cpi->sf.use_fast_coef_updates) {
-    case 0: {
+    case TWO_LOOP: {
       /* dry run to see if there is any udpate at all needed */
       int savings = 0;
       int update[2] = {0, 0};
@@ -625,14 +596,14 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
       return;
     }
 
-    case 1:
-    case 2: {
+    case ONE_LOOP:
+    case ONE_LOOP_REDUCED: {
       const int prev_coef_contexts_to_update =
-          cpi->sf.use_fast_coef_updates == 2 ? COEFF_CONTEXTS >> 1
-                                             : COEFF_CONTEXTS;
+          cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ?
+              COEFF_CONTEXTS >> 1 : COEFF_CONTEXTS;
       const int coef_band_to_update =
-          cpi->sf.use_fast_coef_updates == 2 ? COEF_BANDS >> 1
-                                             : COEF_BANDS;
+          cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ?
+              COEF_BANDS >> 1 : COEF_BANDS;
       int updates = 0;
       int noupdates_before_first = 0;
       for (i = 0; i < PLANE_TYPES; ++i) {
@@ -698,13 +669,15 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) {
   const TX_MODE tx_mode = cpi->common.tx_mode;
   const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
   TX_SIZE tx_size;
+  vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
+
   vp9_clear_system_state();
 
   for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size)
-    build_tree_distribution(cpi, tx_size);
+    build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size]);
 
   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
-    update_coef_probs_common(w, cpi, tx_size);
+    update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size]);
 }
 
 static void encode_loopfilter(struct loopfilter *lf,
@@ -961,7 +934,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
 
-  vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) *
+  vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
              mi_cols_aligned_to_sb(cm->mi_cols));
 
   tok[0][0] = cpi->tok;
@@ -1058,19 +1031,22 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) {
   vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8);
 }
 
+static void write_profile(BITSTREAM_PROFILE profile,
+                          struct vp9_write_bit_buffer *wb) {
+  assert(profile < MAX_PROFILES);
+  vp9_wb_write_bit(wb, profile & 1);
+  vp9_wb_write_bit(wb, profile >> 1);
+}
+
 static void write_uncompressed_header(VP9_COMP *cpi,
                                       struct vp9_write_bit_buffer *wb) {
   VP9_COMMON *const cm = &cpi->common;
 
   vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2);
 
-  // bitstream version.
-  // 00 - profile 0. 4:2:0 only
-  // 10 - profile 1. adds 4:4:4, 4:2:2, alpha
-  vp9_wb_write_bit(wb, cm->version);
-  vp9_wb_write_bit(wb, 0);
+  write_profile(cm->profile, wb);
 
-  vp9_wb_write_bit(wb, 0);
+  vp9_wb_write_bit(wb, 0);  // show_existing_frame
   vp9_wb_write_bit(wb, cm->frame_type);
   vp9_wb_write_bit(wb, cm->show_frame);
   vp9_wb_write_bit(wb, cm->error_resilient_mode);
@@ -1078,16 +1054,20 @@ static void write_uncompressed_header(VP9_COMP *cpi,
   if (cm->frame_type == KEY_FRAME) {
     const COLOR_SPACE cs = UNKNOWN;
     write_sync_code(wb);
+    if (cm->profile > PROFILE_1) {
+      assert(cm->bit_depth > BITS_8);
+      vp9_wb_write_bit(wb, cm->bit_depth - BITS_10);
+    }
     vp9_wb_write_literal(wb, cs, 3);
     if (cs != SRGB) {
       vp9_wb_write_bit(wb, 0);  // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
-      if (cm->version == 1) {
+      if (cm->profile >= PROFILE_1) {
         vp9_wb_write_bit(wb, cm->subsampling_x);
         vp9_wb_write_bit(wb, cm->subsampling_y);
         vp9_wb_write_bit(wb, 0);  // has extra plane
       }
     } else {
-      assert(cm->version == 1);
+      assert(cm->profile == PROFILE_1);
       vp9_wb_write_bit(wb, 0);  // has extra plane
     }
 
@@ -1150,18 +1130,10 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
     encode_txfm_probs(cm, &header_bc);
 
   update_coef_probs(cpi, &header_bc);
-
-#ifdef ENTROPY_STATS
-  active_section = 2;
-#endif
-
   update_skip_probs(cm, &header_bc);
 
   if (!frame_is_intra_only(cm)) {
     int i;
-#ifdef ENTROPY_STATS
-    active_section = 1;
-#endif
 
     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
       prob_diff_update(vp9_inter_mode_tree, cm->fc.inter_mode_probs[i],
@@ -1223,7 +1195,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
 
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
   uint8_t *data = dest;
-  size_t first_part_size;
+  size_t first_part_size, uncompressed_hdr_size;
   struct vp9_write_bit_buffer wb = {data, 0};
   struct vp9_write_bit_buffer saved_wb;
 
@@ -1231,17 +1203,11 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
   saved_wb = wb;
   vp9_wb_write_literal(&wb, 0, 16);  // don't know in advance first part. size
 
-  data += vp9_rb_bytes_written(&wb);
+  uncompressed_hdr_size = vp9_rb_bytes_written(&wb);
+  data += uncompressed_hdr_size;
 
   vp9_compute_update_table();
 
-#ifdef ENTROPY_STATS
-  if (cm->frame_type == INTER_FRAME)
-    active_section = 0;
-  else
-    active_section = 7;
-#endif
-
   vp9_clear_system_state();
 
   first_part_size = write_compressed_header(cpi, data);
diff --git a/source/libvpx/vp9/encoder/vp9_block.h b/source/libvpx/vp9/encoder/vp9_block.h
index 85f6c97..7729d84 100644
--- a/source/libvpx/vp9/encoder/vp9_block.h
+++ b/source/libvpx/vp9/encoder/vp9_block.h
@@ -11,7 +11,6 @@
 #ifndef VP9_ENCODER_VP9_BLOCK_H_
 #define VP9_ENCODER_VP9_BLOCK_H_
 
-#include "vp9/common/vp9_onyx.h"
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx_ports/mem.h"
@@ -116,7 +115,6 @@ struct macroblock {
   unsigned int source_variance;
   unsigned int pred_sse[MAX_REF_FRAMES];
   int pred_mv_sad[MAX_REF_FRAMES];
-  int mode_sad[MAX_REF_FRAMES][INTER_MODES + 1];
 
   int nmvjointcost[MV_JOINTS];
   int nmvcosts[2][MV_VALS];
@@ -132,9 +130,9 @@ struct macroblock {
   int *nmvsadcost_hp[2];
   int **mvsadcost;
 
-  int mbmode_cost[MB_MODE_COUNT];
+  int mbmode_cost[INTRA_MODES];
   unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
-  int intra_uv_mode_cost[2][MB_MODE_COUNT];
+  int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
 
@@ -155,11 +153,10 @@ struct macroblock {
 
   int encode_breakout;
 
-  unsigned char *active_ptr;
+  int in_active_map;
 
   // note that token_costs is the cost when eob node is skipped
   vp9_coeff_cost token_costs[TX_SIZES];
-  DECLARE_ALIGNED(16, uint8_t, token_cache[1024]);
 
   int optimize;
 
@@ -199,7 +196,8 @@ struct macroblock {
 // TODO(jingning): the variables used here are little complicated. need further
 // refactoring on organizing the temporary buffers, when recursive
 // partition down to 4x4 block size is enabled.
-static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
+static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
+                                                   BLOCK_SIZE bsize) {
   switch (bsize) {
     case BLOCK_64X64:
       return &x->sb64_context;
diff --git a/source/libvpx/vp9/encoder/vp9_cost.c b/source/libvpx/vp9/encoder/vp9_cost.c
new file mode 100644
index 0000000..1c3c3d2
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_cost.c
@@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/encoder/vp9_cost.h"
+
+const unsigned int vp9_prob_cost[256] = {
+  2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
+  1129, 1099, 1072, 1046, 1023, 1000, 979,  959,  940,  922,  905,  889,
+  873,  858,  843,  829,  816,  803,  790,  778,  767,  755,  744,  733,
+  723,  713,  703,  693,  684,  675,  666,  657,  649,  641,  633,  625,
+  617,  609,  602,  594,  587,  580,  573,  567,  560,  553,  547,  541,
+  534,  528,  522,  516,  511,  505,  499,  494,  488,  483,  477,  472,
+  467,  462,  457,  452,  447,  442,  437,  433,  428,  424,  419,  415,
+  410,  406,  401,  397,  393,  389,  385,  381,  377,  373,  369,  365,
+  361,  357,  353,  349,  346,  342,  338,  335,  331,  328,  324,  321,
+  317,  314,  311,  307,  304,  301,  297,  294,  291,  288,  285,  281,
+  278,  275,  272,  269,  266,  263,  260,  257,  255,  252,  249,  246,
+  243,  240,  238,  235,  232,  229,  227,  224,  221,  219,  216,  214,
+  211,  208,  206,  203,  201,  198,  196,  194,  191,  189,  186,  184,
+  181,  179,  177,  174,  172,  170,  168,  165,  163,  161,  159,  156,
+  154,  152,  150,  148,  145,  143,  141,  139,  137,  135,  133,  131,
+  129,  127,  125,  123,  121,  119,  117,  115,  113,  111,  109,  107,
+  105,  103,  101,  99,   97,   95,   93,   92,   90,   88,   86,   84,
+  82,   81,   79,   77,   75,   73,   72,   70,   68,   66,   65,   63,
+  61,   60,   58,   56,   55,   53,   51,   50,   48,   46,   45,   43,
+  41,   40,   38,   37,   35,   33,   32,   30,   29,   27,   25,   24,
+  22,   21,   19,   18,   16,   15,   13,   12,   10,   9,    7,    6,
+  4,    3,    1,    1};
+
+static void cost(int *costs, vp9_tree tree, const vp9_prob *probs,
+                 int i, int c) {
+  const vp9_prob prob = probs[i / 2];
+  int b;
+
+  for (b = 0; b <= 1; ++b) {
+    const int cc = c + vp9_cost_bit(prob, b);
+    const vp9_tree_index ii = tree[i + b];
+
+    if (ii <= 0)
+      costs[-ii] = cc;
+    else
+      cost(costs, tree, probs, ii, cc);
+  }
+}
+
+void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) {
+  cost(costs, tree, probs, 0, 0);
+}
+
+void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) {
+  assert(tree[0] <= 0 && tree[1] > 0);
+
+  costs[-tree[0]] = vp9_cost_bit(probs[0], 0);
+  cost(costs, tree, probs, 2, 0);
+}
diff --git a/source/libvpx/vp9/encoder/vp9_cost.h b/source/libvpx/vp9/encoder/vp9_cost.h
new file mode 100644
index 0000000..6d2b940
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_cost.h
@@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_COST_H_
+#define VP9_ENCODER_VP9_COST_H_
+
+#include "vp9/common/vp9_prob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const unsigned int vp9_prob_cost[256];
+
+#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
+
+#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
+
+#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \
+                                                    : (prob))
+
+static INLINE unsigned int cost_branch256(const unsigned int ct[2],
+                                          vp9_prob p) {
+  return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
+}
+
+static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs,
+                             int bits, int len) {
+  int cost = 0;
+  vp9_tree_index i = 0;
+
+  do {
+    const int bit = (bits >> --len) & 1;
+    cost += vp9_cost_bit(probs[i >> 1], bit);
+    i = tree[i + bit];
+  } while (len);
+
+  return cost;
+}
+
+void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree);
+void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_COST_H_
diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c
index b8dc72a..c52e4f3 100644
--- a/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -29,16 +29,24 @@
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_tile_common.h"
+
+#include "vp9/encoder/vp9_aq_complexity.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_extend.h"
-#include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_pickmode.h"
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_segmentation.h"
 #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_vaq.h"
+
+#define GF_ZEROMV_ZBIN_BOOST 0
+#define LF_ZEROMV_ZBIN_BOOST 0
+#define MV_ZBIN_BOOST        0
+#define SPLIT_MV_ZBIN_BOOST  0
+#define INTRA_ZBIN_BOOST     0
 
 static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
   switch (subsize) {
@@ -74,10 +82,10 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
 //  (infinite lambda), which confounds analysis.
 // This also avoids the need for divide by zero checks in
 //  vp9_activity_masking().
-#define ACTIVITY_AVG_MIN (64)
+#define ACTIVITY_AVG_MIN 64
 
 // Motion vector component magnitude threshold for defining fast motion.
-#define FAST_MOTION_MV_THRESH (24)
+#define FAST_MOTION_MV_THRESH 24
 
 // This is used as a reference when computing the source variance for the
 //  purposes of activity masking.
@@ -149,24 +157,446 @@ static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi,
     return BLOCK_16X16;
 }
 
+// Lighter version of set_offsets that only sets the mode info
+// pointers.
+static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm,
+                                        MACROBLOCKD *const xd,
+                                        int mi_row,
+                                        int mi_col) {
+  const int idx_str = xd->mi_stride * mi_row + mi_col;
+  xd->mi = cm->mi_grid_visible + idx_str;
+  xd->mi[0] = cm->mi + idx_str;
+}
+
+static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col,
+                              BLOCK_SIZE bsize) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const int mb_rows = cm->mb_rows;
+  const int mb_cols = cm->mb_cols;
+  const int mb_row = mi_row >> 1;
+  const int mb_col = mi_col >> 1;
+  const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1;
+  const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1;
+  int r, c;
+  if (bsize <= BLOCK_16X16) {
+    return cpi->active_map[mb_row * mb_cols + mb_col];
+  }
+  for (r = 0; r < mb_height; ++r) {
+    for (c = 0; c < mb_width; ++c) {
+      int row = mb_row + r;
+      int col = mb_col + c;
+      if (row >= mb_rows || col >= mb_cols)
+        continue;
+      if (cpi->active_map[row * mb_cols + col])
+        return 1;
+    }
+  }
+  return 0;
+}
+
+static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x,
+                            int mi_row, int mi_col,
+                            BLOCK_SIZE bsize) {
+  if (cpi->active_map_enabled && !x->e_mbd.lossless) {
+    return is_block_in_mb_map(cpi, mi_row, mi_col, bsize);
+  } else {
+    return 1;
+  }
+}
+
+static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
+                        int mi_row, int mi_col, BLOCK_SIZE bsize) {
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi;
+  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+  const int mi_height = num_8x8_blocks_high_lookup[bsize];
+  const int mb_row = mi_row >> 1;
+  const int mb_col = mi_col >> 1;
+  const int idx_map = mb_row * cm->mb_cols + mb_col;
+  const struct segmentation *const seg = &cm->seg;
+
+  set_skip_context(xd, mi_row, mi_col);
+
+  // Activity map pointer
+  x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
+  x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
+
+  set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+
+  mbmi = &xd->mi[0]->mbmi;
+
+  // Set up destination pointers.
+  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+
+  // Set up limit values for MV components.
+  // Mv beyond the range do not produce new/different prediction block.
+  x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
+  x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
+  x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
+  x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
+
+  // Set up distance of MB to edge of frame in 1/8th pel units.
+  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
+  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
+                 cm->mi_rows, cm->mi_cols);
+
+  // Set up source buffers.
+  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
+  // R/D setup.
+  x->rddiv = cpi->RDDIV;
+  x->rdmult = cpi->RDMULT;
+
+  // Setup segment ID.
+  if (seg->enabled) {
+    if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
+      const uint8_t *const map = seg->update_map ? cpi->segmentation_map
+                                                 : cm->last_frame_seg_map;
+      mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+    }
+    vp9_init_plane_quantizers(cpi, x);
+
+    x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
+  } else {
+    mbmi->segment_id = 0;
+    x->encode_breakout = cpi->encode_breakout;
+  }
+}
+
+static void duplicate_mode_info_in_sb(VP9_COMMON * const cm,
+                                     MACROBLOCKD *const xd,
+                                     int mi_row,
+                                     int mi_col,
+                                     BLOCK_SIZE bsize) {
+  const int block_width = num_8x8_blocks_wide_lookup[bsize];
+  const int block_height = num_8x8_blocks_high_lookup[bsize];
+  int i, j;
+  for (j = 0; j < block_height; ++j)
+    for (i = 0; i < block_width; ++i) {
+      if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols)
+        xd->mi[j * xd->mi_stride + i] = xd->mi[0];
+    }
+}
+
+static void set_block_size(VP9_COMP * const cpi,
+                           const TileInfo *const tile,
+                           int mi_row, int mi_col,
+                           BLOCK_SIZE bsize) {
+  if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
+    MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+    set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col);
+    xd->mi[0]->mbmi.sb_type = bsize;
+    duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize);
+  }
+}
+
+typedef struct {
+  int64_t sum_square_error;
+  int64_t sum_error;
+  int count;
+  int variance;
+} var;
+
+typedef struct {
+  var none;
+  var horz[2];
+  var vert[2];
+} partition_variance;
+
+typedef struct {
+  partition_variance part_variances;
+  var split[4];
+} v8x8;
+
+typedef struct {
+  partition_variance part_variances;
+  v8x8 split[4];
+} v16x16;
+
+typedef struct {
+  partition_variance part_variances;
+  v16x16 split[4];
+} v32x32;
+
+typedef struct {
+  partition_variance part_variances;
+  v32x32 split[4];
+} v64x64;
+
+typedef struct {
+  partition_variance *part_variances;
+  var *split[4];
+} variance_node;
+
+typedef enum {
+  V16X16,
+  V32X32,
+  V64X64,
+} TREE_LEVEL;
+
+static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
+  int i;
+  switch (bsize) {
+    case BLOCK_64X64: {
+      v64x64 *vt = (v64x64 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i].part_variances.none;
+      break;
+    }
+    case BLOCK_32X32: {
+      v32x32 *vt = (v32x32 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i].part_variances.none;
+      break;
+    }
+    case BLOCK_16X16: {
+      v16x16 *vt = (v16x16 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i].part_variances.none;
+      break;
+    }
+    case BLOCK_8X8: {
+      v8x8 *vt = (v8x8 *) data;
+      node->part_variances = &vt->part_variances;
+      for (i = 0; i < 4; i++)
+        node->split[i] = &vt->split[i];
+      break;
+    }
+    default: {
+      assert(0);
+    }
+  }
+}
+
+// Set variance values given sum square error, sum error, count.
+static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
+  v->sum_square_error = s2;
+  v->sum_error = s;
+  v->count = c;
+  if (c > 0)
+    v->variance = (int)(256 *
+                        (v->sum_square_error - v->sum_error * v->sum_error /
+                         v->count) / v->count);
+  else
+    v->variance = 0;
+}
+
+void sum_2_variances(const var *a, const var *b, var *r) {
+  fill_variance(a->sum_square_error + b->sum_square_error,
+                a->sum_error + b->sum_error, a->count + b->count, r);
+}
+
+static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
+  variance_node node;
+  tree_to_node(data, bsize, &node);
+  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
+  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
+  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
+  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
+  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
+                  &node.part_variances->none);
+}
+
+static int set_vt_partitioning(VP9_COMP *cpi,
+                               void *data,
+                               const TileInfo *const tile,
+                               BLOCK_SIZE bsize,
+                               int mi_row,
+                               int mi_col,
+                               int mi_size) {
+  VP9_COMMON * const cm = &cpi->common;
+  variance_node vt;
+  const int block_width = num_8x8_blocks_wide_lookup[bsize];
+  const int block_height = num_8x8_blocks_high_lookup[bsize];
+  // TODO(debargha): Choose this more intelligently.
+  const int64_t threshold_multiplier = 25;
+  int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
+  assert(block_height == block_width);
+
+  tree_to_node(data, bsize, &vt);
+
+  // Split none is available only if we have more than half a block size
+  // in width and height inside the visible image.
+  if (mi_col + block_width / 2 < cm->mi_cols &&
+      mi_row + block_height / 2 < cm->mi_rows &&
+      vt.part_variances->none.variance < threshold) {
+    set_block_size(cpi, tile, mi_row, mi_col, bsize);
+    return 1;
+  }
+
+  // Vertical split is available on all but the bottom border.
+  if (mi_row + block_height / 2 < cm->mi_rows &&
+      vt.part_variances->vert[0].variance < threshold &&
+      vt.part_variances->vert[1].variance < threshold) {
+    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
+    set_block_size(cpi, tile, mi_row, mi_col, subsize);
+    set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize);
+    return 1;
+  }
+
+  // Horizontal split is available on all but the right border.
+  if (mi_col + block_width / 2 < cm->mi_cols &&
+      vt.part_variances->horz[0].variance < threshold &&
+      vt.part_variances->horz[1].variance < threshold) {
+    BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
+    set_block_size(cpi, tile, mi_row, mi_col, subsize);
+    set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize);
+    return 1;
+  }
+  return 0;
+}
+
+// TODO(debargha): Fix this function and make it work as expected.
+static void choose_partitioning(VP9_COMP *cpi,
+                                const TileInfo *const tile,
+                                int mi_row, int mi_col) {
+  VP9_COMMON * const cm = &cpi->common;
+  MACROBLOCK *x = &cpi->mb;
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
+
+  int i, j, k;
+  v64x64 vt;
+  uint8_t *s;
+  const uint8_t *d;
+  int sp;
+  int dp;
+  int pixels_wide = 64, pixels_high = 64;
+  int_mv nearest_mv, near_mv;
+  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+  const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
+
+  vp9_zero(vt);
+  set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+
+  if (xd->mb_to_right_edge < 0)
+    pixels_wide += (xd->mb_to_right_edge >> 3);
+  if (xd->mb_to_bottom_edge < 0)
+    pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+  s = x->plane[0].src.buf;
+  sp = x->plane[0].src.stride;
+
+  if (cm->frame_type != KEY_FRAME) {
+    vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf);
+
+    xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
+    xd->mi[0]->mbmi.sb_type = BLOCK_64X64;
+    vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
+                          xd->mi[0]->mbmi.ref_mvs[LAST_FRAME],
+                          &nearest_mv, &near_mv);
+
+    xd->mi[0]->mbmi.mv[0] = nearest_mv;
+    vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
+
+    d = xd->plane[0].dst.buf;
+    dp = xd->plane[0].dst.stride;
+  } else {
+    d = VP9_VAR_OFFS;
+    dp = 0;
+  }
+
+  // Fill in the entire tree of 8x8 variances for splits.
+  for (i = 0; i < 4; i++) {
+    const int x32_idx = ((i & 1) << 5);
+    const int y32_idx = ((i >> 1) << 5);
+    for (j = 0; j < 4; j++) {
+      const int x16_idx = x32_idx + ((j & 1) << 4);
+      const int y16_idx = y32_idx + ((j >> 1) << 4);
+      v16x16 *vst = &vt.split[i].split[j];
+      for (k = 0; k < 4; k++) {
+        int x_idx = x16_idx + ((k & 1) << 3);
+        int y_idx = y16_idx + ((k >> 1) << 3);
+        unsigned int sse = 0;
+        int sum = 0;
+        if (x_idx < pixels_wide && y_idx < pixels_high)
+          vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
+                              d + y_idx * dp + x_idx, dp, &sse, &sum);
+        fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
+      }
+    }
+  }
+  // Fill the rest of the variance tree by summing split partition values.
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
+    }
+    fill_variance_tree(&vt.split[i], BLOCK_32X32);
+  }
+  fill_variance_tree(&vt, BLOCK_64X64);
+
+  // Now go through the entire structure,  splitting every block size until
+  // we get to one that's got a variance lower than our threshold,  or we
+  // hit 8x8.
+  if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64,
+                           mi_row, mi_col, 8)) {
+    for (i = 0; i < 4; ++i) {
+      const int x32_idx = ((i & 1) << 2);
+      const int y32_idx = ((i >> 1) << 2);
+      if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32,
+                               (mi_row + y32_idx), (mi_col + x32_idx), 4)) {
+        for (j = 0; j < 4; ++j) {
+          const int x16_idx = ((j & 1) << 1);
+          const int y16_idx = ((j >> 1) << 1);
+          // NOTE: This is a temporary hack to disable 8x8 partitions,
+          // since it works really bad - possibly due to a bug
+#define DISABLE_8X8_VAR_BASED_PARTITION
+#ifdef DISABLE_8X8_VAR_BASED_PARTITION
+          if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
+              mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
+            set_block_size(cpi, tile,
+                           (mi_row + y32_idx + y16_idx),
+                           (mi_col + x32_idx + x16_idx),
+                           BLOCK_16X16);
+          } else {
+            for (k = 0; k < 4; ++k) {
+              const int x8_idx = (k & 1);
+              const int y8_idx = (k >> 1);
+              set_block_size(cpi, tile,
+                             (mi_row + y32_idx + y16_idx + y8_idx),
+                             (mi_col + x32_idx + x16_idx + x8_idx),
+                             BLOCK_8X8);
+            }
+          }
+#else
+          if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
+                                   BLOCK_16X16,
+                                   (mi_row + y32_idx + y16_idx),
+                                   (mi_col + x32_idx + x16_idx), 2)) {
+            for (k = 0; k < 4; ++k) {
+              const int x8_idx = (k & 1);
+              const int y8_idx = (k >> 1);
+              set_block_size(cpi, tile,
+                             (mi_row + y32_idx + y16_idx + y8_idx),
+                             (mi_col + x32_idx + x16_idx + x8_idx),
+                             BLOCK_8X8);
+            }
+          }
+#endif
+        }
+      }
+    }
+  }
+}
+
 // Original activity measure from Tim T's code.
 static unsigned int tt_activity_measure(MACROBLOCK *x) {
   unsigned int sse;
-  /* TODO: This could also be done over smaller areas (8x8), but that would
-   *  require extensive changes elsewhere, as lambda is assumed to be fixed
-   *  over an entire MB in most of the code.
-   * Another option is to compute four 8x8 variances, and pick a single
-   *  lambda using a non-linear combination (e.g., the smallest, or second
-   *  smallest, etc.).
-   */
-  unsigned int act = vp9_variance16x16(x->plane[0].src.buf,
-                                       x->plane[0].src.stride,
-                                       VP9_VAR_OFFS, 0, &sse) << 4;
+  // TODO: This could also be done over smaller areas (8x8), but that would
+  // require extensive changes elsewhere, as lambda is assumed to be fixed
+  // over an entire MB in most of the code.
+  // Another option is to compute four 8x8 variances, and pick a single
+  // lambda using a non-linear combination (e.g., the smallest, or second
+  // smallest, etc.).
+  const unsigned int act = vp9_variance16x16(x->plane[0].src.buf,
+                                             x->plane[0].src.stride,
+                                             VP9_VAR_OFFS, 0, &sse) << 4;
   // If the region is flat, lower the activity some more.
-  if (act < (8 << 12))
-    act = MIN(act, 5 << 12);
-
-  return act;
+  return act < (8 << 12) ? MIN(act, 5 << 12) : act;
 }
 
 // Stub for alternative experimental activity measures.
@@ -387,54 +817,9 @@ static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
   adjust_act_zbin(cpi, x);
 }
 
-// Select a segment for the current SB64
-static void select_in_frame_q_segment(VP9_COMP *cpi,
-                                      int mi_row, int mi_col,
-                                      int output_enabled, int projected_rate) {
-  VP9_COMMON *const cm = &cpi->common;
-
-  const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
-  const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
-  const int xmis = MIN(cm->mi_cols - mi_col, bw);
-  const int ymis = MIN(cm->mi_rows - mi_row, bh);
-  int complexity_metric = 64;
-  int x, y;
-
-  unsigned char segment;
-
-  if (!output_enabled) {
-    segment = 0;
-  } else {
-    // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
-    // It is converted to bits * 256 units
-    const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
-                            (bw * bh);
-
-    if (projected_rate < (target_rate / 4)) {
-      segment = 1;
-    } else {
-      segment = 0;
-    }
-
-    if (target_rate > 0) {
-      complexity_metric =
-        clamp((int)((projected_rate * 64) / target_rate), 16, 255);
-    }
-  }
-
-  // Fill in the entires in the segment map corresponding to this SB64
-  for (y = 0; y < ymis; y++) {
-    for (x = 0; x < xmis; x++) {
-      cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
-      cpi->complexity_map[mi_offset + y * cm->mi_cols + x] =
-        (unsigned char)complexity_metric;
-    }
-  }
-}
-
 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
-                         BLOCK_SIZE bsize, int output_enabled) {
+                         int mi_row, int mi_col, BLOCK_SIZE bsize,
+                         int output_enabled) {
   int i, x_idx, y;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
@@ -442,26 +827,37 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   struct macroblock_plane *const p = x->plane;
   struct macroblockd_plane *const pd = xd->plane;
   MODE_INFO *mi = &ctx->mic;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
-  MODE_INFO *mi_addr = xd->mi_8x8[0];
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  MODE_INFO *mi_addr = xd->mi[0];
+  const struct segmentation *const seg = &cm->seg;
 
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   int max_plane;
 
-  assert(mi->mbmi.mode < MB_MODE_COUNT);
-  assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
-  assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES);
   assert(mi->mbmi.sb_type == bsize);
 
-  // For in frame adaptive Q copy over the chosen segment id into the
-  // mode innfo context for the chosen mode / partition.
-  if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && output_enabled)
-    mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id;
-
   *mi_addr = *mi;
 
+  // If segmentation in use
+  if (seg->enabled && output_enabled) {
+    // For in frame complexity AQ copy the segment id from the segment map.
+    if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+      const uint8_t *const map = seg->update_map ? cpi->segmentation_map
+                                                 : cm->last_frame_seg_map;
+      mi_addr->mbmi.segment_id =
+        vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+    }
+    // Else for cyclic refresh mode update the segment map, set the segment id
+    // and then update the quantizer.
+    else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+      vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi,
+                                        mi_row, mi_col, bsize, 1);
+      vp9_init_plane_quantizers(cpi, x);
+    }
+  }
+
   max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
   for (i = 0; i < max_plane; ++i) {
     p[i].coeff = ctx->coeff_pbuf[i][1];
@@ -483,13 +879,11 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
     for (x_idx = 0; x_idx < mi_width; x_idx++)
       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
         && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
-        xd->mi_8x8[x_idx + y * mis] = mi_addr;
+        xd->mi[x_idx + y * mis] = mi_addr;
       }
 
-    if ((cpi->oxcf.aq_mode == VARIANCE_AQ) ||
-        (cpi->oxcf.aq_mode == COMPLEXITY_AQ)) {
+  if (cpi->oxcf.aq_mode)
     vp9_init_plane_quantizers(cpi, x);
-  }
 
   // FIXME(rbultje) I'm pretty sure this should go to the end of this block
   // (i.e. after the output_enabled)
@@ -538,12 +932,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
 #endif
   if (!frame_is_intra_only(cm)) {
     if (is_inter_block(mbmi)) {
-      if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
-        MV best_mv[2];
-        for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
-          best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
-        vp9_update_mv_count(cm, xd, best_mv);
-      }
+      vp9_update_mv_count(cm, xd);
 
       if (cm->interp_filter == SWITCHABLE) {
         const int ctx = vp9_get_pred_context_switchable_interp(xd);
@@ -577,89 +966,6 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
                      x->e_mbd.plane[i].subsampling_y);
 }
 
-static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
-                        int mi_row, int mi_col, BLOCK_SIZE bsize) {
-  MACROBLOCK *const x = &cpi->mb;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi;
-  const int idx_str = xd->mode_info_stride * mi_row + mi_col;
-  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
-  const int mi_height = num_8x8_blocks_high_lookup[bsize];
-  const int mb_row = mi_row >> 1;
-  const int mb_col = mi_col >> 1;
-  const int idx_map = mb_row * cm->mb_cols + mb_col;
-  const struct segmentation *const seg = &cm->seg;
-
-  set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
-
-  // Activity map pointer
-  x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
-  x->active_ptr = cpi->active_map + idx_map;
-
-  xd->mi_8x8 = cm->mi_grid_visible + idx_str;
-  xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
-
-  xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
-
-  xd->mi_8x8[0] = cm->mi + idx_str;
-
-  mbmi = &xd->mi_8x8[0]->mbmi;
-
-  // Set up destination pointers
-  setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
-
-  // Set up limit values for MV components
-  // mv beyond the range do not produce new/different prediction block
-  x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
-  x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
-  x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
-  x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
-
-  // Set up distance of MB to edge of frame in 1/8th pel units
-  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
-  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
-                 cm->mi_rows, cm->mi_cols);
-
-  /* set up source buffers */
-  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
-
-  /* R/D setup */
-  x->rddiv = cpi->RDDIV;
-  x->rdmult = cpi->RDMULT;
-
-  /* segment ID */
-  if (seg->enabled) {
-    if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
-      const uint8_t *const map = seg->update_map ? cpi->segmentation_map
-                                                 : cm->last_frame_seg_map;
-      mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
-    }
-    vp9_init_plane_quantizers(cpi, x);
-
-    if (seg->enabled && cpi->seg0_cnt > 0 &&
-        !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) &&
-        vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) {
-      cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
-    } else {
-      const int y = mb_row & ~3;
-      const int x = mb_col & ~3;
-      const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
-      const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
-      const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1;
-      const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1;
-
-      cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
-          << 16) / cm->MBs;
-    }
-
-    x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
-  } else {
-    mbmi->segment_id = 0;
-    x->encode_breakout = cpi->encode_breakout;
-  }
-}
-
 static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                              int mi_row, int mi_col,
                              int *totalrate, int64_t *totaldist,
@@ -668,10 +974,11 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi;
   struct macroblock_plane *const p = x->plane;
   struct macroblockd_plane *const pd = xd->plane;
-  int i;
-  int orig_rdmult = x->rdmult;
+  const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
+  int i, orig_rdmult;
   double rdmult_ratio;
 
   vp9_clear_system_state();
@@ -691,7 +998,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  xd->mi_8x8[0]->mbmi.sb_type = bsize;
+  mbmi = &xd->mi[0]->mbmi;
+  mbmi->sb_type = bsize;
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     p[i].coeff = ctx->coeff_pbuf[i][0];
@@ -703,44 +1011,49 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   x->skip_recode = 0;
 
   // Set to zero to make sure we do not use the previous encoded frame stats
-  xd->mi_8x8[0]->mbmi.skip = 0;
+  mbmi->skip = 0;
 
   x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
 
-  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+  if (aq_mode == VARIANCE_AQ) {
     const int energy = bsize <= BLOCK_16X16 ? x->mb_energy
                                             : vp9_block_energy(cpi, x, bsize);
 
     if (cm->frame_type == KEY_FRAME ||
         cpi->refresh_alt_ref_frame ||
         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
-      xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
+      mbmi->segment_id = vp9_vaq_segment_id(energy);
     } else {
       const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
                                                     : cm->last_frame_seg_map;
-      xd->mi_8x8[0]->mbmi.segment_id =
-        vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
+      mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
     }
 
     rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
     vp9_init_plane_quantizers(cpi, x);
   }
 
+  // Save rdmult before it might be changed, so it can be restored later.
+  orig_rdmult = x->rdmult;
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
     activity_masking(cpi, x);
 
-  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+  if (aq_mode == VARIANCE_AQ) {
     vp9_clear_system_state();
     x->rdmult = (int)round(x->rdmult * rdmult_ratio);
-  } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+  } else if (aq_mode == COMPLEXITY_AQ) {
     const int mi_offset = mi_row * cm->mi_cols + mi_col;
     unsigned char complexity = cpi->complexity_map[mi_offset];
     const int is_edge = (mi_row <= 1) || (mi_row >= (cm->mi_rows - 2)) ||
                         (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2));
-
-    if (!is_edge && (complexity > 128)) {
-      x->rdmult = x->rdmult  + ((x->rdmult * (complexity - 128)) / 256);
-    }
+    if (!is_edge && (complexity > 128))
+      x->rdmult += ((x->rdmult * (complexity - 128)) / 256);
+  } else if (aq_mode == CYCLIC_REFRESH_AQ) {
+    const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
+                                                  : cm->last_frame_seg_map;
+    // If segment 1, use rdmult for that segment.
+    if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col))
+      x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
   }
 
   // Find best coding mode & reconstruct the MB so it is available
@@ -757,14 +1070,13 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                                     totaldist, bsize, ctx, best_rd);
   }
 
-  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+  if (aq_mode == VARIANCE_AQ) {
     x->rdmult = orig_rdmult;
     if (*totalrate != INT_MAX) {
       vp9_clear_system_state();
       *totalrate = (int)round(*totalrate * rdmult_ratio);
     }
-  }
-  else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+  } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) {
     x->rdmult = orig_rdmult;
   }
 }
@@ -773,7 +1085,7 @@ static void update_stats(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const MACROBLOCK *const x = &cpi->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const MODE_INFO *const mi = xd->mi_8x8[0];
+  const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
   if (!frame_is_intra_only(cm)) {
@@ -840,21 +1152,21 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
   int mi_height = num_8x8_blocks_high_lookup[bsize];
   for (p = 0; p < MAX_MB_PLANE; p++) {
     vpx_memcpy(
-        cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
+        xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
         a + num_4x4_blocks_wide * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     vpx_memcpy(
-        cpi->left_context[p]
+        xd->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         l + num_4x4_blocks_high * p,
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
   }
-  vpx_memcpy(cpi->above_seg_context + mi_col, sa,
-             sizeof(*cpi->above_seg_context) * mi_width);
-  vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl,
-             sizeof(cpi->left_seg_context[0]) * mi_height);
+  vpx_memcpy(xd->above_seg_context + mi_col, sa,
+             sizeof(*xd->above_seg_context) * mi_width);
+  vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
+             sizeof(xd->left_seg_context[0]) * mi_height);
 }
 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
@@ -873,20 +1185,20 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
   for (p = 0; p < MAX_MB_PLANE; ++p) {
     vpx_memcpy(
         a + num_4x4_blocks_wide * p,
-        cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
+        xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
         xd->plane[p].subsampling_x);
     vpx_memcpy(
         l + num_4x4_blocks_high * p,
-        cpi->left_context[p]
+        xd->left_context[p]
             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
         xd->plane[p].subsampling_y);
   }
-  vpx_memcpy(sa, cpi->above_seg_context + mi_col,
-             sizeof(*cpi->above_seg_context) * mi_width);
-  vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK),
-             sizeof(cpi->left_seg_context[0]) * mi_height);
+  vpx_memcpy(sa, xd->above_seg_context + mi_col,
+             sizeof(*xd->above_seg_context) * mi_width);
+  vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
+             sizeof(xd->left_seg_context[0]) * mi_height);
 }
 
 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
@@ -901,7 +1213,8 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
       return;
   }
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
+  update_state(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize,
+               output_enabled);
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
 
   if (output_enabled) {
@@ -917,6 +1230,8 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
                       int output_enabled, BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+
   const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
   int ctx;
   PARTITION_TYPE partition;
@@ -926,8 +1241,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
     return;
 
   if (bsize >= BLOCK_8X8) {
-    ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
-                                 mi_row, mi_col, bsize);
+    ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
     subsize = *get_sb_partitioning(x, bsize);
   } else {
     ctx = 0;
@@ -982,8 +1296,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
-    update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 // Check to see if the given partition size is allowed for a specified number
@@ -1011,11 +1324,11 @@ static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
 // However, at the bottom and right borders of the image the requested size
 // may not be allowed in which case this code attempts to choose the largest
 // allowable partition.
-static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
-                             MODE_INFO **mi_8x8, int mi_row, int mi_col,
-                             BLOCK_SIZE bsize) {
+static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
+                                   MODE_INFO **mi_8x8, int mi_row, int mi_col,
+                                   BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   int row8x8_remaining = tile->mi_row_end - mi_row;
   int col8x8_remaining = tile->mi_col_end - mi_col;
   int block_row, block_col;
@@ -1051,15 +1364,79 @@ static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
+static void constrain_copy_partitioning(VP9_COMP *const cpi,
+                                        const TileInfo *const tile,
+                                        MODE_INFO **mi_8x8,
+                                        MODE_INFO **prev_mi_8x8,
+                                        int mi_row, int mi_col,
+                                        BLOCK_SIZE bsize) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int mis = cm->mi_stride;
+  const int row8x8_remaining = tile->mi_row_end - mi_row;
+  const int col8x8_remaining = tile->mi_col_end - mi_col;
+  MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col;
+  const int bh = num_8x8_blocks_high_lookup[bsize];
+  const int bw = num_8x8_blocks_wide_lookup[bsize];
+  int block_row, block_col;
+
+  assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
+
+  // If the SB64 if it is all "in image".
+  if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
+      (row8x8_remaining >= MI_BLOCK_SIZE)) {
+    for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
+      for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
+        const int index = block_row * mis + block_col;
+        MODE_INFO *prev_mi = prev_mi_8x8[index];
+        const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+        // Use previous partition if block size is not larger than bsize.
+        if (prev_mi && sb_type <= bsize) {
+          int block_row2, block_col2;
+          for (block_row2 = 0; block_row2 < bh; ++block_row2) {
+            for (block_col2 = 0; block_col2 < bw; ++block_col2) {
+              const int index2 = (block_row + block_row2) * mis +
+                  block_col + block_col2;
+              prev_mi = prev_mi_8x8[index2];
+              if (prev_mi) {
+                const ptrdiff_t offset = prev_mi - cm->prev_mi;
+                mi_8x8[index2] = cm->mi + offset;
+                mi_8x8[index2]->mbmi.sb_type = prev_mi->mbmi.sb_type;
+              }
+            }
+          }
+        } else {
+          // Otherwise, use fixed partition of size bsize.
+          mi_8x8[index] = mi_upper_left + index;
+          mi_8x8[index]->mbmi.sb_type = bsize;
+        }
+      }
+    }
+  } else {
+    // Else this is a partial SB64, copy previous partition.
+    for (block_row = 0; block_row < 8; ++block_row) {
+      for (block_col = 0; block_col < 8; ++block_col) {
+        MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
+        const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+        if (prev_mi) {
+          const ptrdiff_t offset = prev_mi - cm->prev_mi;
+          mi_8x8[block_row * mis + block_col] = cm->mi + offset;
+          mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
+        }
+      }
+    }
+  }
+}
+
 static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
                               MODE_INFO **prev_mi_8x8) {
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   int block_row, block_col;
 
   for (block_row = 0; block_row < 8; ++block_row) {
     for (block_col = 0; block_col < 8; ++block_col) {
       MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
       const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+
       if (prev_mi) {
         const ptrdiff_t offset = prev_mi - cm->prev_mi;
         mi_8x8[block_row * mis + block_col] = cm->mi + offset;
@@ -1069,8 +1446,127 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
   }
 }
 
+const struct {
+  int row;
+  int col;
+} coord_lookup[16] = {
+    // 32x32 index = 0
+    {0, 0}, {0, 2}, {2, 0}, {2, 2},
+    // 32x32 index = 1
+    {0, 4}, {0, 6}, {2, 4}, {2, 6},
+    // 32x32 index = 2
+    {4, 0}, {4, 2}, {6, 0}, {6, 2},
+    // 32x32 index = 3
+    {4, 4}, {4, 6}, {6, 4}, {6, 6},
+};
+
+static void set_source_var_based_partition(VP9_COMP *cpi,
+                                           const TileInfo *const tile,
+                                           MODE_INFO **mi_8x8,
+                                           int mi_row, int mi_col) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *x = &cpi->mb;
+  const int mis = cm->mi_stride;
+  int row8x8_remaining = tile->mi_row_end - mi_row;
+  int col8x8_remaining = tile->mi_col_end - mi_col;
+  int r, c;
+  MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
+
+  assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
+
+  // In-image SB64
+  if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
+      (row8x8_remaining >= MI_BLOCK_SIZE)) {
+    const int src_stride = x->plane[0].src.stride;
+    const int pre_stride = cpi->Last_Source->y_stride;
+    const uint8_t *src = x->plane[0].src.buf;
+    const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
+                           (mi_col * MI_SIZE);
+    const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
+    const int thr_32x32 = cpi->sf.source_var_thresh;
+    const int thr_64x64 = thr_32x32 << 1;
+    int i, j;
+    int index;
+    diff d32[4];
+    int use16x16 = 0;
+
+    for (i = 0; i < 4; i++) {
+      diff d16[4];
+
+      for (j = 0; j < 4; j++) {
+        int b_mi_row = coord_lookup[i * 4 + j].row;
+        int b_mi_col = coord_lookup[i * 4 + j].col;
+        int b_offset = b_mi_row * MI_SIZE * src_stride +
+                       b_mi_col * MI_SIZE;
+
+        vp9_get_sse_sum_16x16(src + b_offset,
+                              src_stride,
+                              pre_src + b_offset,
+                              pre_stride, &d16[j].sse, &d16[j].sum);
+
+        d16[j].var = d16[j].sse -
+            (((uint32_t)d16[j].sum * d16[j].sum) >> 8);
+
+        index = b_mi_row * mis + b_mi_col;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
+
+        // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
+        // size to further improve quality.
+      }
+
+      if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 &&
+          d16[2].var < thr_32x32 && d16[3].var < thr_32x32) {
+        d32[i].sse = d16[0].sse;
+        d32[i].sum = d16[0].sum;
+
+        for (j = 1; j < 4; j++) {
+          d32[i].sse += d16[j].sse;
+          d32[i].sum += d16[j].sum;
+        }
+
+        d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
+
+        index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
+
+        if (!((cm->current_video_frame - 1) %
+            cpi->sf.search_type_check_frequency))
+          cpi->use_large_partition_rate += 1;
+      } else {
+        use16x16 = 1;
+      }
+    }
+
+    if (!use16x16) {
+      if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 &&
+          d32[2].var < thr_64x64 && d32[3].var < thr_64x64)  {
+        mi_8x8[0] = mi_upper_left;
+        mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
+      }
+    }
+  } else {   // partial in-image SB64
+    BLOCK_SIZE bsize = BLOCK_16X16;
+    int bh = num_8x8_blocks_high_lookup[bsize];
+    int bw = num_8x8_blocks_wide_lookup[bsize];
+
+    for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
+      for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
+        int index = r * mis + c;
+        // Find a partition size that fits
+        bsize = find_partition_size(bsize,
+                                    (row8x8_remaining - r),
+                                    (col8x8_remaining - c), &bh, &bw);
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = bsize;
+      }
+    }
+  }
+}
+
 static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   int block_row, block_col;
 
   if (cm->prev_mi) {
@@ -1088,55 +1584,39 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
   return 0;
 }
 
-static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx) {
-  int i;
+static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
+                            int mi_row, int mi_col, int bsize) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const struct segmentation *const seg = &cm->seg;
 
-  x->skip = ctx->skip;
+  *(xd->mi[0]) = ctx->mic;
 
-#if CONFIG_INTERNAL_STATS
-  if (frame_is_intra_only(cm)) {
-    static const int kf_mode_index[] = {
-      THR_DC /*DC_PRED*/,
-      THR_V_PRED /*V_PRED*/,
-      THR_H_PRED /*H_PRED*/,
-      THR_D45_PRED /*D45_PRED*/,
-      THR_D135_PRED /*D135_PRED*/,
-      THR_D117_PRED /*D117_PRED*/,
-      THR_D153_PRED /*D153_PRED*/,
-      THR_D207_PRED /*D207_PRED*/,
-      THR_D63_PRED /*D63_PRED*/,
-      THR_TM /*TM_PRED*/,
-    };
-    ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
-  } else {
-    // Note how often each mode chosen as best
-    ++cpi->mode_chosen_counts[ctx->best_mode_index];
+  // For in frame adaptive Q, check for reseting the segment_id and updating
+  // the cyclic refresh map.
+  if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) {
+    vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi,
+                                      mi_row, mi_col, bsize, 1);
+    vp9_init_plane_quantizers(cpi, x);
   }
-#endif
-  if (!frame_is_intra_only(cm)) {
-    if (is_inter_block(mbmi)) {
-      if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
-        MV best_mv[2];
-        for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
-          best_mv[i] = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
-        vp9_update_mv_count(cm, xd, best_mv);
-      }
 
-      if (cm->interp_filter == SWITCHABLE) {
-        const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
-        ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter];
-      }
+  if (is_inter_block(mbmi)) {
+    vp9_update_mv_count(cm, xd);
+
+    if (cm->interp_filter == SWITCHABLE) {
+      const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
+      ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter];
     }
   }
+
+  x->skip = ctx->skip;
 }
 
 static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
-                     TOKENEXTRA **tp, int mi_row, int mi_col,
-                     int output_enabled, BLOCK_SIZE bsize) {
+                        TOKENEXTRA **tp, int mi_row, int mi_col,
+                        int output_enabled, BLOCK_SIZE bsize) {
   MACROBLOCK *const x = &cpi->mb;
 
   if (bsize < BLOCK_8X8) {
@@ -1145,8 +1625,9 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
     if (x->ab_index > 0)
       return;
   }
+
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
-  update_state_rt(cpi, get_block_context(x, bsize));
+  update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize);
 
   encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
   update_stats(cpi);
@@ -1160,6 +1641,8 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
                          int output_enabled, BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+
   const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
   int ctx;
   PARTITION_TYPE partition;
@@ -1170,10 +1653,9 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
 
   if (bsize >= BLOCK_8X8) {
     MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-    const int idx_str = xd->mode_info_stride * mi_row + mi_col;
+    const int idx_str = xd->mi_stride * mi_row + mi_col;
     MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
-    ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
-                                 mi_row, mi_col, bsize);
+    ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
     subsize = mi_8x8[0]->mbmi.sb_type;
   } else {
     ctx = 0;
@@ -1232,8 +1714,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
-    update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
-                             mi_row, mi_col, subsize, bsize);
+    update_partition_context(xd, mi_row, mi_col, subsize, bsize);
 }
 
 static void rd_use_partition(VP9_COMP *cpi,
@@ -1244,12 +1725,10 @@ static void rd_use_partition(VP9_COMP *cpi,
                              int do_recon) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
-  const int mis = cm->mode_info_stride;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int mis = cm->mi_stride;
   const int bsl = b_width_log2(bsize);
-  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-  const int ms = num_4x4_blocks_wide / 2;
-  const int mh = num_4x4_blocks_high / 2;
+  const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
   const int bss = (1 << bsl) / 4;
   int i, pl;
   PARTITION_TYPE partition = PARTITION_NONE;
@@ -1268,10 +1747,14 @@ static void rd_use_partition(VP9_COMP *cpi,
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
   BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
+  int do_partition_search = 1;
 
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
+  assert(num_4x4_blocks_wide_lookup[bsize] ==
+         num_4x4_blocks_high_lookup[bsize]);
+
   partition = partition_lookup[bsl][bs_type];
   subsize = get_subsize(bsize, partition);
 
@@ -1291,9 +1774,22 @@ static void rd_use_partition(VP9_COMP *cpi,
   if (bsize == BLOCK_16X16) {
     set_offsets(cpi, tile, mi_row, mi_col, bsize);
     x->mb_energy = vp9_block_energy(cpi, x, bsize);
+  } else {
+    x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
   }
 
-  if (cpi->sf.partition_search_type == SEARCH_PARTITION &&
+  if (!x->in_active_map) {
+    do_partition_search = 0;
+    if (mi_row + (mi_step >> 1) < cm->mi_rows &&
+        mi_col + (mi_step >> 1) < cm->mi_cols) {
+      *(get_sb_partitioning(x, bsize)) = bsize;
+      bs_type = mi_8x8[0]->mbmi.sb_type = bsize;
+      subsize = bsize;
+      partition = PARTITION_NONE;
+    }
+  }
+  if (do_partition_search &&
+      cpi->sf.partition_search_type == SEARCH_PARTITION &&
       cpi->sf.adjust_partitioning_from_last_frame) {
     // Check if any of the sub blocks are further split.
     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
@@ -1311,15 +1807,13 @@ static void rd_use_partition(VP9_COMP *cpi,
     // If partition is not none try none unless each of the 4 splits are split
     // even further..
     if (partition != PARTITION_NONE && !splits_below &&
-        mi_row + (ms >> 1) < cm->mi_rows &&
-        mi_col + (ms >> 1) < cm->mi_cols) {
+        mi_row + (mi_step >> 1) < cm->mi_rows &&
+        mi_col + (mi_step >> 1) < cm->mi_cols) {
       *(get_sb_partitioning(x, bsize)) = bsize;
       rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
                        get_block_context(x, bsize), INT64_MAX);
 
-      pl = partition_plane_context(cpi->above_seg_context,
-                                   cpi->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
 
       if (none_rate < INT_MAX) {
         none_rate += x->partition_cost[pl][PARTITION_NONE];
@@ -1344,13 +1838,14 @@ static void rd_use_partition(VP9_COMP *cpi,
                        &last_part_dist, subsize,
                        get_block_context(x, subsize), INT64_MAX);
       if (last_part_rate != INT_MAX &&
-          bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
+          bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
         int rt = 0;
         int64_t dt = 0;
-        update_state(cpi, get_block_context(x, subsize), subsize, 0);
+        update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
+                     subsize, 0);
         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
         *get_sb_index(x, subsize) = 1;
-        rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt,
+        rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt,
                          subsize, get_block_context(x, subsize), INT64_MAX);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
@@ -1368,13 +1863,14 @@ static void rd_use_partition(VP9_COMP *cpi,
                        &last_part_dist, subsize,
                        get_block_context(x, subsize), INT64_MAX);
       if (last_part_rate != INT_MAX &&
-          bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
+          bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
         int rt = 0;
         int64_t dt = 0;
-        update_state(cpi, get_block_context(x, subsize), subsize, 0);
+        update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
+                     subsize, 0);
         encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
         *get_sb_index(x, subsize) = 1;
-        rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt,
+        rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt,
                          subsize, get_block_context(x, subsize), INT64_MAX);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
@@ -1390,8 +1886,8 @@ static void rd_use_partition(VP9_COMP *cpi,
       last_part_rate = 0;
       last_part_dist = 0;
       for (i = 0; i < 4; i++) {
-        int x_idx = (i & 1) * (ms >> 1);
-        int y_idx = (i >> 1) * (ms >> 1);
+        int x_idx = (i & 1) * (mi_step >> 1);
+        int y_idx = (i >> 1) * (mi_step >> 1);
         int jj = i >> 1, ii = i & 0x01;
         int rt;
         int64_t dt;
@@ -1417,18 +1913,20 @@ static void rd_use_partition(VP9_COMP *cpi,
       assert(0);
   }
 
-  pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
-                               mi_row, mi_col, bsize);
+  pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   if (last_part_rate < INT_MAX) {
     last_part_rate += x->partition_cost[pl][partition];
     last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
   }
 
-  if (cpi->sf.adjust_partitioning_from_last_frame
+  if (do_partition_search
+      && cpi->sf.adjust_partitioning_from_last_frame
       && cpi->sf.partition_search_type == SEARCH_PARTITION
       && partition != PARTITION_SPLIT && bsize > BLOCK_8X8
-      && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
-      && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
+      && (mi_row + mi_step < cm->mi_rows ||
+          mi_row + (mi_step >> 1) == cm->mi_rows)
+      && (mi_col + mi_step < cm->mi_cols ||
+          mi_col + (mi_step >> 1) == cm->mi_cols)) {
     BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
     chosen_rate = 0;
     chosen_dist = 0;
@@ -1436,8 +1934,8 @@ static void rd_use_partition(VP9_COMP *cpi,
 
     // Split partition.
     for (i = 0; i < 4; i++) {
-      int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2);
-      int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2);
+      int x_idx = (i & 1) * (mi_step >> 1);
+      int y_idx = (i >> 1) * (mi_step >> 1);
       int rt = 0;
       int64_t dt = 0;
       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
@@ -1471,14 +1969,11 @@ static void rd_use_partition(VP9_COMP *cpi,
         encode_sb(cpi, tile, tp,  mi_row + y_idx, mi_col + x_idx, 0,
                   split_subsize);
 
-      pl = partition_plane_context(cpi->above_seg_context,
-                                   cpi->left_seg_context,
-                                   mi_row + y_idx, mi_col + x_idx,
+      pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
                                    split_subsize);
       chosen_rate += x->partition_cost[pl][PARTITION_NONE];
     }
-    pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
-                                 mi_row, mi_col, bsize);
+    pl = partition_plane_context(xd, mi_row, mi_col, bsize);
     if (chosen_rate < INT_MAX) {
       chosen_rate += x->partition_cost[pl][PARTITION_SPLIT];
       chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
@@ -1516,10 +2011,14 @@ static void rd_use_partition(VP9_COMP *cpi,
     // and and if necessary apply a Q delta using segmentation to get
     // closer to the target.
     if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
-      select_in_frame_q_segment(cpi, mi_row, mi_col,
-                                output_enabled, chosen_rate);
+      vp9_select_in_frame_q_segment(cpi, mi_row, mi_col,
+                                    output_enabled, chosen_rate);
     }
 
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              chosen_rate, chosen_dist);
+
     encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
 
@@ -1567,7 +2066,7 @@ static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8,
       *min_block_size = MIN(*min_block_size, sb_type);
       *max_block_size = MAX(*max_block_size, sb_type);
     }
-    index += xd->mode_info_stride;
+    index += xd->mi_stride;
   }
 }
 
@@ -1583,77 +2082,71 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
 // Look at neighboring blocks and set a min and max partition size based on
 // what they chose.
 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
-                                    int row, int col,
+                                    int mi_row, int mi_col,
                                     BLOCK_SIZE *min_block_size,
                                     BLOCK_SIZE *max_block_size) {
-  VP9_COMMON * const cm = &cpi->common;
+  VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  MODE_INFO ** mi_8x8 = xd->mi_8x8;
-  MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8;
-
+  MODE_INFO **mi_8x8 = xd->mi;
   const int left_in_image = xd->left_available && mi_8x8[-1];
   const int above_in_image = xd->up_available &&
-                             mi_8x8[-xd->mode_info_stride];
-  MODE_INFO ** above_sb64_mi_8x8;
-  MODE_INFO ** left_sb64_mi_8x8;
+                             mi_8x8[-xd->mi_stride];
+  MODE_INFO **above_sb64_mi_8x8;
+  MODE_INFO **left_sb64_mi_8x8;
 
-  int row8x8_remaining = tile->mi_row_end - row;
-  int col8x8_remaining = tile->mi_col_end - col;
+  int row8x8_remaining = tile->mi_row_end - mi_row;
+  int col8x8_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
-
+  BLOCK_SIZE min_size = BLOCK_4X4;
+  BLOCK_SIZE max_size = BLOCK_64X64;
   // Trap case where we do not have a prediction.
-  if (!left_in_image && !above_in_image &&
-      ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) {
-    *min_block_size = BLOCK_4X4;
-    *max_block_size = BLOCK_64X64;
-  } else {
+  if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
     // Default "min to max" and "max to min"
-    *min_block_size = BLOCK_64X64;
-    *max_block_size = BLOCK_4X4;
+    min_size = BLOCK_64X64;
+    max_size = BLOCK_4X4;
 
     // NOTE: each call to get_sb_partition_size_range() uses the previous
     // passed in values for min and max as a starting point.
-    //
     // Find the min and max partition used in previous frame at this location
-    if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) {
-      get_sb_partition_size_range(cpi, prev_mi_8x8,
-                                  min_block_size, max_block_size);
+    if (cm->frame_type != KEY_FRAME) {
+      MODE_INFO **const prev_mi =
+          &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
+      get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size);
     }
-
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
       left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE];
       get_sb_partition_size_range(cpi, left_sb64_mi_8x8,
-                                  min_block_size, max_block_size);
+                                  &min_size, &max_size);
     }
-
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
-      above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE];
+      above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
       get_sb_partition_size_range(cpi, above_sb64_mi_8x8,
-                                  min_block_size, max_block_size);
+                                  &min_size, &max_size);
+    }
+    // adjust observed min and max
+    if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
+      min_size = min_partition_size[min_size];
+      max_size = max_partition_size[max_size];
     }
   }
 
-  // adjust observed min and max
-  if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
-    *min_block_size = min_partition_size[*min_block_size];
-    *max_block_size = max_partition_size[*max_block_size];
-  }
-
-  // Check border cases where max and min from neighbours may not be legal.
-  *max_block_size = find_partition_size(*max_block_size,
-                                        row8x8_remaining, col8x8_remaining,
-                                        &bh, &bw);
-  *min_block_size = MIN(*min_block_size, *max_block_size);
+  // Check border cases where max and min from neighbors may not be legal.
+  max_size = find_partition_size(max_size,
+                                 row8x8_remaining, col8x8_remaining,
+                                 &bh, &bw);
+  min_size = MIN(min_size, max_size);
 
   // When use_square_partition_only is true, make sure at least one square
   // partition is allowed by selecting the next smaller square size as
   // *min_block_size.
   if (cpi->sf.use_square_partition_only &&
-      (*max_block_size - *min_block_size) < 2) {
-    *min_block_size = next_square_size[*min_block_size];
+      next_square_size[max_size] < min_size) {
+     min_size = next_square_size[max_size];
   }
+  *min_block_size = min_size;
+  *max_block_size = max_size;
 }
 
 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
@@ -1673,10 +2166,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                               int64_t *dist, int do_recon, int64_t best_rd) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
-  const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
   PARTITION_CONTEXT sl[8], sa[8];
   TOKENEXTRA *tp_orig = *tp;
+  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
   int i, pl;
   BLOCK_SIZE subsize;
   int this_rate, sum_rate = 0, best_rate = INT_MAX;
@@ -1685,8 +2180,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   int do_split = bsize >= BLOCK_8X8;
   int do_rect = 1;
   // Override skipping rectangular partition operations for edge blocks
-  const int force_horz_split = (mi_row + ms >= cm->mi_rows);
-  const int force_vert_split = (mi_col + ms >= cm->mi_cols);
+  const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
+  const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
   const int xss = x->e_mbd.plane[1].subsampling_x;
   const int yss = x->e_mbd.plane[1].subsampling_y;
 
@@ -1712,6 +2207,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   if (bsize == BLOCK_16X16) {
     set_offsets(cpi, tile, mi_row, mi_col, bsize);
     x->mb_energy = vp9_block_energy(cpi, x, bsize);
+  } else {
+    x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
   }
 
   // Determine partition types in search according to the speed features.
@@ -1745,15 +2242,15 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     }
   }
 
+  if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
+    do_split = 0;
   // PARTITION_NONE
   if (partition_none_allowed) {
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
-                     get_block_context(x, bsize), best_rd);
+                     ctx, best_rd);
     if (this_rate != INT_MAX) {
       if (bsize >= BLOCK_8X8) {
-        pl = partition_plane_context(cpi->above_seg_context,
-                                     cpi->left_seg_context,
-                                     mi_row, mi_col, bsize);
+        pl = partition_plane_context(xd, mi_row, mi_col, bsize);
         this_rate += x->partition_cost[pl][PARTITION_NONE];
       }
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
@@ -1780,12 +2277,16 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         }
       }
     }
+    if (!x->in_active_map) {
+      do_split = 0;
+      do_rect = 0;
+    }
     restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
   }
 
   // store estimated motion vector
   if (cpi->sf.adaptive_motion_search)
-    store_pred_mv(x, get_block_context(x, bsize));
+    store_pred_mv(x, ctx);
 
   // PARTITION_SPLIT
   sum_rd = 0;
@@ -1794,19 +2295,19 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   if (do_split) {
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
-      const int x_idx = (i & 1) * ms;
-      const int y_idx = (i >> 1) * ms;
+      const int x_idx = (i & 1) * mi_step;
+      const int y_idx = (i >> 1) * mi_step;
 
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
         continue;
 
       *get_sb_index(x, subsize) = i;
       if (cpi->sf.adaptive_motion_search)
-        load_pred_mv(x, get_block_context(x, bsize));
+        load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         get_block_context(x, subsize)->pred_interp_filter =
-            get_block_context(x, bsize)->mic.mbmi.interp_filter;
+            ctx->mic.mbmi.interp_filter;
       rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
                         &this_rate, &this_dist, i != 3, best_rd - sum_rd);
 
@@ -1819,9 +2320,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       }
     }
     if (sum_rd < best_rd && i == 4) {
-      pl = partition_plane_context(cpi->above_seg_context,
-                                   cpi->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1844,27 +2343,28 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     subsize = get_subsize(bsize, PARTITION_HORZ);
     *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
-      load_pred_mv(x, get_block_context(x, bsize));
+      load_pred_mv(x, ctx);
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
       get_block_context(x, subsize)->pred_interp_filter =
-          get_block_context(x, bsize)->mic.mbmi.interp_filter;
+          ctx->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                      get_block_context(x, subsize), best_rd);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
-    if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
-      update_state(cpi, get_block_context(x, subsize), subsize, 0);
+    if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) {
+      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
+                   subsize, 0);
       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
 
       *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
-        load_pred_mv(x, get_block_context(x, bsize));
+        load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         get_block_context(x, subsize)->pred_interp_filter =
-            get_block_context(x, bsize)->mic.mbmi.interp_filter;
-      rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
+            ctx->mic.mbmi.interp_filter;
+      rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate,
                        &this_dist, subsize, get_block_context(x, subsize),
                        best_rd - sum_rd);
       if (this_rate == INT_MAX) {
@@ -1876,9 +2376,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       }
     }
     if (sum_rd < best_rd) {
-      pl = partition_plane_context(cpi->above_seg_context,
-                                   cpi->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_HORZ];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1897,26 +2395,27 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
 
     *get_sb_index(x, subsize) = 0;
     if (cpi->sf.adaptive_motion_search)
-      load_pred_mv(x, get_block_context(x, bsize));
+      load_pred_mv(x, ctx);
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
       get_block_context(x, subsize)->pred_interp_filter =
-          get_block_context(x, bsize)->mic.mbmi.interp_filter;
+          ctx->mic.mbmi.interp_filter;
     rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
                      get_block_context(x, subsize), best_rd);
     sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
-    if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
-      update_state(cpi, get_block_context(x, subsize), subsize, 0);
+    if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) {
+      update_state(cpi, get_block_context(x, subsize), mi_row, mi_col,
+                   subsize, 0);
       encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
 
       *get_sb_index(x, subsize) = 1;
       if (cpi->sf.adaptive_motion_search)
-        load_pred_mv(x, get_block_context(x, bsize));
+        load_pred_mv(x, ctx);
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         get_block_context(x, subsize)->pred_interp_filter =
-            get_block_context(x, bsize)->mic.mbmi.interp_filter;
-      rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
+            ctx->mic.mbmi.interp_filter;
+      rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate,
                        &this_dist, subsize, get_block_context(x, subsize),
                        best_rd - sum_rd);
       if (this_rate == INT_MAX) {
@@ -1928,9 +2427,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       }
     }
     if (sum_rd < best_rd) {
-      pl = partition_plane_context(cpi->above_seg_context,
-                                   cpi->left_seg_context,
-                                   mi_row, mi_col, bsize);
+      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
       sum_rate += x->partition_cost[pl][PARTITION_VERT];
       sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
       if (sum_rd < best_rd) {
@@ -1958,8 +2455,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
     // and and if necessary apply a Q delta using segmentation to get
     // closer to the target.
     if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
-      select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate);
+      vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
+                                    best_rate);
     }
+
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              best_rate, best_dist);
+
     encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
   }
   if (bsize == BLOCK_64X64) {
@@ -1974,11 +2477,13 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
 static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                              int mi_row, TOKENEXTRA **tp) {
   VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  SPEED_FEATURES *const sf = &cpi->sf;
   int mi_col;
 
   // Initialize the left context for the new SB row
-  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
-  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+  vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
+  vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
 
   // Code each SB in the row
   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
@@ -1989,7 +2494,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     BLOCK_SIZE i;
     MACROBLOCK *x = &cpi->mb;
 
-    if (cpi->sf.adaptive_pred_interp_filter) {
+    if (sf->adaptive_pred_interp_filter) {
       for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) {
         const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
         const int num_4x4_h = num_4x4_blocks_high_lookup[i];
@@ -2003,64 +2508,69 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
 
     vp9_zero(cpi->mb.pred_mv);
 
-    if ((cpi->sf.partition_search_type == SEARCH_PARTITION &&
-         cpi->sf.use_lastframe_partitioning) ||
-        cpi->sf.partition_search_type == FIXED_PARTITION ||
-        cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
-      const int idx_str = cm->mode_info_stride * mi_row + mi_col;
+    if ((sf->partition_search_type == SEARCH_PARTITION &&
+         sf->use_lastframe_partitioning) ||
+         sf->partition_search_type == FIXED_PARTITION ||
+         sf->partition_search_type == VAR_BASED_PARTITION ||
+         sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
+      const int idx_str = cm->mi_stride * mi_row + mi_col;
       MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
       MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
-
       cpi->mb.source_variance = UINT_MAX;
-      if (cpi->sf.partition_search_type == FIXED_PARTITION) {
+      if (sf->partition_search_type == FIXED_PARTITION) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
-        set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
-                         cpi->sf.always_this_block_size);
+        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+                               sf->always_this_block_size);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
-      } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
-                 cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-        // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
-        // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
-        // map to the same thing.
+      } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
-        set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1);
+      } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
+        choose_partitioning(cpi, tile, mi_row, mi_col);
         rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                          &dummy_rate, &dummy_dist, 1);
       } else {
         if ((cm->current_video_frame
-            % cpi->sf.last_partitioning_redo_frequency) == 0
+            % sf->last_partitioning_redo_frequency) == 0
             || cm->prev_mi == 0
             || cm->show_frame == 0
             || cm->frame_type == KEY_FRAME
             || cpi->rc.is_src_frame_alt_ref
-            || ((cpi->sf.use_lastframe_partitioning ==
+            || ((sf->use_lastframe_partitioning ==
                  LAST_FRAME_PARTITION_LOW_MOTION) &&
                  sb_has_motion(cm, prev_mi_8x8))) {
           // If required set upper and lower partition size limits
-          if (cpi->sf.auto_min_max_partition_size) {
+          if (sf->auto_min_max_partition_size) {
             set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
             rd_auto_partition_range(cpi, tile, mi_row, mi_col,
-                                    &cpi->sf.min_partition_size,
-                                    &cpi->sf.max_partition_size);
+                                    &sf->min_partition_size,
+                                    &sf->max_partition_size);
           }
           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                             &dummy_rate, &dummy_dist, 1, INT64_MAX);
         } else {
-          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
+          if (sf->constrain_copy_partition &&
+              sb_has_motion(cm, prev_mi_8x8))
+            constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8,
+                                        mi_row, mi_col, BLOCK_16X16);
+          else
+            copy_partitioning(cm, mi_8x8, prev_mi_8x8);
           rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                            &dummy_rate, &dummy_dist, 1);
         }
       }
     } else {
       // If required set upper and lower partition size limits
-      if (cpi->sf.auto_min_max_partition_size) {
+      if (sf->auto_min_max_partition_size) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         rd_auto_partition_range(cpi, tile, mi_row, mi_col,
-                                &cpi->sf.min_partition_size,
-                                &cpi->sf.max_partition_size);
+                                &sf->min_partition_size,
+                                &sf->max_partition_size);
       }
       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rate, &dummy_dist, 1, INT64_MAX);
@@ -2075,40 +2585,27 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
 
   x->act_zbin_adj = 0;
-  cpi->seg0_idx = 0;
-
-  xd->mode_info_stride = cm->mode_info_stride;
 
   // Copy data over into macro block data structures.
   vp9_setup_src_planes(x, cpi->Source, 0, 0);
 
   // TODO(jkoleszar): are these initializations required?
-  setup_pre_planes(xd, 0, get_ref_frame_buffer(cpi, LAST_FRAME), 0, 0, NULL);
-  setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0);
+  vp9_setup_pre_planes(xd, 0, get_ref_frame_buffer(cpi, LAST_FRAME), 0, 0,
+                       NULL);
+  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0);
 
   vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
 
-  xd->mi_8x8[0]->mbmi.mode = DC_PRED;
-  xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED;
-
-  vp9_zero(cm->counts.y_mode);
-  vp9_zero(cm->counts.uv_mode);
-  vp9_zero(cm->counts.inter_mode);
-  vp9_zero(cm->counts.partition);
-  vp9_zero(cm->counts.intra_inter);
-  vp9_zero(cm->counts.comp_inter);
-  vp9_zero(cm->counts.single_ref);
-  vp9_zero(cm->counts.comp_ref);
-  vp9_zero(cm->counts.tx);
-  vp9_zero(cm->counts.skip);
+  xd->mi[0]->mbmi.mode = DC_PRED;
+  xd->mi[0]->mbmi.uv_mode = DC_PRED;
 
   // Note: this memset assumes above_context[0], [1] and [2]
   // are allocated as part of the same buffer.
-  vpx_memset(cpi->above_context[0], 0,
-             sizeof(*cpi->above_context[0]) *
+  vpx_memset(xd->above_context[0], 0,
+             sizeof(*xd->above_context[0]) *
              2 * aligned_mi_cols * MAX_MB_PLANE);
-  vpx_memset(cpi->above_seg_context, 0,
-             sizeof(*cpi->above_seg_context) * aligned_mi_cols);
+  vpx_memset(xd->above_seg_context, 0,
+             sizeof(*xd->above_seg_context) * aligned_mi_cols);
 }
 
 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
@@ -2127,13 +2624,6 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
   }
 }
 
-static void switch_tx_mode(VP9_COMP *cpi) {
-  if (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
-      cpi->common.tx_mode >= ALLOW_32X32)
-    cpi->common.tx_mode = ALLOW_32X32;
-}
-
-
 static int check_dual_ref_flags(VP9_COMP *cpi) {
   const int ref_flags = cpi->ref_frame_flags;
 
@@ -2145,105 +2635,20 @@ static int check_dual_ref_flags(VP9_COMP *cpi) {
   }
 }
 
-static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
-  int x, y;
-
-  for (y = 0; y < ymbs; y++) {
-    for (x = 0; x < xmbs; x++) {
-      if (!mi_8x8[y * mis + x]->mbmi.skip)
-        return 0;
-    }
-  }
-
-  return 1;
-}
-
-static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs,
-                          TX_SIZE tx_size) {
-  int x, y;
-
-  for (y = 0; y < ymbs; y++) {
-    for (x = 0; x < xmbs; x++)
-      mi_8x8[y * mis + x]->mbmi.tx_size = tx_size;
-  }
-}
-
-static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis,
-                                   TX_SIZE max_tx_size, int bw, int bh,
-                                   int mi_row, int mi_col,
-                                   MODE_INFO **mi_8x8) {
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
-    return;
-  } else {
-    MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
-    if (mbmi->tx_size > max_tx_size) {
-      const int ymbs = MIN(bh, cm->mi_rows - mi_row);
-      const int xmbs = MIN(bw, cm->mi_cols - mi_col);
-
-      assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
-             get_skip_flag(mi_8x8, mis, ymbs, xmbs));
-      set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
-    }
-  }
-}
-
-static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8,
-                                    TX_SIZE max_tx_size, int mi_row, int mi_col,
-                                    BLOCK_SIZE bsize) {
-  const int mis = cm->mode_info_stride;
-  int bw, bh;
-  const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
-
-  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
-    return;
-
-  bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
-  bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
-
-  if (bw == bs && bh == bs) {
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col,
-                           mi_8x8);
-  } else if (bw == bs && bh < bs) {
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col,
-                           mi_8x8);
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs,
-                           mi_col, mi_8x8 + hbs * mis);
-  } else if (bw < bs && bh == bs) {
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col,
-                           mi_8x8);
-    reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row,
-                           mi_col + hbs, mi_8x8 + hbs);
-  } else {
-    const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
-    int n;
-
-    assert(bw < bs && bh < bs);
-
-    for (n = 0; n < 4; n++) {
-      const int mi_dc = hbs * (n & 1);
-      const int mi_dr = hbs * (n >> 1);
-
-      reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size,
-                              mi_row + mi_dr, mi_col + mi_dc, subsize);
-    }
-  }
-}
-
 static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
   int mi_row, mi_col;
-  const int mis = cm->mode_info_stride;
-  MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible;
+  const int mis = cm->mi_stride;
+  MODE_INFO **mi_ptr = cm->mi_grid_visible;
 
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
-    mi_8x8 = mi_ptr;
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) {
-      reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col,
-                              BLOCK_64X64);
+  for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
+    for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+      if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max)
+        mi_ptr[mi_col]->mbmi.tx_size = txfm_max;
     }
   }
 }
 
-static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) {
+static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
   if (frame_is_intra_only(&cpi->common))
     return INTRA_FRAME;
   else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
@@ -2254,30 +2659,31 @@ static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) {
     return GOLDEN_FRAME;
 }
 
-static void select_tx_mode(VP9_COMP *cpi) {
+static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
   if (cpi->oxcf.lossless) {
-    cpi->common.tx_mode = ONLY_4X4;
+    return ONLY_4X4;
   } else if (cpi->common.current_video_frame == 0) {
-    cpi->common.tx_mode = TX_MODE_SELECT;
+    return TX_MODE_SELECT;
   } else {
     if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
-      cpi->common.tx_mode = ALLOW_32X32;
+      return ALLOW_32X32;
     } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
-      int frame_type = get_frame_type(cpi);
-      cpi->common.tx_mode =
-          cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32]
-          > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
-          ALLOW_32X32 : TX_MODE_SELECT;
+      const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
+      return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >
+                 cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
+                     ALLOW_32X32 : TX_MODE_SELECT;
     } else {
       unsigned int total = 0;
       int i;
       for (i = 0; i < TX_SIZES; ++i)
         total += cpi->tx_stepdown_count[i];
+
       if (total) {
-        double fraction = (double)cpi->tx_stepdown_count[0] / total;
-        cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
-        // printf("fraction = %f\n", fraction);
-      }  // else keep unchanged
+        const double fraction = (double)cpi->tx_stepdown_count[0] / total;
+        return fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
+      } else {
+        return cpi->common.tx_mode;
+      }
     }
   }
 }
@@ -2296,171 +2702,573 @@ typedef enum {
 
 static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
                           MB_PREDICTION_MODE mode) {
-  mbmi->interp_filter = EIGHTTAP;
   mbmi->mode = mode;
+  mbmi->uv_mode = mode;
   mbmi->mv[0].as_int = 0;
   mbmi->mv[1].as_int = 0;
-  if (mode < NEARESTMV) {
-    mbmi->ref_frame[0] = INTRA_FRAME;
-  } else {
-    mbmi->ref_frame[0] = LAST_FRAME;
-  }
-
-  mbmi->ref_frame[1] = INTRA_FRAME;
+  mbmi->ref_frame[0] = INTRA_FRAME;
+  mbmi->ref_frame[1] = NONE;
   mbmi->tx_size = max_txsize_lookup[bsize];
-  mbmi->uv_mode = mode;
   mbmi->skip = 0;
   mbmi->sb_type = bsize;
   mbmi->segment_id = 0;
 }
 
-static INLINE int get_block_row(int b32i, int b16i, int b8i) {
-  return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
+static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
+                                int mi_row, int mi_col,
+                                int *rate, int64_t *dist,
+                                BLOCK_SIZE bsize) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  set_offsets(cpi, tile, mi_row, mi_col, bsize);
+  xd->mi[0]->mbmi.sb_type = bsize;
+
+  if (!frame_is_intra_only(cm)) {
+    vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
+                        rate, dist, bsize);
+  } else {
+    MB_PREDICTION_MODE intramode = DC_PRED;
+    set_mode_info(&xd->mi[0]->mbmi, bsize, intramode);
+  }
+  duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
 }
 
-static INLINE int get_block_col(int b32i, int b16i, int b8i) {
-  return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
+static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
+                              int mi_row, int mi_col,
+                              BLOCK_SIZE bsize, BLOCK_SIZE subsize) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+  PARTITION_TYPE partition = partition_lookup[bsl][subsize];
+
+  assert(bsize >= BLOCK_8X8);
+
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+    return;
+
+  switch (partition) {
+    case PARTITION_NONE:
+      set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+      break;
+    case PARTITION_VERT:
+      *get_sb_index(x, subsize) = 0;
+      set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+
+      if (mi_col + hbs < cm->mi_cols) {
+        *get_sb_index(x, subsize) = 1;
+        set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs);
+        *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+        duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize);
+      }
+      break;
+    case PARTITION_HORZ:
+      *get_sb_index(x, subsize) = 0;
+      set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+      *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+      duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+      if (mi_row + hbs < cm->mi_rows) {
+        *get_sb_index(x, subsize) = 1;
+        set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col);
+        *(xd->mi[0]) = get_block_context(x, subsize)->mic;
+        duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize);
+      }
+      break;
+    case PARTITION_SPLIT:
+      *get_sb_index(x, subsize) = 0;
+      fill_mode_info_sb(cm, x, mi_row, mi_col, subsize,
+                        *(get_sb_partitioning(x, subsize)));
+      *get_sb_index(x, subsize) = 1;
+      fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
+                        *(get_sb_partitioning(x, subsize)));
+      *get_sb_index(x, subsize) = 2;
+      fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
+                        *(get_sb_partitioning(x, subsize)));
+      *get_sb_index(x, subsize) = 3;
+      fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
+                        *(get_sb_partitioning(x, subsize)));
+      break;
+    default:
+      break;
+  }
 }
 
-static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
-                                TOKENEXTRA **tp, int mi_row, int mi_col,
-                                BLOCK_SIZE bsize, int *rate, int64_t *dist) {
+static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
+                                 TOKENEXTRA **tp, int mi_row,
+                                 int mi_col, BLOCK_SIZE bsize, int *rate,
+                                 int64_t *dist, int do_recon, int64_t best_rd) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  int mis = cm->mode_info_stride;
-  int br, bc;
-  int i, j;
-  MB_PREDICTION_MODE mode = DC_PRED;
-  int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row);
-  int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col);
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
+  TOKENEXTRA *tp_orig = *tp;
+  PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
+  int i;
+  BLOCK_SIZE subsize;
+  int this_rate, sum_rate = 0, best_rate = INT_MAX;
+  int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
+  int64_t sum_rd = 0;
+  int do_split = bsize >= BLOCK_8X8;
+  int do_rect = 1;
+  // Override skipping rectangular partition operations for edge blocks
+  const int force_horz_split = (mi_row + ms >= cm->mi_rows);
+  const int force_vert_split = (mi_col + ms >= cm->mi_cols);
+  const int xss = x->e_mbd.plane[1].subsampling_x;
+  const int yss = x->e_mbd.plane[1].subsampling_y;
 
-  int bw = num_8x8_blocks_wide_lookup[bsize];
-  int bh = num_8x8_blocks_high_lookup[bsize];
+  int partition_none_allowed = !force_horz_split && !force_vert_split;
+  int partition_horz_allowed = !force_vert_split && yss <= xss &&
+                               bsize >= BLOCK_8X8;
+  int partition_vert_allowed = !force_horz_split && xss <= yss &&
+                               bsize >= BLOCK_8X8;
+  (void) *tp_orig;
 
-  int brate = 0;
-  int64_t bdist = 0;
-  *rate = 0;
-  *dist = 0;
+  if (bsize < BLOCK_8X8) {
+    // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
+    // there is nothing to be done.
+    if (x->ab_index != 0) {
+      *rate = 0;
+      *dist = 0;
+      return;
+    }
+  }
+
+  assert(num_8x8_blocks_wide_lookup[bsize] ==
+             num_8x8_blocks_high_lookup[bsize]);
+
+  x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
+
+  // Determine partition types in search according to the speed features.
+  // The threshold set here has to be of square block size.
+  if (cpi->sf.auto_min_max_partition_size) {
+    partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
+                               bsize >= cpi->sf.min_partition_size);
+    partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+                                bsize >  cpi->sf.min_partition_size) ||
+                                force_horz_split);
+    partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
+                                bsize >  cpi->sf.min_partition_size) ||
+                                force_vert_split);
+    do_split &= bsize > cpi->sf.min_partition_size;
+  }
+  if (cpi->sf.use_square_partition_only) {
+    partition_horz_allowed &= force_horz_split;
+    partition_vert_allowed &= force_vert_split;
+  }
 
-  // find prediction mode for each 8x8 block
-  for (br = 0; br < rows; br += bh) {
-    for (bc = 0; bc < cols; bc += bw) {
-      int row = mi_row + br;
-      int col = mi_col + bc;
+  if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
+    do_split = 0;
 
-      BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
-                                          &bh, &bw);
+  // PARTITION_NONE
+  if (partition_none_allowed) {
+    nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+                        &this_rate, &this_dist, bsize);
+    ctx->mic.mbmi = xd->mi[0]->mbmi;
 
-      set_offsets(cpi, tile, row, col, bs);
+    if (this_rate != INT_MAX) {
+      int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+      this_rate += x->partition_cost[pl][PARTITION_NONE];
+      sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+      if (sum_rd < best_rd) {
+        int64_t stop_thresh = 4096;
+        int64_t stop_thresh_rd;
 
-      if (cm->frame_type != KEY_FRAME)
-        vp9_pick_inter_mode(cpi, x, tile, row, col,
-                            &brate, &bdist, bs);
-      else
-        set_mode_info(&xd->mi_8x8[0]->mbmi, bs, mode);
+        best_rate = this_rate;
+        best_dist = this_dist;
+        best_rd = sum_rd;
+        if (bsize >= BLOCK_8X8)
+          *(get_sb_partitioning(x, bsize)) = bsize;
+
+        // Adjust threshold according to partition size.
+        stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
+            b_height_log2_lookup[bsize]);
+
+        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
+        // If obtained distortion is very small, choose current partition
+        // and stop splitting.
+        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+          do_split = 0;
+          do_rect = 0;
+        }
+      }
+    }
+    if (!x->in_active_map) {
+      do_split = 0;
+      do_rect = 0;
+    }
+  }
+
+  // store estimated motion vector
+  store_pred_mv(x, ctx);
+
+  // PARTITION_SPLIT
+  sum_rd = 0;
+  if (do_split) {
+    int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+    sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
+    subsize = get_subsize(bsize, PARTITION_SPLIT);
+    for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+      const int x_idx = (i & 1) * ms;
+      const int y_idx = (i >> 1) * ms;
+
+      if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+        continue;
+
+      *get_sb_index(x, subsize) = i;
+      load_pred_mv(x, ctx);
+
+      nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
+                           subsize, &this_rate, &this_dist, 0,
+                           best_rd - sum_rd);
+
+      if (this_rate == INT_MAX) {
+        sum_rd = INT64_MAX;
+      } else {
+        sum_rate += this_rate;
+        sum_dist += this_dist;
+        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+      }
+    }
+
+    if (sum_rd < best_rd) {
+      best_rate = sum_rate;
+      best_dist = sum_dist;
+      best_rd = sum_rd;
+      *(get_sb_partitioning(x, bsize)) = subsize;
+    } else {
+      // skip rectangular partition test when larger block size
+      // gives better rd cost
+      if (cpi->sf.less_rectangular_check)
+        do_rect &= !partition_none_allowed;
+    }
+  }
+
+  // PARTITION_HORZ
+  if (partition_horz_allowed && do_rect) {
+    subsize = get_subsize(bsize, PARTITION_HORZ);
+    *get_sb_index(x, subsize) = 0;
+    if (cpi->sf.adaptive_motion_search)
+      load_pred_mv(x, ctx);
+
+    nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+                        &this_rate, &this_dist, subsize);
+
+    get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+
+    sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
 
-      *rate += brate;
-      *dist += bdist;
+    if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+      *get_sb_index(x, subsize) = 1;
+
+      load_pred_mv(x, ctx);
 
-      for (j = 0; j < bh; ++j)
-        for (i = 0; i < bw; ++i)
-          xd->mi_8x8[j * mis + i] = xd->mi_8x8[0];
+      nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
+                          &this_rate, &this_dist, subsize);
+
+      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+
+      if (this_rate == INT_MAX) {
+        sum_rd = INT64_MAX;
+      } else {
+        int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+        this_rate += x->partition_cost[pl][PARTITION_HORZ];
+        sum_rate += this_rate;
+        sum_dist += this_dist;
+        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+      }
+    }
+    if (sum_rd < best_rd) {
+      best_rd = sum_rd;
+      best_rate = sum_rate;
+      best_dist = sum_dist;
+      *(get_sb_partitioning(x, bsize)) = subsize;
     }
   }
+
+  // PARTITION_VERT
+  if (partition_vert_allowed && do_rect) {
+    subsize = get_subsize(bsize, PARTITION_VERT);
+
+    *get_sb_index(x, subsize) = 0;
+    if (cpi->sf.adaptive_motion_search)
+      load_pred_mv(x, ctx);
+
+    nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
+                        &this_rate, &this_dist, subsize);
+    get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+    sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+    if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+      *get_sb_index(x, subsize) = 1;
+
+      load_pred_mv(x, ctx);
+
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
+                          &this_rate, &this_dist, subsize);
+
+      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+
+      if (this_rate == INT_MAX) {
+        sum_rd = INT64_MAX;
+      } else {
+        int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+        this_rate += x->partition_cost[pl][PARTITION_VERT];
+        sum_rate += this_rate;
+        sum_dist += this_dist;
+        sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+      }
+    }
+    if (sum_rd < best_rd) {
+      best_rate = sum_rate;
+      best_dist = sum_dist;
+      best_rd = sum_rd;
+      *(get_sb_partitioning(x, bsize)) = subsize;
+    }
+  }
+
+  *rate = best_rate;
+  *dist = best_dist;
+
+  if (best_rate == INT_MAX)
+    return;
+
+  // update mode info array
+  fill_mode_info_sb(cm, x, mi_row, mi_col, bsize,
+                    *(get_sb_partitioning(x, bsize)));
+
+  if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
+    int output_enabled = (bsize == BLOCK_64X64);
+
+    // Check the projected output rate for this SB against it's target
+    // and and if necessary apply a Q delta using segmentation to get
+    // closer to the target.
+    if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
+      vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
+                                    best_rate);
+    }
+
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              best_rate, best_dist);
+
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
+  }
+
+  if (bsize == BLOCK_64X64) {
+    assert(tp_orig < *tp);
+    assert(best_rate < INT_MAX);
+    assert(best_dist < INT64_MAX);
+  } else {
+    assert(tp_orig == *tp);
+  }
+}
+
+static void nonrd_use_partition(VP9_COMP *cpi,
+                                const TileInfo *const tile,
+                                MODE_INFO **mi_8x8,
+                                TOKENEXTRA **tp,
+                                int mi_row, int mi_col,
+                                BLOCK_SIZE bsize, int output_enabled,
+                                int *totrate, int64_t *totdist) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+  const int mis = cm->mi_stride;
+  PARTITION_TYPE partition;
+  BLOCK_SIZE subsize;
+  int rate = INT_MAX;
+  int64_t dist = INT64_MAX;
+
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+    return;
+
+  subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4;
+  partition = partition_lookup[bsl][subsize];
+
+  switch (partition) {
+    case PARTITION_NONE:
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      break;
+    case PARTITION_VERT:
+      *get_sb_index(x, subsize) = 0;
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      if (mi_col + hbs < cm->mi_cols) {
+        *get_sb_index(x, subsize) = 1;
+        nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
+                            &rate, &dist, subsize);
+        get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+        if (rate != INT_MAX && dist != INT64_MAX &&
+            *totrate != INT_MAX && *totdist != INT64_MAX) {
+          *totrate += rate;
+          *totdist += dist;
+        }
+      }
+      break;
+    case PARTITION_HORZ:
+      *get_sb_index(x, subsize) = 0;
+      nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
+      get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi;
+      if (mi_row + hbs < cm->mi_rows) {
+        *get_sb_index(x, subsize) = 1;
+        nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
+                            &rate, &dist, subsize);
+        get_block_context(x, subsize)->mic.mbmi = mi_8x8[0]->mbmi;
+        if (rate != INT_MAX && dist != INT64_MAX &&
+            *totrate != INT_MAX && *totdist != INT64_MAX) {
+          *totrate += rate;
+          *totdist += dist;
+        }
+      }
+      break;
+    case PARTITION_SPLIT:
+      subsize = get_subsize(bsize, PARTITION_SPLIT);
+      *get_sb_index(x, subsize) = 0;
+      nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+                          subsize, output_enabled, totrate, totdist);
+      *get_sb_index(x, subsize) = 1;
+      nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
+                          mi_row, mi_col + hbs, subsize, output_enabled,
+                          &rate, &dist);
+      if (rate != INT_MAX && dist != INT64_MAX &&
+          *totrate != INT_MAX && *totdist != INT64_MAX) {
+        *totrate += rate;
+        *totdist += dist;
+      }
+      *get_sb_index(x, subsize) = 2;
+      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
+                          mi_row + hbs, mi_col, subsize, output_enabled,
+                          &rate, &dist);
+      if (rate != INT_MAX && dist != INT64_MAX &&
+          *totrate != INT_MAX && *totdist != INT64_MAX) {
+        *totrate += rate;
+        *totdist += dist;
+      }
+      *get_sb_index(x, subsize) = 3;
+      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
+                          mi_row + hbs, mi_col + hbs, subsize, output_enabled,
+                          &rate, &dist);
+      if (rate != INT_MAX && dist != INT64_MAX &&
+          *totrate != INT_MAX && *totdist != INT64_MAX) {
+        *totrate += rate;
+        *totdist += dist;
+      }
+      break;
+    default:
+      assert("Invalid partition type.");
+  }
+
+  if (bsize == BLOCK_64X64 && output_enabled) {
+    if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+      vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+                                              *totrate, *totdist);
+    encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize);
+  }
 }
 
 static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 int mi_row, TOKENEXTRA **tp) {
+  VP9_COMMON *cm = &cpi->common;
+  MACROBLOCKD *xd = &cpi->mb.e_mbd;
   int mi_col;
 
   // Initialize the left context for the new SB row
-  vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
-  vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+  vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
+  vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
 
   // Code each SB in the row
   for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
        mi_col += MI_BLOCK_SIZE) {
-    int dummy_rate;
-    int64_t dummy_dist;
+    int dummy_rate = 0;
+    int64_t dummy_dist = 0;
+    const int idx_str = cm->mi_stride * mi_row + mi_col;
+    MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
+    MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+    BLOCK_SIZE bsize;
 
     cpi->mb.source_variance = UINT_MAX;
+    vp9_zero(cpi->mb.pred_mv);
 
-    if (cpi->sf.partition_search_type == FIXED_PARTITION) {
-      nonrd_use_partition(cpi, tile, tp, mi_row, mi_col,
-                          cpi->sf.always_this_block_size,
-                          &dummy_rate, &dummy_dist);
-      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
-    } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
-               cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
-      // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
-      // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
-      // map to the same thing.
-      BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi,
-                                                             mi_row,
-                                                             mi_col);
-      nonrd_use_partition(cpi, tile, tp, mi_row, mi_col,
-                          bsize, &dummy_rate, &dummy_dist);
-      encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
-    } else {
-      assert(0);
+    // Set the partition type of the 64X64 block
+    switch (cpi->sf.partition_search_type) {
+      case VAR_BASED_PARTITION:
+        choose_partitioning(cpi, tile, mi_row, mi_col);
+        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist);
+        break;
+      case SOURCE_VAR_BASED_PARTITION:
+        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+        set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
+        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist);
+        break;
+      case VAR_BASED_FIXED_PARTITION:
+      case FIXED_PARTITION:
+        bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
+                cpi->sf.always_this_block_size :
+                get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
+        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
+        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+                            1, &dummy_rate, &dummy_dist);
+        break;
+      case REFERENCE_PARTITION:
+        if (cpi->sf.partition_check || sb_has_motion(cm, prev_mi_8x8)) {
+          nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
+                               &dummy_rate, &dummy_dist, 1, INT64_MAX);
+        } else {
+          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
+          nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+                              BLOCK_64X64, 1, &dummy_rate, &dummy_dist);
+        }
+        break;
+      default:
+        assert(0);
     }
   }
 }
 // end RTC play code
 
 static void encode_frame_internal(VP9_COMP *cpi) {
-  int mi_row;
+  SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCK *const x = &cpi->mb;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
 
-//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-//           cpi->common.current_video_frame, cpi->common.show_frame,
-//           cm->frame_type);
+  xd->mi = cm->mi_grid_visible;
+  xd->mi[0] = cm->mi;
 
-  vp9_zero(cm->counts.switchable_interp);
+  vp9_zero(cm->counts);
+  vp9_zero(cpi->coef_counts);
   vp9_zero(cpi->tx_stepdown_count);
+  vp9_zero(cpi->rd_comp_pred_diff);
+  vp9_zero(cpi->rd_filter_diff);
+  vp9_zero(cpi->rd_tx_select_diff);
+  vp9_zero(cpi->rd_tx_select_threshes);
 
-  xd->mi_8x8 = cm->mi_grid_visible;
-  // required for vp9_frame_init_quantizer
-  xd->mi_8x8[0] = cm->mi;
-
-  xd->last_mi = cm->prev_mi;
-
-  vp9_zero(cm->counts.mv);
-  vp9_zero(cpi->coef_counts);
-  vp9_zero(cm->counts.eob_branch);
+  cm->tx_mode = select_tx_mode(cpi);
 
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
-      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 &&
+                           cm->y_dc_delta_q == 0 &&
+                           cm->uv_dc_delta_q == 0 &&
+                           cm->uv_ac_delta_q == 0;
   switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
 
   vp9_frame_init_quantizer(cpi);
 
   vp9_initialize_rd_consts(cpi);
   vp9_initialize_me_consts(cpi, cm->base_qindex);
-  switch_tx_mode(cpi);
-
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
-    // Initialize encode frame context.
-    init_encode_frame_mb_context(cpi);
-
-    // Build a frame level activity map
-    build_activity_map(cpi);
-  }
-
-  // Re-initialize encode frame context.
   init_encode_frame_mb_context(cpi);
 
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
+  if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
+    build_activity_map(cpi);
 
   set_prev_mi(cm);
 
-  if (cpi->sf.use_nonrd_pick_mode) {
+  if (sf->use_nonrd_pick_mode) {
     // Initialize internal buffer pointers for rtc coding, where non-RD
     // mode decision is used and hence no buffer pointer swap needed.
     int i;
@@ -2474,6 +3282,30 @@ static void encode_frame_internal(VP9_COMP *cpi) {
       pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
       p[i].eobs = ctx->eobs_pbuf[i][0];
     }
+    vp9_zero(x->zcoeff_blk);
+
+    if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION &&
+        cm->current_video_frame > 0) {
+      int check_freq = sf->search_type_check_frequency;
+
+      if ((cm->current_video_frame - 1) % check_freq == 0) {
+        cpi->use_large_partition_rate = 0;
+      }
+
+      if ((cm->current_video_frame - 1) % check_freq == 1) {
+        const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] -
+                                  b_width_log2_lookup[BLOCK_16X16]) +
+                                  (b_height_log2_lookup[BLOCK_32X32] -
+                                  b_height_log2_lookup[BLOCK_16X16]));
+        cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 *
+                                        mbs_in_b32x32 / cm->MBs;
+      }
+
+      if ((cm->current_video_frame - 1) % check_freq >= 1) {
+        if (cpi->use_large_partition_rate < 15)
+          sf->partition_search_type = FIXED_PARTITION;
+      }
+    }
   }
 
   {
@@ -2491,12 +3323,13 @@ static void encode_frame_internal(VP9_COMP *cpi) {
         for (tile_col = 0; tile_col < tile_cols; tile_col++) {
           TileInfo tile;
           TOKENEXTRA *tp_old = tp;
+          int mi_row;
 
           // For each row of SBs in the frame
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
                mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
-            if (cpi->sf.use_nonrd_pick_mode)
+            if (sf->use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
               encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
             else
               encode_rd_sb_row(cpi, &tile, mi_row, &tp);
@@ -2511,18 +3344,18 @@ static void encode_frame_internal(VP9_COMP *cpi) {
     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
   }
 
-  if (cpi->sf.skip_encode_sb) {
+  if (sf->skip_encode_sb) {
     int j;
     unsigned int intra_count = 0, inter_count = 0;
     for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
       intra_count += cm->counts.intra_inter[j][0];
       inter_count += cm->counts.intra_inter[j][1];
     }
-    cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
-    cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
-    cpi->sf.skip_encode_frame &= cm->show_frame;
+    sf->skip_encode_frame = (intra_count << 2) < inter_count &&
+                            cm->frame_type != KEY_FRAME &&
+                            cm->show_frame;
   } else {
-    cpi->sf.skip_encode_frame = 0;
+    sf->skip_encode_frame = 0;
   }
 
 #if 0
@@ -2556,33 +3389,31 @@ void vp9_encode_frame(VP9_COMP *cpi) {
 
   if (cpi->sf.frame_parameter_update) {
     int i;
-    REFERENCE_MODE reference_mode;
-    /*
-     * This code does a single RD pass over the whole frame assuming
-     * either compound, single or hybrid prediction as per whatever has
-     * worked best for that type of frame in the past.
-     * It also predicts whether another coding mode would have worked
-     * better that this coding mode. If that is the case, it remembers
-     * that for subsequent frames.
-     * It does the same analysis for transform size selection also.
-     */
+
+    // This code does a single RD pass over the whole frame assuming
+    // either compound, single or hybrid prediction as per whatever has
+    // worked best for that type of frame in the past.
+    // It also predicts whether another coding mode would have worked
+    // better that this coding mode. If that is the case, it remembers
+    // that for subsequent frames.
+    // It does the same analysis for transform size selection also.
     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
     const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
     const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
 
     /* prediction (compound, single or hybrid) mode selection */
-    if (frame_type == 3 || !cm->allow_comp_inter_inter)
-      reference_mode = SINGLE_REFERENCE;
+    if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter)
+      cm->reference_mode = SINGLE_REFERENCE;
     else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] &&
              mode_thresh[COMPOUND_REFERENCE] >
                  mode_thresh[REFERENCE_MODE_SELECT] &&
              check_dual_ref_flags(cpi) &&
              cpi->static_mb_pct == 100)
-      reference_mode = COMPOUND_REFERENCE;
+      cm->reference_mode = COMPOUND_REFERENCE;
     else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT])
-      reference_mode = SINGLE_REFERENCE;
+      cm->reference_mode = SINGLE_REFERENCE;
     else
-      reference_mode = REFERENCE_MODE_SELECT;
+      cm->reference_mode = REFERENCE_MODE_SELECT;
 
     if (cm->interp_filter == SWITCHABLE) {
       if (frame_type != ALTREF_FRAME &&
@@ -2598,12 +3429,6 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       }
     }
 
-    cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
-
-    /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
-    select_tx_mode(cpi);
-    cm->reference_mode = reference_mode;
-
     encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
@@ -2682,8 +3507,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
       }
     }
   } else {
-    // Force the usage of the BILINEAR interp_filter.
-    cm->interp_filter = BILINEAR;
+    cm->reference_mode = SINGLE_REFERENCE;
+    cm->interp_filter = SWITCHABLE;
     encode_frame_internal(cpi);
   }
 }
@@ -2693,18 +3518,18 @@ static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
   const MB_PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
 
-  ++counts->uv_mode[y_mode][uv_mode];
-
   if (bsize < BLOCK_8X8) {
     int idx, idy;
-    const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
-    const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-    for (idy = 0; idy < 2; idy += num_4x4_blocks_high)
-      for (idx = 0; idx < 2; idx += num_4x4_blocks_wide)
+    const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+    const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+    for (idy = 0; idy < 2; idy += num_4x4_h)
+      for (idx = 0; idx < 2; idx += num_4x4_w)
         ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
   } else {
     ++counts->y_mode[size_group_lookup[bsize]][y_mode];
   }
+
+  ++counts->uv_mode[y_mode][uv_mode];
 }
 
 // Experimental stub function to create a per MB zbin adjustment based on
@@ -2713,13 +3538,10 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
 #if USE_ACT_INDEX
   x->act_zbin_adj = *(x->mb_activity_ptr);
 #else
-  int64_t a;
-  int64_t b;
-  int64_t act = *(x->mb_activity_ptr);
-
   // Apply the masking to the RD multiplier.
-  a = act + 4 * cpi->activity_avg;
-  b = 4 * act + cpi->activity_avg;
+  const int64_t act = *(x->mb_activity_ptr);
+  const int64_t a = act + 4 * cpi->activity_avg;
+  const int64_t b = 4 * act + cpi->activity_avg;
 
   if (act > cpi->activity_avg)
     x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1;
@@ -2751,23 +3573,26 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO **mi_8x8 = xd->mi_8x8;
+  MODE_INFO **mi_8x8 = xd->mi;
   MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
   PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
   unsigned int segment_id = mbmi->segment_id;
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
 
   x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
-                   (cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
-                   !cpi->sf.use_nonrd_pick_mode;
+                   cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
+                   cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
+                   cpi->sf.allow_skip_recode;
+
   x->skip_optimize = ctx->is_coded;
   ctx->is_coded = 1;
   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
   x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
                     x->q_index < QIDX_SKIP_THRESH);
+
   if (x->skip_encode)
     return;
 
@@ -2778,7 +3603,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
     }
   } else {
     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
     if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
       // Adjust the zbin based on this MB rate.
@@ -2806,7 +3630,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
     for (ref = 0; ref < 1 + is_compound; ++ref) {
       YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
                                                      mbmi->ref_frame[ref]);
-      setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf);
+      vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
+                           &xd->block_refs[ref]->sf);
     }
     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
 
diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.h b/source/libvpx/vp9/encoder/vp9_encodeframe.h
index f7d17c3..131e932 100644
--- a/source/libvpx/vp9/encoder/vp9_encodeframe.h
+++ b/source/libvpx/vp9/encoder/vp9_encodeframe.h
@@ -18,11 +18,20 @@ extern "C" {
 
 struct macroblock;
 struct yv12_buffer_config;
+struct VP9_COMP;
+
+typedef struct {
+  unsigned int sse;
+  int sum;
+  unsigned int var;
+} diff;
 
 void vp9_setup_src_planes(struct macroblock *x,
                           const struct yv12_buffer_config *src,
                           int mi_row, int mi_col);
 
+void vp9_encode_frame(struct VP9_COMP *cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.c b/source/libvpx/vp9/encoder/vp9_encodemb.c
index 513730e..5e98e4e 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -107,11 +107,11 @@ static int trellis_get_coeff_context(const int16_t *scan,
 
 static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
                        TX_SIZE tx_size, MACROBLOCK *mb,
-                       struct optimize_ctx *ctx) {
+                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   struct macroblock_plane *p = &mb->plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
-  const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
+  const int ref = is_inter_block(&xd->mi[0]->mbmi);
   vp9_token_state tokens[1025][2];
   unsigned best_index[1025][2];
   const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
@@ -133,18 +133,13 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
   const scan_order *so = get_scan(xd, tx_size, type, block);
   const int16_t *scan = so->scan;
   const int16_t *nb = so->neighbors;
-  ENTROPY_CONTEXT *a, *l;
-  int tx_x, tx_y;
-  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &tx_x, &tx_y);
-  a = &ctx->ta[plane][tx_x];
-  l = &ctx->tl[plane][tx_y];
 
   assert((!type && !plane) || (type && plane));
   assert(eob <= default_eob);
 
   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
   rdmult = mb->rdmult * err_mult;
-  if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi))
+  if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi))
     rdmult = (rdmult * 9) >> 4;
   rddiv = mb->rddiv;
   /* Initialize the sentinel node of the trellis. */
@@ -380,15 +375,17 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   int i, j;
   uint8_t *dst;
+  ENTROPY_CONTEXT *a, *l;
   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
   dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
+  a = &ctx->ta[plane][i];
+  l = &ctx->tl[plane][j];
 
   // TODO(jingning): per transformed block zero forcing only enabled for
   // luma component. will integrate chroma components as well.
   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
     p->eobs[block] = 0;
-    ctx->ta[plane][i] = 0;
-    ctx->tl[plane][j] = 0;
+    *a = *l = 0;
     return;
   }
 
@@ -396,10 +393,9 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
-    optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
+    optimize_b(plane, block, plane_bsize, tx_size, x, a, l);
   } else {
-    ctx->ta[plane][i] = p->eobs[block] > 0;
-    ctx->tl[plane][j] = p->eobs[block] > 0;
+    *a = *l = p->eobs[block] > 0;
   }
 
   if (p->eobs[block])
@@ -428,6 +424,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
       assert(0 && "Invalid transform size");
   }
 }
+
 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
                                TX_SIZE tx_size, void *arg) {
   MACROBLOCK *const x = (MACROBLOCK *)arg;
@@ -455,7 +452,7 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct encode_b_args arg = {x, &ctx, &mbmi->skip};
   int plane;
 
@@ -480,7 +477,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
   struct encode_b_args* const args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
@@ -502,9 +499,6 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
   src = &p->src.buf[4 * (j * src_stride + i)];
   src_diff = &p->src_diff[4 * (j * diff_stride + i)];
 
-  // if (x->optimize)
-  //   optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
-
   switch (tx_size) {
     case TX_32X32:
       scan_order = &vp9_default_scan_orders[TX_32X32];
@@ -526,7 +520,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
       break;
     case TX_16X16:
-      tx_type = get_tx_type_16x16(pd->plane_type, xd);
+      tx_type = get_tx_type(pd->plane_type, xd);
       scan_order = &vp9_scan_orders[TX_16X16][tx_type];
       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
       vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
@@ -546,7 +540,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
       break;
     case TX_8X8:
-      tx_type = get_tx_type_8x8(pd->plane_type, xd);
+      tx_type = get_tx_type(pd->plane_type, xd);
       scan_order = &vp9_scan_orders[TX_8X8][tx_type];
       mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
       vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
@@ -568,7 +562,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
     case TX_4X4:
       tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
       scan_order = &vp9_scan_orders[TX_4X4][tx_type];
-      mode = plane == 0 ? get_y_mode(xd->mi_8x8[0], block) : mbmi->uv_mode;
+      mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
       vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
                               x->skip_encode ? src : dst,
                               x->skip_encode ? src_stride : dst_stride,
@@ -614,14 +608,14 @@ void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
 
 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip};
+  struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
 
   vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
                                          &arg);
 }
 
 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
-  MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
+  MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi;
   x->skip_encode = 0;
   mbmi->mode = DC_PRED;
   mbmi->ref_frame[0] = INTRA_FRAME;
diff --git a/source/libvpx/vp9/encoder/vp9_encodemv.c b/source/libvpx/vp9/encoder/vp9_encodemv.c
index 5079699..9d44865 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/source/libvpx/vp9/encoder/vp9_encodemv.c
@@ -13,11 +13,9 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_entropymode.h"
 #include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_encodemv.h"
 
-#ifdef ENTROPY_STATS
-extern unsigned int active_section;
-#endif
+#include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_encodemv.h"
 
 static struct vp9_token mv_joint_encodings[MV_JOINTS];
 static struct vp9_token mv_class_encodings[MV_CLASSES];
@@ -160,7 +158,7 @@ static void write_mv_update(const vp9_tree_index *tree,
 
   vp9_tree_probs_from_distribution(tree, branch_ct, counts);
   for (i = 0; i < n - 1; ++i)
-    update_mv(w, branch_ct[i], &probs[i], NMV_UPDATE_PROB);
+    update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB);
 }
 
 void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
@@ -174,13 +172,13 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
     nmv_component *comp = &mvc->comps[i];
     nmv_component_counts *comp_counts = &counts->comps[i];
 
-    update_mv(w, comp_counts->sign, &comp->sign, NMV_UPDATE_PROB);
+    update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
     write_mv_update(vp9_mv_class_tree, comp->classes, comp_counts->classes,
                     MV_CLASSES, w);
     write_mv_update(vp9_mv_class0_tree, comp->class0, comp_counts->class0,
                     CLASS0_SIZE, w);
     for (j = 0; j < MV_OFFSET_BITS; ++j)
-      update_mv(w, comp_counts->bits[j], &comp->bits[j], NMV_UPDATE_PROB);
+      update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
   }
 
   for (i = 0; i < 2; ++i) {
@@ -195,8 +193,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
   if (usehp) {
     for (i = 0; i < 2; ++i) {
       update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
-                NMV_UPDATE_PROB);
-      update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, NMV_UPDATE_PROB);
+                MV_UPDATE_PROB);
+      update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
     }
   }
 }
@@ -231,22 +229,21 @@ void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
   build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
 }
 
-static void inc_mvs(const int_mv mv[2], const MV ref[2], int is_compound,
+static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2],
                     nmv_context_counts *counts) {
   int i;
-  for (i = 0; i < 1 + is_compound; ++i) {
-    const MV diff = { mv[i].as_mv.row - ref[i].row,
-                      mv[i].as_mv.col - ref[i].col };
+
+  for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+    const MV *ref = &mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
+    const MV diff = {mvs[i].as_mv.row - ref->row,
+                     mvs[i].as_mv.col - ref->col};
     vp9_inc_mv(&diff, counts);
   }
 }
 
-void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd,
-                         const MV best_ref_mv[2]) {
-  const MODE_INFO *mi = xd->mi_8x8[0];
+void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) {
+  const MODE_INFO *mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
-  const int is_compound = has_second_ref(mbmi);
-  nmv_context_counts *counts = &cm->counts.mv;
 
   if (mbmi->sb_type < BLOCK_8X8) {
     const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
@@ -257,11 +254,12 @@ void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd,
       for (idx = 0; idx < 2; idx += num_4x4_w) {
         const int i = idy * 2 + idx;
         if (mi->bmi[i].as_mode == NEWMV)
-          inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, counts);
+          inc_mvs(mbmi, mi->bmi[i].as_mv, &cm->counts.mv);
       }
     }
-  } else if (mbmi->mode == NEWMV) {
-    inc_mvs(mbmi->mv, best_ref_mv, is_compound, counts);
+  } else {
+    if (mbmi->mode == NEWMV)
+      inc_mvs(mbmi, mbmi->mv, &cm->counts.mv);
   }
 }
 
diff --git a/source/libvpx/vp9/encoder/vp9_encodemv.h b/source/libvpx/vp9/encoder/vp9_encodemv.h
index f16b2c1..50cb961 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemv.h
+++ b/source/libvpx/vp9/encoder/vp9_encodemv.h
@@ -28,8 +28,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
 void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
                               const nmv_context* mvctx, int usehp);
 
-void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd,
-                         const MV best_ref_mv[2]);
+void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c
index 8e454e6..db32ef8 100644
--- a/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -20,9 +20,10 @@
 
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_quant_common.h"
-#include "vp9/common/vp9_reconinter.h"  // setup_dst_planes()
+#include "vp9/common/vp9_reconinter.h"  // vp9_setup_dst_planes()
 #include "vp9/common/vp9_systemdependent.h"
 
+#include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_block.h"
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemb.h"
@@ -34,7 +35,6 @@
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
-#include "vp9/encoder/vp9_vaq.h"
 #include "vp9/encoder/vp9_variance.h"
 
 #define OUTPUT_FPF 0
@@ -54,7 +54,14 @@
 
 #define MIN_KF_BOOST        300
 
-#define DISABLE_RC_LONG_TERM_MEM 0
+#if CONFIG_MULTIPLE_ARF
+// Set MIN_GF_INTERVAL to 1 for the full decomposition.
+#define MIN_GF_INTERVAL             2
+#else
+#define MIN_GF_INTERVAL             4
+#endif
+
+#define DISABLE_RC_LONG_TERM_MEM
 
 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
   YV12_BUFFER_CONFIG temp = *a;
@@ -62,22 +69,6 @@ static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
   *b = temp;
 }
 
-static int select_cq_level(int qindex) {
-  int ret_val = QINDEX_RANGE - 1;
-  int i;
-
-  double target_q = (vp9_convert_qindex_to_q(qindex) * 0.5847) + 1.0;
-
-  for (i = 0; i < QINDEX_RANGE; ++i) {
-    if (target_q <= vp9_convert_qindex_to_q(i)) {
-      ret_val = i;
-      break;
-    }
-  }
-
-  return ret_val;
-}
-
 static int gfboost_qadjust(int qindex) {
   const double q = vp9_convert_qindex_to_q(qindex);
   return (int)((0.00000828 * q * q * q) +
@@ -85,17 +76,10 @@ static int gfboost_qadjust(int qindex) {
                (1.32 * q) + 79.3);
 }
 
-static int kfboost_qadjust(int qindex) {
-  const double q = vp9_convert_qindex_to_q(qindex);
-  return (int)((0.00000973 * q * q * q) +
-               (-0.00613 * q * q) +
-               (1.316 * q) + 121.2);
-}
-
 // Resets the first pass file to the given position using a relative seek from
 // the current position.
 static void reset_fpf_position(struct twopass_rc *p,
-                               FIRSTPASS_STATS *position) {
+                               const FIRSTPASS_STATS *position) {
   p->stats_in = position;
 }
 
@@ -197,10 +181,13 @@ static void zero_stats(FIRSTPASS_STATS *section) {
   section->new_mv_count = 0.0;
   section->count      = 0.0;
   section->duration   = 1.0;
+  section->spatial_layer_id = 0;
 }
 
-static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) {
+static void accumulate_stats(FIRSTPASS_STATS *section,
+                             const FIRSTPASS_STATS *frame) {
   section->frame += frame->frame;
+  section->spatial_layer_id = frame->spatial_layer_id;
   section->intra_error += frame->intra_error;
   section->coded_error += frame->coded_error;
   section->sr_coded_error += frame->sr_coded_error;
@@ -221,7 +208,8 @@ static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) {
   section->duration   += frame->duration;
 }
 
-static void subtract_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) {
+static void subtract_stats(FIRSTPASS_STATS *section,
+                           const FIRSTPASS_STATS *frame) {
   section->frame -= frame->frame;
   section->intra_error -= frame->intra_error;
   section->coded_error -= frame->coded_error;
@@ -269,12 +257,22 @@ static void avg_stats(FIRSTPASS_STATS *section) {
 // harder frames.
 static double calculate_modified_err(const VP9_COMP *cpi,
                                      const FIRSTPASS_STATS *this_frame) {
-  const struct twopass_rc *const twopass = &cpi->twopass;
-  const FIRSTPASS_STATS *const stats = &twopass->total_stats;
-  const double av_err = stats->ssim_weighted_pred_err / stats->count;
-  double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
-                                           DOUBLE_DIVIDE_CHECK(av_err),
-                                       cpi->oxcf.two_pass_vbrbias / 100.0);
+  const struct twopass_rc *twopass = &cpi->twopass;
+  const SVC *const svc = &cpi->svc;
+  const FIRSTPASS_STATS *stats;
+  double av_err;
+  double modified_error;
+
+  if (svc->number_spatial_layers > 1 &&
+      svc->number_temporal_layers == 1) {
+    twopass = &svc->layer_context[svc->spatial_layer_id].twopass;
+  }
+
+  stats = &twopass->total_stats;
+  av_err = stats->ssim_weighted_pred_err / stats->count;
+  modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
+                   DOUBLE_DIVIDE_CHECK(av_err),
+                   cpi->oxcf.two_pass_vbrbias / 100.0);
 
   return fclamp(modified_error,
                 twopass->modified_error_min, twopass->modified_error_max);
@@ -338,15 +336,13 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) {
 }
 
 // This function returns the maximum target rate per frame.
-static int frame_max_bits(const VP9_COMP *cpi) {
-  int64_t max_bits =
-    ((int64_t)cpi->rc.av_per_frame_bandwidth *
-     (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100;
-
+static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) {
+  int64_t max_bits = ((int64_t)rc->av_per_frame_bandwidth *
+                          (int64_t)oxcf->two_pass_vbrmax_section) / 100;
   if (max_bits < 0)
     max_bits = 0;
-  else if (max_bits > cpi->rc.max_frame_bandwidth)
-    max_bits = cpi->rc.max_frame_bandwidth;
+  else if (max_bits > rc->max_frame_bandwidth)
+    max_bits = rc->max_frame_bandwidth;
 
   return (int)max_bits;
 }
@@ -356,7 +352,15 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
 }
 
 void vp9_end_first_pass(VP9_COMP *cpi) {
-  output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    int i;
+    for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+      output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
+                   cpi->output_pkt_list);
+    }
+  } else {
+    output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+  }
 }
 
 static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
@@ -379,7 +383,7 @@ static unsigned int zz_motion_search(const MACROBLOCK *x) {
   const uint8_t *const ref = xd->plane[0].pre[0].buf;
   const int ref_stride = xd->plane[0].pre[0].stride;
   unsigned int sse;
-  vp9_variance_fn_t fn = get_block_variance_fn(xd->mi_8x8[0]->mbmi.sb_type);
+  vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type);
   fn(src, src_stride, ref, ref_stride, &sse);
   return sse;
 }
@@ -393,7 +397,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   int num00, tmp_err, n, sr = 0;
   int step_param = 3;
   int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
-  const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
   int new_mv_mode_penalty = 256;
   const int quart_frm = MIN(cpi->common.width, cpi->common.height);
@@ -415,6 +419,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                     x->sadperbit16, &num00, &v_fn_ptr,
                                     x->nmvjointcost,
                                     x->mvcost, ref_mv);
+  if (tmp_err < INT_MAX)
+    tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
   if (tmp_err < INT_MAX - new_mv_mode_penalty)
     tmp_err += new_mv_mode_penalty;
 
@@ -439,6 +445,8 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                         &num00, &v_fn_ptr,
                                         x->nmvjointcost,
                                         x->mvcost, ref_mv);
+      if (tmp_err < INT_MAX)
+        tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
       if (tmp_err < INT_MAX - new_mv_mode_penalty)
         tmp_err += new_mv_mode_penalty;
 
@@ -474,11 +482,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
   int recon_yoffset, recon_uvoffset;
   YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
-  YV12_BUFFER_CONFIG *const gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+  YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
   YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
-  const int recon_y_stride = lst_yv12->y_stride;
-  const int recon_uv_stride = lst_yv12->uv_stride;
-  const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
+  int recon_y_stride = lst_yv12->y_stride;
+  int recon_uv_stride = lst_yv12->uv_stride;
+  int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height);
   int64_t intra_error = 0;
   int64_t coded_error = 0;
   int64_t sr_coded_error = 0;
@@ -494,17 +502,47 @@ void vp9_first_pass(VP9_COMP *cpi) {
   int new_mv_count = 0;
   int sum_in_vectors = 0;
   uint32_t lastmv_as_int = 0;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  struct twopass_rc *twopass = &cpi->twopass;
   const MV zero_mv = {0, 0};
+  const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
 
   vp9_clear_system_state();
 
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
+    const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
+    twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+
+    vp9_scale_references(cpi);
+
+    // Use either last frame or alt frame for motion search.
+    if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
+      ref_frame = LAST_FRAME;
+    } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
+      scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME);
+      ref_frame = ALTREF_FRAME;
+    }
+
+    if (scaled_ref_buf != NULL) {
+      // Update the stride since we are using scaled reference buffer
+      first_ref_buf = scaled_ref_buf;
+      recon_y_stride = first_ref_buf->y_stride;
+      recon_uv_stride = first_ref_buf->uv_stride;
+      uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height);
+    }
+
+    // Disable golden frame for svc first pass for now.
+    gld_yv12 = NULL;
+    set_ref_ptrs(cm, xd, ref_frame, NONE);
+  }
+
   vp9_setup_src_planes(x, cpi->Source, 0, 0);
-  setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL);
-  setup_dst_planes(xd, new_yv12, 0, 0);
+  vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
+  vp9_setup_dst_planes(xd, new_yv12, 0, 0);
 
-  xd->mi_8x8 = cm->mi_grid_visible;
-  xd->mi_8x8[0] = cm->mi;
+  xd->mi = cm->mi_grid_visible;
+  xd->mi[0] = cm->mi;
 
   vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
 
@@ -552,8 +590,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
       xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
       xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
       xd->left_available = (mb_col != 0);
-      xd->mi_8x8[0]->mbmi.sb_type = bsize;
-      xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+      xd->mi[0]->mbmi.sb_type = bsize;
+      xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
       set_mi_row_col(xd, &tile,
                      mb_row << 1, num_8x8_blocks_high_lookup[bsize],
                      mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
@@ -593,7 +631,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
         int tmp_err, motion_error;
         int_mv mv, tmp_mv;
 
-        xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
+        xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
         motion_error = zz_motion_search(x);
         // Assume 0,0 motion with no mv overhead.
         mv.as_int = tmp_mv.as_int = 0;
@@ -625,7 +663,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
         }
 
         // Search in an older reference frame.
-        if (cm->current_video_frame > 1) {
+        if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
           // Assume 0,0 motion with no mv overhead.
           int gf_motion_error;
 
@@ -643,9 +681,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
             ++second_ref_count;
 
           // Reset to last frame as reference buffer.
-          xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset;
-          xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset;
-          xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset;
+          xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+          xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+          xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
 
           // In accumulating a score for the older reference frame take the
           // best of the motion predicted score and the intra coded error
@@ -672,10 +710,11 @@ void vp9_first_pass(VP9_COMP *cpi) {
           mv.as_mv.row *= 8;
           mv.as_mv.col *= 8;
           this_error = motion_error;
-          vp9_set_mbmode_and_mvs(xd, NEWMV, &mv.as_mv);
-          xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
-          xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
-          xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE;
+          xd->mi[0]->mbmi.mode = NEWMV;
+          xd->mi[0]->mbmi.mv[0] = mv;
+          xd->mi[0]->mbmi.tx_size = TX_4X4;
+          xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
+          xd->mi[0]->mbmi.ref_frame[1] = NONE;
           vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
           vp9_encode_sby_pass1(x, bsize);
           sum_mvr += mv.as_mv.row;
@@ -752,6 +791,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
     FIRSTPASS_STATS fps;
 
     fps.frame = cm->current_video_frame;
+    fps.spatial_layer_id = cpi->svc.spatial_layer_id;
     fps.intra_error = (double)(intra_error >> 8);
     fps.coded_error = (double)(coded_error >> 8);
     fps.sr_coded_error = (double)(sr_coded_error >> 8);
@@ -801,20 +841,28 @@ void vp9_first_pass(VP9_COMP *cpi) {
        (twopass->this_frame_stats.pcnt_inter > 0.20) &&
        ((twopass->this_frame_stats.intra_error /
          DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
-    vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+    if (gld_yv12 != NULL) {
+      vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+    }
     twopass->sr_update_lag = 1;
   } else {
     ++twopass->sr_update_lag;
   }
-  // Swap frame pointers so last frame refers to the frame we just compressed.
-  swap_yv12(lst_yv12, new_yv12);
+
+  if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    vp9_update_reference_frames(cpi);
+  } else {
+    // Swap frame pointers so last frame refers to the frame we just compressed.
+    swap_yv12(lst_yv12, new_yv12);
+  }
 
   vp9_extend_frame_borders(lst_yv12);
 
   // Special case for the first frame. Copy into the GF buffer as a second
   // reference.
-  if (cm->current_video_frame == 0)
+  if (cm->current_video_frame == 0 && gld_yv12 != NULL) {
     vp8_yv12_copy_frame(lst_yv12, gld_yv12);
+  }
 
   // Use this to see what the first pass reconstruction looks like.
   if (0) {
@@ -835,12 +883,6 @@ void vp9_first_pass(VP9_COMP *cpi) {
   ++cm->current_video_frame;
 }
 
-// Estimate a cost per mb attributable to overheads such as the coding of modes
-// and motion vectors. This currently makes simplistic assumptions for testing.
-static double bitcost(double prob) {
-  return -(log(prob) / log(2.0));
-}
-
 static double calc_correction_factor(double err_per_mb,
                                      double err_divisor,
                                      double pt_low,
@@ -868,21 +910,21 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
 
   const double section_err = fpstats->coded_error / fpstats->count;
   const double err_per_mb = section_err / num_mbs;
+  const double speed_term = 1.0 + ((double)cpi->speed * 0.04);
 
   if (section_target_bandwitdh <= 0)
     return rc->worst_quality;          // Highest value allowed
 
-  target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20)
-                              ? (512 * section_target_bandwitdh) / num_mbs
-                              : 512 * (section_target_bandwitdh / num_mbs);
+  target_norm_bits_per_mb =
+      ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs;
 
   // Try and pick a max Q that will be high enough to encode the
   // content at the given rate.
   for (q = rc->best_quality; q < rc->worst_quality; ++q) {
     const double err_correction_factor = calc_correction_factor(err_per_mb,
                                              ERR_DIVISOR, 0.5, 0.90, q);
-    const int bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q,
-                                                         err_correction_factor);
+    const int bits_per_mb_at_this_q =
+      vp9_rc_bits_per_mb(INTER_FRAME, q, (err_correction_factor * speed_term));
     if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
       break;
   }
@@ -897,10 +939,18 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
 extern void vp9_new_framerate(VP9_COMP *cpi, double framerate);
 
 void vp9_init_second_pass(VP9_COMP *cpi) {
+  SVC *const svc = &cpi->svc;
   FIRSTPASS_STATS this_frame;
-  FIRSTPASS_STATS *start_pos;
-  struct twopass_rc *const twopass = &cpi->twopass;
+  const FIRSTPASS_STATS *start_pos;
+  struct twopass_rc *twopass = &cpi->twopass;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  const int is_spatial_svc = (svc->number_spatial_layers > 1) &&
+                             (svc->number_temporal_layers == 1);
+  double frame_rate;
+
+  if (is_spatial_svc) {
+    twopass = &svc->layer_context[svc->spatial_layer_id].twopass;
+  }
 
   zero_stats(&twopass->total_stats);
   zero_stats(&twopass->total_left_stats);
@@ -911,30 +961,44 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
   twopass->total_stats = *twopass->stats_in_end;
   twopass->total_left_stats = twopass->total_stats;
 
+  frame_rate = 10000000.0 * twopass->total_stats.count /
+               twopass->total_stats.duration;
   // Each frame can have a different duration, as the frame rate in the source
   // isn't guaranteed to be constant. The frame rate prior to the first frame
   // encoded in the second pass is a guess. However, the sum duration is not.
   // It is calculated based on the actual durations of all frames from the
   // first pass.
-  vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count /
-                        twopass->total_stats.duration);
+
+  if (is_spatial_svc) {
+    vp9_update_spatial_layer_framerate(cpi, frame_rate);
+    twopass->bits_left =
+        (int64_t)(twopass->total_stats.duration *
+        svc->layer_context[svc->spatial_layer_id].target_bandwidth /
+        10000000.0);
+  } else {
+    vp9_new_framerate(cpi, frame_rate);
+    twopass->bits_left = (int64_t)(twopass->total_stats.duration *
+                                   oxcf->target_bandwidth / 10000000.0);
+  }
 
   cpi->output_framerate = oxcf->framerate;
-  twopass->bits_left = (int64_t)(twopass->total_stats.duration *
-                                 oxcf->target_bandwidth / 10000000.0);
 
   // Calculate a minimum intra value to be used in determining the IIratio
   // scores used in the second pass. We have this minimum to make sure
   // that clips that are static but "low complexity" in the intra domain
   // are still boosted appropriately for KF/GF/ARF.
-  twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
-  twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+  if (!is_spatial_svc) {
+    // We don't know the number of MBs for each layer at this point.
+    // So we will do it later.
+    twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
+    twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+  }
 
   // This variable monitors how far behind the second ref update is lagging.
   twopass->sr_update_lag = 1;
 
-  // Scan the first pass file and calculate an average Intra / Inter error score
-  // ratio for the sequence.
+  // Scan the first pass file and calculate an average Intra / Inter error
+  // score ratio for the sequence.
   {
     double sum_iiratio = 0.0;
     start_pos = twopass->stats_in;
@@ -993,8 +1057,8 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm,
 // Function to test for a condition where a complex transition is followed
 // by a static section. For example in slide shows where there is a fade
 // between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
-                                      int still_interval,
+static int detect_transition_to_still(struct twopass_rc *twopass,
+                                      int frame_interval, int still_interval,
                                       double loop_decay_rate,
                                       double last_decay_rate) {
   int trans_to_still = 0;
@@ -1006,19 +1070,19 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
       loop_decay_rate >= 0.999 &&
       last_decay_rate < 0.9) {
     int j;
-    FIRSTPASS_STATS *position = cpi->twopass.stats_in;
+    const FIRSTPASS_STATS *position = twopass->stats_in;
     FIRSTPASS_STATS tmp_next_frame;
 
     // Look ahead a few frames to see if static condition persists...
     for (j = 0; j < still_interval; ++j) {
-      if (EOF == input_stats(&cpi->twopass, &tmp_next_frame))
+      if (EOF == input_stats(twopass, &tmp_next_frame))
         break;
 
       if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999)
         break;
     }
 
-    reset_fpf_position(&cpi->twopass, position);
+    reset_fpf_position(twopass, position);
 
     // Only if it does do we signal a transition to still.
     if (j == still_interval)
@@ -1340,9 +1404,11 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
 
 // Analyse and define a gf/arf group.
 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
-  FIRSTPASS_STATS next_frame = { 0 };
-  FIRSTPASS_STATS *start_pos;
+  RATE_CONTROL *const rc = &cpi->rc;
+  VP9_CONFIG *const oxcf = &cpi->oxcf;
   struct twopass_rc *const twopass = &cpi->twopass;
+  FIRSTPASS_STATS next_frame = { 0 };
+  const FIRSTPASS_STATS *start_pos;
   int i;
   double boost_score = 0.0;
   double old_boost_score = 0.0;
@@ -1361,16 +1427,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double mv_in_out_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
   double mv_ratio_accumulator_thresh;
-  const int max_bits = frame_max_bits(cpi);  // Max bits for a single frame.
-
-  unsigned int allow_alt_ref = cpi->oxcf.play_alternate &&
-                               cpi->oxcf.lag_in_frames;
+  // Max bits for a single frame.
+  const int max_bits = frame_max_bits(rc, oxcf);
+  unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames;
 
   int f_boost = 0;
   int b_boost = 0;
   int flash_detected;
   int active_max_gf_interval;
-  RATE_CONTROL *const rc = &cpi->rc;
 
   twopass->gf_group_bits = 0;
 
@@ -1407,7 +1471,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     active_max_gf_interval = rc->max_gf_interval;
 
   i = 0;
-  while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) {
+  while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
     ++i;
 
     // Accumulate error score of frames in this gf group.
@@ -1442,7 +1506,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // Break clause to detect very still sections after motion. For example,
       // a static image after a fade or other transition.
-      if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
+      if (detect_transition_to_still(twopass, i, 5, loop_decay_rate,
                                      last_loop_decay_rate)) {
         allow_alt_ref = 0;
         break;
@@ -1581,8 +1645,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   // Calculate the bits to be allocated to the group as a whole.
   if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) {
-    twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits *
-                (gf_group_err / cpi->twopass.kf_group_error_left));
+    twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits *
+                (gf_group_err / twopass->kf_group_error_left));
   } else {
     twopass->gf_group_bits = 0;
   }
@@ -1671,10 +1735,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   {
     // Adjust KF group bits and error remaining.
     twopass->kf_group_error_left -= (int64_t)gf_group_err;
-    twopass->kf_group_bits -= twopass->gf_group_bits;
-
-    if (twopass->kf_group_bits < 0)
-      twopass->kf_group_bits = 0;
 
     // If this is an arf update we want to remove the score for the overlay
     // frame at the end which will usually be very cheap to code.
@@ -1691,11 +1751,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       twopass->gf_group_error_left = (int64_t)gf_group_err;
     }
 
-    twopass->gf_group_bits -= twopass->gf_bits;
-
-    if (twopass->gf_group_bits < 0)
-      twopass->gf_group_bits = 0;
-
     // This condition could fail if there are two kfs very close together
     // despite MIN_GF_INTERVAL and would cause a divide by 0 in the
     // calculation of alt_extra_bits.
@@ -1704,8 +1759,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       if (boost >= 150) {
         const int pct_extra = MIN(20, (boost - 100) / 50);
-        const int alt_extra_bits = (int)((twopass->gf_group_bits * pct_extra) /
-                                       100);
+        const int alt_extra_bits = (int)((
+            MAX(twopass->gf_group_bits - twopass->gf_bits, 0) *
+            pct_extra) / 100);
         twopass->gf_group_bits -= alt_extra_bits;
       }
     }
@@ -1734,40 +1790,36 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
 // Allocate bits to a normal frame that is neither a gf an arf or a key frame.
 static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+  struct twopass_rc *twopass = &cpi->twopass;
+  // For a single frame.
+  const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
+  // Calculate modified prediction error used in bit allocation.
+  const double modified_err = calculate_modified_err(cpi, this_frame);
   int target_frame_size;
-  double modified_err;
   double err_fraction;
-  const int max_bits = frame_max_bits(cpi);  // Max for a single frame.
 
-  // Calculate modified prediction error used in bit allocation.
-  modified_err = calculate_modified_err(cpi, this_frame);
-
-  if (cpi->twopass.gf_group_error_left > 0)
+  if (twopass->gf_group_error_left > 0)
     // What portion of the remaining GF group error is used by this frame.
-    err_fraction = modified_err / cpi->twopass.gf_group_error_left;
+    err_fraction = modified_err / twopass->gf_group_error_left;
   else
     err_fraction = 0.0;
 
   // How many of those bits available for allocation should we give it?
-  target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction);
+  target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction);
 
   // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at
   // the top end.
   target_frame_size = clamp(target_frame_size, 0,
-                            MIN(max_bits, (int)cpi->twopass.gf_group_bits));
+                            MIN(max_bits, (int)twopass->gf_group_bits));
 
   // Adjust error and bits remaining.
-  cpi->twopass.gf_group_error_left -= (int64_t)modified_err;
-  cpi->twopass.gf_group_bits -= target_frame_size;
-
-  if (cpi->twopass.gf_group_bits < 0)
-    cpi->twopass.gf_group_bits = 0;
+  twopass->gf_group_error_left -= (int64_t)modified_err;
 
   // Per frame bit target for this frame.
   vp9_rc_set_frame_target(cpi, target_frame_size);
 }
 
-static int test_candidate_kf(VP9_COMP *cpi,
+static int test_candidate_kf(struct twopass_rc *twopass,
                              const FIRSTPASS_STATS *last_frame,
                              const FIRSTPASS_STATS *this_frame,
                              const FIRSTPASS_STATS *next_frame) {
@@ -1788,19 +1840,12 @@ static int test_candidate_kf(VP9_COMP *cpi,
          ((next_frame->intra_error /
            DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) {
     int i;
-    FIRSTPASS_STATS *start_pos;
-
-    FIRSTPASS_STATS local_next_frame;
-
+    const FIRSTPASS_STATS *start_pos = twopass->stats_in;
+    FIRSTPASS_STATS local_next_frame = *next_frame;
     double boost_score = 0.0;
     double old_boost_score = 0.0;
     double decay_accumulator = 1.0;
 
-    local_next_frame = *next_frame;
-
-    // Note the starting file position so we can reset to it.
-    start_pos = cpi->twopass.stats_in;
-
     // Examine how well the key frame predicts subsequent frames.
     for (i = 0; i < 16; ++i) {
       double next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error /
@@ -1832,7 +1877,7 @@ static int test_candidate_kf(VP9_COMP *cpi,
       old_boost_score = boost_score;
 
       // Get the next frame details
-      if (EOF == input_stats(&cpi->twopass, &local_next_frame))
+      if (EOF == input_stats(twopass, &local_next_frame))
         break;
     }
 
@@ -1842,7 +1887,7 @@ static int test_candidate_kf(VP9_COMP *cpi,
       is_viable_kf = 1;
     } else {
       // Reset the file position
-      reset_fpf_position(&cpi->twopass, start_pos);
+      reset_fpf_position(twopass, start_pos);
 
       is_viable_kf = 0;
     }
@@ -1853,28 +1898,21 @@ static int test_candidate_kf(VP9_COMP *cpi,
 
 static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int i, j;
-  FIRSTPASS_STATS last_frame;
-  FIRSTPASS_STATS first_frame;
+  RATE_CONTROL *const rc = &cpi->rc;
+  struct twopass_rc *const twopass = &cpi->twopass;
+  const FIRSTPASS_STATS first_frame = *this_frame;
+  const FIRSTPASS_STATS *start_position = twopass->stats_in;
   FIRSTPASS_STATS next_frame;
-  FIRSTPASS_STATS *start_position;
-
+  FIRSTPASS_STATS last_frame;
   double decay_accumulator = 1.0;
   double zero_motion_accumulator = 1.0;
-  double boost_score = 0;
-  double loop_decay_rate;
-
+  double boost_score = 0.0;
   double kf_mod_err = 0.0;
   double kf_group_err = 0.0;
   double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
 
-  RATE_CONTROL *const rc = &cpi->rc;
-  struct twopass_rc *const twopass = &cpi->twopass;
-
   vp9_zero(next_frame);
 
-  vp9_clear_system_state();
-
-  start_position = twopass->stats_in;
   cpi->common.frame_type = KEY_FRAME;
 
   // Is this a forced key frame by interval.
@@ -1888,9 +1926,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   rc->frames_to_key = 1;
 
-  // Take a copy of the initial frame details.
-  first_frame = *this_frame;
-
   twopass->kf_group_bits = 0;        // Total bits available to kf group
   twopass->kf_group_error_left = 0;  // Group modified error score.
 
@@ -1909,8 +1944,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // Provided that we are not at the end of the file...
     if (cpi->oxcf.auto_key &&
         lookup_next_frame_stats(twopass, &next_frame) != EOF) {
+      double loop_decay_rate;
+
       // Check for a scene cut.
-      if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
+      if (test_candidate_kf(twopass, &last_frame, this_frame, &next_frame))
         break;
 
       // How fast is the prediction quality decaying?
@@ -1926,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // Special check for transition or high motion followed by a
       // static scene.
-      if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i,
+      if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i,
                                      loop_decay_rate, decay_accumulator))
         break;
 
@@ -1949,13 +1986,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // is between 1x and 2x.
   if (cpi->oxcf.auto_key &&
       rc->frames_to_key > (int)cpi->key_frame_frequency) {
-    FIRSTPASS_STATS tmp_frame;
+    FIRSTPASS_STATS tmp_frame = first_frame;
 
     rc->frames_to_key /= 2;
 
-    // Copy first frame details.
-    tmp_frame = first_frame;
-
     // Reset to the start of the group.
     reset_fpf_position(twopass, start_position);
 
@@ -1963,10 +1997,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
     // Rescan to get the correct error data for the forced kf group.
     for (i = 0; i < rc->frames_to_key; ++i) {
-      // Accumulate kf group errors.
       kf_group_err += calculate_modified_err(cpi, &tmp_frame);
-
-      // Load the next frame's stats.
       input_stats(twopass, &tmp_frame);
     }
     rc->next_key_frame_forced = 1;
@@ -1985,7 +2016,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // Calculate the number of bits that should be assigned to the kf group.
   if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
     // Maximum number of bits for a single normal frame (not key frame).
-    int max_bits = frame_max_bits(cpi);
+    const int max_bits = frame_max_bits(rc, &cpi->oxcf);
 
     // Maximum number of bits allocated to the key frame group.
     int64_t max_grp_bits;
@@ -2012,20 +2043,19 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   // Scan through the kf group collating various stats.
   for (i = 0; i < rc->frames_to_key; ++i) {
-    double r;
-
     if (EOF == input_stats(twopass, &next_frame))
       break;
 
     // Monitor for static sections.
     if ((next_frame.pcnt_inter - next_frame.pcnt_motion) <
-        zero_motion_accumulator) {
-      zero_motion_accumulator =
-        (next_frame.pcnt_inter - next_frame.pcnt_motion);
+            zero_motion_accumulator) {
+      zero_motion_accumulator = (next_frame.pcnt_inter -
+                                     next_frame.pcnt_motion);
     }
 
     // For the first few frames collect data to decide kf boost.
     if (i <= (rc->max_gf_interval * 2)) {
+      double r;
       if (next_frame.intra_error > twopass->kf_intra_err_min)
         r = (IIKFACTOR2 * next_frame.intra_error /
              DOUBLE_DIVIDE_CHECK(next_frame.coded_error));
@@ -2038,10 +2068,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // How fast is prediction quality decaying.
       if (!detect_flash(twopass, 0)) {
-        loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
+        const double loop_decay_rate = get_prediction_decay_rate(&cpi->common,
+                                                                 &next_frame);
         decay_accumulator *= loop_decay_rate;
-        decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
-                              ? MIN_DECAY_FACTOR : decay_accumulator;
+        decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR);
       }
 
       boost_score += (decay_accumulator * r);
@@ -2072,7 +2102,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   if (1) {
     int kf_boost = (int)boost_score;
     int allocation_chunks;
-    int alt_kf_bits;
 
     if (kf_boost < (rc->frames_to_key * 3))
       kf_boost = (rc->frames_to_key * 3);
@@ -2106,14 +2135,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
     // Prevent overflow.
     if (kf_boost > 1028) {
-      int divisor = kf_boost >> 10;
+      const int divisor = kf_boost >> 10;
       kf_boost /= divisor;
       allocation_chunks /= divisor;
     }
 
-    twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0
-           : twopass->kf_group_bits;
-
+    twopass->kf_group_bits = MAX(0, twopass->kf_group_bits);
     // Calculate the number of bits to be spent on the key frame.
     twopass->kf_bits = (int)((double)kf_boost *
         ((double)twopass->kf_group_bits / allocation_chunks));
@@ -2123,11 +2150,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     // then use an alternate calculation based on the kf error score
     // which should give a smaller key frame.
     if (kf_mod_err < kf_group_err / rc->frames_to_key) {
-      double  alt_kf_grp_bits = ((double)twopass->bits_left *
+      double alt_kf_grp_bits = ((double)twopass->bits_left *
          (kf_mod_err * (double)rc->frames_to_key) /
          DOUBLE_DIVIDE_CHECK(twopass->modified_error_left));
 
-      alt_kf_bits = (int)((double)kf_boost *
+      const int alt_kf_bits = (int)((double)kf_boost *
                           (alt_kf_grp_bits / (double)allocation_chunks));
 
       if (twopass->kf_bits > alt_kf_bits)
@@ -2136,12 +2163,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       // Else if it is much harder than other frames in the group make sure
       // it at least receives an allocation in keeping with its relative
       // error score.
-      alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
+      const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
                DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)));
 
-      if (alt_kf_bits > twopass->kf_bits) {
+      if (alt_kf_bits > twopass->kf_bits)
         twopass->kf_bits = alt_kf_bits;
-      }
     }
     twopass->kf_group_bits -= twopass->kf_bits;
     // Per frame bit target for this frame.
@@ -2161,7 +2187,7 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
-       cm->frame_flags & FRAMEFLAGS_KEY)) {
+       (cm->frame_flags & FRAMEFLAGS_KEY))) {
     cm->frame_type = KEY_FRAME;
   } else {
     cm->frame_type = INTER_FRAME;
@@ -2174,14 +2200,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
   struct twopass_rc *const twopass = &cpi->twopass;
-  const int frames_left = (int)(twopass->total_stats.count -
-                              cm->current_video_frame);
+  int frames_left;
   FIRSTPASS_STATS this_frame;
   FIRSTPASS_STATS this_frame_copy;
 
   double this_frame_intra_error;
   double this_frame_coded_error;
   int target;
+  LAYER_CONTEXT *lc = NULL;
+  int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1);
+
+  if (is_spatial_svc) {
+    lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+    frames_left = (int)(twopass->total_stats.count -
+                  lc->current_video_frame_in_layer);
+  } else {
+    frames_left = (int)(twopass->total_stats.count -
+                  cm->current_video_frame);
+  }
 
   if (!twopass->stats_in)
     return;
@@ -2194,9 +2230,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
   vp9_clear_system_state();
 
+  if (is_spatial_svc && twopass->kf_intra_err_min == 0) {
+    twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
+    twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
+  }
+
   if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
     twopass->active_worst_quality = cpi->oxcf.cq_level;
-  } else if (cm->current_video_frame == 0) {
+  } else if (cm->current_video_frame == 0 ||
+             (is_spatial_svc && lc->current_video_frame_in_layer == 0)) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
@@ -2219,6 +2261,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     // Define next KF group and assign bits to it.
     this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
+    // Don't place key frame in any enhancement layers in spatial svc
+    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+        cpi->svc.spatial_layer_id > 0) {
+      cm->frame_type = INTER_FRAME;
+    }
   } else {
     cm->frame_type = INTER_FRAME;
   }
@@ -2278,23 +2325,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   subtract_stats(&twopass->total_left_stats, &this_frame);
 }
 
-void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
+void vp9_twopass_postencode_update(VP9_COMP *cpi) {
 #ifdef DISABLE_RC_LONG_TERM_MEM
-  cpi->twopass.bits_left -=  cpi->rc.this_frame_target;
+  const uint64_t bits_used = cpi->rc.this_frame_target;
 #else
-  cpi->twopass.bits_left -= 8 * bytes_used;
+  const uint64_t bits_used = cpi->rc.projected_frame_size;
+#endif
+  cpi->twopass.bits_left -= bits_used;
+  cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0);
   // Update bits left to the kf and gf groups to account for overshoot or
   // undershoot on these frames.
-  if (cm->frame_type == KEY_FRAME) {
-    cpi->twopass.kf_group_bits += cpi->rc.this_frame_target -
-        cpi->rc.projected_frame_size;
-
-    cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0);
-  } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
-    cpi->twopass.gf_group_bits += cpi->rc.this_frame_target -
-        cpi->rc.projected_frame_size;
-
+  if (cpi->common.frame_type == KEY_FRAME) {
+    // For key frames kf_group_bits already had the target bits subtracted out.
+    // So now update to the correct value based on the actual bits used.
+    cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used;
+  } else {
+    cpi->twopass.kf_group_bits -= bits_used;
+    cpi->twopass.gf_group_bits -= bits_used;
     cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
   }
-#endif
+  cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0);
 }
diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.h b/source/libvpx/vp9/encoder/vp9_firstpass.h
index 03c0e20..7a16c8f 100644
--- a/source/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/source/libvpx/vp9/encoder/vp9_firstpass.h
@@ -35,6 +35,7 @@ typedef struct {
   double new_mv_count;
   double duration;
   double count;
+  int64_t spatial_layer_id;
 } FIRSTPASS_STATS;
 
 struct twopass_rc {
@@ -43,7 +44,9 @@ struct twopass_rc {
   unsigned int this_iiratio;
   FIRSTPASS_STATS total_stats;
   FIRSTPASS_STATS this_frame_stats;
-  FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
+  const FIRSTPASS_STATS *stats_in;
+  const FIRSTPASS_STATS *stats_in_start;
+  const FIRSTPASS_STATS *stats_in_end;
   FIRSTPASS_STATS total_left_stats;
   int first_pass_done;
   int64_t bits_left;
@@ -55,7 +58,6 @@ struct twopass_rc {
   double modified_error_left;
   double kf_intra_err_min;
   double gf_intra_err_min;
-  int static_scene_max_gf_interval;
   int kf_bits;
   // Remaining error from uncoded frames in a gf group. Two pass use only
   int64_t gf_group_error_left;
@@ -93,8 +95,7 @@ int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
                               int section_target_bandwitdh);
 
 // Post encode update of the rate control parameters for 2-pass
-void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
-                                   uint64_t bytes_used);
+void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_lookahead.c b/source/libvpx/vp9/encoder/vp9_lookahead.c
index a88d5ec..cf03e01 100644
--- a/source/libvpx/vp9/encoder/vp9_lookahead.c
+++ b/source/libvpx/vp9/encoder/vp9_lookahead.c
@@ -28,8 +28,8 @@ struct lookahead_ctx {
 
 
 /* Return the buffer at the given absolute index and increment the index */
-static struct lookahead_entry * pop(struct lookahead_ctx *ctx,
-                                    unsigned int *idx) {
+static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
+                                   unsigned int *idx) {
   unsigned int index = *idx;
   struct lookahead_entry *buf = ctx->buf + index;
 
@@ -55,16 +55,19 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) {
 }
 
 
-struct lookahead_ctx * vp9_lookahead_init(unsigned int width,
-                                          unsigned int height,
-                                          unsigned int subsampling_x,
-                                          unsigned int subsampling_y,
-                                          unsigned int depth) {
+struct lookahead_ctx *vp9_lookahead_init(unsigned int width,
+                                         unsigned int height,
+                                         unsigned int subsampling_x,
+                                         unsigned int subsampling_y,
+                                         unsigned int depth) {
   struct lookahead_ctx *ctx = NULL;
 
   // Clamp the lookahead queue depth
   depth = clamp(depth, 1, MAX_LAG_BUFFERS);
 
+  // Allocate memory to keep previous source frames available.
+  depth += MAX_PRE_FRAMES;
+
   // Allocate the lookahead structures
   ctx = calloc(1, sizeof(*ctx));
   if (ctx) {
@@ -96,7 +99,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG   *src,
   int mb_cols = (src->y_width + 15) >> 4;
 #endif
 
-  if (ctx->sz + 1 > ctx->max_sz)
+  if (ctx->sz + 1  + MAX_PRE_FRAMES > ctx->max_sz)
     return 1;
   ctx->sz++;
   buf = pop(ctx, &ctx->write_idx);
@@ -159,11 +162,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG   *src,
 }
 
 
-struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx,
-                                           int drain) {
+struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx,
+                                          int drain) {
   struct lookahead_entry *buf = NULL;
 
-  if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) {
+  if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
     buf = pop(ctx, &ctx->read_idx);
     ctx->sz--;
   }
@@ -171,16 +174,28 @@ struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx,
 }
 
 
-struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx,
-                                            int index) {
+struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx,
+                                           int index) {
   struct lookahead_entry *buf = NULL;
 
-  if (index < (int)ctx->sz) {
-    index += ctx->read_idx;
-    if (index >= (int)ctx->max_sz)
-      index -= ctx->max_sz;
-    buf = ctx->buf + index;
+  if (index >= 0) {
+    // Forward peek
+    if (index < (int)ctx->sz) {
+      index += ctx->read_idx;
+      if (index >= (int)ctx->max_sz)
+        index -= ctx->max_sz;
+      buf = ctx->buf + index;
+    }
+  } else if (index < 0) {
+    // Backward peek
+    if (-index <= MAX_PRE_FRAMES) {
+      index += ctx->read_idx;
+      if (index < 0)
+        index += ctx->max_sz;
+      buf = ctx->buf + index;
+    }
   }
+
   return buf;
 }
 
diff --git a/source/libvpx/vp9/encoder/vp9_lookahead.h b/source/libvpx/vp9/encoder/vp9_lookahead.h
index ff63c0d..046c533 100644
--- a/source/libvpx/vp9/encoder/vp9_lookahead.h
+++ b/source/libvpx/vp9/encoder/vp9_lookahead.h
@@ -20,6 +20,9 @@ extern "C" {
 
 #define MAX_LAG_BUFFERS 25
 
+// The max of past frames we want to keep in the queue.
+#define MAX_PRE_FRAMES 1
+
 struct lookahead_entry {
   YV12_BUFFER_CONFIG  img;
   int64_t             ts_start;
diff --git a/source/libvpx/vp9/encoder/vp9_mbgraph.c b/source/libvpx/vp9/encoder/vp9_mbgraph.c
index d3e19b4..44b171f 100644
--- a/source/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/source/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -61,7 +61,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
         &sse);
   }
 
-  vp9_set_mbmode_and_mvs(xd, NEWMV, dst_mv);
+  xd->mi[0]->mbmi.mode = NEWMV;
+  xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv;
+
   vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
 
   /* restore UMV window */
@@ -143,7 +145,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi,
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
     unsigned int err;
 
-    xd->mi_8x8[0]->mbmi.mode = mode;
+    xd->mi[0]->mbmi.mode = mode;
     vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode,
                             x->plane[0].src.buf, x->plane[0].src.stride,
                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
@@ -250,7 +252,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
   xd->plane[0].dst.stride  = buf->y_stride;
   xd->plane[0].pre[0].stride  = buf->y_stride;
   xd->plane[1].dst.stride = buf->uv_stride;
-  xd->mi_8x8[0] = &mi_local;
+  xd->mi[0] = &mi_local;
   mi_local.mbmi.sb_type = BLOCK_16X16;
   mi_local.mbmi.ref_frame[0] = LAST_FRAME;
   mi_local.mbmi.ref_frame[1] = NONE;
@@ -368,7 +370,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
     else
       cpi->static_mb_pct = 0;
 
-    cpi->seg0_cnt = ncnt[0];
     vp9_enable_segmentation(&cm->seg);
   } else {
     cpi->static_mb_pct = 0;
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.c b/source/libvpx/vp9/encoder/vp9_mcomp.c
index 7d6fd3b..f7a02a4 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -23,6 +23,11 @@
 
 // #define NEW_DIAMOND_SEARCH
 
+static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
+                                             const MV *mv) {
+  return &buf->buf[mv->row * buf->stride + mv->col];
+}
+
 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
   int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
   int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
@@ -98,42 +103,23 @@ static int mvsad_err_cost(const MV *mv, const MV *ref,
 }
 
 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
-  int len;
-  int search_site_count = 0;
+  int len, ss_count = 1;
 
-  // Generate offsets for 4 search sites per step.
-  x->ss[search_site_count].mv.col = 0;
-  x->ss[search_site_count].mv.row = 0;
-  x->ss[search_site_count].offset = 0;
-  search_site_count++;
+  x->ss[0].mv.col = x->ss[0].mv.row = 0;
+  x->ss[0].offset = 0;
 
   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = 0;
-    x->ss[search_site_count].mv.row = -len;
-    x->ss[search_site_count].offset = -len * stride;
-    search_site_count++;
-
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = 0;
-    x->ss[search_site_count].mv.row = len;
-    x->ss[search_site_count].offset = len * stride;
-    search_site_count++;
-
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = -len;
-    x->ss[search_site_count].mv.row = 0;
-    x->ss[search_site_count].offset = -len;
-    search_site_count++;
-
-    // Compute offsets for search sites.
-    x->ss[search_site_count].mv.col = len;
-    x->ss[search_site_count].mv.row = 0;
-    x->ss[search_site_count].offset = len;
-    search_site_count++;
+    // Generate offsets for 4 search sites per step.
+    const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
+    int i;
+    for (i = 0; i < 4; ++i) {
+      search_site *const ss = &x->ss[ss_count++];
+      ss->mv = ss_mvs[i];
+      ss->offset = ss->mv.row * stride + ss->mv.col;
+    }
   }
 
-  x->ss_count = search_site_count;
+  x->ss_count = ss_count;
   x->searches_per_step = 4;
 }
 
@@ -389,9 +375,9 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
   unsigned int sse;
   unsigned int whichdir;
   int thismse;
-  unsigned int halfiters = iters_per_step;
-  unsigned int quarteriters = iters_per_step;
-  unsigned int eighthiters = iters_per_step;
+  const unsigned int halfiters = iters_per_step;
+  const unsigned int quarteriters = iters_per_step;
+  const unsigned int eighthiters = iters_per_step;
 
   DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
   const int y_stride = xd->plane[0].pre[0].stride;
@@ -418,7 +404,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
   // calculate central point error
   // TODO(yunqingwang): central pointer error was already calculated in full-
   // pixel search, and can be passed in this function.
-  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
   besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -514,8 +500,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
                               MV *ref_mv,
                               int search_param,
                               int sad_per_bit,
-                              int do_init_search,
-                              int do_refine,
+                              int do_init_search, int do_refine,
                               const vp9_variance_fn_ptr_t *vfp,
                               int use_mvcost,
                               const MV *center_mv, MV *best_mv,
@@ -527,20 +512,15 @@ static int vp9_pattern_search(const MACROBLOCK *x,
     10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
   };
   int i, j, s, t;
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   int br, bc;
-  MV this_mv;
   int bestsad = INT_MAX;
   int thissad;
-  const uint8_t *base_offset;
-  const uint8_t *this_offset;
   int k = -1;
-  int best_site = -1;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   int best_init_s = search_param_to_steps[search_param];
-  const int *mvjsadcost = x->nmvjointsadcost;
+  const int *const mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
 
   // adjust ref_mv to make sure it is within MV range
@@ -549,13 +529,10 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   bc = ref_mv->col;
 
   // Work out the start point for the search
-  base_offset = xd->plane[0].pre[0].buf;
-  this_offset = base_offset + (br * in_what_stride) + bc;
-  this_mv.row = br;
-  this_mv.col = bc;
-  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(&this_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+  bestsad = vfp->sdf(what->buf, what->stride,
+                     get_buf_from_mv(in_what, ref_mv), in_what->stride,
+                     0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
+                         mvjsadcost, mvsadcost, sad_per_bit);
 
   // Search all possible scales upto the search param around the center point
   // pick the scale of the point that is best as the starting scale of
@@ -564,27 +541,25 @@ static int vp9_pattern_search(const MACROBLOCK *x,
     s = best_init_s;
     best_init_s = -1;
     for (t = 0; t <= s; ++t) {
-      best_site = -1;
+      int best_site = -1;
       if (check_bounds(x, br, bc, 1 << t)) {
         for (i = 0; i < num_candidates[t]; i++) {
-          this_mv.row = br + candidates[t][i].row;
-          this_mv.col = bc + candidates[t][i].col;
-          this_offset = base_offset + (this_mv.row * in_what_stride) +
-                                       this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
+          const MV this_mv = {br + candidates[t][i].row,
+                              bc + candidates[t][i].col};
+          thissad = vfp->sdf(what->buf, what->stride,
+                             get_buf_from_mv(in_what, &this_mv),
+                             in_what->stride, bestsad);
           CHECK_BETTER
         }
       } else {
         for (i = 0; i < num_candidates[t]; i++) {
-          this_mv.row = br + candidates[t][i].row;
-          this_mv.col = bc + candidates[t][i].col;
+          const MV this_mv = {br + candidates[t][i].row,
+                              bc + candidates[t][i].col};
           if (!is_mv_in(x, &this_mv))
             continue;
-          this_offset = base_offset + (this_mv.row * in_what_stride) +
-                                       this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
+          thissad = vfp->sdf(what->buf, what->stride,
+                             get_buf_from_mv(in_what, &this_mv),
+                             in_what->stride, bestsad);
           CHECK_BETTER
         }
       }
@@ -604,31 +579,30 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   // If the center point is still the best, just skip this and move to
   // the refinement step.
   if (best_init_s != -1) {
+    int best_site = -1;
     s = best_init_s;
-    best_site = -1;
+
     do {
       // No need to search all 6 points the 1st time if initial search was used
       if (!do_init_search || s != best_init_s) {
         if (check_bounds(x, br, bc, 1 << s)) {
           for (i = 0; i < num_candidates[s]; i++) {
-            this_mv.row = br + candidates[s][i].row;
-            this_mv.col = bc + candidates[s][i].col;
-            this_offset = base_offset + (this_mv.row * in_what_stride) +
-                                         this_mv.col;
-            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                               bestsad);
+            const MV this_mv = {br + candidates[s][i].row,
+                                bc + candidates[s][i].col};
+            thissad = vfp->sdf(what->buf, what->stride,
+                               get_buf_from_mv(in_what, &this_mv),
+                               in_what->stride, bestsad);
             CHECK_BETTER
           }
         } else {
           for (i = 0; i < num_candidates[s]; i++) {
-            this_mv.row = br + candidates[s][i].row;
-            this_mv.col = bc + candidates[s][i].col;
+            const MV this_mv = {br + candidates[s][i].row,
+                                bc + candidates[s][i].col};
             if (!is_mv_in(x, &this_mv))
               continue;
-            this_offset = base_offset + (this_mv.row * in_what_stride) +
-                                         this_mv.col;
-            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                               bestsad);
+            thissad = vfp->sdf(what->buf, what->stride,
+                               get_buf_from_mv(in_what, &this_mv),
+                               in_what->stride, bestsad);
             CHECK_BETTER
           }
         }
@@ -651,24 +625,22 @@ static int vp9_pattern_search(const MACROBLOCK *x,
 
         if (check_bounds(x, br, bc, 1 << s)) {
           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
-            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
-            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
-            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
-                                         this_mv.col;
-            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                               bestsad);
+            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
+                                bc + candidates[s][next_chkpts_indices[i]].col};
+            thissad = vfp->sdf(what->buf, what->stride,
+                               get_buf_from_mv(in_what, &this_mv),
+                               in_what->stride, bestsad);
             CHECK_BETTER
           }
         } else {
           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
-            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
-            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
+            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
+                                bc + candidates[s][next_chkpts_indices[i]].col};
             if (!is_mv_in(x, &this_mv))
               continue;
-            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
-                                         this_mv.col;
-            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                               bestsad);
+            thissad = vfp->sdf(what->buf, what->stride,
+                               get_buf_from_mv(in_what, &this_mv),
+                               in_what->stride, bestsad);
             CHECK_BETTER
           }
         }
@@ -685,29 +657,28 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   // Check 4 1-away neighbors if do_refine is true.
   // For most well-designed schemes do_refine will not be necessary.
   if (do_refine) {
-    static const MV neighbors[4] = { {0, -1}, { -1, 0}, {1, 0}, {0, 1} };
+    static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
+
     for (j = 0; j < 16; j++) {
-      best_site = -1;
+      int best_site = -1;
       if (check_bounds(x, br, bc, 1)) {
         for (i = 0; i < 4; i++) {
-          this_mv.row = br + neighbors[i].row;
-          this_mv.col = bc + neighbors[i].col;
-          this_offset = base_offset + this_mv.row * in_what_stride +
-                            this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
+          const MV this_mv = {br + neighbors[i].row,
+                              bc + neighbors[i].col};
+          thissad = vfp->sdf(what->buf, what->stride,
+                             get_buf_from_mv(in_what, &this_mv),
+                             in_what->stride, bestsad);
           CHECK_BETTER
         }
       } else {
         for (i = 0; i < 4; i++) {
-          this_mv.row = br + neighbors[i].row;
-          this_mv.col = bc + neighbors[i].col;
+          const MV this_mv = {br + neighbors[i].row,
+                              bc + neighbors[i].col};
           if (!is_mv_in(x, &this_mv))
             continue;
-          this_offset = base_offset + this_mv.row * in_what_stride +
-                            this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
+          thissad = vfp->sdf(what->buf, what->stride,
+                             get_buf_from_mv(in_what, &this_mv),
+                             in_what->stride, bestsad);
           CHECK_BETTER
         }
       }
@@ -724,20 +695,41 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   best_mv->row = br;
   best_mv->col = bc;
 
-  this_offset = base_offset + (best_mv->row * in_what_stride) +
-                               best_mv->col;
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-  if (bestsad == INT_MAX)
-    return INT_MAX;
+  return bestsad;
+}
 
-  return vfp->vf(what, what_stride, this_offset, in_what_stride,
-                 (unsigned int *)&bestsad) +
-         use_mvcost ? mv_err_cost(&this_mv, center_mv,
-                                  x->nmvjointcost, x->mvcost, x->errorperbit)
-                    : 0;
+int vp9_get_mvpred_var(const MACROBLOCK *x,
+                       const MV *best_mv, const MV *center_mv,
+                       const vp9_variance_fn_ptr_t *vfp,
+                       int use_mvcost) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+  const MV mv = {best_mv->row * 8, best_mv->col * 8};
+  unsigned int unused;
+
+  return vfp->vf(what->buf, what->stride,
+                 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
+      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
+                                 x->mvcost, x->errorperbit) : 0);
 }
 
+int vp9_get_mvpred_av_var(const MACROBLOCK *x,
+                          const MV *best_mv, const MV *center_mv,
+                          const uint8_t *second_pred,
+                          const vp9_variance_fn_ptr_t *vfp,
+                          int use_mvcost) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+  const MV mv = {best_mv->row * 8, best_mv->col * 8};
+  unsigned int unused;
+
+  return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
+                   what->buf, what->stride, &unused, second_pred) +
+      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
+                                 x->mvcost, x->errorperbit) : 0);
+}
 
 int vp9_hex_search(const MACROBLOCK *x,
                    MV *ref_mv,
@@ -853,184 +845,34 @@ int vp9_square_search(const MACROBLOCK *x,
                             do_init_search, 0, vfp, use_mvcost,
                             center_mv, best_mv,
                             square_num_candidates, square_candidates);
-};
-
-// Number of candidates in first hex search
-#define FIRST_HEX_CANDIDATES 6
-// Index of previous hex search's best match
-#define PRE_BEST_CANDIDATE 6
-// Number of candidates in following hex search
-#define NEXT_HEX_CANDIDATES 3
-// Number of candidates in refining search
-#define REFINE_CANDIDATES 4
+}
 
 int vp9_fast_hex_search(const MACROBLOCK *x,
                         MV *ref_mv,
                         int search_param,
                         int sad_per_bit,
+                        int do_init_search,  // must be zero for fast_hex
                         const vp9_variance_fn_ptr_t *vfp,
                         int use_mvcost,
                         const MV *center_mv,
                         MV *best_mv) {
-  const MACROBLOCKD* const xd = &x->e_mbd;
-  static const MV hex[FIRST_HEX_CANDIDATES] = {
-    { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}
-  };
-  static const MV next_chkpts[PRE_BEST_CANDIDATE][NEXT_HEX_CANDIDATES] = {
-    {{ -2, 0}, { -1, -2}, {1, -2}},
-    {{ -1, -2}, {1, -2}, {2, 0}},
-    {{1, -2}, {2, 0}, {1, 2}},
-    {{2, 0}, {1, 2}, { -1, 2}},
-    {{1, 2}, { -1, 2}, { -2, 0}},
-    {{ -1, 2}, { -2, 0}, { -1, -2}}
-  };
-  static const MV neighbors[REFINE_CANDIDATES] = {
-      {0, -1}, { -1, 0}, {1, 0}, {0, 1}
-  };
-  int i, j;
-
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  int br, bc;
-  MV this_mv;
-  unsigned int bestsad = 0x7fffffff;
-  unsigned int thissad;
-  const uint8_t *base_offset;
-  const uint8_t *this_offset;
-  int k = -1;
-  int best_site = -1;
-  const int max_hex_search = 512;
-  const int max_dia_search = 32;
-
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
-  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
-  // Adjust ref_mv to make sure it is within MV range
-  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  br = ref_mv->row;
-  bc = ref_mv->col;
-
-  // Check the start point
-  base_offset = xd->plane[0].pre[0].buf;
-  this_offset = base_offset + (br * in_what_stride) + bc;
-  this_mv.row = br;
-  this_mv.col = bc;
-  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                             sad_per_bit);
-
-  // Initial 6-point hex search
-  if (check_bounds(x, br, bc, 2)) {
-    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
-      this_mv.row = br + hex[i].row;
-      this_mv.col = bc + hex[i].col;
-      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
-      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                         bestsad);
-      CHECK_BETTER
-    }
-  } else {
-    for (i = 0; i < FIRST_HEX_CANDIDATES; i++) {
-      this_mv.row = br + hex[i].row;
-      this_mv.col = bc + hex[i].col;
-      if (!is_mv_in(x, &this_mv))
-        continue;
-      this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col;
-      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                         bestsad);
-      CHECK_BETTER
-    }
-  }
-
-  // Continue hex search if we find a better match in first round
-  if (best_site != -1) {
-    br += hex[best_site].row;
-    bc += hex[best_site].col;
-    k = best_site;
-
-    // Allow search covering maximum MV range
-    for (j = 1; j < max_hex_search; j++) {
-      best_site = -1;
-
-      if (check_bounds(x, br, bc, 2)) {
-        for (i = 0; i < 3; i++) {
-          this_mv.row = br + next_chkpts[k][i].row;
-          this_mv.col = bc + next_chkpts[k][i].col;
-          this_offset = base_offset + (this_mv.row * in_what_stride) +
-              this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
-          CHECK_BETTER
-        }
-      } else {
-        for (i = 0; i < 3; i++) {
-          this_mv.row = br + next_chkpts[k][i].row;
-          this_mv.col = bc + next_chkpts[k][i].col;
-          if (!is_mv_in(x, &this_mv))
-            continue;
-          this_offset = base_offset + (this_mv.row * in_what_stride) +
-              this_mv.col;
-          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                             bestsad);
-          CHECK_BETTER
-        }
-      }
-
-      if (best_site == -1) {
-        break;
-      } else {
-        br += next_chkpts[k][best_site].row;
-        bc += next_chkpts[k][best_site].col;
-        k += 5 + best_site;
-        if (k >= 12) k -= 12;
-        else if (k >= 6) k -= 6;
-      }
-    }
-  }
-
-  // Check 4 1-away neighbors
-  for (j = 0; j < max_dia_search; j++) {
-    best_site = -1;
-
-    if (check_bounds(x, br, bc, 1)) {
-      for (i = 0; i < REFINE_CANDIDATES; i++) {
-        this_mv.row = br + neighbors[i].row;
-        this_mv.col = bc + neighbors[i].col;
-        this_offset = base_offset + (this_mv.row * in_what_stride) +
-            this_mv.col;
-        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                           bestsad);
-        CHECK_BETTER
-      }
-    } else {
-      for (i = 0; i < REFINE_CANDIDATES; i++) {
-        this_mv.row = br + neighbors[i].row;
-        this_mv.col = bc + neighbors[i].col;
-        if (!is_mv_in(x, &this_mv))
-          continue;
-        this_offset = base_offset + (this_mv.row * in_what_stride) +
-            this_mv.col;
-        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
-                           bestsad);
-        CHECK_BETTER
-      }
-    }
-
-    if (best_site == -1) {
-      break;
-    } else {
-      br += neighbors[best_site].row;
-      bc += neighbors[best_site].col;
-    }
-  }
-
-  best_mv->row = br;
-  best_mv->col = bc;
+  return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
+                        sad_per_bit, do_init_search, vfp, use_mvcost,
+                        center_mv, best_mv);
+}
 
-  return bestsad;
+int vp9_fast_dia_search(const MACROBLOCK *x,
+                        MV *ref_mv,
+                        int search_param,
+                        int sad_per_bit,
+                        int do_init_search,
+                        const vp9_variance_fn_ptr_t *vfp,
+                        int use_mvcost,
+                        const MV *center_mv,
+                        MV *best_mv) {
+  return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
+                           sad_per_bit, do_init_search, vfp, use_mvcost,
+                           center_mv, best_mv);
 }
 
 #undef CHECK_BETTER
@@ -1045,9 +887,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
   const int what_stride = x->plane[0].src.stride;
   const uint8_t *in_what;
   const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address;
-
-  MV this_mv;
 
   unsigned int bestsad = INT_MAX;
   int ref_row, ref_col;
@@ -1076,7 +915,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
 
   // Work out the start point for the search
   in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
-  best_address = in_what;
 
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
@@ -1100,8 +938,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
 
         for (i = 0; i < 4; ++i) {
           if (sad_array[i] < bestsad) {
-            this_mv.row = ref_row + tr;
-            this_mv.col = ref_col + tc + i;
+            const MV this_mv = {ref_row + tr, ref_col + tc + i};
             thissad = sad_array[i] +
                       mvsad_err_cost(&this_mv, &fcenter_mv,
                                       mvjsadcost, mvsadcost, sad_per_bit);
@@ -1119,8 +956,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
                                 bestsad);
 
           if (thissad < bestsad) {
-            this_mv.row = ref_row + tr;
-            this_mv.col = ref_col + tc + i;
+            const MV this_mv = {ref_row + tr, ref_col + tc + i};
             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
                                       mvjsadcost, mvsadcost, sad_per_bit);
 
@@ -1134,20 +970,9 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
       }
     }
   }
-
   best_mv->row += best_tr;
   best_mv->col += best_tc;
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad == INT_MAX)
-    return INT_MAX;
-
-  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                    (unsigned int *)(&thissad)) +
-                       mv_err_cost(&this_mv, center_mv,
-                                   mvjcost, mvcost, x->errorperbit);
+  return bestsad;
 }
 
 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
@@ -1156,77 +981,49 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                              const vp9_variance_fn_ptr_t *fn_ptr,
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
-  int i, j, step;
-
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *in_what;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address;
-
-  MV this_mv;
-
-  int bestsad = INT_MAX;
-  int best_site = 0;
-  int last_site = 0;
-
-  int ref_row, ref_col;
-  int this_row_offset, this_col_offset;
-
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   // search_param determines the length of the initial step and hence the number
   // of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
   // (MAX_FIRST_STEP/4) pel... etc.
   const search_site *const ss = &x->ss[search_param * x->searches_per_step];
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
-  int thissad;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
+  const uint8_t *best_address;
+  int best_sad = INT_MAX;
+  int best_site = 0;
+  int last_site = 0;
+  int i, j, step;
 
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  ref_row = ref_mv->row;
-  ref_col = ref_mv->col;
+  best_address = get_buf_from_mv(in_what, ref_mv);
   *num00 = 0;
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
-
-  // Work out the start point for the search
-  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
-  best_address = in_what;
+  *best_mv = *ref_mv;
 
   // Check the starting position
-  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+  best_sad = fn_ptr->sdf(what->buf, what->stride,
+                        in_what->buf, in_what->stride, 0x7fffffff) +
+      mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
 
   i = 1;
 
   for (step = 0; step < tot_steps; step++) {
     for (j = 0; j < x->searches_per_step; j++) {
-      // Trap illegal vectors
-      this_row_offset = best_mv->row + ss[i].mv.row;
-      this_col_offset = best_mv->col + ss[i].mv.col;
-
-      if ((this_col_offset > x->mv_col_min) &&
-          (this_col_offset < x->mv_col_max) &&
-          (this_row_offset > x->mv_row_min) &&
-          (this_row_offset < x->mv_row_max)) {
-        const uint8_t *const check_here = ss[i].offset + best_address;
-        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                              bestsad);
-
-        if (thissad < bestsad) {
-          this_mv.row = this_row_offset;
-          this_mv.col = this_col_offset;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
-          if (thissad < bestsad) {
-            bestsad = thissad;
+      const MV mv = {best_mv->row + ss[i].mv.row,
+                     best_mv->col + ss[i].mv.col};
+      if (is_mv_in(x, &mv)) {
+       int sad = fn_ptr->sdf(what->buf, what->stride,
+                             best_address + ss[i].offset, in_what->stride,
+                             best_sad);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                                sad_per_bit);
+          if (sad < best_sad) {
+            best_sad = sad;
             best_site = i;
           }
         }
@@ -1242,22 +1039,17 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
       last_site = best_site;
 #if defined(NEW_DIAMOND_SEARCH)
       while (1) {
-        this_row_offset = best_mv->row + ss[best_site].mv.row;
-        this_col_offset = best_mv->col + ss[best_site].mv.col;
-        if ((this_col_offset > x->mv_col_min) &&
-            (this_col_offset < x->mv_col_max) &&
-            (this_row_offset > x->mv_row_min) &&
-            (this_row_offset < x->mv_row_max)) {
-          check_here = ss[best_site].offset + best_address;
-          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                bestsad);
-          if (thissad < bestsad) {
-            this_mv.row = this_row_offset;
-            this_mv.col = this_col_offset;
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
-            if (thissad < bestsad) {
-              bestsad = thissad;
+        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
+                            best_mv->col + ss[best_site].mv.col};
+        if (is_mv_in(x, &this_mv)) {
+          int sad = fn_ptr->sdf(what->buf, what->stride,
+                                best_address + ss[best_site].offset,
+                                in_what->stride, best_sad);
+          if (sad < best_sad) {
+            sad += mvsad_err_cost(&this_mv, &fcenter_mv,
+                                  mvjsadcost, mvsadcost, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
               best_mv->row += ss[best_site].mv.row;
               best_mv->col += ss[best_site].mv.col;
               best_address += ss[best_site].offset;
@@ -1268,21 +1060,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
         break;
       };
 #endif
-    } else if (best_address == in_what) {
+    } else if (best_address == in_what->buf) {
       (*num00)++;
     }
   }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad == INT_MAX)
-    return INT_MAX;
-
-  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                    (unsigned int *)(&thissad)) +
-                       mv_err_cost(&this_mv, center_mv,
-                                   mvjcost, mvcost, x->errorperbit);
+  return best_sad;
 }
 
 int vp9_diamond_search_sadx4(const MACROBLOCK *x,
@@ -1300,16 +1082,12 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
   const int in_what_stride = xd->plane[0].pre[0].stride;
   const uint8_t *best_address;
 
-  MV this_mv;
-
   unsigned int bestsad = INT_MAX;
   int best_site = 0;
   int last_site = 0;
 
   int ref_row;
   int ref_col;
-  int this_row_offset;
-  int this_col_offset;
 
   // search_param determines the length of the initial step and hence the number
   // of iterations.
@@ -1319,7 +1097,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
   const search_site *ss = &x->ss[search_param * x->searches_per_step];
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
 
-  unsigned int thissad;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
 
   const int *mvjsadcost = x->nmvjointsadcost;
@@ -1370,8 +1147,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
 
         for (t = 0; t < 4; t++, i++) {
           if (sad_array[t] < bestsad) {
-            this_mv.row = best_mv->row + ss[i].mv.row;
-            this_mv.col = best_mv->col + ss[i].mv.col;
+            const MV this_mv = {best_mv->row + ss[i].mv.row,
+                                best_mv->col + ss[i].mv.col};
             sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
                                            mvjsadcost, mvsadcost, sad_per_bit);
 
@@ -1385,20 +1162,15 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
     } else {
       for (j = 0; j < x->searches_per_step; j++) {
         // Trap illegal vectors
-        this_row_offset = best_mv->row + ss[i].mv.row;
-        this_col_offset = best_mv->col + ss[i].mv.col;
+        const MV this_mv = {best_mv->row + ss[i].mv.row,
+                            best_mv->col + ss[i].mv.col};
 
-        if ((this_col_offset > x->mv_col_min) &&
-            (this_col_offset < x->mv_col_max) &&
-            (this_row_offset > x->mv_row_min) &&
-            (this_row_offset < x->mv_row_max)) {
+        if (is_mv_in(x, &this_mv)) {
           const uint8_t *const check_here = ss[i].offset + best_address;
-          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                bestsad);
+          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
+                                             in_what_stride, bestsad);
 
           if (thissad < bestsad) {
-            this_mv.row = this_row_offset;
-            this_mv.col = this_col_offset;
             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
                                       mvjsadcost, mvsadcost, sad_per_bit);
 
@@ -1418,18 +1190,13 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
       last_site = best_site;
 #if defined(NEW_DIAMOND_SEARCH)
       while (1) {
-        this_row_offset = best_mv->row + ss[best_site].mv.row;
-        this_col_offset = best_mv->col + ss[best_site].mv.col;
-        if ((this_col_offset > x->mv_col_min) &&
-            (this_col_offset < x->mv_col_max) &&
-            (this_row_offset > x->mv_row_min) &&
-            (this_row_offset < x->mv_row_max)) {
-          check_here = ss[best_site].offset + best_address;
-          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                bestsad);
+        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
+                            best_mv->col + ss[best_site].mv.col};
+        if (is_mv_in(x, &this_mv)) {
+          const uint8_t *const check_here = ss[best_site].offset + best_address;
+          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
+                                             in_what_stride, bestsad);
           if (thissad < bestsad) {
-            this_mv.row = this_row_offset;
-            this_mv.col = this_col_offset;
             thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
                                       mvjsadcost, mvsadcost, sad_per_bit);
             if (thissad < bestsad) {
@@ -1448,24 +1215,14 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
       (*num00)++;
     }
   }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad == INT_MAX)
-    return INT_MAX;
-
-  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                    (unsigned int *)(&thissad)) +
-                    mv_err_cost(&this_mv, center_mv,
-                                mvjcost, mvcost, x->errorperbit);
+  return bestsad;
 }
 
 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
               point as the best match, we will do a final 1-away diamond
               refining search  */
 
-int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
+int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
                            MV *mvp_full, int step_param,
                            int sadpb, int further_steps, int do_refine,
                            const vp9_variance_fn_ptr_t *fn_ptr,
@@ -1476,6 +1233,8 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
                                         step_param, sadpb, &n,
                                         fn_ptr, x->nmvjointcost,
                                         x->mvcost, ref_mv);
+  if (bestsme < INT_MAX)
+    bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   *dst_mv = temp_mv;
 
   // If there won't be more n-step search, check to see if refining search is
@@ -1493,6 +1252,8 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
                                         step_param + n, sadpb, &num00,
                                         fn_ptr, x->nmvjointcost, x->mvcost,
                                         ref_mv);
+      if (thissme < INT_MAX)
+        thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
 
       // check to see if refining search is needed.
       if (num00 > further_steps - n)
@@ -1512,12 +1273,13 @@ int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
     thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
                                        fn_ptr, x->nmvjointcost, x->mvcost,
                                        ref_mv);
+    if (thissme < INT_MAX)
+      thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
     if (thissme < bestsme) {
       bestsme = thissme;
       *dst_mv = best_mv;
     }
   }
-
   return bestsme;
 }
 
@@ -1528,10 +1290,8 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
                           const MV *center_mv, MV *best_mv) {
   int r, c;
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *const what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
   const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
   const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
@@ -1539,38 +1299,26 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
-                                         ref_mv->col];
-  int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride,
-                             0x7fffffff) +
+  int best_sad = fn_ptr->sdf(what->buf, what->stride,
+      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
   *best_mv = *ref_mv;
 
   for (r = row_min; r < row_max; ++r) {
     for (c = col_min; c < col_max; ++c) {
-      const MV this_mv = {r, c};
-      const uint8_t *check_here = &in_what[r * in_what_stride + c];
-      const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                  best_sad) +
-          mvsad_err_cost(&this_mv, &fcenter_mv,
-                         mvjsadcost, mvsadcost, sad_per_bit);
+      const MV mv = {r, c};
+      const int sad = fn_ptr->sdf(what->buf, what->stride,
+          get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
+          mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                         sad_per_bit);
 
       if (sad < best_sad) {
         best_sad = sad;
-        *best_mv = this_mv;
-        best_address = check_here;
+        *best_mv = mv;
       }
     }
   }
-
-  if (best_sad < INT_MAX) {
-    unsigned int unused;
-    const MV mv = {best_mv->row * 8, best_mv->col * 8};
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &unused)
-                + mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
-  } else {
-    return INT_MAX;
-  }
+  return best_sad;
 }
 
 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
@@ -1635,10 +1383,8 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
             bestsad = thissad;
             best_mv->row = r;
             best_mv->col = c;
-            bestaddress = check_here;
           }
         }
-
         check_here++;
         c++;
       }
@@ -1657,7 +1403,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
           bestsad = thissad;
           best_mv->row = r;
           best_mv->col = c;
-          bestaddress = check_here;
         }
       }
 
@@ -1665,17 +1410,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
       c++;
     }
   }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
-    return INT_MAX;
+  return bestsad;
 }
 
 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
@@ -1691,7 +1426,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
   MV this_mv;
   unsigned int bestsad = INT_MAX;
   int r, c;
-  unsigned int thissad;
   int ref_row = ref_mv->row;
   int ref_col = ref_mv->col;
 
@@ -1731,7 +1465,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
 
       for (i = 0; i < 8; i++) {
-        thissad = (unsigned int)sad_array8[i];
+        unsigned int thissad = (unsigned int)sad_array8[i];
 
         if (thissad < bestsad) {
           this_mv.col = c;
@@ -1742,7 +1476,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
             bestsad = thissad;
             best_mv->row = r;
             best_mv->col = c;
-            bestaddress = check_here;
           }
         }
 
@@ -1757,18 +1490,17 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
 
       for (i = 0; i < 3; i++) {
-        thissad = sad_array[i];
+        unsigned int thissad = sad_array[i];
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                     mvjsadcost, mvsadcost, sad_per_bit);
+          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+                                    mvjsadcost, mvsadcost, sad_per_bit);
 
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
             best_mv->col = c;
-            bestaddress = check_here;
           }
         }
 
@@ -1778,8 +1510,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
     }
 
     while (c < col_max) {
-      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                            bestsad);
+      unsigned int thissad = fn_ptr->sdf(what, what_stride,
+                                         check_here, in_what_stride, bestsad);
 
       if (thissad < bestsad) {
         this_mv.col = c;
@@ -1790,7 +1522,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
           bestsad = thissad;
           best_mv->row = r;
           best_mv->col = c;
-          bestaddress = check_here;
         }
       }
 
@@ -1798,17 +1529,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
       c++;
     }
   }
-
-  this_mv.row = best_mv->row * 8;
-  this_mv.col = best_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
-    return INT_MAX;
+  return bestsad;
 }
 
 int vp9_refining_search_sad_c(const MACROBLOCK *x,
@@ -1817,41 +1538,34 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
                               const vp9_variance_fn_ptr_t *fn_ptr,
                               int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
-  int i, j;
-
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *const what = x->plane[0].src.buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
-  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
-                                             ref_mv->col];
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
 
-  unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address,
-                                     in_what_stride, 0x7fffffff) +
+  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+                                     get_buf_from_mv(in_what, ref_mv),
+                                     in_what->stride, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+  int i, j;
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
 
     for (j = 0; j < 4; j++) {
-      const MV this_mv = {ref_mv->row + neighbors[j].row,
-                          ref_mv->col + neighbors[j].col};
-      if (is_mv_in(x, &this_mv)) {
-        const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
-                                                this_mv.col];
-        unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
-                                           in_what_stride, bestsad);
-        if (thissad < bestsad) {
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, error_per_bit);
-
-          if (thissad < bestsad) {
-            bestsad = thissad;
+      const MV mv = {ref_mv->row + neighbors[j].row,
+                     ref_mv->col + neighbors[j].col};
+      if (is_mv_in(x, &mv)) {
+        unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+            get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
+                                error_per_bit);
+          if (sad < best_sad) {
+            best_sad = sad;
             best_site = j;
           }
         }
@@ -1863,19 +1577,9 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
     } else {
       ref_mv->row += neighbors[best_site].row;
       ref_mv->col += neighbors[best_site].col;
-      best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col];
     }
   }
-
-  if (bestsad < INT_MAX) {
-    unsigned int unused;
-    const MV mv = {ref_mv->row * 8, ref_mv->col * 8};
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                      &unused) +
-        mv_err_cost(&mv, center_mv, mvjcost, mvcost, x->errorperbit);
-  } else {
-    return INT_MAX;
-  }
+  return best_sad;
 }
 
 int vp9_refining_search_sadx4(const MACROBLOCK *x,
@@ -1885,82 +1589,64 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
                               int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
-  int i, j;
-  int this_row_offset, this_col_offset;
-
-  const int what_stride = x->plane[0].src.stride;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *what = x->plane[0].src.buf;
-  const uint8_t *best_address = xd->plane[0].pre[0].buf +
-                          (ref_mv->row * xd->plane[0].pre[0].stride) +
-                          ref_mv->col;
-  unsigned int thissad;
-  MV this_mv;
-
+  const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
-  unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address,
-                                    in_what_stride, 0x7fffffff) +
+  const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
+  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
+                                    in_what->stride, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+  int i, j;
 
   for (i = 0; i < search_range; i++) {
     int best_site = -1;
-    int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
-                 ((ref_mv->row + 1) < x->mv_row_max) &
-                 ((ref_mv->col - 1) > x->mv_col_min) &
-                 ((ref_mv->col + 1) < x->mv_col_max);
+    const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
+                       ((ref_mv->row + 1) < x->mv_row_max) &
+                       ((ref_mv->col - 1) > x->mv_col_min) &
+                       ((ref_mv->col + 1) < x->mv_col_max);
 
     if (all_in) {
-      unsigned int sad_array[4];
-      uint8_t const *block_offset[4] = {
-        best_address - in_what_stride,
+      unsigned int sads[4];
+      const uint8_t *const positions[4] = {
+        best_address - in_what->stride,
         best_address - 1,
         best_address + 1,
-        best_address + in_what_stride
+        best_address + in_what->stride
       };
 
-      fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
-                     sad_array);
+      fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
 
-      for (j = 0; j < 4; j++) {
-        if (sad_array[j] < bestsad) {
-          this_mv.row = ref_mv->row + neighbors[j].row;
-          this_mv.col = ref_mv->col + neighbors[j].col;
-          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv,
+      for (j = 0; j < 4; ++j) {
+        if (sads[j] < best_sad) {
+          const MV mv = {ref_mv->row + neighbors[j].row,
+                         ref_mv->col + neighbors[j].col};
+          sads[j] += mvsad_err_cost(&mv, &fcenter_mv,
                                          mvjsadcost, mvsadcost, error_per_bit);
 
-          if (sad_array[j] < bestsad) {
-            bestsad = sad_array[j];
+          if (sads[j] < best_sad) {
+            best_sad = sads[j];
             best_site = j;
           }
         }
       }
     } else {
-      for (j = 0; j < 4; j++) {
-        this_row_offset = ref_mv->row + neighbors[j].row;
-        this_col_offset = ref_mv->col + neighbors[j].col;
-
-        if ((this_col_offset > x->mv_col_min) &&
-            (this_col_offset < x->mv_col_max) &&
-            (this_row_offset > x->mv_row_min) &&
-            (this_row_offset < x->mv_row_max)) {
-          const uint8_t *check_here = neighbors[j].row * in_what_stride +
-                                      neighbors[j].col + best_address;
-          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                                bestsad);
-
-          if (thissad < bestsad) {
-            this_mv.row = this_row_offset;
-            this_mv.col = this_col_offset;
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, error_per_bit);
-
-            if (thissad < bestsad) {
-              bestsad = thissad;
+      for (j = 0; j < 4; ++j) {
+        const MV mv = {ref_mv->row + neighbors[j].row,
+                       ref_mv->col + neighbors[j].col};
+
+        if (is_mv_in(x, &mv)) {
+          unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+                                         get_buf_from_mv(in_what, &mv),
+                                         in_what->stride, best_sad);
+          if (sad < best_sad) {
+            sad += mvsad_err_cost(&mv, &fcenter_mv,
+                                  mvjsadcost, mvsadcost, error_per_bit);
+
+            if (sad < best_sad) {
+              best_sad = sad;
               best_site = j;
             }
           }
@@ -1973,21 +1659,11 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
     } else {
       ref_mv->row += neighbors[best_site].row;
       ref_mv->col += neighbors[best_site].col;
-      best_address += (neighbors[best_site].row) * in_what_stride +
-                      neighbors[best_site].col;
+      best_address = get_buf_from_mv(in_what, ref_mv);
     }
   }
 
-  this_mv.row = ref_mv->row * 8;
-  this_mv.col = ref_mv->col * 8;
-
-  if (bestsad < INT_MAX)
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
-                      (unsigned int *)(&thissad)) +
-                      mv_err_cost(&this_mv, center_mv,
-                                  mvjcost, mvcost, x->errorperbit);
-  else
-    return INT_MAX;
+  return best_sad;
 }
 
 // This function is called when we do joint motion search in comp_inter_inter
@@ -1999,48 +1675,36 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              int *mvjcost, int *mvcost[2],
                              const MV *center_mv,
                              const uint8_t *second_pred, int w, int h) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
                            {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
-  int i, j;
-
-  const uint8_t *what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *in_what = xd->plane[0].pre[0].buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride +
-                                             ref_mv->col];
-  unsigned int thissad;
-  MV this_mv;
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
   const int *mvjsadcost = x->nmvjointsadcost;
   int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
-  /* Get compound pred by averaging two pred blocks. */
-  unsigned int bestsad = fn_ptr->sdaf(what, what_stride,
-                                      best_address, in_what_stride,
-                                      second_pred, 0x7fffffff) +
+  unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
+      get_buf_from_mv(in_what, ref_mv), in_what->stride,
+      second_pred, 0x7fffffff) +
       mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+  int i, j;
 
   for (i = 0; i < search_range; ++i) {
     int best_site = -1;
 
-    for (j = 0; j < 8; j++) {
-      this_mv.row = ref_mv->row + neighbors[j].row;
-      this_mv.col = ref_mv->col + neighbors[j].col;
+    for (j = 0; j < 8; ++j) {
+      const MV mv = {ref_mv->row + neighbors[j].row,
+                     ref_mv->col + neighbors[j].col};
 
-      if (is_mv_in(x, &this_mv)) {
-        const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
-                                                this_mv.col];
-
-        thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
-                               second_pred, bestsad);
-        if (thissad < bestsad) {
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+      if (is_mv_in(x, &mv)) {
+        unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
+            get_buf_from_mv(in_what, &mv), in_what->stride,
+            second_pred, best_sad);
+        if (sad < best_sad) {
+          sad += mvsad_err_cost(&mv, &fcenter_mv,
                                     mvjsadcost, mvsadcost, error_per_bit);
-          if (thissad < bestsad) {
-            bestsad = thissad;
+          if (sad < best_sad) {
+            best_sad = sad;
             best_site = j;
           }
         }
@@ -2052,21 +1716,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
     } else {
       ref_mv->row += neighbors[best_site].row;
       ref_mv->col += neighbors[best_site].col;
-      best_address = &in_what[ref_mv->row * in_what_stride + ref_mv->col];
     }
   }
-
-  this_mv.row = ref_mv->row * 8;
-  this_mv.col = ref_mv->col * 8;
-
-  if (bestsad < INT_MAX) {
-    // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
-    // so we don't have to use the subpixel with xoff=0,yoff=0 here.
-    return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride,
-                        (unsigned int *)(&thissad), second_pred) +
-                        mv_err_cost(&this_mv, center_mv,
-                                    mvjcost, mvcost, x->errorperbit);
-  } else {
-    return INT_MAX;
-  }
+  return best_sad;
 }
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.h b/source/libvpx/vp9/encoder/vp9_mcomp.h
index 586a74c..f7b7c5e 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -35,6 +35,17 @@ extern "C" {
 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
                     const int *mvjcost, int *mvcost[2], int weight);
+
+// Utility to compute variance + MV rate cost for a given MV
+int vp9_get_mvpred_var(const MACROBLOCK *x,
+                       const MV *best_mv, const MV *center_mv,
+                       const vp9_variance_fn_ptr_t *vfp,
+                       int use_mvcost);
+int vp9_get_mvpred_av_var(const MACROBLOCK *x,
+                          const MV *best_mv, const MV *center_mv,
+                          const uint8_t *second_pred,
+                          const vp9_variance_fn_ptr_t *vfp,
+                          int use_mvcost);
 void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
 void vp9_init3smotion_compensation(MACROBLOCK *x,  int stride);
 
@@ -42,47 +53,28 @@ struct VP9_COMP;
 int vp9_init_search_range(struct VP9_COMP *cpi, int size);
 
 // Runs sequence of diamond searches in smaller steps for RD
-int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x,
+int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
                            MV *mvp_full, int step_param,
                            int sadpb, int further_steps, int do_refine,
                            const vp9_variance_fn_ptr_t *fn_ptr,
                            const MV *ref_mv, MV *dst_mv);
 
-int vp9_hex_search(const MACROBLOCK *x,
-                   MV *ref_mv,
-                   int search_param,
-                   int error_per_bit,
-                   int do_init_search,
-                   const vp9_variance_fn_ptr_t *vf,
-                   int use_mvcost,
-                   const MV *center_mv,
-                   MV *best_mv);
-int vp9_bigdia_search(const MACROBLOCK *x,
-                      MV *ref_mv,
-                      int search_param,
-                      int error_per_bit,
-                      int do_init_search,
-                      const vp9_variance_fn_ptr_t *vf,
-                      int use_mvcost,
-                      const MV *center_mv,
-                      MV *best_mv);
-int vp9_square_search(const MACROBLOCK *x,
-                      MV *ref_mv,
-                      int search_param,
-                      int error_per_bit,
-                      int do_init_search,
-                      const vp9_variance_fn_ptr_t *vf,
-                      int use_mvcost,
-                      const MV *center_mv,
-                      MV *best_mv);
-int vp9_fast_hex_search(const MACROBLOCK *x,
-                        MV *ref_mv,
-                        int search_param,
-                        int sad_per_bit,
-                        const vp9_variance_fn_ptr_t *vfp,
-                        int use_mvcost,
-                        const MV *center_mv,
-                        MV *best_mv);
+typedef int (integer_mv_pattern_search_fn) (
+    const MACROBLOCK *x,
+    MV *ref_mv,
+    int search_param,
+    int error_per_bit,
+    int do_init_search,
+    const vp9_variance_fn_ptr_t *vf,
+    int use_mvcost,
+    const MV *center_mv,
+    MV *best_mv);
+
+integer_mv_pattern_search_fn vp9_hex_search;
+integer_mv_pattern_search_fn vp9_bigdia_search;
+integer_mv_pattern_search_fn vp9_square_search;
+integer_mv_pattern_search_fn vp9_fast_hex_search;
+integer_mv_pattern_search_fn vp9_fast_dia_search;
 
 typedef int (fractional_mv_step_fp) (
     const MACROBLOCK *x,
diff --git a/source/libvpx/vp9/encoder/vp9_onyx_if.c b/source/libvpx/vp9/encoder/vp9_onyx_if.c
index f985545..0ac9d5f 100644
--- a/source/libvpx/vp9/encoder/vp9_onyx_if.c
+++ b/source/libvpx/vp9/encoder/vp9_onyx_if.c
@@ -27,7 +27,11 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_tile_common.h"
 
+#include "vp9/encoder/vp9_aq_complexity.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
+#include "vp9/encoder/vp9_aq_variance.h"
 #include "vp9/encoder/vp9_bitstream.h"
+#include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mbgraph.h"
@@ -36,9 +40,13 @@
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/encoder/vp9_speed_features.h"
+#if CONFIG_INTERNAL_STATS
+#include "vp9/encoder/vp9_ssim.h"
+#endif
 #include "vp9/encoder/vp9_temporal_filter.h"
-#include "vp9/encoder/vp9_vaq.h"
 #include "vp9/encoder/vp9_resize.h"
+#include "vp9/encoder/vp9_svc_layercontext.h"
 
 void vp9_coef_tree_initialize();
 
@@ -53,30 +61,11 @@ void vp9_coef_tree_initialize();
                                          // now so that HIGH_PRECISION is always
                                          // chosen.
 
-// Masks for partially or completely disabling split mode
-#define DISABLE_ALL_SPLIT         0x3F
-#define DISABLE_ALL_INTER_SPLIT   0x1F
-#define DISABLE_COMPOUND_SPLIT    0x18
-#define LAST_AND_INTRA_SPLIT_ONLY 0x1E
-
 // Max rate target for 1080P and below encodes under normal circumstances
 // (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
 #define MAX_MB_RATE 250
 #define MAXRATE_1080P 2025000
 
-#if CONFIG_INTERNAL_STATS
-extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source,
-                            YV12_BUFFER_CONFIG *dest, int lumamask,
-                            double *weight);
-
-
-extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source,
-                             YV12_BUFFER_CONFIG *dest, double *ssim_y,
-                             double *ssim_u, double *ssim_v);
-
-
-#endif
-
 // #define OUTPUT_YUV_REC
 
 #ifdef OUTPUT_YUV_SRC
@@ -92,12 +81,7 @@ FILE *kf_list;
 FILE *keyfile;
 #endif
 
-void vp9_init_quantizer(VP9_COMP *cpi);
-
-static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
-  {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-
-static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
+static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
   switch (mode) {
     case NORMAL:
       *hr = 1;
@@ -135,17 +119,33 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
   }
 }
 
+static void setup_key_frame(VP9_COMP *cpi) {
+  vp9_setup_past_independence(&cpi->common);
+
+  // All buffers are implicitly updated on key frames.
+  cpi->refresh_golden_frame = 1;
+  cpi->refresh_alt_ref_frame = 1;
+}
+
+static void setup_inter_frame(VP9_COMMON *cm) {
+  if (cm->error_resilient_mode || cm->intra_only)
+    vp9_setup_past_independence(cm);
+
+  assert(cm->frame_context_idx < FRAME_CONTEXTS);
+  cm->fc = cm->frame_contexts[cm->frame_context_idx];
+}
+
 void vp9_initialize_enc() {
   static int init_done = 0;
 
   if (!init_done) {
-    vp9_initialize_common();
+    vp9_init_neighbors();
+    vp9_init_quant_tables();
+
     vp9_coef_tree_initialize();
     vp9_tokenize_initialize();
-    vp9_init_quant_tables();
     vp9_init_me_luts();
     vp9_rc_init_minq_luts();
-    // init_base_skip_probs();
     vp9_entropy_mv_init();
     vp9_entropy_mode_init();
     init_done = 1;
@@ -154,6 +154,7 @@ void vp9_initialize_enc() {
 
 static void dealloc_compressor_data(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  int i;
 
   // Delete sementation map
   vpx_free(cpi->segmentation_map);
@@ -164,14 +165,19 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
   cpi->coding_context.last_frame_seg_map_copy = NULL;
 
   vpx_free(cpi->complexity_map);
-  cpi->complexity_map = 0;
+  cpi->complexity_map = NULL;
+
+  vp9_cyclic_refresh_free(cpi->cyclic_refresh);
+  cpi->cyclic_refresh = NULL;
+
   vpx_free(cpi->active_map);
-  cpi->active_map = 0;
+  cpi->active_map = NULL;
 
   vp9_free_frame_buffers(cm);
 
   vp9_free_frame_buffer(&cpi->last_frame_uf);
   vp9_free_frame_buffer(&cpi->scaled_source);
+  vp9_free_frame_buffer(&cpi->scaled_last_source);
   vp9_free_frame_buffer(&cpi->alt_ref_buffer);
   vp9_lookahead_destroy(cpi->lookahead);
 
@@ -184,108 +190,65 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
   vpx_free(cpi->mb_norm_activity_map);
   cpi->mb_norm_activity_map = 0;
 
-  vpx_free(cpi->above_context[0]);
-  cpi->above_context[0] = NULL;
-
-  vpx_free(cpi->above_seg_context);
-  cpi->above_seg_context = NULL;
-}
-
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a target value
-// target q value
-int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) {
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int start_index = rc->worst_quality;
-  int target_index = rc->worst_quality;
-  int i;
-
-  // Convert the average q value to an index.
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    start_index = i;
-    if (vp9_convert_qindex_to_q(i) >= qstart)
-      break;
-  }
-
-  // Convert the q target to an index
-  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
-    target_index = i;
-    if (vp9_convert_qindex_to_q(i) >= qtarget)
-      break;
+  for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
+    vpx_free(lc->rc_twopass_stats_in.buf);
+    lc->rc_twopass_stats_in.buf = NULL;
+    lc->rc_twopass_stats_in.sz = 0;
   }
-
-  return target_index - start_index;
 }
 
-// Computes a q delta (in "q index" terms) to get from a starting q value
-// to a value that should equate to thegiven rate ratio.
+static void save_coding_context(VP9_COMP *cpi) {
+  CODING_CONTEXT *const cc = &cpi->coding_context;
+  VP9_COMMON *cm = &cpi->common;
 
-static int compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index,
-                                  double rate_target_ratio) {
-  int i;
-  int target_index = cpi->rc.worst_quality;
+  // Stores a snapshot of key state variables which can subsequently be
+  // restored with a call to vp9_restore_coding_context. These functions are
+  // intended for use in a re-code loop in vp9_compress_frame where the
+  // quantizer value is adjusted between loop iterations.
+  vp9_copy(cc->nmvjointcost,  cpi->mb.nmvjointcost);
+  vp9_copy(cc->nmvcosts,  cpi->mb.nmvcosts);
+  vp9_copy(cc->nmvcosts_hp,  cpi->mb.nmvcosts_hp);
 
-  // Look up the current projected bits per block for the base index
-  const int base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type,
-                                            base_q_index, 1.0);
+  vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
 
-  // Find the target bits per mb based on the base value and given ratio.
-  const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
+  vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy,
+             cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
 
-  // Convert the q target to an index
-  for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; ++i) {
-    target_index = i;
-    if (vp9_rc_bits_per_mb(cpi->common.frame_type, i, 1.0) <=
-            target_bits_per_mb )
-      break;
-  }
+  vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
+  vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
 
-  return target_index - base_q_index;
+  cc->fc = cm->fc;
 }
 
-// This function sets up a set of segments with delta Q values around
-// the baseline frame quantizer.
-static void setup_in_frame_q_adj(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  struct segmentation *const seg = &cm->seg;
-
-  // Make SURE use of floating point in this function is safe.
-  vp9_clear_system_state();
-
-  if (cm->frame_type == KEY_FRAME ||
-      cpi->refresh_alt_ref_frame ||
-      (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
-    int segment;
-
-    // Clear down the segment map
-    vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
+static void restore_coding_context(VP9_COMP *cpi) {
+  CODING_CONTEXT *const cc = &cpi->coding_context;
+  VP9_COMMON *cm = &cpi->common;
 
-    // Clear down the complexity map used for rd
-    vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
+  // Restore key state variables to the snapshot state stored in the
+  // previous call to vp9_save_coding_context.
+  vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
+  vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts);
+  vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
 
-    vp9_enable_segmentation(seg);
-    vp9_clearall_segfeatures(seg);
+  vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
 
-    // Select delta coding method
-    seg->abs_delta = SEGMENT_DELTADATA;
+  vpx_memcpy(cm->last_frame_seg_map,
+             cpi->coding_context.last_frame_seg_map_copy,
+             (cm->mi_rows * cm->mi_cols));
 
-    // Segment 0 "Q" feature is disabled so it defaults to the baseline Q
-    vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
+  vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
+  vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
 
-    // Use some of the segments for in frame Q adjustment
-    for (segment = 1; segment < 2; segment++) {
-      const int qindex_delta = compute_qdelta_by_rate(cpi, cm->base_qindex,
-                                   in_frame_q_adj_ratio[segment]);
-      vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
-      vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
-    }
-  }
+  cm->fc = cc->fc;
 }
+
 static void configure_static_seg_features(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  const RATE_CONTROL *const rc = &cpi->rc;
   struct segmentation *const seg = &cm->seg;
 
-  int high_q = (int)(cpi->rc.avg_q > 48.0);
+  int high_q = (int)(rc->avg_q > 48.0);
   int qi_delta;
 
   // Disable and clear down for KF
@@ -323,9 +286,8 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
       seg->update_map = 1;
       seg->update_data = 1;
 
-      qi_delta = vp9_compute_qdelta(
-          cpi, cpi->rc.avg_q, (cpi->rc.avg_q * 0.875));
-      vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta - 2));
+      qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875);
+      vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
 
       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
@@ -338,16 +300,15 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
     // All other frames if segmentation has been enabled
 
     // First normal frame in a valid gf or alt ref group
-    if (cpi->rc.frames_since_golden == 0) {
+    if (rc->frames_since_golden == 0) {
       // Set up segment features for normal frames in an arf group
-      if (cpi->rc.source_alt_ref_active) {
+      if (rc->source_alt_ref_active) {
         seg->update_map = 0;
         seg->update_data = 1;
         seg->abs_delta = SEGMENT_DELTADATA;
 
-        qi_delta = vp9_compute_qdelta(cpi, cpi->rc.avg_q,
-                                      (cpi->rc.avg_q * 1.125));
-        vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, (qi_delta + 2));
+        qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125);
+        vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
 
         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
@@ -372,7 +333,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
 
         vp9_clearall_segfeatures(seg);
       }
-    } else if (cpi->rc.is_src_frame_alt_ref) {
+    } else if (rc->is_src_frame_alt_ref) {
       // Special case where we are coding over the top of a previous
       // alt ref frame.
       // Segment coding disabled for compred testing
@@ -404,27 +365,6 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
   }
 }
 
-// DEBUG: Print out the segment id of each MB in the current frame.
-static void print_seg_map(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
-  int row, col;
-  int map_index = 0;
-  FILE *statsfile = fopen("segmap.stt", "a");
-
-  fprintf(statsfile, "%10d\n", cm->current_video_frame);
-
-  for (row = 0; row < cpi->common.mi_rows; row++) {
-    for (col = 0; col < cpi->common.mi_cols; col++) {
-      fprintf(statsfile, "%10d", cpi->segmentation_map[map_index]);
-      map_index++;
-    }
-    fprintf(statsfile, "\n");
-  }
-  fprintf(statsfile, "\n");
-
-  fclose(statsfile);
-}
-
 static void update_reference_segmentation_map(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
@@ -436,7 +376,7 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) {
     uint8_t *cache = cache_ptr;
     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
       cache[0] = mi_8x8[0]->mbmi.segment_id;
-    mi_8x8_ptr += cm->mode_info_stride;
+    mi_8x8_ptr += cm->mi_stride;
     cache_ptr += cm->mi_cols;
   }
 }
@@ -445,539 +385,137 @@ static int is_slowest_mode(int mode) {
 }
 
 static void set_rd_speed_thresholds(VP9_COMP *cpi) {
-  SPEED_FEATURES *sf = &cpi->sf;
   int i;
 
   // Set baseline threshold values
   for (i = 0; i < MAX_MODES; ++i)
-    sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
-
-  sf->thresh_mult[THR_NEARESTMV] = 0;
-  sf->thresh_mult[THR_NEARESTG] = 0;
-  sf->thresh_mult[THR_NEARESTA] = 0;
-
-  sf->thresh_mult[THR_DC] += 1000;
-
-  sf->thresh_mult[THR_NEWMV] += 1000;
-  sf->thresh_mult[THR_NEWA] += 1000;
-  sf->thresh_mult[THR_NEWG] += 1000;
-
-  sf->thresh_mult[THR_NEARMV] += 1000;
-  sf->thresh_mult[THR_NEARA] += 1000;
-  sf->thresh_mult[THR_COMP_NEARESTLA] += 1000;
-  sf->thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
-  sf->thresh_mult[THR_TM] += 1000;
-
-  sf->thresh_mult[THR_COMP_NEARLA] += 1500;
-  sf->thresh_mult[THR_COMP_NEWLA] += 2000;
-  sf->thresh_mult[THR_NEARG] += 1000;
-  sf->thresh_mult[THR_COMP_NEARGA] += 1500;
-  sf->thresh_mult[THR_COMP_NEWGA] += 2000;
-
-  sf->thresh_mult[THR_ZEROMV] += 2000;
-  sf->thresh_mult[THR_ZEROG] += 2000;
-  sf->thresh_mult[THR_ZEROA] += 2000;
-  sf->thresh_mult[THR_COMP_ZEROLA] += 2500;
-  sf->thresh_mult[THR_COMP_ZEROGA] += 2500;
-
-  sf->thresh_mult[THR_H_PRED] += 2000;
-  sf->thresh_mult[THR_V_PRED] += 2000;
-  sf->thresh_mult[THR_D45_PRED ] += 2500;
-  sf->thresh_mult[THR_D135_PRED] += 2500;
-  sf->thresh_mult[THR_D117_PRED] += 2500;
-  sf->thresh_mult[THR_D153_PRED] += 2500;
-  sf->thresh_mult[THR_D207_PRED] += 2500;
-  sf->thresh_mult[THR_D63_PRED] += 2500;
+  cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+
+  cpi->rd_thresh_mult[THR_NEARESTMV] = 0;
+  cpi->rd_thresh_mult[THR_NEARESTG] = 0;
+  cpi->rd_thresh_mult[THR_NEARESTA] = 0;
+
+  cpi->rd_thresh_mult[THR_DC] += 1000;
+
+  cpi->rd_thresh_mult[THR_NEWMV] += 1000;
+  cpi->rd_thresh_mult[THR_NEWA] += 1000;
+  cpi->rd_thresh_mult[THR_NEWG] += 1000;
+
+  cpi->rd_thresh_mult[THR_NEARMV] += 1000;
+  cpi->rd_thresh_mult[THR_NEARA] += 1000;
+  cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000;
+  cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+  cpi->rd_thresh_mult[THR_TM] += 1000;
+
+  cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500;
+  cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000;
+  cpi->rd_thresh_mult[THR_NEARG] += 1000;
+  cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500;
+  cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000;
+
+  cpi->rd_thresh_mult[THR_ZEROMV] += 2000;
+  cpi->rd_thresh_mult[THR_ZEROG] += 2000;
+  cpi->rd_thresh_mult[THR_ZEROA] += 2000;
+  cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500;
+  cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+  cpi->rd_thresh_mult[THR_H_PRED] += 2000;
+  cpi->rd_thresh_mult[THR_V_PRED] += 2000;
+  cpi->rd_thresh_mult[THR_D45_PRED ] += 2500;
+  cpi->rd_thresh_mult[THR_D135_PRED] += 2500;
+  cpi->rd_thresh_mult[THR_D117_PRED] += 2500;
+  cpi->rd_thresh_mult[THR_D153_PRED] += 2500;
+  cpi->rd_thresh_mult[THR_D207_PRED] += 2500;
+  cpi->rd_thresh_mult[THR_D63_PRED] += 2500;
 
   /* disable frame modes if flags not set */
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
-    sf->thresh_mult[THR_NEWMV    ] = INT_MAX;
-    sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
-    sf->thresh_mult[THR_ZEROMV   ] = INT_MAX;
-    sf->thresh_mult[THR_NEARMV   ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEWMV    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX;
+    cpi->rd_thresh_mult[THR_ZEROMV   ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEARMV   ] = INT_MAX;
   }
   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
-    sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
-    sf->thresh_mult[THR_ZEROG    ] = INT_MAX;
-    sf->thresh_mult[THR_NEARG    ] = INT_MAX;
-    sf->thresh_mult[THR_NEWG     ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_ZEROG    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEARG    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEWG     ] = INT_MAX;
   }
   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
-    sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
-    sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
-    sf->thresh_mult[THR_NEARA    ] = INT_MAX;
-    sf->thresh_mult[THR_NEWA     ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_ZEROA    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEARA    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_NEWA     ] = INT_MAX;
   }
 
   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
       (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
-    sf->thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
-    sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
-    sf->thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
-    sf->thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
   }
   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
       (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
-    sf->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
-    sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
-    sf->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
-    sf->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
+    cpi->rd_thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
   }
 }
 
 static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
-  SPEED_FEATURES *sf = &cpi->sf;
+  const SPEED_FEATURES *const sf = &cpi->sf;
   int i;
 
   for (i = 0; i < MAX_REFS; ++i)
-    sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
+    cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
 
-  sf->thresh_mult_sub8x8[THR_LAST] += 2500;
-  sf->thresh_mult_sub8x8[THR_GOLD] += 2500;
-  sf->thresh_mult_sub8x8[THR_ALTR] += 2500;
-  sf->thresh_mult_sub8x8[THR_INTRA] += 2500;
-  sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
-  sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+  cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500;
+  cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500;
+  cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500;
+  cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500;
+  cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+  cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500;
 
   // Check for masked out split cases.
-  for (i = 0; i < MAX_REFS; i++) {
+  for (i = 0; i < MAX_REFS; i++)
     if (sf->disable_split_mask & (1 << i))
-      sf->thresh_mult_sub8x8[i] = INT_MAX;
-  }
+      cpi->rd_thresh_mult_sub8x8[i] = INT_MAX;
 
   // disable mode test if frame flag is not set
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
-    sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+    cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX;
   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
-    sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+    cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
-    sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+    cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
       (VP9_LAST_FLAG | VP9_ALT_FLAG))
-    sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+    cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
       (VP9_GOLD_FLAG | VP9_ALT_FLAG))
-    sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
-}
-
-static void set_good_speed_feature(VP9_COMMON *cm,
-                                   SPEED_FEATURES *sf,
-                                   int speed) {
-  int i;
-  sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
-  if (speed == 1) {
-    sf->use_square_partition_only = !frame_is_intra_only(cm);
-    sf->less_rectangular_check  = 1;
-    sf->tx_size_search_method = frame_is_intra_only(cm)
-      ? USE_FULL_RD : USE_LARGESTALL;
-
-    if (MIN(cm->width, cm->height) >= 720)
-      sf->disable_split_mask = cm->show_frame ?
-        DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
-    else
-      sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
-
-    sf->use_rd_breakout = 1;
-    sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_interp_filter = 1;
-    sf->auto_mv_step_size = 1;
-    sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
-    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-  }
-  if (speed == 2) {
-    sf->use_square_partition_only = !frame_is_intra_only(cm);
-    sf->less_rectangular_check  = 1;
-    sf->tx_size_search_method = frame_is_intra_only(cm)
-      ? USE_FULL_RD : USE_LARGESTALL;
-
-    if (MIN(cm->width, cm->height) >= 720)
-      sf->disable_split_mask = cm->show_frame ?
-        DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
-    else
-      sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-
-    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
-                                 FLAG_SKIP_INTRA_BESTINTER |
-                                 FLAG_SKIP_COMP_BESTINTRA |
-                                 FLAG_SKIP_INTRA_LOWVAR;
-    sf->use_rd_breakout = 1;
-    sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_interp_filter = 2;
-    sf->reference_masking = 1;
-    sf->auto_mv_step_size = 1;
-
-    sf->disable_filter_search_var_thresh = 50;
-    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
-    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
-    sf->adjust_partitioning_from_last_frame = 1;
-    sf->last_partitioning_redo_frequency = 3;
-
-    sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
-    sf->use_lp32x32fdct = 1;
-    sf->mode_skip_start = 11;
-    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-  }
-  if (speed == 3) {
-    sf->use_square_partition_only = 1;
-    sf->tx_size_search_method = USE_LARGESTALL;
-
-    if (MIN(cm->width, cm->height) >= 720)
-      sf->disable_split_mask = DISABLE_ALL_SPLIT;
-    else
-      sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
-
-    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
-      FLAG_SKIP_INTRA_BESTINTER |
-      FLAG_SKIP_COMP_BESTINTRA |
-      FLAG_SKIP_INTRA_LOWVAR;
-
-    sf->use_rd_breakout = 1;
-    sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_interp_filter = 2;
-    sf->reference_masking = 1;
-    sf->auto_mv_step_size = 1;
-
-    sf->disable_split_var_thresh = 32;
-    sf->disable_filter_search_var_thresh = 100;
-    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
-    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
-    sf->adjust_partitioning_from_last_frame = 1;
-    sf->last_partitioning_redo_frequency = 3;
-
-    sf->use_uv_intra_rd_estimate = 1;
-    sf->skip_encode_sb = 1;
-    sf->use_lp32x32fdct = 1;
-    sf->subpel_iters_per_step = 1;
-    sf->use_fast_coef_updates = 2;
-
-    sf->adaptive_rd_thresh = 4;
-    sf->mode_skip_start = 6;
-  }
-  if (speed == 4) {
-    sf->use_square_partition_only = 1;
-    sf->tx_size_search_method = USE_LARGESTALL;
-    sf->disable_split_mask = DISABLE_ALL_SPLIT;
-
-    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
-      FLAG_SKIP_INTRA_BESTINTER |
-      FLAG_SKIP_COMP_BESTINTRA |
-      FLAG_SKIP_COMP_REFMISMATCH |
-      FLAG_SKIP_INTRA_LOWVAR |
-      FLAG_EARLY_TERMINATE;
-
-    sf->use_rd_breakout = 1;
-    sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_interp_filter = 2;
-    sf->reference_masking = 1;
-    sf->auto_mv_step_size = 1;
-
-    sf->disable_split_var_thresh = 64;
-    sf->disable_filter_search_var_thresh = 200;
-    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
-    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
-    sf->adjust_partitioning_from_last_frame = 1;
-    sf->last_partitioning_redo_frequency = 3;
-
-    sf->use_uv_intra_rd_estimate = 1;
-    sf->skip_encode_sb = 1;
-    sf->use_lp32x32fdct = 1;
-    sf->subpel_iters_per_step = 1;
-    sf->use_fast_coef_updates = 2;
-
-    sf->adaptive_rd_thresh = 4;
-    sf->mode_skip_start = 6;
-  }
-  if (speed >= 5) {
-    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-    sf->partition_search_type = FIXED_PARTITION;
-    sf->always_this_block_size = BLOCK_16X16;
-    sf->tx_size_search_method = frame_is_intra_only(cm) ?
-      USE_FULL_RD : USE_LARGESTALL;
-    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
-                                 FLAG_SKIP_INTRA_BESTINTER |
-                                 FLAG_SKIP_COMP_BESTINTRA |
-                                 FLAG_SKIP_COMP_REFMISMATCH |
-                                 FLAG_SKIP_INTRA_LOWVAR |
-                                 FLAG_EARLY_TERMINATE;
-    sf->use_rd_breakout = 1;
-    sf->use_lp32x32fdct = 1;
-    sf->optimize_coefficients = 0;
-    sf->auto_mv_step_size = 1;
-    sf->reference_masking = 1;
-
-    sf->disable_split_mask = DISABLE_ALL_SPLIT;
-    sf->search_method = HEX;
-    sf->subpel_iters_per_step = 1;
-    sf->disable_split_var_thresh = 64;
-    sf->disable_filter_search_var_thresh = 500;
-    for (i = 0; i < TX_SIZES; i++) {
-      sf->intra_y_mode_mask[i] = INTRA_DC_ONLY;
-      sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
-    }
-    sf->use_fast_coef_updates = 2;
-    sf->adaptive_rd_thresh = 4;
-    sf->mode_skip_start = 6;
-  }
-}
-
-static void set_rt_speed_feature(VP9_COMMON *cm,
-                                 SPEED_FEATURES *sf,
-                                 int speed) {
-  sf->static_segmentation = 0;
-  sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
-  sf->encode_breakout_thresh = 1;
-
-  if (speed == 1) {
-    sf->use_square_partition_only = !frame_is_intra_only(cm);
-    sf->less_rectangular_check = 1;
-    sf->tx_size_search_method =
-        frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
-
-    if (MIN(cm->width, cm->height) >= 720)
-      sf->disable_split_mask = cm->show_frame ?
-        DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
-    else
-      sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
-
-    sf->use_rd_breakout = 1;
-    sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_interp_filter = 1;
-    sf->auto_mv_step_size = 1;
-    sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
-    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->encode_breakout_thresh = 8;
-  }
-  if (speed >= 2) {
-    sf->use_square_partition_only = !frame_is_intra_only(cm);
-    sf->less_rectangular_check = 1;
-    sf->tx_size_search_method =
-        frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
-
-    if (MIN(cm->width, cm->height) >= 720)
-      sf->disable_split_mask = cm->show_frame ?
-        DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
-    else
-      sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
-
-    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH
-        | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA
-        | FLAG_SKIP_INTRA_LOWVAR;
-
-    sf->use_rd_breakout = 1;
-    sf->adaptive_motion_search = 1;
-    sf->adaptive_pred_interp_filter = 2;
-    sf->auto_mv_step_size = 1;
-    sf->reference_masking = 1;
-
-    sf->disable_filter_search_var_thresh = 50;
-    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-
-    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
-    sf->adjust_partitioning_from_last_frame = 1;
-    sf->last_partitioning_redo_frequency = 3;
-
-    sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = ALLOW_RECODE_KFARFGF;
-    sf->use_lp32x32fdct = 1;
-    sf->mode_skip_start = 11;
-    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->encode_breakout_thresh = 200;
-  }
-  if (speed >= 3) {
-    sf->use_square_partition_only = 1;
-    sf->tx_size_search_method = USE_LARGESTALL;
-
-    if (MIN(cm->width, cm->height) >= 720)
-      sf->disable_split_mask = DISABLE_ALL_SPLIT;
-    else
-      sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
-
-    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH
-        | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA
-        | FLAG_SKIP_INTRA_LOWVAR;
-
-    sf->disable_filter_search_var_thresh = 100;
-    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
-    sf->use_uv_intra_rd_estimate = 1;
-    sf->skip_encode_sb = 1;
-    sf->subpel_iters_per_step = 1;
-    sf->use_fast_coef_updates = 2;
-    sf->adaptive_rd_thresh = 4;
-    sf->mode_skip_start = 6;
-    sf->encode_breakout_thresh = 400;
-  }
-  if (speed >= 4) {
-    sf->optimize_coefficients = 0;
-    sf->disable_split_mask = DISABLE_ALL_SPLIT;
-    sf->use_fast_lpf_pick = 2;
-    sf->encode_breakout_thresh = 700;
-  }
-  if (speed >= 5) {
-    int i;
-    sf->adaptive_rd_thresh = 5;
-    sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
-        RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
-    sf->adjust_partitioning_from_last_frame =
-        cm->last_frame_type == KEY_FRAME || (0 ==
-        (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
-    sf->subpel_force_stop = 1;
-    for (i = 0; i < TX_SIZES; i++) {
-      sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
-      sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
-    }
-    sf->frame_parameter_update = 0;
-    sf->encode_breakout_thresh = 1000;
-    sf->search_method = FAST_HEX;
-  }
-  if (speed >= 6) {
-    sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
-    sf->search_method = HEX;
-  }
-  if (speed >= 7) {
-    sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
-    sf->use_nonrd_pick_mode = 1;
-    sf->search_method = FAST_HEX;
-  }
-  if (speed >= 8) {
-    int i;
-    for (i = 0; i < BLOCK_SIZES; ++i)
-      sf->disable_inter_mode_mask[i] = 14;   // only search NEARESTMV (0)
-  }
+    cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
 }
 
-void vp9_set_speed_features(VP9_COMP *cpi) {
-  SPEED_FEATURES *sf = &cpi->sf;
-  VP9_COMMON *cm = &cpi->common;
-  int speed = cpi->speed;
-  int i;
-
-  // Convert negative speed to positive
-  if (speed < 0)
-    speed = -speed;
-
+static void set_speed_features(VP9_COMP *cpi) {
 #if CONFIG_INTERNAL_STATS
+  int i;
   for (i = 0; i < MAX_MODES; ++i)
     cpi->mode_chosen_counts[i] = 0;
 #endif
 
-  // best quality defaults
-  sf->frame_parameter_update = 1;
-  sf->search_method = NSTEP;
-  sf->recode_loop = ALLOW_RECODE;
-  sf->subpel_search_method = SUBPEL_TREE;
-  sf->subpel_iters_per_step = 2;
-  sf->subpel_force_stop = 0;
-  sf->optimize_coefficients = !cpi->oxcf.lossless;
-  sf->reduce_first_step_size = 0;
-  sf->auto_mv_step_size = 0;
-  sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
-  sf->comp_inter_joint_search_thresh = BLOCK_4X4;
-  sf->adaptive_rd_thresh = 0;
-  sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
-  sf->tx_size_search_method = USE_FULL_RD;
-  sf->use_lp32x32fdct = 0;
-  sf->adaptive_motion_search = 0;
-  sf->adaptive_pred_interp_filter = 0;
-  sf->reference_masking = 0;
-  sf->partition_search_type = SEARCH_PARTITION;
-  sf->less_rectangular_check = 0;
-  sf->use_square_partition_only = 0;
-  sf->auto_min_max_partition_size = NOT_IN_USE;
-  sf->max_partition_size = BLOCK_64X64;
-  sf->min_partition_size = BLOCK_4X4;
-  sf->adjust_partitioning_from_last_frame = 0;
-  sf->last_partitioning_redo_frequency = 4;
-  sf->disable_split_mask = 0;
-  sf->mode_search_skip_flags = 0;
-  sf->disable_split_var_thresh = 0;
-  sf->disable_filter_search_var_thresh = 0;
-  for (i = 0; i < TX_SIZES; i++) {
-    sf->intra_y_mode_mask[i] = ALL_INTRA_MODES;
-    sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES;
-  }
-  sf->use_rd_breakout = 0;
-  sf->skip_encode_sb = 0;
-  sf->use_uv_intra_rd_estimate = 0;
-  sf->use_fast_lpf_pick = 0;
-  sf->use_fast_coef_updates = 0;
-  sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
-  sf->use_nonrd_pick_mode = 0;
-  sf->encode_breakout_thresh = 0;
-  for (i = 0; i < BLOCK_SIZES; ++i)
-    sf->disable_inter_mode_mask[i] = 0;
-
-  switch (cpi->oxcf.mode) {
-    case MODE_BESTQUALITY:
-    case MODE_SECONDPASS_BEST:  // This is the best quality mode.
-      cpi->diamond_search_sad = vp9_full_range_search;
-      break;
-    case MODE_FIRSTPASS:
-    case MODE_GOODQUALITY:
-    case MODE_SECONDPASS:
-      set_good_speed_feature(cm, sf, speed);
-      break;
-    case MODE_REALTIME:
-      set_rt_speed_feature(cm, sf, speed);
-      break;
-  }; /* switch */
+  vp9_set_speed_features(cpi);
 
   // Set rd thresholds based on mode and speed setting
   set_rd_speed_thresholds(cpi);
   set_rd_speed_thresholds_sub8x8(cpi);
 
-  // Slow quant, dct and trellis not worthwhile for first pass
-  // so make sure they are always turned off.
-  if (cpi->pass == 1) {
-    sf->optimize_coefficients = 0;
-  }
-
-  // No recode for 1 pass.
-  if (cpi->pass == 0) {
-    sf->recode_loop = DISALLOW_RECODE;
-    sf->optimize_coefficients = 0;
-  }
-
   cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
   if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
     cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
   }
-
-  if (cpi->sf.subpel_search_method == SUBPEL_TREE) {
-    cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
-    cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
-  }
-
-  cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1;
-
-  if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME &&
-      sf->encode_breakout_thresh > cpi->encode_breakout)
-    cpi->encode_breakout = sf->encode_breakout_thresh;
-
-  if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
-    sf->adaptive_pred_interp_filter = 0;
 }
 
 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
@@ -1020,6 +558,13 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate scaled source buffer");
 
+  if (vp9_alloc_frame_buffer(&cpi->scaled_last_source,
+                             cm->width, cm->height,
+                             cm->subsampling_x, cm->subsampling_y,
+                             VP9_ENC_BORDER_IN_PIXELS))
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to allocate scaled last source buffer");
+
   vpx_free(cpi->tok);
 
   {
@@ -1037,24 +582,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
   CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map,
                   vpx_calloc(sizeof(unsigned int),
                              cm->mb_rows * cm->mb_cols));
-
-  // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
-  // block where mi unit size is 8x8.
-  vpx_free(cpi->above_context[0]);
-  CHECK_MEM_ERROR(cm, cpi->above_context[0],
-                  vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) *
-                             MAX_MB_PLANE,
-                             sizeof(*cpi->above_context[0])));
-
-  vpx_free(cpi->above_seg_context);
-  CHECK_MEM_ERROR(cm, cpi->above_seg_context,
-                  vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols),
-                             sizeof(*cpi->above_seg_context)));
 }
 
 
 static void update_frame_size(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
 
   vp9_update_frame_size(cm);
 
@@ -1073,6 +606,13 @@ static void update_frame_size(VP9_COMP *cpi) {
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate scaled source buffer");
 
+  if (vp9_realloc_frame_buffer(&cpi->scaled_last_source,
+                               cm->width, cm->height,
+                               cm->subsampling_x, cm->subsampling_y,
+                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to reallocate scaled last source buffer");
+
   {
     int y_stride = cpi->scaled_source.y_stride;
 
@@ -1083,57 +623,23 @@ static void update_frame_size(VP9_COMP *cpi) {
     }
   }
 
-  {
-    int i;
-    for (i = 1; i < MAX_MB_PLANE; ++i) {
-      cpi->above_context[i] = cpi->above_context[0] +
-                              i * sizeof(*cpi->above_context[0]) * 2 *
-                              mi_cols_aligned_to_sb(cm->mi_cols);
-    }
-  }
+  init_macroblockd(cm, xd);
 }
 
-
-// Table that converts 0-63 Q range values passed in outside to the Qindex
-// range used internally.
-static const int q_trans[] = {
-  0,    4,   8,  12,  16,  20,  24,  28,
-  32,   36,  40,  44,  48,  52,  56,  60,
-  64,   68,  72,  76,  80,  84,  88,  92,
-  96,  100, 104, 108, 112, 116, 120, 124,
-  128, 132, 136, 140, 144, 148, 152, 156,
-  160, 164, 168, 172, 176, 180, 184, 188,
-  192, 196, 200, 204, 208, 212, 216, 220,
-  224, 228, 232, 236, 240, 244, 249, 255,
-};
-
-int vp9_reverse_trans(int x) {
-  int i;
-
-  for (i = 0; i < 64; i++)
-    if (q_trans[i] >= x)
-      return i;
-
-  return 63;
-};
-
 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
   VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
+  VP9_CONFIG *const oxcf = &cpi->oxcf;
   int vbr_max_bits;
 
-  if (framerate < 0.1)
-    framerate = 30;
-
-  cpi->oxcf.framerate = framerate;
+  oxcf->framerate = framerate < 0.1 ? 30 : framerate;
   cpi->output_framerate = cpi->oxcf.framerate;
-  cpi->rc.av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth
-                                         / cpi->output_framerate);
-  cpi->rc.min_frame_bandwidth = (int)(cpi->rc.av_per_frame_bandwidth *
-                                      cpi->oxcf.two_pass_vbrmin_section / 100);
-
+  rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth /
+                                     cpi->output_framerate);
+  rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
+                                  oxcf->two_pass_vbrmin_section / 100);
 
-  cpi->rc.min_frame_bandwidth = MAX(cpi->rc.min_frame_bandwidth,
-                                    FRAME_OVERHEAD_BITS);
+  rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
 
   // A maximum bitrate for a frame is defined.
   // The baseline for this aligns with HW implementations that
@@ -1143,31 +649,31 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
   // be acheived because of a user specificed max q (e.g. when the user
   // specifies lossless encode.
   //
-  vbr_max_bits = (int)(((int64_t)cpi->rc.av_per_frame_bandwidth *
-      cpi->oxcf.two_pass_vbrmax_section) / 100);
-  cpi->rc.max_frame_bandwidth =
-      MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
+  vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
+      oxcf->two_pass_vbrmax_section) / 100);
+  rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
+                                vbr_max_bits);
 
   // Set Maximum gf/arf interval
-  cpi->rc.max_gf_interval = 16;
+  rc->max_gf_interval = 16;
 
   // Extended interval for genuinely static scenes
-  cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
 
   // Special conditions when alt ref frame enabled in lagged compress mode
-  if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) {
-    if (cpi->rc.max_gf_interval > cpi->oxcf.lag_in_frames - 1)
-      cpi->rc.max_gf_interval = cpi->oxcf.lag_in_frames - 1;
+  if (oxcf->play_alternate && oxcf->lag_in_frames) {
+    if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
+      rc->max_gf_interval = oxcf->lag_in_frames - 1;
 
-    if (cpi->twopass.static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1)
-      cpi->twopass.static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1;
+    if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+      rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
   }
 
-  if (cpi->rc.max_gf_interval > cpi->twopass.static_scene_max_gf_interval)
-    cpi->rc.max_gf_interval = cpi->twopass.static_scene_max_gf_interval;
+  if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+    rc->max_gf_interval = rc->static_scene_max_gf_interval;
 }
 
-static int64_t rescale(int64_t val, int64_t num, int denom) {
+int64_t vp9_rescale(int64_t val, int64_t num, int denom) {
   int64_t llnum = num;
   int64_t llden = denom;
   int64_t llval = val;
@@ -1175,124 +681,6 @@ static int64_t rescale(int64_t val, int64_t num, int denom) {
   return (llval * llnum / llden);
 }
 
-// Initialize layer context data from init_config().
-static void init_layer_context(VP9_COMP *const cpi) {
-  const VP9_CONFIG *const oxcf = &cpi->oxcf;
-  int temporal_layer = 0;
-  cpi->svc.spatial_layer_id = 0;
-  cpi->svc.temporal_layer_id = 0;
-  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
-      ++temporal_layer) {
-    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
-    RATE_CONTROL *const lrc = &lc->rc;
-    lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
-    lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
-    lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q];
-    lrc->total_actual_bits = 0;
-    lrc->total_target_vs_actual = 0;
-    lrc->ni_tot_qi = 0;
-    lrc->tot_q = 0.0;
-    lrc->avg_q = 0.0;
-    lrc->ni_frames = 0;
-    lrc->decimation_count = 0;
-    lrc->decimation_factor = 0;
-    lrc->rate_correction_factor = 1.0;
-    lrc->key_frame_rate_correction_factor = 1.0;
-    lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
-        1000;
-    lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level),
-                               lc->target_bandwidth, 1000);
-    lrc->bits_off_target = lrc->buffer_level;
-  }
-}
-
-// Update the layer context from a change_config() call.
-static void update_layer_context_change_config(VP9_COMP *const cpi,
-                                               const int target_bandwidth) {
-  const VP9_CONFIG *const oxcf = &cpi->oxcf;
-  const RATE_CONTROL *const rc = &cpi->rc;
-  int temporal_layer = 0;
-  float bitrate_alloc = 1.0;
-  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
-      ++temporal_layer) {
-    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
-    RATE_CONTROL *const lrc = &lc->rc;
-    lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000;
-    bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
-    // Update buffer-related quantities.
-    lc->starting_buffer_level =
-        (int64_t)(oxcf->starting_buffer_level * bitrate_alloc);
-    lc->optimal_buffer_level =
-        (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc);
-    lc->maximum_buffer_size =
-        (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc);
-    lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
-    lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
-    // Update framerate-related quantities.
-    lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
-    lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
-    lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
-    // Update qp-related quantities.
-    lrc->worst_quality = rc->worst_quality;
-    lrc->best_quality = rc->best_quality;
-  }
-}
-
-// Prior to encoding the frame, update framerate-related quantities
-// for the current layer.
-static void update_layer_framerate(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  const VP9_CONFIG *const oxcf = &cpi->oxcf;
-  LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
-  RATE_CONTROL *const lrc = &lc->rc;
-  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
-  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
-  lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
-  // Update the average layer frame size (non-cumulative per-frame-bw).
-  if (temporal_layer == 0) {
-    lc->avg_frame_size = lrc->av_per_frame_bandwidth;
-  } else {
-    double prev_layer_framerate = oxcf->framerate /
-        oxcf->ts_rate_decimator[temporal_layer - 1];
-    int prev_layer_target_bandwidth =
-        oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
-    lc->avg_frame_size =
-        (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
-              (lc->framerate - prev_layer_framerate));
-  }
-}
-
-// Prior to encoding the frame, set the layer context, for the current layer
-// to be encoded, to the cpi struct.
-static void restore_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-  int frame_since_key = cpi->rc.frames_since_key;
-  int frame_to_key = cpi->rc.frames_to_key;
-  cpi->rc = lc->rc;
-  cpi->oxcf.target_bandwidth = lc->target_bandwidth;
-  cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
-  cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
-  cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
-  cpi->output_framerate = lc->framerate;
-  // Reset the frames_since_key and frames_to_key counters to their values
-  // before the layer restore. Keep these defined for the stream (not layer).
-  cpi->rc.frames_since_key = frame_since_key;
-  cpi->rc.frames_to_key = frame_to_key;
-}
-
-// Save the layer context after encoding the frame.
-static void save_layer_context(VP9_COMP *const cpi) {
-  int temporal_layer = cpi->svc.temporal_layer_id;
-  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
-  lc->rc = cpi->rc;
-  lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth;
-  lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
-  lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
-  lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
-  lc->framerate = cpi->output_framerate;
-}
-
 static void set_tile_limits(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
 
@@ -1304,14 +692,14 @@ static void set_tile_limits(VP9_COMP *cpi) {
   cm->log2_tile_rows = cpi->oxcf.tile_rows;
 }
 
-static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
+static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
   int i;
 
   cpi->oxcf = *oxcf;
 
-  cm->version = oxcf->version;
+  cm->profile = oxcf->profile;
+  cm->bit_depth = oxcf->bit_depth;
 
   cm->width = oxcf->width;
   cm->height = oxcf->height;
@@ -1324,42 +712,15 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   // Temporal scalability.
   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
 
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    init_layer_context(cpi);
+  if ((cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+      (cpi->svc.number_spatial_layers > 1 &&
+      cpi->oxcf.mode == MODE_SECONDPASS_BEST)) {
+    vp9_init_layer_context(cpi);
   }
 
   // change includes all joint functionality
-  vp9_change_config(ptr, oxcf);
-
-  // Initialize active best and worst q and average q values.
-  if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    cpi->rc.avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q;
-    cpi->rc.avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q;
-    cpi->rc.avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q;
-  } else {
-    cpi->rc.avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q +
-                                   cpi->oxcf.best_allowed_q) / 2;
-    cpi->rc.avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q +
-                                   cpi->oxcf.best_allowed_q) / 2;
-    cpi->rc.avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q +
-                                   cpi->oxcf.best_allowed_q) / 2;
-  }
-  cpi->rc.last_q[0]                 = cpi->oxcf.best_allowed_q;
-  cpi->rc.last_q[1]                 = cpi->oxcf.best_allowed_q;
-  cpi->rc.last_q[2]                 = cpi->oxcf.best_allowed_q;
-
-  // Initialise the starting buffer levels
-  cpi->rc.buffer_level              = cpi->oxcf.starting_buffer_level;
-  cpi->rc.bits_off_target           = cpi->oxcf.starting_buffer_level;
-
-  cpi->rc.rolling_target_bits       = cpi->rc.av_per_frame_bandwidth;
-  cpi->rc.rolling_actual_bits       = cpi->rc.av_per_frame_bandwidth;
-  cpi->rc.long_rolling_target_bits  = cpi->rc.av_per_frame_bandwidth;
-  cpi->rc.long_rolling_actual_bits  = cpi->rc.av_per_frame_bandwidth;
-
-  cpi->rc.total_actual_bits         = 0;
-  cpi->rc.total_target_vs_actual    = 0;
+  vp9_change_config(cpi, oxcf);
 
   cpi->static_mb_pct = 0;
 
@@ -1374,16 +735,18 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
     cpi->fixed_divide[i] = 0x80000 / i;
 }
 
-void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
+void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
 
-  if (!cpi || !oxcf)
-    return;
+  if (cm->profile != oxcf->profile)
+    cm->profile = oxcf->profile;
+  cm->bit_depth = oxcf->bit_depth;
 
-  if (cm->version != oxcf->version) {
-    cm->version = oxcf->version;
-  }
+  if (cm->profile <= PROFILE_1)
+    assert(cm->bit_depth == BITS_8);
+  else
+    assert(cm->bit_depth > BITS_8);
 
   cpi->oxcf = *oxcf;
 
@@ -1397,6 +760,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
       cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5);
       break;
 
+    case MODE_BESTQUALITY:
+      cpi->pass = 0;
+      break;
+
     case MODE_FIRSTPASS:
       cpi->pass = 1;
       break;
@@ -1415,15 +782,17 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
       break;
   }
 
-  cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
-  cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
-  cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
-
   cpi->oxcf.lossless = oxcf->lossless;
-  cpi->mb.e_mbd.itxm_add = cpi->oxcf.lossless ? vp9_iwht4x4_add
-                                              : vp9_idct4x4_add;
-  cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
-
+  if (cpi->oxcf.lossless) {
+    // In lossless mode, make sure right quantizer range and correct transform
+    // is set.
+    cpi->oxcf.worst_allowed_q = 0;
+    cpi->oxcf.best_allowed_q = 0;
+    cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
+  } else {
+    cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
+  }
+  rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
   cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
 
   cpi->refresh_golden_frame = 0;
@@ -1452,34 +821,35 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   // Convert target bandwidth from Kbit/s to Bit/s
   cpi->oxcf.target_bandwidth       *= 1000;
 
-  cpi->oxcf.starting_buffer_level = rescale(cpi->oxcf.starting_buffer_level,
-                                            cpi->oxcf.target_bandwidth, 1000);
+  cpi->oxcf.starting_buffer_level =
+      vp9_rescale(cpi->oxcf.starting_buffer_level,
+                  cpi->oxcf.target_bandwidth, 1000);
 
   // Set or reset optimal and maximum buffer levels.
   if (cpi->oxcf.optimal_buffer_level == 0)
     cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
   else
-    cpi->oxcf.optimal_buffer_level = rescale(cpi->oxcf.optimal_buffer_level,
-                                             cpi->oxcf.target_bandwidth, 1000);
+    cpi->oxcf.optimal_buffer_level =
+        vp9_rescale(cpi->oxcf.optimal_buffer_level,
+                    cpi->oxcf.target_bandwidth, 1000);
 
   if (cpi->oxcf.maximum_buffer_size == 0)
     cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
   else
-    cpi->oxcf.maximum_buffer_size = rescale(cpi->oxcf.maximum_buffer_size,
-                                            cpi->oxcf.target_bandwidth, 1000);
+    cpi->oxcf.maximum_buffer_size =
+        vp9_rescale(cpi->oxcf.maximum_buffer_size,
+                    cpi->oxcf.target_bandwidth, 1000);
   // Under a configuration change, where maximum_buffer_size may change,
   // keep buffer level clipped to the maximum allowed buffer size.
-  cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target,
-                                cpi->oxcf.maximum_buffer_size);
-  cpi->rc.buffer_level = MIN(cpi->rc.buffer_level,
-                             cpi->oxcf.maximum_buffer_size);
+  rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size);
+  rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size);
 
   // Set up frame rate and related parameters rate control values.
   vp9_new_framerate(cpi, cpi->oxcf.framerate);
 
   // Set absolute upper and lower quality limits
-  cpi->rc.worst_quality = cpi->oxcf.worst_allowed_q;
-  cpi->rc.best_quality = cpi->oxcf.best_allowed_q;
+  rc->worst_quality = cpi->oxcf.worst_allowed_q;
+  rc->best_quality = cpi->oxcf.best_allowed_q;
 
   // active values should only be modified if out of new range
 
@@ -1504,9 +874,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   }
   update_frame_size(cpi);
 
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth);
+  if ((cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+      (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
+    vp9_update_layer_context_change_config(cpi,
+                                           (int)cpi->oxcf.target_bandwidth);
   }
 
   cpi->speed = abs(cpi->oxcf.cpu_used);
@@ -1520,7 +892,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
 #else
   cpi->alt_ref_source = NULL;
 #endif
-  cpi->rc.is_src_frame_alt_ref = 0;
+  rc->is_src_frame_alt_ref = 0;
 
 #if 0
   // Experimental RD Code
@@ -1541,7 +913,7 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) {
   mvjointsadcost[0] = 600;
   mvjointsadcost[1] = 300;
   mvjointsadcost[2] = 300;
-  mvjointsadcost[0] = 300;
+  mvjointsadcost[3] = 300;
 }
 
 static void cal_nmvsadcosts(int *mvsadcost[2]) {
@@ -1693,30 +1065,19 @@ static void free_pick_mode_context(MACROBLOCK *x) {
   }
 }
 
-VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
+VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
   int i, j;
-  volatile union {
-    VP9_COMP *cpi;
-    VP9_PTR   ptr;
-  } ctx;
-
-  VP9_COMP *cpi;
-  VP9_COMMON *cm;
-
-  cpi = ctx.cpi = vpx_memalign(32, sizeof(VP9_COMP));
-  // Check that the CPI instance is valid
-  if (!cpi)
-    return 0;
+  VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP));
+  VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL;
 
-  cm = &cpi->common;
+  if (!cm)
+    return NULL;
 
   vp9_zero(*cpi);
 
   if (setjmp(cm->error.jmp)) {
-    VP9_PTR ptr = ctx.ptr;
-
-    ctx.cpi->common.error.setjmp = 0;
-    vp9_remove_compressor(&ptr);
+    cm->error.setjmp = 0;
+    vp9_remove_compressor(cpi);
     return 0;
   }
 
@@ -1729,20 +1090,18 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   cpi->use_svc = 0;
 
-  init_config((VP9_PTR)cpi, oxcf);
-
+  init_config(cpi, oxcf);
+  vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc);
   init_pick_mode_context(cpi);
 
-  cm->current_video_frame   = 0;
+  cm->current_video_frame = 0;
 
   // Set reference frame sign bias for ALTREF frame to 1 (for now)
   cm->ref_frame_sign_bias[ALTREF_FRAME] = 1;
 
-  cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
-
   cpi->gold_is_last = 0;
-  cpi->alt_is_last  = 0;
-  cpi->gold_is_alt  = 0;
+  cpi->alt_is_last = 0;
+  cpi->gold_is_alt = 0;
 
   // Create the encoder segmentation map and set all entries to 0
   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
@@ -1752,6 +1111,9 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   CHECK_MEM_ERROR(cm, cpi->complexity_map,
                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
 
+  // Create a map used for cyclic background refresh.
+  CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
+                  vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
 
   // And a place holder structure is the coding context
   // for use if we want to save and restore it
@@ -1772,13 +1134,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
   cpi->key_frame_frequency = cpi->oxcf.key_freq;
-
-  cpi->rc.frames_since_key = 8;  // Sensible default for first frame.
-  cpi->rc.this_key_frame_forced = 0;
-  cpi->rc.next_key_frame_forced = 0;
-
-  cpi->rc.source_alt_ref_pending = 0;
-  cpi->rc.source_alt_ref_active = 0;
   cpi->refresh_alt_ref_frame = 0;
 
 #if CONFIG_MULTIPLE_ARF
@@ -1834,18 +1189,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   cpi->first_time_stamp_ever = INT64_MAX;
 
-  cpi->rc.frames_till_gf_update_due      = 0;
-
-  cpi->rc.ni_av_qi                     = cpi->oxcf.worst_allowed_q;
-  cpi->rc.ni_tot_qi                    = 0;
-  cpi->rc.ni_frames                   = 0;
-  cpi->rc.tot_q = 0.0;
-  cpi->rc.avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q);
-
-  cpi->rc.rate_correction_factor         = 1.0;
-  cpi->rc.key_frame_rate_correction_factor = 1.0;
-  cpi->rc.gf_rate_correction_factor  = 1.0;
-
   cal_nmvjointsadcost(cpi->mb.nmvjointsadcost);
   cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX];
   cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX];
@@ -1878,17 +1221,56 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   if (cpi->pass == 1) {
     vp9_init_first_pass(cpi);
   } else if (cpi->pass == 2) {
-    size_t packet_sz = sizeof(FIRSTPASS_STATS);
-    int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
+    const size_t packet_sz = sizeof(FIRSTPASS_STATS);
+    const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
 
-    cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
-    cpi->twopass.stats_in = cpi->twopass.stats_in_start;
-    cpi->twopass.stats_in_end = (void *)((char *)cpi->twopass.stats_in
-                                         + (packets - 1) * packet_sz);
-    vp9_init_second_pass(cpi);
+    if (cpi->svc.number_spatial_layers > 1
+        && cpi->svc.number_temporal_layers == 1) {
+      FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
+      FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0};
+      int i;
+
+      for (i = 0; i < oxcf->ss_number_layers; ++i) {
+        FIRSTPASS_STATS *const last_packet_for_layer =
+            &stats[packets - oxcf->ss_number_layers + i];
+        const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
+        const int packets_in_layer = (int)last_packet_for_layer->count + 1;
+        if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
+          LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
+
+          vpx_free(lc->rc_twopass_stats_in.buf);
+
+          lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
+          CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
+                          vpx_malloc(lc->rc_twopass_stats_in.sz));
+          lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
+          lc->twopass.stats_in = lc->twopass.stats_in_start;
+          lc->twopass.stats_in_end = lc->twopass.stats_in_start
+                                     + packets_in_layer - 1;
+          stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
+        }
+      }
+
+      for (i = 0; i < packets; ++i) {
+        const int layer_id = (int)stats[i].spatial_layer_id;
+        if (layer_id >= 0 && layer_id < oxcf->ss_number_layers
+            && stats_copy[layer_id] != NULL) {
+          *stats_copy[layer_id] = stats[i];
+          ++stats_copy[layer_id];
+        }
+      }
+
+      vp9_init_second_pass_spatial_svc(cpi);
+    } else {
+      cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
+      cpi->twopass.stats_in = cpi->twopass.stats_in_start;
+      cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
+
+      vp9_init_second_pass(cpi);
+    }
   }
 
-  vp9_set_speed_features(cpi);
+  set_speed_features(cpi);
 
   // Default rd threshold factors for mode selection
   for (i = 0; i < BLOCK_SIZES; ++i) {
@@ -2010,11 +1392,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   vp9_zero(cpi->mode_test_hits);
 #endif
 
-  return (VP9_PTR) cpi;
+  return cpi;
 }
 
-void vp9_remove_compressor(VP9_PTR *ptr) {
-  VP9_COMP *cpi = (VP9_COMP *)(*ptr);
+void vp9_remove_compressor(VP9_COMP *cpi) {
   int i;
 
   if (!cpi)
@@ -2121,7 +1502,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
 
   vp9_remove_common(&cpi->common);
   vpx_free(cpi);
-  *ptr = 0;
 
 #ifdef OUTPUT_YUV_SRC
   fclose(yuv_file);
@@ -2143,53 +1523,42 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
 
 #endif
 }
+static int64_t get_sse(const uint8_t *a, int a_stride,
+                       const uint8_t *b, int b_stride,
+                       int width, int height) {
+  const int dw = width % 16;
+  const int dh = height % 16;
+  int64_t total_sse = 0;
+  unsigned int sse = 0;
+  int sum = 0;
+  int x, y;
+
+  if (dw > 0) {
+    variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
+             dw, height, &sse, &sum);
+    total_sse += sse;
+  }
 
+  if (dh > 0) {
+    variance(&a[(height - dh) * a_stride], a_stride,
+             &b[(height - dh) * b_stride], b_stride,
+             width - dw, dh, &sse, &sum);
+    total_sse += sse;
+  }
 
-static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride,
-                                 const uint8_t *recon, int recon_stride,
-                                 unsigned int cols, unsigned int rows) {
-  unsigned int row, col;
-  uint64_t total_sse = 0;
-  int diff;
-
-  for (row = 0; row + 16 <= rows; row += 16) {
-    for (col = 0; col + 16 <= cols; col += 16) {
-      unsigned int sse;
-
-      vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse);
+  for (y = 0; y < height / 16; ++y) {
+    const uint8_t *pa = a;
+    const uint8_t *pb = b;
+    for (x = 0; x < width / 16; ++x) {
+      vp9_mse16x16(pa, a_stride, pb, b_stride, &sse);
       total_sse += sse;
-    }
-
-    /* Handle odd-sized width */
-    if (col < cols) {
-      unsigned int border_row, border_col;
-      const uint8_t *border_orig = orig;
-      const uint8_t *border_recon = recon;
-
-      for (border_row = 0; border_row < 16; border_row++) {
-        for (border_col = col; border_col < cols; border_col++) {
-          diff = border_orig[border_col] - border_recon[border_col];
-          total_sse += diff * diff;
-        }
-
-        border_orig += orig_stride;
-        border_recon += recon_stride;
-      }
-    }
-
-    orig += orig_stride * 16;
-    recon += recon_stride * 16;
-  }
 
-  /* Handle odd-sized height */
-  for (; row < rows; row++) {
-    for (col = 0; col < cols; col++) {
-      diff = orig[col] - recon[col];
-      total_sse += diff * diff;
+      pa += 16;
+      pb += 16;
     }
 
-    orig += orig_stride;
-    recon += recon_stride;
+    a += 16 * a_stride;
+    b += 16 * b_stride;
   }
 
   return total_sse;
@@ -2217,9 +1586,9 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
     const int w = widths[i];
     const int h = heights[i];
     const uint32_t samples = w * h;
-    const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i],
-                                          b_planes[i], b_strides[i],
-                                          w, h);
+    const uint64_t sse = get_sse(a_planes[i], a_strides[i],
+                                 b_planes[i], b_strides[i],
+                                 w, h);
     psnr->sse[1 + i] = sse;
     psnr->samples[1 + i] = samples;
     psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);
@@ -2248,9 +1617,7 @@ static void generate_psnr_packet(VP9_COMP *cpi) {
   vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
 }
 
-int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
-
+int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
   if (ref_frame_flags > 7)
     return -1;
 
@@ -2258,27 +1625,11 @@ int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) {
   return 0;
 }
 
-int vp9_update_reference(VP9_PTR ptr, int ref_frame_flags) {
-  VP9_COMP *cpi = (VP9_COMP *)(ptr);
-
-  if (ref_frame_flags > 7)
-    return -1;
-
-  cpi->ext_refresh_golden_frame = 0;
-  cpi->ext_refresh_alt_ref_frame = 0;
-  cpi->ext_refresh_last_frame   = 0;
-
-  if (ref_frame_flags & VP9_LAST_FLAG)
-    cpi->ext_refresh_last_frame = 1;
-
-  if (ref_frame_flags & VP9_GOLD_FLAG)
-    cpi->ext_refresh_golden_frame = 1;
-
-  if (ref_frame_flags & VP9_ALT_FLAG)
-    cpi->ext_refresh_alt_ref_frame = 1;
-
+void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
+  cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
+  cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
+  cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
   cpi->ext_refresh_frame_flags_pending = 1;
-  return 0;
 }
 
 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi,
@@ -2294,9 +1645,8 @@ static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(VP9_COMP *cpi,
   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
 }
 
-int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                            YV12_BUFFER_CONFIG *sd) {
-  VP9_COMP *const cpi = (VP9_COMP *)ptr;
   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
   if (cfg) {
     vp8_yv12_copy_frame(cfg, sd);
@@ -2306,8 +1656,7 @@ int vp9_copy_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
   }
 }
 
-int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
-  VP9_COMP *cpi = (VP9_COMP *)ptr;
+int vp9_get_reference_enc(VP9_COMP *cpi, int index, YV12_BUFFER_CONFIG **fb) {
   VP9_COMMON *cm = &cpi->common;
 
   if (index < 0 || index >= REF_FRAMES)
@@ -2317,9 +1666,8 @@ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) {
   return 0;
 }
 
-int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
+int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
                           YV12_BUFFER_CONFIG *sd) {
-  VP9_COMP *cpi = (VP9_COMP *)ptr;
   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
   if (cfg) {
     vp8_yv12_copy_frame(sd, cfg);
@@ -2329,9 +1677,9 @@ int vp9_set_reference_enc(VP9_PTR ptr, VP9_REFFRAME ref_frame_flag,
   }
 }
 
-int vp9_update_entropy(VP9_PTR comp, int update) {
-  ((VP9_COMP *)comp)->ext_refresh_frame_context = update;
-  ((VP9_COMP *)comp)->ext_refresh_frame_context_pending = 1;
+int vp9_update_entropy(VP9_COMP * cpi, int update) {
+  cpi->ext_refresh_frame_context = update;
+  cpi->ext_refresh_frame_context_pending = 1;
   return 0;
 }
 
@@ -2532,36 +1880,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
 }
 #endif
 
-static double compute_edge_pixel_proportion(YV12_BUFFER_CONFIG *frame) {
-#define EDGE_THRESH 128
-  int i, j;
-  int num_edge_pels = 0;
-  int num_pels = (frame->y_height - 2) * (frame->y_width - 2);
-  uint8_t *prev = frame->y_buffer + 1;
-  uint8_t *curr = frame->y_buffer + 1 + frame->y_stride;
-  uint8_t *next = frame->y_buffer + 1 + 2 * frame->y_stride;
-  for (i = 1; i < frame->y_height - 1; i++) {
-    for (j = 1; j < frame->y_width - 1; j++) {
-      /* Sobel hor and ver gradients */
-      int v = 2 * (curr[1] - curr[-1]) + (prev[1] - prev[-1]) +
-              (next[1] - next[-1]);
-      int h = 2 * (prev[0] - next[0]) + (prev[1] - next[1]) +
-              (prev[-1] - next[-1]);
-      h = (h < 0 ? -h : h);
-      v = (v < 0 ? -v : v);
-      if (h > EDGE_THRESH || v > EDGE_THRESH)
-        num_edge_pels++;
-      curr++;
-      prev++;
-      next++;
-    }
-    curr += frame->y_stride - frame->y_width + 2;
-    prev += frame->y_stride - frame->y_width + 2;
-    next += frame->y_stride - frame->y_width + 2;
-  }
-  return (double)num_edge_pels / num_pels;
-}
-
 // Function to test for conditions that indicate we should loop
 // back and recode a frame.
 static int recode_loop_test(const VP9_COMP *cpi,
@@ -2598,7 +1916,7 @@ static int recode_loop_test(const VP9_COMP *cpi,
   return force_recode;
 }
 
-static void update_reference_frames(VP9_COMP * const cpi) {
+void vp9_update_reference_frames(VP9_COMP *cpi) {
   VP9_COMMON * const cm = &cpi->common;
 
   // At this point the new frame has been encoded.
@@ -2669,21 +1987,20 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
 
     vpx_usec_timer_start(&timer);
 
-    vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick);
+    vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
 
     vpx_usec_timer_mark(&timer);
     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
   }
 
   if (lf->filter_level > 0) {
-    vp9_set_alt_lf_level(cpi, lf->filter_level);
     vp9_loop_filter_frame(cm, xd, lf->filter_level, 0, 0);
   }
 
   vp9_extend_frame_inner_borders(cm->frame_to_show);
 }
 
-static void scale_references(VP9_COMP *cpi) {
+void vp9_scale_references(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
   MV_REFERENCE_FRAME ref_frame;
 
@@ -2745,7 +2062,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 
   vp9_clear_system_state();
 
-  recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
+  recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
   if (cpi->twopass.total_left_stats.coded_error != 0.0)
     fprintf(f, "%10u %10d %10d %10d %10d %10d "
@@ -2801,7 +2118,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
                                        int q) {
   VP9_COMMON *const cm = &cpi->common;
   vp9_clear_system_state();
-  vp9_set_quantizer(cpi, q);
+  vp9_set_quantizer(cm, q);
 
   // Set up entropy context depending on frame type. The decoder mandates
   // the use of the default context, index 0, for keyframes and inter
@@ -2809,19 +2126,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
   // other inter-frames the encoder currently uses only two contexts;
   // context 1 for ALTREF frames and context 0 for the others.
   if (cm->frame_type == KEY_FRAME) {
-    vp9_setup_key_frame(cpi);
+    setup_key_frame(cpi);
   } else {
-    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
-      cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
-    }
-    vp9_setup_inter_frame(cpi);
+    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
+      cm->frame_context_idx = cpi->refresh_alt_ref_frame;
+
+    setup_inter_frame(cm);
   }
   // Variance adaptive and in frame q adjustment experiments are mutually
   // exclusive.
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     vp9_vaq_frame_setup(cpi);
   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
-    setup_in_frame_q_adj(cpi);
+    vp9_setup_in_frame_q_adj(cpi);
+  } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+    vp9_cyclic_refresh_setup(cpi);
   }
   // transform / motion compensation build reconstruction frame
   vp9_encode_frame(cpi);
@@ -2856,7 +2175,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
   do {
     vp9_clear_system_state();
 
-    vp9_set_quantizer(cpi, q);
+    vp9_set_quantizer(cm, q);
 
     if (loop_count == 0) {
       // Set up entropy context depending on frame type. The decoder mandates
@@ -2865,12 +2184,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
       // other inter-frames the encoder currently uses only two contexts;
       // context 1 for ALTREF frames and context 0 for the others.
       if (cm->frame_type == KEY_FRAME) {
-        vp9_setup_key_frame(cpi);
+        setup_key_frame(cpi);
       } else {
-        if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) {
+        if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
           cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
-        }
-        vp9_setup_inter_frame(cpi);
+
+        setup_inter_frame(cm);
       }
     }
 
@@ -2879,7 +2198,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
       vp9_vaq_frame_setup(cpi);
     } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
-      setup_in_frame_q_adj(cpi);
+      vp9_setup_in_frame_q_adj(cpi);
     }
 
     // transform / motion compensation build reconstruction frame
@@ -2895,13 +2214,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     // accurate estimate of output frame size to determine if we need
     // to recode.
     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
-      vp9_save_coding_context(cpi);
+      save_coding_context(cpi);
       cpi->dummy_packing = 1;
       if (!cpi->sf.use_nonrd_pick_mode)
         vp9_pack_bitstream(cpi, dest, size);
 
       rc->projected_frame_size = (int)(*size) << 3;
-      vp9_restore_coding_context(cpi);
+      restore_coding_context(cpi);
 
       if (frame_over_shoot_limit == 0)
         frame_over_shoot_limit = 1;
@@ -2914,7 +2233,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
            rc->this_key_frame_forced &&
            (rc->projected_frame_size < rc->max_frame_bandwidth)) {
         int last_q = q;
-        int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
+        int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
         int high_err_target = cpi->ambient_err;
         int low_err_target = cpi->ambient_err >> 1;
@@ -3118,7 +2437,20 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   } else {
     cpi->Source = cpi->un_scaled_source;
   }
-  scale_references(cpi);
+
+  // Scale the last source buffer, if required.
+  if (cpi->unscaled_last_source != NULL) {
+    if (cm->mi_cols * MI_SIZE != cpi->unscaled_last_source->y_width ||
+        cm->mi_rows * MI_SIZE != cpi->unscaled_last_source->y_height) {
+      scale_and_extend_frame_nonnormative(cpi->unscaled_last_source,
+                                          &cpi->scaled_last_source);
+      cpi->Last_Source = &cpi->scaled_last_source;
+    } else {
+      cpi->Last_Source = cpi->unscaled_last_source;
+    }
+  }
+
+  vp9_scale_references(cpi);
 
   vp9_clear_system_state();
 
@@ -3155,7 +2487,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
-    vp9_setup_key_frame(cpi);
+    setup_key_frame(cpi);
     // Reset the loop filter deltas and segmentation map.
     vp9_reset_segment_features(&cm->seg);
 
@@ -3237,6 +2569,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   vp9_write_yuv_frame(cpi->Source);
 #endif
 
+  set_speed_features(cpi);
+
   // Decide q and q bounds.
   q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
 
@@ -3256,7 +2590,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // fixed interval. Note the reconstruction error if it is the frame before
   // the force key frame
   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
-    cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
+    cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
   }
 
   // If the encoder forced a KEY_FRAME decision
@@ -3294,7 +2628,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     update_reference_segmentation_map(cpi);
 
   release_scaled_references(cpi);
-  update_reference_frames(cpi);
+  vp9_update_reference_frames(cpi);
 
   for (t = TX_4X4; t <= TX_32X32; t++)
     full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
@@ -3371,29 +2705,14 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     cm->last_show_frame = cm->show_frame;
 
   if (cm->show_frame) {
-    // current mip will be the prev_mip for the next frame
-    MODE_INFO *temp = cm->prev_mip;
-    MODE_INFO **temp2 = cm->prev_mi_grid_base;
-    cm->prev_mip = cm->mip;
-    cm->mip = temp;
-    cm->prev_mi_grid_base = cm->mi_grid_base;
-    cm->mi_grid_base = temp2;
-
-    // update the upper left visible macroblock ptrs
-    cm->mi = cm->mip + cm->mode_info_stride + 1;
-    cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1;
-
-    cpi->mb.e_mbd.mi_8x8 = cm->mi_grid_visible;
-    cpi->mb.e_mbd.mi_8x8[0] = cm->mi;
+    vp9_swap_mi_and_prev_mi(cm);
 
     // Don't increment frame counters if this was an altref buffer
     // update not a real frame
     ++cm->current_video_frame;
+    if (cpi->use_svc)
+      vp9_inc_frame_in_layer(&cpi->svc);
   }
-
-  // restore prev_mi
-  cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
-  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
 }
 
 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -3419,7 +2738,7 @@ static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
   (void) frame_flags;
 
   vp9_rc_get_first_pass_params(cpi);
-  vp9_set_quantizer(cpi, find_fp_qindex());
+  vp9_set_quantizer(&cpi->common, find_fp_qindex());
   vp9_first_pass(cpi);
 }
 
@@ -3430,7 +2749,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
   vp9_rc_get_second_pass_params(cpi);
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
 
-  vp9_twopass_postencode_update(cpi, *size);
+  vp9_twopass_postencode_update(cpi);
 }
 
 static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
@@ -3447,10 +2766,9 @@ static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
 }
 
 
-int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
+int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                           int64_t end_time) {
-  VP9_COMP *cpi = (VP9_COMP *)ptr;
   VP9_COMMON *cm = &cpi->common;
   struct vpx_usec_timer timer;
   int res = 0;
@@ -3465,7 +2783,7 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags,
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
-  if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) {
+  if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) {
     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
                        "Non-4:2:0 color space requires profile >= 1");
     res = -1;
@@ -3533,12 +2851,12 @@ void adjust_frame_rate(VP9_COMP *cpi) {
   cpi->last_end_time_stamp_seen = cpi->source->ts_end;
 }
 
-int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
+int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest,
                             int64_t *time_stamp, int64_t *time_end, int flush) {
-  VP9_COMP *cpi = (VP9_COMP *) ptr;
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &cpi->mb.e_mbd;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  RATE_CONTROL *const rc = &cpi->rc;
   struct vpx_usec_timer  cmptimer;
   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
   MV_REFERENCE_FRAME ref_frame;
@@ -3546,9 +2864,14 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   if (!cpi)
     return -1;
 
+  if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) {
+    vp9_restore_layer_context(cpi);
+  }
+
   vpx_usec_timer_start(&cmptimer);
 
   cpi->source = NULL;
+  cpi->last_source = NULL;
 
   set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
 
@@ -3560,7 +2883,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   cpi->refresh_alt_ref_frame = 0;
 
   // Should we code an alternate reference frame.
-  if (cpi->oxcf.play_alternate && cpi->rc.source_alt_ref_pending) {
+  if (cpi->oxcf.play_alternate && rc->source_alt_ref_pending) {
     int frames_to_arf;
 
 #if CONFIG_MULTIPLE_ARF
@@ -3572,9 +2895,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
           - cpi->next_frame_in_order;
     else
 #endif
-      frames_to_arf = cpi->rc.frames_till_gf_update_due;
+      frames_to_arf = rc->frames_till_gf_update_due;
 
-    assert(frames_to_arf <= cpi->rc.frames_to_key);
+    assert(frames_to_arf <= rc->frames_to_key);
 
     if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) {
 #if CONFIG_MULTIPLE_ARF
@@ -3586,7 +2909,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
       if (cpi->oxcf.arnr_max_frames > 0) {
         // Produce the filtered ARF frame.
         // TODO(agrange) merge these two functions.
-        vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost);
+        vp9_configure_arnr_filter(cpi, frames_to_arf, rc->gfu_boost);
         vp9_temporal_filter_prepare(cpi, frames_to_arf);
         vp9_extend_frame_borders(&cpi->alt_ref_buffer);
         force_src_buffer = &cpi->alt_ref_buffer;
@@ -3596,14 +2919,14 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
       cpi->refresh_alt_ref_frame = 1;
       cpi->refresh_golden_frame = 0;
       cpi->refresh_last_frame = 0;
-      cpi->rc.is_src_frame_alt_ref = 0;
+      rc->is_src_frame_alt_ref = 0;
 
 #if CONFIG_MULTIPLE_ARF
       if (!cpi->multi_arf_enabled)
 #endif
-        cpi->rc.source_alt_ref_pending = 0;
+        rc->source_alt_ref_pending = 0;
     } else {
-      cpi->rc.source_alt_ref_pending = 0;
+      rc->source_alt_ref_pending = 0;
     }
   }
 
@@ -3611,25 +2934,32 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
 #if CONFIG_MULTIPLE_ARF
     int i;
 #endif
+
+    // Get last frame source.
+    if (cm->current_video_frame > 0) {
+      if ((cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
+        return -1;
+    }
+
     if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) {
       cm->show_frame = 1;
       cm->intra_only = 0;
 
 #if CONFIG_MULTIPLE_ARF
       // Is this frame the ARF overlay.
-      cpi->rc.is_src_frame_alt_ref = 0;
+      rc->is_src_frame_alt_ref = 0;
       for (i = 0; i < cpi->arf_buffered; ++i) {
         if (cpi->source == cpi->alt_ref_source[i]) {
-          cpi->rc.is_src_frame_alt_ref = 1;
+          rc->is_src_frame_alt_ref = 1;
           cpi->refresh_golden_frame = 1;
           break;
         }
       }
 #else
-      cpi->rc.is_src_frame_alt_ref = cpi->alt_ref_source
-          && (cpi->source == cpi->alt_ref_source);
+      rc->is_src_frame_alt_ref = cpi->alt_ref_source &&
+                                 (cpi->source == cpi->alt_ref_source);
 #endif
-      if (cpi->rc.is_src_frame_alt_ref) {
+      if (rc->is_src_frame_alt_ref) {
         // Current frame is an ARF overlay frame.
 #if CONFIG_MULTIPLE_ARF
         cpi->alt_ref_source[i] = NULL;
@@ -3649,13 +2979,20 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   if (cpi->source) {
     cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer
                                                            : &cpi->source->img;
+
+  if (cpi->last_source != NULL) {
+    cpi->unscaled_last_source = &cpi->last_source->img;
+  } else {
+    cpi->unscaled_last_source = NULL;
+  }
+
     *time_stamp = cpi->source->ts_start;
     *time_end = cpi->source->ts_end;
     *frame_flags = cpi->source->flags;
 
 #if CONFIG_MULTIPLE_ARF
-    if ((cm->frame_type != KEY_FRAME) && (cpi->pass == 2))
-      cpi->rc.source_alt_ref_pending = is_next_frame_arf(cpi);
+    if (cm->frame_type != KEY_FRAME && cpi->pass == 2)
+      rc->source_alt_ref_pending = is_next_frame_arf(cpi);
 #endif
   } else {
     *size = 0;
@@ -3678,8 +3015,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
 
   if (cpi->svc.number_temporal_layers > 1 &&
       cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_framerate(cpi);
-    restore_layer_context(cpi);
+    vp9_update_temporal_layer_framerate(cpi);
+    vp9_restore_layer_context(cpi);
   }
 
   // start with a 0 size frame
@@ -3728,19 +3065,19 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   }
 
   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
-  xd->interp_kernel = vp9_get_interp_kernel(
-      DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER);
 
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     vp9_vaq_init();
   }
 
-  if (cpi->use_svc) {
-    SvcEncode(cpi, size, dest, frame_flags);
-  } else if (cpi->pass == 1) {
+  if (cpi->pass == 1 &&
+      (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
     Pass1Encode(cpi, size, dest, frame_flags);
-  } else if (cpi->pass == 2) {
+  } else if (cpi->pass == 2 &&
+      (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
     Pass2Encode(cpi, size, dest, frame_flags);
+  } else if (cpi->use_svc) {
+    SvcEncode(cpi, size, dest, frame_flags);
   } else {
     // One pass encode
     Pass0Encode(cpi, size, dest, frame_flags);
@@ -3759,9 +3096,10 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   }
 
   // Save layer specific state.
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    save_layer_context(cpi);
+  if ((cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+      (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
+    vp9_save_layer_context(cpi);
   }
 
   vpx_usec_timer_mark(&cmptimer);
@@ -3846,9 +3184,8 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   return 0;
 }
 
-int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
+int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
                               vp9_ppflags_t *flags) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
   VP9_COMMON *cm = &cpi->common;
 
   if (!cm->show_frame) {
@@ -3876,11 +3213,10 @@ int vp9_get_preview_raw_frame(VP9_PTR comp, YV12_BUFFER_CONFIG *dest,
   }
 }
 
-int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
+int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
                    unsigned int cols, int delta_q[MAX_SEGMENTS],
                    int delta_lf[MAX_SEGMENTS],
                    unsigned int threshold[MAX_SEGMENTS]) {
-  VP9_COMP *cpi = (VP9_COMP *) comp;
   signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS];
   struct segmentation *seg = &cpi->common.seg;
   int i;
@@ -3926,10 +3262,8 @@ int vp9_set_roimap(VP9_PTR comp, unsigned char *map, unsigned int rows,
   return 0;
 }
 
-int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map,
                        unsigned int rows, unsigned int cols) {
-  VP9_COMP *cpi = (VP9_COMP *) comp;
-
   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
     if (map) {
       vpx_memcpy(cpi->active_map, map, rows * cols);
@@ -3945,9 +3279,8 @@ int vp9_set_active_map(VP9_PTR comp, unsigned char *map,
   }
 }
 
-int vp9_set_internal_size(VP9_PTR comp,
+int vp9_set_internal_size(VP9_COMP *cpi,
                           VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
-  VP9_COMP *cpi = (VP9_COMP *) comp;
   VP9_COMMON *cm = &cpi->common;
   int hr = 0, hs = 0, vr = 0, vs = 0;
 
@@ -3967,9 +3300,8 @@ int vp9_set_internal_size(VP9_PTR comp,
   return 0;
 }
 
-int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
+int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
                          unsigned int height) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
   VP9_COMMON *cm = &cpi->common;
 
   check_initial_width(cpi, 1, 1);
@@ -4004,37 +3336,20 @@ int vp9_set_size_literal(VP9_PTR comp, unsigned int width,
   return 0;
 }
 
-void vp9_set_svc(VP9_PTR comp, int use_svc) {
-  VP9_COMP *cpi = (VP9_COMP *)comp;
+void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
   cpi->use_svc = use_svc;
   return;
 }
 
-int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
-                    const YV12_BUFFER_CONFIG *reference) {
-  int i, j;
-  int total = 0;
-
-  const uint8_t *src = source->y_buffer;
-  const uint8_t *ref = reference->y_buffer;
-
-  // Loop through the Y plane raw and reconstruction data summing
-  // (square differences)
-  for (i = 0; i < source->y_height; i += 16) {
-    for (j = 0; j < source->y_width; j += 16) {
-      unsigned int sse;
-      total += vp9_mse16x16(src + j, source->y_stride,
-                            ref + j, reference->y_stride, &sse);
-    }
-
-    src += 16 * source->y_stride;
-    ref += 16 * reference->y_stride;
-  }
+int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) {
+  assert(a->y_crop_width == b->y_crop_width);
+  assert(a->y_crop_height == b->y_crop_height);
 
-  return total;
+  return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
+                      a->y_crop_width, a->y_crop_height);
 }
 
 
-int vp9_get_quantizer(VP9_PTR c) {
-  return ((VP9_COMP *)c)->common.base_qindex;
+int vp9_get_quantizer(VP9_COMP *cpi) {
+  return cpi->common.base_qindex;
 }
diff --git a/source/libvpx/vp9/encoder/vp9_onyx_int.h b/source/libvpx/vp9/encoder/vp9_onyx_int.h
index 019cb13..18203f9 100644
--- a/source/libvpx/vp9/encoder/vp9_onyx_int.h
+++ b/source/libvpx/vp9/encoder/vp9_onyx_int.h
@@ -16,12 +16,14 @@
 #include "./vpx_config.h"
 #include "vpx_ports/mem.h"
 #include "vpx/internal/vpx_codec_internal.h"
+#include "vpx/vp8cx.h"
 
+#include "vp9/common/vp9_ppflags.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_entropymode.h"
-#include "vp9/common/vp9_onyx.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_lookahead.h"
@@ -29,8 +31,9 @@
 #include "vp9/encoder/vp9_mcomp.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_speed_features.h"
+#include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_treewriter.h"
 #include "vp9/encoder/vp9_variance.h"
 
 #ifdef __cplusplus
@@ -39,30 +42,11 @@ extern "C" {
 
 // #define MODE_TEST_HIT_STATS
 
-#if CONFIG_MULTIPLE_ARF
-// Set MIN_GF_INTERVAL to 1 for the full decomposition.
-#define MIN_GF_INTERVAL             2
-#else
-#define MIN_GF_INTERVAL             4
-#endif
 #define DEFAULT_GF_INTERVAL         10
-#define DEFAULT_KF_BOOST            2000
-#define DEFAULT_GF_BOOST            2000
-
-#define KEY_FRAME_CONTEXT 5
 
 #define MAX_MODES 30
 #define MAX_REFS  6
 
-#define MIN_THRESHMULT  32
-#define MAX_THRESHMULT  512
-
-#define GF_ZEROMV_ZBIN_BOOST 0
-#define LF_ZEROMV_ZBIN_BOOST 0
-#define MV_ZBIN_BOOST        0
-#define SPLIT_MV_ZBIN_BOOST  0
-#define INTRA_ZBIN_BOOST     0
-
 typedef struct {
   int nmvjointcost[MV_JOINTS];
   int nmvcosts[2][MV_VALS];
@@ -132,84 +116,6 @@ typedef enum {
 } THR_MODES_SUB8X8;
 
 typedef enum {
-  DIAMOND = 0,
-  NSTEP = 1,
-  HEX = 2,
-  BIGDIA = 3,
-  SQUARE = 4,
-  FAST_HEX = 5
-} SEARCH_METHODS;
-
-typedef enum {
-  USE_FULL_RD = 0,
-  USE_LARGESTINTRA,
-  USE_LARGESTINTRA_MODELINTER,
-  USE_LARGESTALL
-} TX_SIZE_SEARCH_METHOD;
-
-typedef enum {
-  NOT_IN_USE = 0,
-  RELAXED_NEIGHBORING_MIN_MAX = 1,
-  STRICT_NEIGHBORING_MIN_MAX = 2
-} AUTO_MIN_MAX_MODE;
-
-typedef enum {
-  // Values should be powers of 2 so that they can be selected as bits of
-  // an integer flags field
-
-  // terminate search early based on distortion so far compared to
-  // qp step, distortion in the neighborhood of the frame, etc.
-  FLAG_EARLY_TERMINATE = 1,
-
-  // skips comp inter modes if the best so far is an intra mode
-  FLAG_SKIP_COMP_BESTINTRA = 2,
-
-  // skips comp inter modes if the best single intermode so far does
-  // not have the same reference as one of the two references being
-  // tested
-  FLAG_SKIP_COMP_REFMISMATCH = 4,
-
-  // skips oblique intra modes if the best so far is an inter mode
-  FLAG_SKIP_INTRA_BESTINTER = 8,
-
-  // skips oblique intra modes  at angles 27, 63, 117, 153 if the best
-  // intra so far is not one of the neighboring directions
-  FLAG_SKIP_INTRA_DIRMISMATCH = 16,
-
-  // skips intra modes other than DC_PRED if the source variance
-  // is small
-  FLAG_SKIP_INTRA_LOWVAR = 32,
-} MODE_SEARCH_SKIP_LOGIC;
-
-typedef enum {
-  SUBPEL_TREE = 0,
-  // Other methods to come
-} SUBPEL_SEARCH_METHODS;
-
-#define ALL_INTRA_MODES 0x3FF
-#define INTRA_DC_ONLY 0x01
-#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
-#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
-#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
-
-typedef enum {
-  LAST_FRAME_PARTITION_OFF = 0,
-  LAST_FRAME_PARTITION_LOW_MOTION = 1,
-  LAST_FRAME_PARTITION_ALL = 2
-} LAST_FRAME_PARTITION_METHOD;
-
-typedef enum {
-  // No recode.
-  DISALLOW_RECODE = 0,
-  // Allow recode for KF and exceeding maximum frame bandwidth.
-  ALLOW_RECODE_KFMAXBW = 1,
-  // Allow recode only for KF/ARF/GF frames.
-  ALLOW_RECODE_KFARFGF = 2,
-  // Allow recode for all frames based on bitrate constraints.
-  ALLOW_RECODE = 3,
-} RECODE_LOOP_TYPE;
-
-typedef enum {
   // encode_breakout is disabled.
   ENCODE_BREAKOUT_DISABLED = 0,
   // encode_breakout is enabled.
@@ -219,237 +125,164 @@ typedef enum {
 } ENCODE_BREAKOUT_TYPE;
 
 typedef enum {
-  // Search partitions using RD/NONRD criterion
-  SEARCH_PARTITION = 0,
-
-  // Always use a fixed size partition
-  FIXED_PARTITION = 1,
+  NORMAL      = 0,
+  FOURFIVE    = 1,
+  THREEFIVE   = 2,
+  ONETWO      = 3
+} VPX_SCALING;
 
-  // Use a fixed size partition in every 64X64 SB, where the size is
-  // determined based on source variance
-  VAR_BASED_FIXED_PARTITION = 2,
-
-  // Use an arbitrary partitioning scheme based on source variance within
-  // a 64X64 SB
-  VAR_BASED_PARTITION
-} PARTITION_SEARCH_TYPE;
+typedef enum {
+  USAGE_LOCAL_FILE_PLAYBACK = 0,
+  USAGE_STREAM_FROM_SERVER  = 1,
+  USAGE_CONSTRAINED_QUALITY = 2,
+  USAGE_CONSTANT_QUALITY    = 3,
+} END_USAGE;
 
-typedef struct {
-  // Frame level coding parameter update
-  int frame_parameter_update;
+typedef enum {
+  // Good Quality Fast Encoding. The encoder balances quality with the
+  // amount of time it takes to encode the output. (speed setting
+  // controls how fast)
+  MODE_GOODQUALITY = 1,
+
+  // One Pass - Best Quality. The encoder places priority on the
+  // quality of the output over encoding speed. The output is compressed
+  // at the highest possible quality. This option takes the longest
+  // amount of time to encode. (speed setting ignored)
+  MODE_BESTQUALITY = 2,
+
+  // Two Pass - First Pass. The encoder generates a file of statistics
+  // for use in the second encoding pass. (speed setting controls how fast)
+  MODE_FIRSTPASS = 3,
+
+  // Two Pass - Second Pass. The encoder uses the statistics that were
+  // generated in the first encoding pass to create the compressed
+  // output. (speed setting controls how fast)
+  MODE_SECONDPASS = 4,
+
+  // Two Pass - Second Pass Best.  The encoder uses the statistics that
+  // were generated in the first encoding pass to create the compressed
+  // output using the highest possible quality, and taking a
+  // longer amount of time to encode. (speed setting ignored)
+  MODE_SECONDPASS_BEST = 5,
+
+  // Realtime/Live Encoding. This mode is optimized for realtime
+  // encoding (for example, capturing a television signal or feed from
+  // a live camera). (speed setting controls how fast)
+  MODE_REALTIME = 6,
+} MODE;
 
-  // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
-  SEARCH_METHODS search_method;
+typedef enum {
+  FRAMEFLAGS_KEY    = 1 << 0,
+  FRAMEFLAGS_GOLDEN = 1 << 1,
+  FRAMEFLAGS_ALTREF = 1 << 2,
+} FRAMETYPE_FLAGS;
 
-  RECODE_LOOP_TYPE recode_loop;
+typedef enum {
+  NO_AQ = 0,
+  VARIANCE_AQ = 1,
+  COMPLEXITY_AQ = 2,
+  CYCLIC_REFRESH_AQ = 3,
+  AQ_MODE_COUNT  // This should always be the last member of the enum
+} AQ_MODE;
+
+typedef struct VP9_CONFIG {
+  BITSTREAM_PROFILE profile;
+  BIT_DEPTH bit_depth;
+  int width;  // width of data passed to the compressor
+  int height;  // height of data passed to the compressor
+  double framerate;  // set to passed in framerate
+  int64_t target_bandwidth;  // bandwidth to be used in kilobits per second
+
+  int noise_sensitivity;  // pre processing blur: recommendation 0
+  int sharpness;  // sharpening output: recommendation 0:
+  int cpu_used;
+  unsigned int rc_max_intra_bitrate_pct;
 
-  // Subpel_search_method can only be subpel_tree which does a subpixel
-  // logarithmic search that keeps stepping at 1/2 pixel units until
-  // you stop getting a gain, and then goes on to 1/4 and repeats
-  // the same process. Along the way it skips many diagonals.
-  SUBPEL_SEARCH_METHODS subpel_search_method;
+  MODE mode;
 
-  // Maximum number of steps in logarithmic subpel search before giving up.
-  int subpel_iters_per_step;
+  // Key Framing Operations
+  int auto_key;  // autodetect cut scenes and set the keyframes
+  int key_freq;  // maximum distance to key frame.
 
-  // Control when to stop subpel search
-  int subpel_force_stop;
+  int lag_in_frames;  // how many frames lag before we start encoding
 
-  // Thresh_mult is used to set a threshold for the rd score. A higher value
-  // means that we will accept the best mode so far more often. This number
-  // is used in combination with the current block size, and thresh_freq_fact
-  // to pick a threshold.
-  int thresh_mult[MAX_MODES];
-  int thresh_mult_sub8x8[MAX_REFS];
-
-  // This parameter controls the number of steps we'll do in a diamond
-  // search.
-  int max_step_search_steps;
-
-  // This parameter controls which step in the n-step process we start at.
-  // It's changed adaptively based on circumstances.
-  int reduce_first_step_size;
-
-  // If this is set to 1, we limit the motion search range to 2 times the
-  // largest motion vector found in the last frame.
-  int auto_mv_step_size;
-
-  // Trellis (dynamic programming) optimization of quantized values (+1, 0).
-  int optimize_coefficients;
-
-  // Always set to 0. If on it enables 0 cost background transmission
-  // (except for the initial transmission of the segmentation). The feature is
-  // disabled because the addition of very large block sizes make the
-  // backgrounds very to cheap to encode, and the segmentation we have
-  // adds overhead.
-  int static_segmentation;
-
-  // If 1 we iterate finding a best reference for 2 ref frames together - via
-  // a log search that iterates 4 times (check around mv for last for best
-  // error of combined predictor then check around mv for alt). If 0 we
-  // we just use the best motion vector found for each frame by itself.
-  int comp_inter_joint_search_thresh;
-
-  // This variable is used to cap the maximum number of times we skip testing a
-  // mode to be evaluated. A high value means we will be faster.
-  int adaptive_rd_thresh;
-
-  // Enables skipping the reconstruction step (idct, recon) in the
-  // intermediate steps assuming the last frame didn't have too many intra
-  // blocks and the q is less than a threshold.
-  int skip_encode_sb;
-  int skip_encode_frame;
-
-  // This variable allows us to reuse the last frames partition choices
-  // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
-  // frame as a starting point in low motion scenes or always use it. If set
-  // we use last partitioning_redo frequency to determine how often to redo
-  // the partitioning from scratch. Adjust_partitioning_from_last_frame
-  // enables us to adjust up or down one partitioning from the last frames
-  // partitioning.
-  LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
-
-  // Determine which method we use to determine transform size. We can choose
-  // between options like full rd, largest for prediction size, largest
-  // for intra and model coefs for the rest.
-  TX_SIZE_SEARCH_METHOD tx_size_search_method;
-
-  // Low precision 32x32 fdct keeps everything in 16 bits and thus is less
-  // precise but significantly faster than the non lp version.
-  int use_lp32x32fdct;
-
-  // TODO(JBB): remove this as its no longer used.
-
-  // After looking at the first set of modes (set by index here), skip
-  // checking modes for reference frames that don't match the reference frame
-  // of the best so far.
-  int mode_skip_start;
-
-  // TODO(JBB): Remove this.
-  int reference_masking;
-
-  PARTITION_SEARCH_TYPE partition_search_type;
-
-  // Used if partition_search_type = FIXED_SIZE_PARTITION
-  BLOCK_SIZE always_this_block_size;
-
-  // Skip rectangular partition test when partition type none gives better
-  // rd than partition type split.
-  int less_rectangular_check;
-
-  // Disable testing non square partitions. (eg 16x32)
-  int use_square_partition_only;
-
-  // Sets min and max partition sizes for this 64x64 region based on the
-  // same 64x64 in last encoded frame, and the left and above neighbor.
-  AUTO_MIN_MAX_MODE auto_min_max_partition_size;
-
-  // Min and max partition size we enable (block_size) as per auto
-  // min max, but also used by adjust partitioning, and pick_partitioning.
-  BLOCK_SIZE min_partition_size;
-  BLOCK_SIZE max_partition_size;
-
-  // Whether or not we allow partitions one smaller or one greater than the last
-  // frame's partitioning. Only used if use_lastframe_partitioning is set.
-  int adjust_partitioning_from_last_frame;
-
-  // How frequently we re do the partitioning from scratch. Only used if
-  // use_lastframe_partitioning is set.
-  int last_partitioning_redo_frequency;
-
-  // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
-  // it always, to allow it for only Last frame and Intra, disable it for all
-  // inter modes or to enable it always.
-  int disable_split_mask;
-
-  // TODO(jingning): combine the related motion search speed features
-  // This allows us to use motion search at other sizes as a starting
-  // point for this motion search and limits the search range around it.
-  int adaptive_motion_search;
-
-  // Allows sub 8x8 modes to use the prediction filter that was determined
-  // best for 8x8 mode. If set to 0 we always re check all the filters for
-  // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
-  // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
-  int adaptive_pred_interp_filter;
-
-  // Implements various heuristics to skip searching modes
-  // The heuristics selected are based on  flags
-  // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
-  unsigned int mode_search_skip_flags;
-
-  // A source variance threshold below which the split mode is disabled
-  unsigned int disable_split_var_thresh;
-
-  // A source variance threshold below which filter search is disabled
-  // Choose a very large value (UINT_MAX) to use 8-tap always
-  unsigned int disable_filter_search_var_thresh;
-
-  // These bit masks allow you to enable or disable intra modes for each
-  // transform size separately.
-  int intra_y_mode_mask[TX_SIZES];
-  int intra_uv_mode_mask[TX_SIZES];
-
-  // This variable enables an early break out of mode testing if the model for
-  // rd built from the prediction signal indicates a value that's much
-  // higher than the best rd we've seen so far.
-  int use_rd_breakout;
-
-  // This enables us to use an estimate for intra rd based on dc mode rather
-  // than choosing an actual uv mode in the stage of encoding before the actual
-  // final encode.
-  int use_uv_intra_rd_estimate;
-
-  // This feature controls how the loop filter level is determined:
-  // 0: Try the full image with different values.
-  // 1: Try a small portion of the image with different values.
-  // 2: Estimate the level based on quantizer and frame type
-  int use_fast_lpf_pick;
-
-  // This feature limits the number of coefficients updates we actually do
-  // by only looking at counts from 1/2 the bands.
-  int use_fast_coef_updates;  // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
-
-  // This flag controls the use of non-RD mode decision.
-  int use_nonrd_pick_mode;
+  // ----------------------------------------------------------------
+  // DATARATE CONTROL OPTIONS
 
-  // This variable sets the encode_breakout threshold. Currently, it is only
-  // enabled in real time mode.
-  int encode_breakout_thresh;
+  END_USAGE end_usage;  // vbr or cbr
 
-  // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
-  // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
-  int disable_inter_mode_mask[BLOCK_SIZES];
-} SPEED_FEATURES;
+  // buffer targeting aggressiveness
+  int under_shoot_pct;
+  int over_shoot_pct;
 
-typedef struct {
-  RATE_CONTROL rc;
-  int target_bandwidth;
-  int64_t starting_buffer_level;
+  // buffering parameters
+  int64_t starting_buffer_level;  // in seconds
   int64_t optimal_buffer_level;
   int64_t maximum_buffer_size;
-  double framerate;
-  int avg_frame_size;
-} LAYER_CONTEXT;
 
-typedef struct VP9_COMP {
-  DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]);
-
-  DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
-
-#if CONFIG_ALPHA
-  DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]);
-  DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]);
-#endif
+  // Frame drop threshold.
+  int drop_frames_water_mark;
+
+  // controlling quality
+  int fixed_q;
+  int worst_allowed_q;
+  int best_allowed_q;
+  int cq_level;
+  int lossless;
+  AQ_MODE aq_mode;  // Adaptive Quantization mode
+
+  // Enable feature to reduce the frame quantization every x frames.
+  int frame_periodic_boost;
+
+  // two pass datarate control
+  int two_pass_vbrbias;        // two pass datarate control tweaks
+  int two_pass_vbrmin_section;
+  int two_pass_vbrmax_section;
+  // END DATARATE CONTROL OPTIONS
+  // ----------------------------------------------------------------
+
+  // Spatial and temporal scalability.
+  int ss_number_layers;  // Number of spatial layers.
+  int ts_number_layers;  // Number of temporal layers.
+  // Bitrate allocation for spatial layers.
+  int ss_target_bitrate[VPX_SS_MAX_LAYERS];
+  // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
+  int ts_target_bitrate[VPX_TS_MAX_LAYERS];
+  int ts_rate_decimator[VPX_TS_MAX_LAYERS];
+
+  // these parameters aren't to be used in final build don't use!!!
+  int play_alternate;
+  int alt_freq;
+
+  int encode_breakout;  // early breakout : for video conf recommend 800
+
+  /* Bitfield defining the error resiliency features to enable.
+   * Can provide decodable frames after losses in previous
+   * frames and decodable partitions after losses in the same frame.
+   */
+  unsigned int error_resilient_mode;
+
+  /* Bitfield defining the parallel decoding mode where the
+   * decoding in successive frames may be conducted in parallel
+   * just by decoding the frame headers.
+   */
+  unsigned int frame_parallel_decoding_mode;
+
+  int arnr_max_frames;
+  int arnr_strength;
+  int arnr_type;
+
+  int tile_columns;
+  int tile_rows;
+
+  struct vpx_fixed_buf         two_pass_stats_in;
+  struct vpx_codec_pkt_list  *output_pkt_list;
+
+  vp8e_tuning tuning;
+} VP9_CONFIG;
 
+typedef struct VP9_COMP {
+  QUANTS quants;
   MACROBLOCK mb;
   VP9_COMMON common;
   VP9_CONFIG oxcf;
@@ -460,10 +293,14 @@ typedef struct VP9_COMP {
 #else
   struct lookahead_entry  *alt_ref_source;
 #endif
+  struct lookahead_entry  *last_source;
 
   YV12_BUFFER_CONFIG *Source;
+  YV12_BUFFER_CONFIG *Last_Source;  // NULL for first frame and alt_ref frames
   YV12_BUFFER_CONFIG *un_scaled_source;
   YV12_BUFFER_CONFIG scaled_source;
+  YV12_BUFFER_CONFIG *unscaled_last_source;
+  YV12_BUFFER_CONFIG scaled_last_source;
 
   int key_frame_frequency;
 
@@ -506,19 +343,26 @@ typedef struct VP9_COMP {
   // Ambient reconstruction err target for force key frames
   int ambient_err;
 
+  // Thresh_mult is used to set a threshold for the rd score. A higher value
+  // means that we will accept the best mode so far more often. This number
+  // is used in combination with the current block size, and thresh_freq_fact
+  // to pick a threshold.
+  int rd_thresh_mult[MAX_MODES];
+  int rd_thresh_mult_sub8x8[MAX_REFS];
+
   int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
   int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
   int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
   int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
 
   int64_t rd_comp_pred_diff[REFERENCE_MODES];
-  int64_t rd_prediction_type_threshes[4][REFERENCE_MODES];
+  int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
   int64_t rd_tx_select_diff[TX_MODES];
   // FIXME(rbultje) can this overflow?
-  int rd_tx_select_threshes[4][TX_MODES];
+  int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
 
   int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  int64_t rd_filter_threshes[4][SWITCHABLE_FILTER_CONTEXTS];
+  int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
   int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
   int64_t mask_filter_rd;
 
@@ -543,14 +387,12 @@ typedef struct VP9_COMP {
 
   vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
   vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
-  vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
 
   struct vpx_codec_pkt_list  *output_pkt_list;
 
   MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
   int mbgraph_n_frames;             // number of frames filled in the above
   int static_mb_pct;                // % forced skip mbs by segmentation
-  int seg0_progress, seg0_idx, seg0_cnt;
 
   // for real time encoding
   int speed;
@@ -558,9 +400,6 @@ typedef struct VP9_COMP {
   int cpu_used;
   int pass;
 
-  vp9_prob last_skip_false_probs[3][SKIP_CONTEXTS];
-  int last_skip_probs_q[3];
-
   int ref_frame_flags;
 
   SPEED_FEATURES sf;
@@ -585,6 +424,8 @@ typedef struct VP9_COMP {
   unsigned char *active_map;
   unsigned int active_map_enabled;
 
+  CYCLIC_REFRESH *cyclic_refresh;
+
   fractional_mv_step_fp *find_fractional_mv_step;
   fractional_mv_step_comp_fp *find_fractional_mv_step_comp;
   vp9_full_search_fn_t full_search_sad;
@@ -641,10 +482,6 @@ typedef struct VP9_COMP {
   unsigned int activity_avg;
   unsigned int *mb_activity_map;
   int *mb_norm_activity_map;
-  int output_partition;
-
-  // Force next frame to intra when kf_auto says so.
-  int force_next_frame_intra;
 
   int droppable;
 
@@ -657,15 +494,9 @@ typedef struct VP9_COMP {
 
   int use_svc;
 
-  struct svc {
-    int spatial_layer_id;
-    int temporal_layer_id;
-    int number_spatial_layers;
-    int number_temporal_layers;
-    // Layer context used for rate control in CBR mode, only defined for
-    // temporal layers for now.
-    LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
-  } svc;
+  SVC svc;
+
+  int use_large_partition_rate;
 
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
@@ -680,26 +511,68 @@ typedef struct VP9_COMP {
   int max_arf_level;
 #endif
 
-#ifdef ENTROPY_STATS
-  int64_t mv_ref_stats[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
-#endif
-
-
 #ifdef MODE_TEST_HIT_STATS
   // Debug / test stats
   int64_t mode_test_hits[BLOCK_SIZES];
 #endif
+} VP9_COMP;
 
-  // Y,U,V,(A)
-  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
-  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16];
+void vp9_initialize_enc();
 
-  PARTITION_CONTEXT *above_seg_context;
-  PARTITION_CONTEXT left_seg_context[8];
-} VP9_COMP;
+struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf);
+void vp9_remove_compressor(VP9_COMP *cpi);
 
-static int get_ref_frame_idx(const VP9_COMP *cpi,
-                             MV_REFERENCE_FRAME ref_frame) {
+void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf);
+
+  // receive a frames worth of data. caller can assume that a copy of this
+  // frame is made and not just a copy of the pointer..
+int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
+                          YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
+                          int64_t end_time_stamp);
+
+int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
+                            size_t *size, uint8_t *dest,
+                            int64_t *time_stamp, int64_t *time_end, int flush);
+
+int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
+                              vp9_ppflags_t *flags);
+
+int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags);
+
+void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
+
+int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
+                           YV12_BUFFER_CONFIG *sd);
+
+int vp9_get_reference_enc(VP9_COMP *cpi, int index,
+                          YV12_BUFFER_CONFIG **fb);
+
+int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
+                          YV12_BUFFER_CONFIG *sd);
+
+int vp9_update_entropy(VP9_COMP *cpi, int update);
+
+int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map,
+                   unsigned int rows, unsigned int cols,
+                   int delta_q[MAX_SEGMENTS],
+                   int delta_lf[MAX_SEGMENTS],
+                   unsigned int threshold[MAX_SEGMENTS]);
+
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map,
+                       unsigned int rows, unsigned int cols);
+
+int vp9_set_internal_size(VP9_COMP *cpi,
+                          VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
+
+int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
+                         unsigned int height);
+
+void vp9_set_svc(VP9_COMP *cpi, int use_svc);
+
+int vp9_get_quantizer(struct VP9_COMP *cpi);
+
+static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
+                                    MV_REFERENCE_FRAME ref_frame) {
   if (ref_frame == LAST_FRAME) {
     return cpi->lst_fb_idx;
   } else if (ref_frame == GOLDEN_FRAME) {
@@ -709,30 +582,43 @@ static int get_ref_frame_idx(const VP9_COMP *cpi,
   }
 }
 
-static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi,
-                                                MV_REFERENCE_FRAME ref_frame) {
-  VP9_COMMON *const cm = &cpi->common;
-  return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi,
-                                                             ref_frame)]].buf;
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
+    VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+  VP9_COMMON * const cm = &cpi->common;
+  return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
+      .buf;
 }
 
-void vp9_encode_frame(VP9_COMP *cpi);
+// Intra only frames, golden frames (except alt ref overlays) and
+// alt ref frames tend to be coded at a higher than ambient quality
+static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) {
+  return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
+         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
+}
 
-void vp9_set_speed_features(VP9_COMP *cpi);
+static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
+  // TODO(JBB): make this work for alpha channel and double check we can't
+  // exceed this token count if we have a 32x32 transform crossing a boundary
+  // at a multiple of 16.
+  // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
+  // resolution. We assume up to 1 token per pixel, and then allow
+  // a head room of 4.
+  return mb_rows * mb_cols * (16 * 16 * 3 + 4);
+}
 
-int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
-                    const YV12_BUFFER_CONFIG *reference);
+int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 
 void vp9_alloc_compressor_data(VP9_COMP *cpi);
 
-int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget);
+void vp9_scale_references(VP9_COMP *cpi);
 
-static int get_token_alloc(int mb_rows, int mb_cols) {
-  return mb_rows * mb_cols * (48 * 16 + 4);
-}
+void vp9_update_reference_frames(VP9_COMP *cpi);
+
+int64_t vp9_rescale(int64_t val, int64_t num, int denom);
 
-static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
-                         MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) {
+static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
+                                MV_REFERENCE_FRAME ref0,
+                                MV_REFERENCE_FRAME ref1) {
   xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
                                                          : 0];
   xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.c b/source/libvpx/vp9/encoder/vp9_picklpf.c
index 5b0ecf7..3ac8522 100644
--- a/source/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/source/libvpx/vp9/encoder/vp9_picklpf.c
@@ -10,39 +10,32 @@
 
 #include <assert.h>
 #include <limits.h>
+
+#include "./vpx_scale_rtcd.h"
+
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_loopfilter.h"
 #include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/vp9_quant_common.h"
+
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_quantize.h"
-#include "vp9/common/vp9_quant_common.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_scale/vpx_scale.h"
-#include "vp9/common/vp9_alloccommon.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "./vpx_scale_rtcd.h"
-
-static int get_min_filter_level(VP9_COMP *cpi, int base_qindex) {
-  return 0;
-}
 
-static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) {
+static int get_max_filter_level(VP9_COMP *cpi) {
   return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
                                                : MAX_LOOP_FILTER;
 }
 
-// Stub function for now Alt LF not used
-void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) {
-}
 
 static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
-                            MACROBLOCKD *const xd, VP9_COMMON *const cm,
                             int filt_level, int partial_frame) {
+  VP9_COMMON *const cm = &cpi->common;
   int filt_err;
 
-  vp9_set_alt_lf_level(cpi, filt_level);
-  vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame);
-
-  filt_err = vp9_calc_ss_err(sd, cm->frame_to_show);
+  vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame);
+  filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
 
   // Re-instate the unfiltered frame
   vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
@@ -52,11 +45,10 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
 
 static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
                                 int partial_frame) {
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   VP9_COMMON *const cm = &cpi->common;
   struct loopfilter *const lf = &cm->lf;
-  const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
-  const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+  const int min_filter_level = 0;
+  const int max_filter_level = get_max_filter_level(cpi);
   int best_err;
   int filt_best;
   int filt_direction = 0;
@@ -73,7 +65,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
   //  Make a copy of the unfiltered / processed recon buffer
   vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
 
-  best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial_frame);
+  best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame);
   filt_best = filt_mid;
   ss_err[filt_mid] = best_err;
 
@@ -95,7 +87,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
     if (filt_direction <= 0 && filt_low != filt_mid) {
       // Get Low filter error score
       if (ss_err[filt_low] < 0) {
-        filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial_frame);
+        filt_err = try_filter_frame(sd, cpi, filt_low, partial_frame);
         ss_err[filt_low] = filt_err;
       } else {
         filt_err = ss_err[filt_low];
@@ -114,7 +106,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
     // Now look at filt_high
     if (filt_direction >= 0 && filt_high != filt_mid) {
       if (ss_err[filt_high] < 0) {
-        filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial_frame);
+        filt_err = try_filter_frame(sd, cpi, filt_high, partial_frame);
         ss_err[filt_high] = filt_err;
       } else {
         filt_err = ss_err[filt_high];
@@ -128,7 +120,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
 
     // Half the step distance if the best filter value was the same as last time
     if (filt_best == filt_mid) {
-      filter_step = filter_step / 2;
+      filter_step /= 2;
       filt_direction = 0;
     } else {
       filt_direction = (filt_best < filt_mid) ? -1 : 1;
@@ -140,25 +132,24 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
 }
 
 void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
-                           int method) {
+                           LPF_PICK_METHOD method) {
   VP9_COMMON *const cm = &cpi->common;
   struct loopfilter *const lf = &cm->lf;
 
   lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
                                                     : cpi->oxcf.sharpness;
 
-  if (method == 2) {
-    const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
-    const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+  if (method == LPF_PICK_FROM_Q) {
+    const int min_filter_level = 0;
+    const int max_filter_level = get_max_filter_level(cpi);
     const int q = vp9_ac_quant(cm->base_qindex, 0);
     // These values were determined by linear fitting the result of the
-    // searched level
-    // filt_guess = q * 0.316206 + 3.87252
-    int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18;
+    // searched level, filt_guess = q * 0.316206 + 3.87252
+    int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
     if (cm->frame_type == KEY_FRAME)
       filt_guess -= 4;
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
   } else {
-    search_filter_level(sd, cpi, method == 1);
+    search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
   }
 }
diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.h b/source/libvpx/vp9/encoder/vp9_picklpf.h
index 0fc1f88..7d08ddb 100644
--- a/source/libvpx/vp9/encoder/vp9_picklpf.h
+++ b/source/libvpx/vp9/encoder/vp9_picklpf.h
@@ -16,13 +16,13 @@
 extern "C" {
 #endif
 
+#include "vp9/encoder/vp9_onyx_int.h"
+
 struct yv12_buffer_config;
 struct VP9_COMP;
 
-void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
-
 void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
-                           struct VP9_COMP *cpi, int method);
+                           struct VP9_COMP *cpi, LPF_PICK_METHOD method);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c
index 9ba48a1..f3fe99c 100644
--- a/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -26,19 +26,18 @@
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
 
-static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                     const TileInfo *const tile,
                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
-  int bestsme = INT_MAX;
-  int further_steps, step_param;
+  int step_param;
   int sadpb = x->sadperbit16;
   MV mvp_full;
   int ref = mbmi->ref_frame[0];
-  int_mv ref_mv = mbmi->ref_mvs[ref][0];
+  const MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
   int i;
 
   int tmp_col_min = x->mv_col_min;
@@ -46,9 +45,6 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   int tmp_row_min = x->mv_row_min;
   int tmp_row_max = x->mv_row_max;
 
-  int buf_offset;
-  int stride = xd->plane[0].pre[0].stride;
-
   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
                                                                         ref);
   if (scaled_ref_frame) {
@@ -59,15 +55,14 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     for (i = 0; i < MAX_MB_PLANE; i++)
       backup_yv12[i] = xd->plane[i].pre[0];
 
-    setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
   }
 
-  vp9_set_mv_search_range(x, &ref_mv.as_mv);
+  vp9_set_mv_search_range(x, &ref_mv);
 
   // TODO(jingning) exploiting adaptive motion search control in non-RD
   // mode decision too.
   step_param = 6;
-  further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
   for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
     if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
@@ -78,36 +73,50 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
         for (i = 0; i < MAX_MB_PLANE; i++)
           xd->plane[i].pre[0] = backup_yv12[i];
       }
-      return INT_MAX;
+      return;
     }
   }
-
-  mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
+  assert(x->mv_best_ref_index[ref] <= 2);
+  if (x->mv_best_ref_index[ref] < 2)
+    mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
+  else
+    mvp_full = x->pred_mv[ref].as_mv;
 
   mvp_full.col >>= 3;
   mvp_full.row >>= 3;
 
-  if (cpi->sf.search_method == FAST_HEX) {
-    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
-                                  &cpi->fn_ptr[bsize], 1,
-                                  &ref_mv.as_mv, &tmp_mv->as_mv);
+  if (cpi->sf.search_method == FAST_DIAMOND) {
+    // NOTE: this returns SAD
+    vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
+                        &cpi->fn_ptr[bsize], 1,
+                        &ref_mv, &tmp_mv->as_mv);
+  } else if (cpi->sf.search_method == FAST_HEX) {
+    // NOTE: this returns SAD
+    vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
+                        &cpi->fn_ptr[bsize], 1,
+                        &ref_mv, &tmp_mv->as_mv);
   } else if (cpi->sf.search_method == HEX) {
-    bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
-                             &cpi->fn_ptr[bsize], 1,
-                             &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
+                   &cpi->fn_ptr[bsize], 1,
+                   &ref_mv, &tmp_mv->as_mv);
   } else if (cpi->sf.search_method == SQUARE) {
-    bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
-                                &cpi->fn_ptr[bsize], 1,
-                                &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
+                      &cpi->fn_ptr[bsize], 1,
+                      &ref_mv, &tmp_mv->as_mv);
   } else if (cpi->sf.search_method == BIGDIA) {
-    bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
-                                &cpi->fn_ptr[bsize], 1,
-                                &ref_mv.as_mv, &tmp_mv->as_mv);
+    // NOTE: this returns SAD
+    vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
+                      &cpi->fn_ptr[bsize], 1,
+                      &ref_mv, &tmp_mv->as_mv);
   } else {
-    bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
-                                     sadpb, further_steps, 1,
-                                     &cpi->fn_ptr[bsize],
-                                     &ref_mv.as_mv, &tmp_mv->as_mv);
+    int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+    // NOTE: this returns variance
+    vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
+                           sadpb, further_steps, 1,
+                           &cpi->fn_ptr[bsize],
+                           &ref_mv, &tmp_mv->as_mv);
   }
   x->mv_col_min = tmp_col_min;
   x->mv_col_max = tmp_col_max;
@@ -120,23 +129,11 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
       xd->plane[i].pre[0] = backup_yv12[i];
   }
 
-  // TODO(jingning) This step can be merged into full pixel search step in the
-  // re-designed log-diamond search
-  buf_offset = tmp_mv->as_mv.row * stride + tmp_mv->as_mv.col;
-
-  // Find sad for current vector.
-  bestsme = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride,
-                                   xd->plane[0].pre[0].buf + buf_offset,
-                                   stride, 0x7fffffff);
-
-  // scale to 1/8 pixel resolution
-  tmp_mv->as_mv.row = tmp_mv->as_mv.row * 8;
-  tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8;
-
   // calculate the bit cost on motion vector
-  *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
+  mvp_full.row = tmp_mv->as_mv.row * 8;
+  mvp_full.col = tmp_mv->as_mv.col * 8;
+  *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv,
                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-  return bestsme;
 }
 
 static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -144,7 +141,7 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     MV *tmp_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
   int ref = mbmi->ref_frame[0];
   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
@@ -160,12 +157,9 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     for (i = 0; i < MAX_MB_PLANE; i++)
       backup_yv12[i] = xd->plane[i].pre[0];
 
-    setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
   }
 
-  tmp_mv->col >>= 3;
-  tmp_mv->row >>= 3;
-
   cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
                                cpi->common.allow_high_precision_mv,
                                x->errorperbit,
@@ -180,6 +174,30 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     for (i = 0; i < MAX_MB_PLANE; i++)
       xd->plane[i].pre[0] = backup_yv12[i];
   }
+
+  x->pred_mv[ref].as_mv = *tmp_mv;
+}
+
+static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
+                              MACROBLOCK *x, MACROBLOCKD *xd,
+                              int *out_rate_sum, int64_t *out_dist_sum) {
+  // Note our transform coeffs are 8 times an orthogonal transform.
+  // Hence quantizer step is also 8 times. To get effective quantizer
+  // we need to divide by 8 before sending to modeling function.
+  unsigned int sse;
+  int rate;
+  int64_t dist;
+
+  struct macroblock_plane *const p = &x->plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
+
+  int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
+                                  pd->dst.buf, pd->dst.stride, &sse);
+
+  vp9_model_rd_from_var_lapndz(sse + var, 1 << num_pels_log2_lookup[bsize],
+                               pd->dequant[1] >> 3, &rate, &dist);
+  *out_rate_sum = rate;
+  *out_dist_sum = dist << 3;
 }
 
 // TODO(jingning) placeholder for inter-frame non-RD mode decision.
@@ -191,29 +209,41 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                             int64_t *returndistortion,
                             BLOCK_SIZE bsize) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
-  const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
   MB_PREDICTION_MODE this_mode, best_mode = ZEROMV;
   MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
+  INTERP_FILTER best_pred_filter = EIGHTTAP;
   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
                                     VP9_ALT_FLAG };
   int64_t best_rd = INT64_MAX;
   int64_t this_rd = INT64_MAX;
-  static const int cost[4]= { 0, 2, 4, 6 };
 
-  const int64_t inter_mode_thresh = 300;
+  int rate = INT_MAX;
+  int64_t dist = INT64_MAX;
+
+  VP9_COMMON *cm = &cpi->common;
+  int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
+
+  const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
+                                           intra_cost_penalty, 0);
   const int64_t intra_mode_cost = 50;
 
+  unsigned char segment_id = mbmi->segment_id;
+  const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
+  const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+  // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
+  int mode_idx[MB_MODE_COUNT] = {0};
+  INTERP_FILTER filter_ref = SWITCHABLE;
+
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
   x->skip = 0;
-  if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+  if (!x->in_active_map)
     x->skip = 1;
-
   // initialize mode decisions
   *returnrate = INT_MAX;
   *returndistortion = INT64_MAX;
@@ -226,21 +256,25 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
                         EIGHTTAP : cpi->common.interp_filter;
   mbmi->skip = 0;
-  mbmi->segment_id = 0;
+  mbmi->segment_id = segment_id;
 
   for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
     x->pred_mv_sad[ref_frame] = INT_MAX;
     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
       vp9_setup_buffer_inter(cpi, x, tile,
-                             ref_frame, block_size, mi_row, mi_col,
+                             ref_frame, bsize, mi_row, mi_col,
                              frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
     }
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
     frame_mv[ZEROMV][ref_frame].as_int = 0;
   }
 
+  if (xd->up_available)
+    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
+  else if (xd->left_available)
+    filter_ref = xd->mi[-1]->mbmi.interp_filter;
+
   for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
-    int rate_mv = 0;
     if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
       continue;
 
@@ -252,58 +286,131 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
     mbmi->ref_frame[0] = ref_frame;
 
+    // Set conversion index for LAST_FRAME.
+    if (ref_frame == LAST_FRAME) {
+      mode_idx[NEARESTMV] = THR_NEARESTMV;   // LAST_FRAME, NEARESTMV
+      mode_idx[NEARMV] = THR_NEARMV;         // LAST_FRAME, NEARMV
+      mode_idx[ZEROMV] = THR_ZEROMV;         // LAST_FRAME, ZEROMV
+      mode_idx[NEWMV] = THR_NEWMV;           // LAST_FRAME, NEWMV
+    }
+
     for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
-      int rate = cost[INTER_OFFSET(this_mode)]
-          << (num_pels_log2_lookup[bsize] - 4);
-      int64_t dist;
+      int rate_mv = 0;
+
       if (cpi->sf.disable_inter_mode_mask[bsize] &
           (1 << INTER_OFFSET(this_mode)))
         continue;
 
+      if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] *
+          rd_thresh_freq_fact[this_mode] >> 5) ||
+          rd_threshes[mode_idx[this_mode]] == INT_MAX)
+        continue;
+
       if (this_mode == NEWMV) {
+        int rate_mode = 0;
         if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
           continue;
 
-        x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
-            full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
-                                     &frame_mv[NEWMV][ref_frame], &rate_mv);
+        full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+                                 &frame_mv[NEWMV][ref_frame], &rate_mv);
 
         if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
           continue;
 
+        rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]]
+                                      [INTER_OFFSET(this_mode)];
+        if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd)
+          continue;
+
         sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
                                 &frame_mv[NEWMV][ref_frame].as_mv);
       }
 
-      if (frame_mv[this_mode][ref_frame].as_int == 0) {
-        dist = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
-      } else if (this_mode != NEARESTMV &&
-                 frame_mv[NEARESTMV][ref_frame].as_int ==
-                     frame_mv[this_mode][ref_frame].as_int) {
-        dist = x->mode_sad[ref_frame][INTER_OFFSET(NEARESTMV)];
+      if (this_mode != NEARESTMV)
+        if (frame_mv[this_mode][ref_frame].as_int ==
+            frame_mv[NEARESTMV][ref_frame].as_int)
+          continue;
+
+      mbmi->mode = this_mode;
+      mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+
+      // Search for the best prediction filter type, when the resulting
+      // motion vector is at sub-pixel accuracy level for luma component, i.e.,
+      // the last three bits are all zeros.
+      if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
+          ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
+           (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
+        int64_t tmp_rdcost1 = INT64_MAX;
+        int64_t tmp_rdcost2 = INT64_MAX;
+        int64_t tmp_rdcost3 = INT64_MAX;
+        int pf_rate[3];
+        int64_t pf_dist[3];
+
+        mbmi->interp_filter = EIGHTTAP;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP],
+                          &pf_dist[EIGHTTAP]);
+        tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv,
+                             vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP],
+                             pf_dist[EIGHTTAP]);
+
+        mbmi->interp_filter = EIGHTTAP_SHARP;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP],
+                          &pf_dist[EIGHTTAP_SHARP]);
+        tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv,
+                          vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP],
+                          pf_dist[EIGHTTAP_SHARP]);
+
+        mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH],
+                          &pf_dist[EIGHTTAP_SMOOTH]);
+        tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv,
+                          vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH],
+                          pf_dist[EIGHTTAP_SMOOTH]);
+
+        if (tmp_rdcost2 < tmp_rdcost1) {
+          if (tmp_rdcost2 < tmp_rdcost3)
+            mbmi->interp_filter = EIGHTTAP_SHARP;
+          else
+            mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        } else {
+          if (tmp_rdcost1 < tmp_rdcost3)
+            mbmi->interp_filter = EIGHTTAP;
+          else
+            mbmi->interp_filter = EIGHTTAP_SMOOTH;
+        }
+
+        rate = pf_rate[mbmi->interp_filter];
+        dist = pf_dist[mbmi->interp_filter];
       } else {
-        mbmi->mode = this_mode;
-        mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+        mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
         vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
-        dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] =
-            cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride,
-                                   pd->dst.buf, pd->dst.stride, INT_MAX);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
       }
 
-      this_rd = rate + dist;
+      rate += rate_mv;
+      rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]]
+                                [INTER_OFFSET(this_mode)];
+      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
       if (this_rd < best_rd) {
         best_rd = this_rd;
+        *returnrate = rate;
+        *returndistortion = dist;
         best_mode = this_mode;
+        best_pred_filter = mbmi->interp_filter;
         best_ref_frame = ref_frame;
       }
     }
   }
 
   mbmi->mode = best_mode;
+  mbmi->interp_filter = best_pred_filter;
   mbmi->ref_frame[0] = best_ref_frame;
   mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
-  xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+  xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
 
   // Perform intra prediction search, if the best SAD is above a certain
   // threshold.
@@ -314,13 +421,15 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                               &p->src.buf[0], p->src.stride,
                               &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
 
-      this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf,
-                                       p->src.stride,
-                                       pd->dst.buf,
-                                       pd->dst.stride, INT_MAX);
+      model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+      rate += x->mbmode_cost[this_mode];
+      rate += intra_cost_penalty;
+      this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
       if (this_rd + intra_mode_cost < best_rd) {
         best_rd = this_rd;
+        *returnrate = rate;
+        *returndistortion = dist;
         mbmi->mode = this_mode;
         mbmi->ref_frame[0] = INTRA_FRAME;
         mbmi->uv_mode = this_mode;
@@ -328,5 +437,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       }
     }
   }
+
   return INT64_MAX;
 }
diff --git a/source/libvpx/vp9/encoder/vp9_quantize.c b/source/libvpx/vp9/encoder/vp9_quantize.c
index 4ab8995..31f3b3e 100644
--- a/source/libvpx/vp9/encoder/vp9_quantize.c
+++ b/source/libvpx/vp9/encoder/vp9_quantize.c
@@ -153,6 +153,7 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) {
 
 void vp9_init_quantizer(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  QUANTS *const quants = &cpi->quants;
   int i, q, quant;
 
   for (q = 0; q < QINDEX_RANGE; q++) {
@@ -163,48 +164,49 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
       // y
       quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q)
                      : vp9_ac_quant(q, 0);
-      invert_quant(&cpi->y_quant[q][i], &cpi->y_quant_shift[q][i], quant);
-      cpi->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      cpi->y_round[q][i] = (qrounding_factor * quant) >> 7;
+      invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
+      quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
+      quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
       cm->y_dequant[q][i] = quant;
 
       // uv
       quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q)
                      : vp9_ac_quant(q, cm->uv_ac_delta_q);
-      invert_quant(&cpi->uv_quant[q][i], &cpi->uv_quant_shift[q][i], quant);
-      cpi->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      cpi->uv_round[q][i] = (qrounding_factor * quant) >> 7;
+      invert_quant(&quants->uv_quant[q][i],
+                   &quants->uv_quant_shift[q][i], quant);
+      quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
+      quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
       cm->uv_dequant[q][i] = quant;
 
 #if CONFIG_ALPHA
       // alpha
       quant = i == 0 ? vp9_dc_quant(q, cm->a_dc_delta_q)
                      : vp9_ac_quant(q, cm->a_ac_delta_q);
-      invert_quant(&cpi->a_quant[q][i], &cpi->a_quant_shift[q][i], quant);
-      cpi->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
-      cpi->a_round[q][i] = (qrounding_factor * quant) >> 7;
+      invert_quant(&quants->a_quant[q][i], &quants->a_quant_shift[q][i], quant);
+      quants->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
+      quants->a_round[q][i] = (qrounding_factor * quant) >> 7;
       cm->a_dequant[q][i] = quant;
 #endif
     }
 
     for (i = 2; i < 8; i++) {
-      cpi->y_quant[q][i] = cpi->y_quant[q][1];
-      cpi->y_quant_shift[q][i] = cpi->y_quant_shift[q][1];
-      cpi->y_zbin[q][i] = cpi->y_zbin[q][1];
-      cpi->y_round[q][i] = cpi->y_round[q][1];
+      quants->y_quant[q][i] = quants->y_quant[q][1];
+      quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
+      quants->y_zbin[q][i] = quants->y_zbin[q][1];
+      quants->y_round[q][i] = quants->y_round[q][1];
       cm->y_dequant[q][i] = cm->y_dequant[q][1];
 
-      cpi->uv_quant[q][i] = cpi->uv_quant[q][1];
-      cpi->uv_quant_shift[q][i] = cpi->uv_quant_shift[q][1];
-      cpi->uv_zbin[q][i] = cpi->uv_zbin[q][1];
-      cpi->uv_round[q][i] = cpi->uv_round[q][1];
+      quants->uv_quant[q][i] = quants->uv_quant[q][1];
+      quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
+      quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
+      quants->uv_round[q][i] = quants->uv_round[q][1];
       cm->uv_dequant[q][i] = cm->uv_dequant[q][1];
 
 #if CONFIG_ALPHA
-      cpi->a_quant[q][i] = cpi->a_quant[q][1];
-      cpi->a_quant_shift[q][i] = cpi->a_quant_shift[q][1];
-      cpi->a_zbin[q][i] = cpi->a_zbin[q][1];
-      cpi->a_round[q][i] = cpi->a_round[q][1];
+      quants->a_quant[q][i] = quants->a_quant[q][1];
+      quants->a_quant_shift[q][i] = quants->a_quant_shift[q][1];
+      quants->a_zbin[q][i] = quants->a_zbin[q][1];
+      quants->a_round[q][i] = quants->a_round[q][1];
       cm->a_dequant[q][i] = cm->a_dequant[q][1];
 #endif
     }
@@ -213,27 +215,28 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
 
 void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
   const VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  QUANTS *const quants = &cpi->quants;
+  const int segment_id = xd->mi[0]->mbmi.segment_id;
   const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
   const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
   const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj;
   int i;
 
   // Y
-  x->plane[0].quant = cpi->y_quant[qindex];
-  x->plane[0].quant_shift = cpi->y_quant_shift[qindex];
-  x->plane[0].zbin = cpi->y_zbin[qindex];
-  x->plane[0].round = cpi->y_round[qindex];
+  x->plane[0].quant = quants->y_quant[qindex];
+  x->plane[0].quant_shift = quants->y_quant_shift[qindex];
+  x->plane[0].zbin = quants->y_zbin[qindex];
+  x->plane[0].round = quants->y_round[qindex];
   x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
   xd->plane[0].dequant = cm->y_dequant[qindex];
 
   // UV
   for (i = 1; i < 3; i++) {
-    x->plane[i].quant = cpi->uv_quant[qindex];
-    x->plane[i].quant_shift = cpi->uv_quant_shift[qindex];
-    x->plane[i].zbin = cpi->uv_zbin[qindex];
-    x->plane[i].round = cpi->uv_round[qindex];
+    x->plane[i].quant = quants->uv_quant[qindex];
+    x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
+    x->plane[i].zbin = quants->uv_zbin[qindex];
+    x->plane[i].round = quants->uv_round[qindex];
     x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
     xd->plane[i].dequant = cm->uv_dequant[qindex];
   }
@@ -273,9 +276,7 @@ void vp9_frame_init_quantizer(VP9_COMP *cpi) {
   vp9_init_plane_quantizers(cpi, &cpi->mb);
 }
 
-void vp9_set_quantizer(struct VP9_COMP *cpi, int q) {
-  VP9_COMMON *const cm = &cpi->common;
-
+void vp9_set_quantizer(VP9_COMMON *cm, int q) {
   // quantizer has to be reinitialized with vp9_init_quantizer() if any
   // delta_q changes.
   cm->base_qindex = q;
@@ -283,3 +284,30 @@ void vp9_set_quantizer(struct VP9_COMP *cpi, int q) {
   cm->uv_dc_delta_q = 0;
   cm->uv_ac_delta_q = 0;
 }
+
+// Table that converts 0-63 Q-range values passed in outside to the Qindex
+// range used internally.
+static const int quantizer_to_qindex[] = {
+  0,    4,   8,  12,  16,  20,  24,  28,
+  32,   36,  40,  44,  48,  52,  56,  60,
+  64,   68,  72,  76,  80,  84,  88,  92,
+  96,  100, 104, 108, 112, 116, 120, 124,
+  128, 132, 136, 140, 144, 148, 152, 156,
+  160, 164, 168, 172, 176, 180, 184, 188,
+  192, 196, 200, 204, 208, 212, 216, 220,
+  224, 228, 232, 236, 240, 244, 249, 255,
+};
+
+int vp9_quantizer_to_qindex(int quantizer) {
+  return quantizer_to_qindex[quantizer];
+}
+
+int vp9_qindex_to_quantizer(int qindex) {
+  int quantizer;
+
+  for (quantizer = 0; quantizer < 64; ++quantizer)
+    if (quantizer_to_qindex[quantizer] >= qindex)
+      return quantizer;
+
+  return 63;
+}
diff --git a/source/libvpx/vp9/encoder/vp9_quantize.h b/source/libvpx/vp9/encoder/vp9_quantize.h
index f356b12..7a93883 100644
--- a/source/libvpx/vp9/encoder/vp9_quantize.h
+++ b/source/libvpx/vp9/encoder/vp9_quantize.h
@@ -17,12 +17,30 @@
 extern "C" {
 #endif
 
+typedef struct {
+  DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]);
+
+  DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
+
+#if CONFIG_ALPHA
+  DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]);
+  DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]);
+#endif
+} QUANTS;
+
 void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan);
 
 struct VP9_COMP;
-
-void vp9_set_quantizer(struct VP9_COMP *cpi, int q);
+struct VP9Common;
 
 void vp9_frame_init_quantizer(struct VP9_COMP *cpi);
 
@@ -32,6 +50,12 @@ void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x);
 
 void vp9_init_quantizer(struct VP9_COMP *cpi);
 
+void vp9_set_quantizer(struct VP9Common *cm, int q);
+
+int vp9_quantizer_to_qindex(int quantizer);
+
+int vp9_qindex_to_quantizer(int qindex);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 89aa821..b4e883f 100644
--- a/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -27,14 +27,14 @@
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 
+#define DEFAULT_KF_BOOST 2000
+#define DEFAULT_GF_BOOST 2000
+
 #define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1
 
 #define MIN_BPB_FACTOR 0.005
 #define MAX_BPB_FACTOR 50
 
-// Bits Per MB at different Q (Multiplied by 512)
-#define BPER_MB_NORMBITS    9
-
 // Tables relating active max Q to active min Q
 static int kf_low_motion_minq[QINDEX_RANGE];
 static int kf_high_motion_minq[QINDEX_RANGE];
@@ -52,10 +52,9 @@ static int kf_low = 400;
 // formulaic approach to facilitate easier adjustment of the Q tables.
 // The formulae were derived from computing a 3rd order polynomial best
 // fit to the original data (after plotting real maxq vs minq (not q index))
-static int calculate_minq_index(double maxq,
-                                double x3, double x2, double x1, double c) {
+static int get_minq_index(double maxq, double x3, double x2, double x1) {
   int i;
-  const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c,
+  const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq,
                                 maxq);
 
   // Special case handling to deal with the step from q2.0
@@ -63,57 +62,26 @@ static int calculate_minq_index(double maxq,
   if (minqtarget <= 2.0)
     return 0;
 
-  for (i = 0; i < QINDEX_RANGE; i++) {
+  for (i = 0; i < QINDEX_RANGE; i++)
     if (minqtarget <= vp9_convert_qindex_to_q(i))
       return i;
-  }
 
   return QINDEX_RANGE - 1;
 }
 
-void vp9_rc_init_minq_luts(void) {
+void vp9_rc_init_minq_luts() {
   int i;
 
   for (i = 0; i < QINDEX_RANGE; i++) {
     const double maxq = vp9_convert_qindex_to_q(i);
 
-
-    kf_low_motion_minq[i] = calculate_minq_index(maxq,
-                                                 0.000001,
-                                                 -0.0004,
-                                                 0.15,
-                                                 0.0);
-    kf_high_motion_minq[i] = calculate_minq_index(maxq,
-                                                  0.000002,
-                                                  -0.0012,
-                                                  0.50,
-                                                  0.0);
-
-    gf_low_motion_minq[i] = calculate_minq_index(maxq,
-                                                 0.0000015,
-                                                 -0.0009,
-                                                 0.32,
-                                                 0.0);
-    gf_high_motion_minq[i] = calculate_minq_index(maxq,
-                                                  0.0000021,
-                                                  -0.00125,
-                                                  0.50,
-                                                  0.0);
-    afq_low_motion_minq[i] = calculate_minq_index(maxq,
-                                                  0.0000015,
-                                                  -0.0009,
-                                                  0.33,
-                                                  0.0);
-    afq_high_motion_minq[i] = calculate_minq_index(maxq,
-                                                   0.0000021,
-                                                   -0.00125,
-                                                   0.55,
-                                                   0.0);
-    inter_minq[i] = calculate_minq_index(maxq,
-                                         0.00000271,
-                                         -0.00113,
-                                         0.75,
-                                         0.0);
+    kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
+    kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
+    gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
+    gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
+    afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
+    afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
+    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
   }
 }
 
@@ -135,79 +103,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
   return (int)(0.5 + (enumerator * correction_factor / q));
 }
 
-void vp9_save_coding_context(VP9_COMP *cpi) {
-  CODING_CONTEXT *const cc = &cpi->coding_context;
-  VP9_COMMON *cm = &cpi->common;
-
-  // Stores a snapshot of key state variables which can subsequently be
-  // restored with a call to vp9_restore_coding_context. These functions are
-  // intended for use in a re-code loop in vp9_compress_frame where the
-  // quantizer value is adjusted between loop iterations.
-  vp9_copy(cc->nmvjointcost,  cpi->mb.nmvjointcost);
-  vp9_copy(cc->nmvcosts,  cpi->mb.nmvcosts);
-  vp9_copy(cc->nmvcosts_hp,  cpi->mb.nmvcosts_hp);
-
-  vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
-
-  vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy,
-             cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
-
-  vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
-  vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
-
-  cc->fc = cm->fc;
-}
-
-void vp9_restore_coding_context(VP9_COMP *cpi) {
-  CODING_CONTEXT *const cc = &cpi->coding_context;
-  VP9_COMMON *cm = &cpi->common;
-
-  // Restore key state variables to the snapshot state stored in the
-  // previous call to vp9_save_coding_context.
-  vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
-  vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts);
-  vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
-
-  vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
-
-  vpx_memcpy(cm->last_frame_seg_map,
-             cpi->coding_context.last_frame_seg_map_copy,
-             (cm->mi_rows * cm->mi_cols));
-
-  vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
-  vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
-
-  cm->fc = cc->fc;
-}
-
-void vp9_setup_key_frame(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
-
-  vp9_setup_past_independence(cm);
-
-  /* All buffers are implicitly updated on key frames. */
-  cpi->refresh_golden_frame = 1;
-  cpi->refresh_alt_ref_frame = 1;
-}
-
-void vp9_setup_inter_frame(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
-  if (cm->error_resilient_mode || cm->intra_only)
-    vp9_setup_past_independence(cm);
-
-  assert(cm->frame_context_idx < FRAME_CONTEXTS);
-  cm->fc = cm->frame_contexts[cm->frame_context_idx];
-}
-
-static int estimate_bits_at_q(int frame_kind, int q, int mbs,
+static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
                               double correction_factor) {
-  const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor));
-
-  // Attempt to retain reasonable accuracy without overflow. The cutoff is
-  // chosen such that the maximum product of Bpm and MBs fits 31 bits. The
-  // largest Bpm takes 20 bits.
-  return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs
-                           : (bpm * mbs) >> BPER_MB_NORMBITS;
+  const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor));
+  return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS;
 }
 
 int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
@@ -244,13 +143,12 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
 
 
 // Update the buffer level for higher layers, given the encoded current layer.
-static void update_layer_buffer_level(VP9_COMP *const cpi,
-                                      int encoded_frame_size) {
+static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
   int temporal_layer = 0;
-  int current_temporal_layer = cpi->svc.temporal_layer_id;
+  int current_temporal_layer = svc->temporal_layer_id;
   for (temporal_layer = current_temporal_layer + 1;
-      temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) {
-    LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+      temporal_layer < svc->number_temporal_layers; ++temporal_layer) {
+    LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer];
     RATE_CONTROL *lrc = &lc->rc;
     int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
         encoded_frame_size);
@@ -280,10 +178,60 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   rc->buffer_level = rc->bits_off_target;
 
   if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
-    update_layer_buffer_level(cpi, encoded_frame_size);
+    update_layer_buffer_level(&cpi->svc, encoded_frame_size);
   }
 }
 
+void vp9_rc_init(const VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc) {
+  if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+    rc->avg_frame_qindex[0] = oxcf->worst_allowed_q;
+    rc->avg_frame_qindex[1] = oxcf->worst_allowed_q;
+    rc->avg_frame_qindex[2] = oxcf->worst_allowed_q;
+  } else {
+    rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q +
+                                   oxcf->best_allowed_q) / 2;
+    rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q +
+                                   oxcf->best_allowed_q) / 2;
+    rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q +
+                                   oxcf->best_allowed_q) / 2;
+  }
+
+  rc->last_q[0] = oxcf->best_allowed_q;
+  rc->last_q[1] = oxcf->best_allowed_q;
+  rc->last_q[2] = oxcf->best_allowed_q;
+
+  rc->buffer_level =    oxcf->starting_buffer_level;
+  rc->bits_off_target = oxcf->starting_buffer_level;
+
+  rc->rolling_target_bits      = rc->av_per_frame_bandwidth;
+  rc->rolling_actual_bits      = rc->av_per_frame_bandwidth;
+  rc->long_rolling_target_bits = rc->av_per_frame_bandwidth;
+  rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth;
+
+  rc->total_actual_bits = 0;
+  rc->total_target_vs_actual = 0;
+
+  rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+  rc->frames_since_key = 8;  // Sensible default for first frame.
+  rc->this_key_frame_forced = 0;
+  rc->next_key_frame_forced = 0;
+  rc->source_alt_ref_pending = 0;
+  rc->source_alt_ref_active = 0;
+
+  rc->frames_till_gf_update_due = 0;
+
+  rc->ni_av_qi = oxcf->worst_allowed_q;
+  rc->ni_tot_qi = 0;
+  rc->ni_frames = 0;
+
+  rc->tot_q = 0.0;
+  rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q);
+
+  rc->rate_correction_factor = 1.0;
+  rc->key_frame_rate_correction_factor = 1.0;
+  rc->gf_rate_correction_factor = 1.0;
+}
+
 int vp9_rc_drop_frame(VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -327,6 +275,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
     return cpi->rc.key_frame_rate_correction_factor;
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+        !cpi->rc.is_src_frame_alt_ref &&
         !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
       return cpi->rc.gf_rate_correction_factor;
     else
@@ -339,6 +288,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
     cpi->rc.key_frame_rate_correction_factor = factor;
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+        !cpi->rc.is_src_frame_alt_ref &&
         !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
       cpi->rc.gf_rate_correction_factor = factor;
     else
@@ -347,7 +297,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
 }
 
 void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
-  const int q = cpi->common.base_qindex;
+  const VP9_COMMON *const cm = &cpi->common;
   int correction_factor = 100;
   double rate_correction_factor = get_rate_correction_factor(cpi);
   double adjustment_limit;
@@ -360,8 +310,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
   // Work out how big we would have expected the frame to be at this Q given
   // the current correction factor.
   // Stay in double to avoid int overflow when values are large
-  projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q,
-                                                 cpi->common.MBs,
+  projected_size_based_on_q = estimate_bits_at_q(cm->frame_type,
+                                                 cm->base_qindex, cm->MBs,
                                                  rate_correction_factor);
   // Work out a size correction factor.
   if (projected_size_based_on_q > 0)
@@ -385,20 +335,18 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 
   if (correction_factor > 102) {
     // We are not already at the worst allowable quality
-    correction_factor =
-        (int)(100 + ((correction_factor - 100) * adjustment_limit));
-    rate_correction_factor =
-        ((rate_correction_factor * correction_factor) / 100);
+    correction_factor = (int)(100 + ((correction_factor - 100) *
+                                  adjustment_limit));
+    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
 
     // Keep rate_correction_factor within limits
     if (rate_correction_factor > MAX_BPB_FACTOR)
       rate_correction_factor = MAX_BPB_FACTOR;
   } else if (correction_factor < 99) {
     // We are not already at the best allowable quality
-    correction_factor =
-        (int)(100 - ((100 - correction_factor) * adjustment_limit));
-    rate_correction_factor =
-        ((rate_correction_factor * correction_factor) / 100);
+    correction_factor = (int)(100 - ((100 - correction_factor) *
+                                  adjustment_limit));
+    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
 
     // Keep rate_correction_factor within limits
     if (rate_correction_factor < MIN_BPB_FACTOR)
@@ -419,11 +367,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
 
   // Calculate required scaling factor based on target frame size and size of
   // frame produced using previous Q.
-  if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
-    // Case where we would overflow int
-    target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS;
-  else
-    target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
+  target_bits_per_mb =
+      ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
 
   i = active_best_quality;
 
@@ -462,33 +407,25 @@ static int get_active_quality(int q, int gfu_boost, int low, int high,
 }
 
 static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
+  const RATE_CONTROL *const rc = &cpi->rc;
+  const unsigned int curr_frame = cpi->common.current_video_frame;
   int active_worst_quality;
+
   if (cpi->common.frame_type == KEY_FRAME) {
-    if (cpi->common.current_video_frame == 0) {
-      active_worst_quality = cpi->rc.worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
-    }
-  } else if (!cpi->rc.is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    if (cpi->common.current_video_frame == 1) {
-      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      active_worst_quality = cpi->rc.last_q[INTER_FRAME];
-    }
+    active_worst_quality = curr_frame == 0 ? rc->worst_quality
+                                           : rc->last_q[KEY_FRAME] * 2;
   } else {
-    if (cpi->common.current_video_frame == 1) {
-      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    if (!rc->is_src_frame_alt_ref &&
+        (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      active_worst_quality =  curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4
+                                              : rc->last_q[INTER_FRAME];
     } else {
-      // Choose active worst quality twice as large as the last q.
-      active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+      active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2
+                                             : rc->last_q[INTER_FRAME] * 2;
     }
   }
-  if (active_worst_quality > cpi->rc.worst_quality)
-    active_worst_quality = cpi->rc.worst_quality;
-  return active_worst_quality;
+
+  return MIN(active_worst_quality, rc->worst_quality);
 }
 
 // Adjust active_worst_quality level based on buffer level.
@@ -498,6 +435,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   // If buffer is below the optimal level, let the active_worst_quality go from
   // ambient Q (at buffer = optimal level) to worst_quality level
   // (at buffer = critical level).
+  const VP9_COMMON *const cm = &cpi->common;
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
   // Buffer level below which we push active_worst to worst_quality.
@@ -505,9 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   int64_t buff_lvl_step = 0;
   int adjustment = 0;
   int active_worst_quality;
-  if (cpi->common.frame_type == KEY_FRAME)
+  if (cm->frame_type == KEY_FRAME)
     return rc->worst_quality;
-  if (cpi->common.current_video_frame > 1)
+  if (cm->current_video_frame > 1)
     active_worst_quality = MIN(rc->worst_quality,
                                rc->avg_frame_qindex[INTER_FRAME] * 5 / 4);
   else
@@ -561,7 +499,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
     if (rc->this_key_frame_forced) {
       int qindex = rc->last_boosted_qindex;
       double last_boosted_q = vp9_convert_qindex_to_q(qindex);
-      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
+      int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
                                             (last_boosted_q * 0.75));
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
     } else if (cm->current_video_frame > 0) {
@@ -583,10 +521,11 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
-                                                   q_adj_factor);
+      active_best_quality += vp9_compute_qdelta(rc, q_val,
+                                                q_val * q_adj_factor);
     }
   } else if (!rc->is_src_frame_alt_ref &&
+             !cpi->use_svc &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
     // Use the lower of active_worst_quality and recent
     // average Q as basis for GF/ARF best Q limit unless last frame was
@@ -639,7 +578,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
                           active_best_quality, active_worst_quality);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
-      if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+      if (rc->this_frame_target >= rc->max_frame_bandwidth)
         *top_index = q;
       else
         q = *top_index;
@@ -672,8 +611,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
     if (rc->this_key_frame_forced) {
       int qindex = rc->last_boosted_qindex;
       double last_boosted_q = vp9_convert_qindex_to_q(qindex);
-      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
-                                            (last_boosted_q * 0.75));
+      int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
+                                            last_boosted_q * 0.75);
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
     } else if (cm->current_video_frame > 0) {
       // not first frame of one pass and kf_boost is set
@@ -694,15 +633,15 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
-                                                   q_adj_factor);
+      active_best_quality += vp9_compute_qdelta(rc, q_val,
+                                                q_val * q_adj_factor);
     }
 #else
     double current_q;
     // Force the KF quantizer to be 30% of the active_worst_quality.
     current_q = vp9_convert_qindex_to_q(active_worst_quality);
     active_best_quality = active_worst_quality
-        + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
+        + vp9_compute_qdelta(rc, current_q, current_q * 0.3);
 #endif
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -805,7 +744,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
                           active_best_quality, active_worst_quality);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
-      if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+      if (rc->this_frame_target >= rc->max_frame_bandwidth)
         *top_index = q;
       else
         q = *top_index;
@@ -821,7 +760,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
     assert(level >= 0);
     new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
     q = active_worst_quality +
-        vp9_compute_qdelta(cpi, current_q, new_q);
+        vp9_compute_qdelta(rc, current_q, new_q);
 
     *bottom_index = q;
     *top_index    = q;
@@ -854,8 +793,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
     if (rc->this_key_frame_forced) {
       int qindex = rc->last_boosted_qindex;
       double last_boosted_q = vp9_convert_qindex_to_q(qindex);
-      int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q,
-                                            (last_boosted_q * 0.75));
+      int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
+                                            last_boosted_q * 0.75);
       active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
     } else {
       // Not forced keyframe.
@@ -879,15 +818,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
-                                                   q_adj_factor);
+      active_best_quality += vp9_compute_qdelta(rc, q_val,
+                                                q_val * q_adj_factor);
     }
 #else
     double current_q;
     // Force the KF quantizer to be 30% of the active_worst_quality.
     current_q = vp9_convert_qindex_to_q(active_worst_quality);
     active_best_quality = active_worst_quality
-        + vp9_compute_qdelta(cpi, current_q, current_q * 0.3);
+        + vp9_compute_qdelta(rc, current_q, current_q * 0.3);
 #endif
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
@@ -988,7 +927,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
                           active_best_quality, active_worst_quality);
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate.
-      if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth)
+      if (rc->this_frame_target >= rc->max_frame_bandwidth)
         *top_index = q;
       else
         q = *top_index;
@@ -1004,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
     assert(level >= 0);
     new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
     q = active_worst_quality +
-        vp9_compute_qdelta(cpi, current_q, new_q);
+        vp9_compute_qdelta(rc, current_q, new_q);
 
     *bottom_index = q;
     *top_index    = q;
@@ -1020,8 +959,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
 }
 
 int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
-                             int *bottom_index,
-                             int *top_index) {
+                             int *bottom_index, int *top_index) {
   int q;
   if (cpi->pass == 0) {
     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -1032,14 +970,14 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
     q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
   }
 
-  // JBB : This is realtime mode.  In real time mode the first frame
-  // should be larger. Q of 0 is disabled because we force tx size to be
+  // Q of 0 is disabled because we force tx size to be
   // 16x16...
   if (cpi->sf.use_nonrd_pick_mode) {
-    if (cpi->common.current_video_frame == 0)
-      q /= 3;
     if (q == 0)
       q++;
+    if (cpi->sf.force_frame_boost == 1)
+      q -= cpi->sf.max_delta_qindex;
+
     if (q < *bottom_index)
       *bottom_index = q;
     else if (q > *top_index)
@@ -1057,28 +995,14 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
     *frame_under_shoot_limit = 0;
     *frame_over_shoot_limit  = INT_MAX;
   } else {
-    if (cpi->common.frame_type == KEY_FRAME) {
-      *frame_over_shoot_limit  = this_frame_target * 9 / 8;
-      *frame_under_shoot_limit = this_frame_target * 7 / 8;
-    } else {
-      if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) {
-        *frame_over_shoot_limit  = this_frame_target * 9 / 8;
-        *frame_under_shoot_limit = this_frame_target * 7 / 8;
-      } else {
-        // Stron overshoot limit for constrained quality
-        if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
-          *frame_over_shoot_limit  = this_frame_target * 11 / 8;
-          *frame_under_shoot_limit = this_frame_target * 2 / 8;
-        } else {
-          *frame_over_shoot_limit  = this_frame_target * 11 / 8;
-          *frame_under_shoot_limit = this_frame_target * 5 / 8;
-        }
-      }
-    }
+    int recode_tolerance =
+      (cpi->sf.recode_tolerance * this_frame_target) / 100;
+
+    *frame_over_shoot_limit = this_frame_target + recode_tolerance;
+    *frame_under_shoot_limit = this_frame_target - recode_tolerance;
 
     // For very small rate targets where the fractional adjustment
-    // (eg * 7/8) may be tiny make sure there is at least a minimum
-    // range.
+    // may be tiny make sure there is at least a minimum range.
     *frame_over_shoot_limit += 200;
     *frame_under_shoot_limit -= 200;
     if (*frame_under_shoot_limit < 0)
@@ -1103,16 +1027,17 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) {
 
 static void update_alt_ref_frame_stats(VP9_COMP *cpi) {
   // this frame refreshes means next frames don't unless specified by user
-  cpi->rc.frames_since_golden = 0;
+  RATE_CONTROL *const rc = &cpi->rc;
+  rc->frames_since_golden = 0;
 
 #if CONFIG_MULTIPLE_ARF
   if (!cpi->multi_arf_enabled)
 #endif
     // Clear the alternate reference update pending flag.
-    cpi->rc.source_alt_ref_pending = 0;
+    rc->source_alt_ref_pending = 0;
 
   // Set the alternate reference frame active flag
-  cpi->rc.source_alt_ref_active = 1;
+  rc->source_alt_ref_active = 1;
 }
 
 static void update_golden_frame_stats(VP9_COMP *cpi) {
@@ -1141,6 +1066,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
 
 void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   VP9_COMMON *const cm = &cpi->common;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
 
   cm->last_frame_type = cm->frame_type;
@@ -1150,7 +1076,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   // Post encode loop adjustment of Q prediction.
   vp9_rc_update_rate_correction_factors(
       cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
-            cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
+            oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
@@ -1159,7 +1085,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
         3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
       (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
-      !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
+      !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
     rc->last_q[2] = cm->base_qindex;
     rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -1205,12 +1131,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
 
   // Actual bits spent
   rc->total_actual_bits += rc->projected_frame_size;
+  rc->total_target_bits += (cm->show_frame ? rc->av_per_frame_bandwidth : 0);
 
-  // Debug stats
-  rc->total_target_vs_actual += (rc->this_frame_target -
-                                 rc->projected_frame_size);
+  rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
 
-  if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame &&
+  if (oxcf->play_alternate && cpi->refresh_alt_ref_frame &&
       (cm->frame_type != KEY_FRAME))
     // Update the alternate reference frame stats as appropriate.
     update_alt_ref_frame_stats(cpi);
@@ -1243,15 +1168,15 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) {
 
 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
   static const int af_ratio = 10;
-  const RATE_CONTROL *rc = &cpi->rc;
+  const RATE_CONTROL *const rc = &cpi->rc;
   int target;
 #if USE_ALTREF_FOR_ONE_PASS
   target = (!rc->is_src_frame_alt_ref &&
             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ?
-      (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) /
-      (cpi->rc.baseline_gf_interval + af_ratio - 1) :
-      (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) /
-      (cpi->rc.baseline_gf_interval + af_ratio - 1);
+      (rc->av_per_frame_bandwidth * rc->baseline_gf_interval * af_ratio) /
+      (rc->baseline_gf_interval + af_ratio - 1) :
+      (rc->av_per_frame_bandwidth * rc->baseline_gf_interval) /
+      (rc->baseline_gf_interval + af_ratio - 1);
 #else
   target = rc->av_per_frame_bandwidth;
 #endif
@@ -1271,7 +1196,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   int target;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
-       cm->frame_flags & FRAMEFLAGS_KEY ||
+       (cm->frame_flags & FRAMEFLAGS_KEY) ||
        rc->frames_to_key == 0 ||
        (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
@@ -1303,18 +1228,19 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
 static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
+  const SVC *const svc = &cpi->svc;
   const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
   const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
   int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
                              FRAME_OVERHEAD_BITS);
   int target = rc->av_per_frame_bandwidth;
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+  if (svc->number_temporal_layers > 1 &&
+      oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
     // Note that for layers, av_per_frame_bandwidth is the cumulative
     // per-frame-bandwidth. For the target size of this frame, use the
     // layer average frame size (i.e., non-cumulative per-frame-bw).
-    int current_temporal_layer = cpi->svc.temporal_layer_id;
-    const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+    int current_temporal_layer = svc->temporal_layer_id;
+    const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer];
     target = lc->avg_frame_size;
     min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
   }
@@ -1351,13 +1277,14 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
 
 void vp9_rc_get_svc_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
-  int target = cpi->rc.av_per_frame_bandwidth;
+  RATE_CONTROL *const rc = &cpi->rc;
+  int target = rc->av_per_frame_bandwidth;
   if ((cm->current_video_frame == 0) ||
       (cm->frame_flags & FRAMEFLAGS_KEY) ||
-      (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
+      (cpi->oxcf.auto_key && (rc->frames_since_key %
                               cpi->key_frame_frequency == 0))) {
     cm->frame_type = KEY_FRAME;
-    cpi->rc.source_alt_ref_active = 0;
+    rc->source_alt_ref_active = 0;
     if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
     }
@@ -1368,8 +1295,8 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
     }
   }
   vp9_rc_set_frame_target(cpi, target);
-  cpi->rc.frames_till_gf_update_due = INT_MAX;
-  cpi->rc.baseline_gf_interval = INT_MAX;
+  rc->frames_till_gf_update_due = INT_MAX;
+  rc->baseline_gf_interval = INT_MAX;
 }
 
 void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
@@ -1377,7 +1304,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if ((cm->current_video_frame == 0 ||
-      cm->frame_flags & FRAMEFLAGS_KEY ||
+      (cm->frame_flags & FRAMEFLAGS_KEY) ||
       rc->frames_to_key == 0 ||
       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
@@ -1396,3 +1323,46 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   rc->frames_till_gf_update_due = INT_MAX;
   rc->baseline_gf_interval = INT_MAX;
 }
+
+int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) {
+  int start_index = rc->worst_quality;
+  int target_index = rc->worst_quality;
+  int i;
+
+  // Convert the average q value to an index.
+  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+    start_index = i;
+    if (vp9_convert_qindex_to_q(i) >= qstart)
+      break;
+  }
+
+  // Convert the q target to an index
+  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+    target_index = i;
+    if (vp9_convert_qindex_to_q(i) >= qtarget)
+      break;
+  }
+
+  return target_index - start_index;
+}
+
+int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
+                               int qindex, double rate_target_ratio) {
+  int target_index = rc->worst_quality;
+  int i;
+
+  // Look up the current projected bits per block for the base index
+  const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0);
+
+  // Find the target bits per mb based on the base value and given ratio.
+  const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
+
+  // Convert the q target to an index
+  for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+    target_index = i;
+    if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb )
+      break;
+  }
+
+  return target_index - qindex;
+}
diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.h b/source/libvpx/vp9/encoder/vp9_ratectrl.h
index 5dbc7d1..7693c2b 100644
--- a/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -12,12 +12,19 @@
 #ifndef VP9_ENCODER_VP9_RATECTRL_H_
 #define VP9_ENCODER_VP9_RATECTRL_H_
 
+#include "vpx/vpx_integer.h"
+
+#include "vp9/common/vp9_blockd.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #define FRAME_OVERHEAD_BITS 200
 
+// Bits Per MB at different Q (Multiplied by 512)
+#define BPER_MB_NORMBITS    9
+
 typedef struct {
   // Rate targetting variables
   int this_frame_target;
@@ -37,6 +44,7 @@ typedef struct {
   int frames_since_golden;
   int frames_till_gf_update_due;
   int max_gf_interval;
+  int static_scene_max_gf_interval;
   int baseline_gf_interval;
   int frames_to_key;
   int frames_since_key;
@@ -53,7 +61,7 @@ typedef struct {
   int ni_av_qi;
   int ni_tot_qi;
   int ni_frames;
-  int avg_frame_qindex[3];  // 0 - KEY, 1 - INTER, 2 - ARF/GF
+  int avg_frame_qindex[3];        // 0 - KEY, 1 - INTER, 2 - ARF/GF
   double tot_q;
   double avg_q;
 
@@ -70,7 +78,8 @@ typedef struct {
   int long_rolling_actual_bits;
 
   int64_t total_actual_bits;
-  int total_target_vs_actual;        // debug stats
+  int64_t total_target_bits;
+  int64_t total_target_vs_actual;
 
   int worst_quality;
   int best_quality;
@@ -78,17 +87,13 @@ typedef struct {
 } RATE_CONTROL;
 
 struct VP9_COMP;
+struct VP9_CONFIG;
 
-void vp9_save_coding_context(struct VP9_COMP *cpi);
-void vp9_restore_coding_context(struct VP9_COMP *cpi);
-
-void vp9_setup_key_frame(struct VP9_COMP *cpi);
-void vp9_setup_inter_frame(struct VP9_COMP *cpi);
+void vp9_rc_init(const struct VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc);
 
 double vp9_convert_qindex_to_q(int qindex);
 
-// initialize luts for minq
-void vp9_rc_init_minq_luts(void);
+void vp9_rc_init_minq_luts();
 
 // Generally at the high level, the following flow is expected
 // to be enforced for rate control:
@@ -161,6 +166,15 @@ int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi,
 // This function is called only from the vp9_rc_get_..._params() functions.
 void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target);
 
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a target q value
+int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget);
+
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a value that should equate to the given rate ratio.
+int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
+                               int qindex, double rate_target_ratio);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.c b/source/libvpx/vp9/encoder/vp9_rdopt.c
index ed81fbe..b292b42 100644
--- a/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -30,6 +30,7 @@
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_systemdependent.h"
 
+#include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_mcomp.h"
@@ -38,9 +39,13 @@
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_treewriter.h"
 #include "vp9/encoder/vp9_variance.h"
 
+#define RD_THRESH_MAX_FACT 64
+#define RD_THRESH_INC      1
+#define RD_THRESH_POW      1.25
+#define RD_MULT_EPB_RATIO  64
+
 /* Factor to weigh the rate for switchable interp filters */
 #define SWITCHABLE_INTERP_RATE_FACTOR 1
 
@@ -72,6 +77,7 @@ struct rdcost_block_args {
   int64_t this_rd;
   int64_t best_rd;
   int skip;
+  int use_fast_coef_costing;
   const scan_order *so;
 };
 
@@ -145,9 +151,8 @@ static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 }
 
 static void fill_mode_costs(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
-  FRAME_CONTEXT *const fc = &cm->fc;
+  const FRAME_CONTEXT *const fc = &cpi->common.fc;
   int i, j;
 
   for (i = 0; i < INTRA_MODES; i++)
@@ -157,15 +162,14 @@ static void fill_mode_costs(VP9_COMP *cpi) {
 
   // TODO(rbultje) separate tables for superblock costing?
   vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[1],
-                  fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[0],
-                  vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
+  vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
+                  vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+  vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
+                  fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     vp9_cost_tokens((int *)x->switchable_interp_costs[i],
-                    fc->switchable_interp_prob[i],
-                    vp9_switchable_interp_tree);
+                    fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
 }
 
 static void fill_token_costs(vp9_coeff_cost *c,
@@ -214,7 +218,7 @@ void vp9_init_me_luts() {
   }
 }
 
-int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
+int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
   const int q = vp9_dc_quant(qindex, 0);
   // TODO(debargha): Adjust the function below
   int rdmult = 88 * q * q / 25;
@@ -228,12 +232,9 @@ int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
 }
 
 static int compute_rd_thresh_factor(int qindex) {
-  int q;
   // TODO(debargha): Adjust the function below
-  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
-  if (q < 8)
-    q = 8;
-  return q;
+  const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
+  return MAX(q, 8);
 }
 
 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
@@ -242,9 +243,8 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
 }
 
 static void set_block_thresholds(VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
   int i, bsize, segment_id;
-  VP9_COMMON *cm = &cpi->common;
-  SPEED_FEATURES *sf = &cpi->sf;
 
   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
     const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
@@ -260,13 +260,13 @@ static void set_block_thresholds(VP9_COMP *cpi) {
 
       for (i = 0; i < MAX_MODES; ++i)
         cpi->rd_threshes[segment_id][bsize][i] =
-            sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4
+            cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
                                             : INT_MAX;
 
       for (i = 0; i < MAX_REFS; ++i) {
         cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
-            sf->thresh_mult_sub8x8[i] < thresh_max
-                ? sf->thresh_mult_sub8x8[i] * t / 4
+            cpi->rd_thresh_mult_sub8x8[i] < thresh_max
+                ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
                 : INT_MAX;
       }
     }
@@ -274,8 +274,8 @@ static void set_block_thresholds(VP9_COMP *cpi) {
 }
 
 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCK *x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->mb;
   int i;
 
   vp9_clear_system_state();
@@ -286,14 +286,12 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
   x->errorperbit += (x->errorperbit == 0);
 
-  vp9_set_speed_features(cpi);
-
   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                          cm->frame_type != KEY_FRAME) ? 0 : 1;
 
   set_block_thresholds(cpi);
 
-  if (!cpi->sf.use_nonrd_pick_mode) {
+  if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
     fill_token_costs(x->token_costs, cm->fc.coef_probs);
 
     for (i = 0; i < PARTITION_CONTEXTS; i++)
@@ -301,7 +299,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
                       vp9_partition_tree);
   }
 
-  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
+  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+      cm->frame_type == KEY_FRAME) {
     fill_mode_costs(cpi);
 
     if (!frame_is_intra_only(cm)) {
@@ -400,9 +399,9 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 }
 
-static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
-                                     unsigned int qstep, int *rate,
-                                     int64_t *dist) {
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+                                  unsigned int qstep, int *rate,
+                                  int64_t *dist) {
   // This function models the rate and distortion for a Laplacian
   // source with given variance when quantized with a uniform quantizer
   // with given stepsize. The closed form expressions are in:
@@ -433,7 +432,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
   int i;
   int64_t rate_sum = 0;
   int64_t dist_sum = 0;
-  int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
+  const int ref = xd->mi[0]->mbmi.ref_frame[0];
   unsigned int sse;
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
@@ -464,8 +463,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
     } else {
       int rate;
       int64_t dist;
-      model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
-                               pd->dequant[1] >> 3, &rate, &dist);
+      vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+                                   pd->dequant[1] >> 3, &rate, &dist);
       rate_sum += rate;
       dist_sum += dist;
     }
@@ -482,8 +481,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
                                  int *out_skip) {
   int j, k;
   BLOCK_SIZE bs;
-  struct macroblock_plane *const p = &x->plane[0];
-  struct macroblockd_plane *const pd = &xd->plane[0];
+  const struct macroblock_plane *const p = &x->plane[0];
+  const struct macroblockd_plane *const pd = &xd->plane[0];
   const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
   const int height = 4 * num_4x4_blocks_high_lookup[bsize];
   int rate_sum = 0;
@@ -512,7 +511,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
                          &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                          &sse);
       // sse works better than var, since there is no dc prediction used
-      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
+      vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
+                                   &rate, &dist);
       rate_sum += rate;
       dist_sum += dist;
       *out_skip &= (rate < 1024);
@@ -549,26 +549,25 @@ static const int16_t band_counts[TX_SIZES][8] = {
   { 1, 2, 3, 4, 11,  256 - 21, 0 },
   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 };
-
 static INLINE int cost_coeffs(MACROBLOCK *x,
                               int plane, int block,
                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
                               TX_SIZE tx_size,
-                              const int16_t *scan, const int16_t *nb) {
+                              const int16_t *scan, const int16_t *nb,
+                              int use_fast_coef_costing) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
-  struct macroblock_plane *p = &x->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  const struct macroblock_plane *p = &x->plane[plane];
+  const struct macroblockd_plane *pd = &xd->plane[plane];
   const PLANE_TYPE type = pd->plane_type;
   const int16_t *band_count = &band_counts[tx_size][1];
   const int eob = p->eobs[block];
   const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
-  uint8_t *p_tok = x->token_cache;
+  uint8_t token_cache[32 * 32];
   int pt = combine_entropy_contexts(*A, *L);
   int c, cost;
-
   // Check for consistency of tx_size with mode info
   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                               : get_uv_tx_size(mbmi) == tx_size);
@@ -584,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
     int v = qcoeff[0];
     int prev_t = vp9_dct_value_tokens_ptr[v].token;
     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
-    p_tok[0] = vp9_pt_energy_class[prev_t];
+    token_cache[0] = vp9_pt_energy_class[prev_t];
     ++token_costs;
 
     // ac tokens
@@ -594,9 +593,13 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
 
       v = qcoeff[rc];
       t = vp9_dct_value_tokens_ptr[v].token;
-      pt = get_coef_context(nb, p_tok, c);
-      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
-      p_tok[rc] = vp9_pt_energy_class[t];
+      if (use_fast_coef_costing) {
+        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
+      } else {
+        pt = get_coef_context(nb, token_cache, c);
+        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
+        token_cache[rc] = vp9_pt_energy_class[t];
+      }
       prev_t = t;
       if (!--band_left) {
         band_left = *band_count++;
@@ -606,8 +609,12 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
 
     // eob token
     if (band_left) {
-      pt = get_coef_context(nb, p_tok, c);
-      cost += (*token_costs)[0][pt][EOB_TOKEN];
+      if (use_fast_coef_costing) {
+        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
+      } else {
+        pt = get_coef_context(nb, token_cache, c);
+        cost += (*token_costs)[0][pt][EOB_TOKEN];
+      }
     }
   }
 
@@ -616,14 +623,13 @@ static INLINE int cost_coeffs(MACROBLOCK *x,
 
   return cost;
 }
-
 static void dist_block(int plane, int block, TX_SIZE tx_size,
                        struct rdcost_block_args* args) {
   const int ss_txfrm_size = tx_size << 1;
   MACROBLOCK* const x = args->x;
   MACROBLOCKD* const xd = &x->e_mbd;
-  struct macroblock_plane *const p = &x->plane[plane];
-  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
   int64_t this_sse;
   int shift = tx_size == TX_32X32 ? 0 : 2;
   int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
@@ -632,7 +638,7 @@ static void dist_block(int plane, int block, TX_SIZE tx_size,
                                &this_sse) >> shift;
   args->sse  = this_sse >> shift;
 
-  if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
+  if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
     // TODO(jingning): tune the model to better capture the distortion.
     int64_t p = (pd->dequant[1] * pd->dequant[1] *
                     (1 << ss_txfrm_size)) >> (shift + 2);
@@ -648,7 +654,8 @@ static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
 
   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                            args->t_left + y_idx, tx_size,
-                           args->so->scan, args->so->neighbors);
+                           args->so->scan, args->so->neighbors,
+                           args->use_fast_coef_costing);
 }
 
 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -656,7 +663,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
   struct rdcost_block_args *args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   int64_t rd1, rd2, rd;
 
   if (args->skip)
@@ -732,15 +739,17 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
                              int *rate, int64_t *distortion,
                              int *skippable, int64_t *sse,
                              int64_t ref_best_rd, int plane,
-                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
+                             BLOCK_SIZE bsize, TX_SIZE tx_size,
+                             int use_fast_coef_casting) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblockd_plane *const pd = &xd->plane[plane];
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
   struct rdcost_block_args args = { 0 };
   args.x = x;
   args.best_rd = ref_best_rd;
+  args.use_fast_coef_costing = use_fast_coef_casting;
 
   if (plane == 0)
-    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
+    xd->mi[0]->mbmi.tx_size = tx_size;
 
   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 
@@ -770,13 +779,13 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
   VP9_COMMON *const cm = &cpi->common;
   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 
   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 
   txfm_rd_in_plane(x, rate, distortion, skip,
                    &sse[mbmi->tx_size], ref_best_rd, 0, bs,
-                   mbmi->tx_size);
+                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
   cpi->tx_stepdown_count[0]++;
 }
 
@@ -789,7 +798,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX},
@@ -872,7 +881,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX},
@@ -920,7 +929,8 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
   // Actually encode using the chosen mode if a model was used, but do not
   // update the r, d costs
   txfm_rd_in_plane(x, rate, distortion, skip,
-                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
+                   &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
+                   cpi->sf.use_fast_coef_costing);
 
   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
     cpi->tx_stepdown_count[0]++;
@@ -941,7 +951,7 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
   int r[TX_SIZES][2], s[TX_SIZES];
   int64_t d[TX_SIZES], sse[TX_SIZES];
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   TX_SIZE tx_size;
 
@@ -968,7 +978,8 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
     for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
       txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
                        &s[tx_size], &sse[tx_size],
-                       ref_best_rd, 0, bs, tx_size);
+                       ref_best_rd, 0, bs, tx_size,
+                       cpi->sf.use_fast_coef_costing);
     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
                              skip, txfm_cache, bs);
   }
@@ -983,7 +994,7 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                                   int64_t ref_best_rd) {
   int64_t sse[TX_SIZES];
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 
   assert(bs == mbmi->sb_type);
   if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
@@ -997,7 +1008,8 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
     for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
       txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
                        &s[tx_size], &sse[tx_size],
-                       ref_best_rd, 0, bs, tx_size);
+                       ref_best_rd, 0, bs, tx_size,
+                       cpi->sf.use_fast_coef_costing);
     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
                              skip, txfm_cache, bs);
   }
@@ -1029,7 +1041,7 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode,
 
 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                      MB_PREDICTION_MODE *best_mode,
-                                     int *bmode_costs,
+                                     const int *bmode_costs,
                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                      int *bestrate, int *bestratey,
                                      int64_t *bestdistortion,
@@ -1058,7 +1070,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 
   vpx_memcpy(ta, a, sizeof(ta));
   vpx_memcpy(tl, l, sizeof(tl));
-  xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
+  xd->mi[0]->mbmi.tx_size = TX_4X4;
 
   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
     int64_t this_rd;
@@ -1087,7 +1099,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
         int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                             p->src_diff);
         int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
-        xd->mi_8x8[0]->bmi[block].as_mode = mode;
+        xd->mi[0]->bmi[block].as_mode = mode;
         vp9_predict_intra_block(xd, block, 1,
                                 TX_4X4, mode,
                                 x->skip_encode ? src : dst,
@@ -1100,7 +1112,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
           vp9_fwht4x4(src_diff, coeff, 8);
           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
-                               so->scan, so->neighbors);
+                               so->scan, so->neighbors,
+                               cpi->sf.use_fast_coef_costing);
           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
             goto next;
           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
@@ -1112,7 +1125,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
           vp9_fht4x4(src_diff, coeff, 8, tx_type);
           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
-                               so->scan, so->neighbors);
+                             so->scan, so->neighbors,
+                             cpi->sf.use_fast_coef_costing);
           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                         16, &unused) >> 2;
           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1152,18 +1166,16 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   return best_rd;
 }
 
-static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
-                                            MACROBLOCK * const mb,
-                                            int * const rate,
-                                            int * const rate_y,
-                                            int64_t * const distortion,
+static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
+                                            int *rate, int *rate_y,
+                                            int64_t *distortion,
                                             int64_t best_rd) {
   int i, j;
-  MACROBLOCKD *const xd = &mb->e_mbd;
-  MODE_INFO *const mic = xd->mi_8x8[0];
-  const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
-  const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
-  const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
+  const MACROBLOCKD *const xd = &mb->e_mbd;
+  MODE_INFO *const mic = xd->mi[0];
+  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
+  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   int idx, idy;
@@ -1172,13 +1184,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
   int tot_rate_y = 0;
   int64_t total_rd = 0;
   ENTROPY_CONTEXT t_above[4], t_left[4];
-  int *bmode_costs;
+  const int *bmode_costs = mb->mbmode_cost;
 
   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 
-  bmode_costs = mb->mbmode_cost;
-
   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1232,7 +1242,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
   MB_PREDICTION_MODE mode;
   MB_PREDICTION_MODE mode_selected = DC_PRED;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mic = xd->mi_8x8[0];
+  MODE_INFO *const mic = xd->mi[0];
   int this_rate, this_rate_tokenonly, s;
   int64_t this_distortion, this_rd;
   TX_SIZE best_tx = TX_4X4;
@@ -1246,8 +1256,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
   /* Y Search for intra prediction mode */
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
     int64_t local_tx_cache[TX_MODES];
-    MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
-    MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
+    MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
+    MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
 
     if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
       continue;
@@ -1296,12 +1306,12 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
   return best_rd;
 }
 
-static void super_block_uvrd(MACROBLOCK *x,
+static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
                              int *rate, int64_t *distortion, int *skippable,
                              int64_t *sse, BLOCK_SIZE bsize,
                              int64_t ref_best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
   int plane;
   int pnrate = 0, pnskip = 1;
@@ -1323,7 +1333,8 @@ static void super_block_uvrd(MACROBLOCK *x,
 
   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
-                     ref_best_rd, plane, bsize, uv_txfm_size);
+                     ref_best_rd, plane, bsize, uv_txfm_size,
+                     cpi->sf.use_fast_coef_costing);
     if (pnrate == INT_MAX)
       goto term;
     *rate += pnrate;
@@ -1357,9 +1368,9 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
       continue;
 
-    xd->mi_8x8[0]->mbmi.uv_mode = mode;
+    xd->mi[0]->mbmi.uv_mode = mode;
 
-    super_block_uvrd(x, &this_rate_tokenonly,
+    super_block_uvrd(cpi, x, &this_rate_tokenonly,
                      &this_distortion, &s, &this_sse, bsize, best_rd);
     if (this_rate_tokenonly == INT_MAX)
       continue;
@@ -1398,18 +1409,19 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  xd->mi_8x8[0]->mbmi.uv_mode = mode_selected;
+  xd->mi[0]->mbmi.uv_mode = mode_selected;
   return best_rd;
 }
 
-static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x,
+static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
                               int *rate, int *rate_tokenonly,
                               int64_t *distortion, int *skippable,
                               BLOCK_SIZE bsize) {
+  const VP9_COMMON *cm = &cpi->common;
   int64_t unused;
 
-  x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
-  super_block_uvrd(x, rate_tokenonly, distortion,
+  x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
+  super_block_uvrd(cpi, x, rate_tokenonly, distortion,
                    skippable, &unused, bsize, INT64_MAX);
   *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
@@ -1425,7 +1437,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   // Use an estimated rd for uv_intra based on DC_PRED if the
   // appropriate speed flag is set.
   if (cpi->sf.use_uv_intra_rd_estimate) {
-    rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv,
+    rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
   // Else do a proper rd search for each possible transform size that may
   // be considered in the main rd loop.
@@ -1434,13 +1446,13 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
   }
-  *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
+  *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
 }
 
-static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
+static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
                        int mode_context) {
-  MACROBLOCK *const x = &cpi->mb;
-  const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
+  const MACROBLOCK *const x = &cpi->mb;
+  const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
 
   // Don't account for mode here if segment skip is enabled.
   if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
@@ -1451,12 +1463,6 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
   }
 }
 
-void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
-                            const MV *mv) {
-  xd->mi_8x8[0]->mbmi.mode = mode;
-  xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv;
-}
-
 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize,
                                 int_mv *frame_mv,
@@ -1464,59 +1470,56 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                 int_mv single_newmv[MAX_REF_FRAMES],
                                 int *rate_mv);
 
-static int labels2mode(MACROBLOCK *x, int i,
+static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
                        MB_PREDICTION_MODE mode,
-                       int_mv *this_mv, int_mv *this_second_mv,
+                       int_mv this_mv[2],
                        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
                        int_mv seg_mvs[MAX_REF_FRAMES],
-                       int_mv *best_ref_mv,
-                       int_mv *second_best_ref_mv,
-                       int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mic = xd->mi_8x8[0];
-  MB_MODE_INFO *mbmi = &mic->mbmi;
+                       int_mv *best_ref_mv[2],
+                       const int *mvjcost, int *mvcost[2]) {
+  MODE_INFO *const mic = xd->mi[0];
+  const MB_MODE_INFO *const mbmi = &mic->mbmi;
   int thismvcost = 0;
   int idx, idy;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
-  const int has_second_rf = has_second_ref(mbmi);
+  const int is_compound = has_second_ref(mbmi);
 
   // the only time we should do costing for new motion vector or mode
   // is when we are on a new label  (jbb May 08, 2007)
   switch (mode) {
     case NEWMV:
-      this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
-      thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+      this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+      thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
-      if (has_second_rf) {
-        this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
-        thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
-                                      &second_best_ref_mv->as_mv,
+      if (is_compound) {
+        this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+        thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
       }
       break;
     case NEARESTMV:
-      this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
-      if (has_second_rf)
-        this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
+      this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
+      if (is_compound)
+        this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
       break;
     case NEARMV:
-      this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
-      if (has_second_rf)
-        this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
+      this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
+      if (is_compound)
+        this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
       break;
     case ZEROMV:
-      this_mv->as_int = 0;
-      if (has_second_rf)
-        this_second_mv->as_int = 0;
+      this_mv[0].as_int = 0;
+      if (is_compound)
+        this_mv[1].as_int = 0;
       break;
     default:
       break;
   }
 
-  mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
-  if (has_second_rf)
-    mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
+  mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
+  if (is_compound)
+    mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
 
   mic->bmi[i].as_mode = mode;
 
@@ -1542,7 +1545,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
   MACROBLOCKD *xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
   struct macroblock_plane *const p = &x->plane[0];
-  MODE_INFO *const mi = xd->mi_8x8[0];
+  MODE_INFO *const mi = xd->mi[0];
   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
@@ -1556,6 +1559,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
   int thisrate = 0, ref;
   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
   const int is_compound = has_second_ref(&mi->mbmi);
+  const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
+
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
                                                pd->pre[ref].stride)];
@@ -1563,7 +1568,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
                               dst, pd->dst.stride,
                               &mi->bmi[i].as_mv[ref].as_mv,
                               &xd->block_refs[ref]->sf, width, height, ref,
-                              xd->interp_kernel, MV_PRECISION_Q3,
+                              kernel, MV_PRECISION_Q3,
                               mi_col * MI_SIZE + 4 * (i % 2),
                               mi_row * MI_SIZE + 4 * (i / 2));
   }
@@ -1588,7 +1593,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
                                         16, &ssz);
       thissse += ssz;
       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
-                              so->scan, so->neighbors);
+                              so->scan, so->neighbors,
+                              cpi->sf.use_fast_coef_costing);
       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
       rd = MIN(rd1, rd2);
@@ -1638,7 +1644,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
 }
 
 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
-  MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
 
@@ -1653,7 +1659,7 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
 
 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
                                   struct buf_2d orig_pre[2]) {
-  MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
   x->plane[0].src = orig_src;
   x->e_mbd.plane[0].pre[0] = orig_pre[0];
   if (has_second_ref(mbmi))
@@ -1664,6 +1670,45 @@ static INLINE int mv_has_subpel(const MV *mv) {
   return (mv->row & 0x0F) || (mv->col & 0x0F);
 }
 
+// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
+// TODO(aconverse): Find out if this is still productive then clean up or remove
+static int check_best_zero_mv(
+    const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
+    int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
+    int disable_inter_mode_mask, int this_mode, int ref_frame,
+    int second_ref_frame) {
+  if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+      (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
+      frame_mv[this_mode][ref_frame].as_int == 0 &&
+      (second_ref_frame == NONE ||
+       frame_mv[this_mode][second_ref_frame].as_int == 0)) {
+    int rfc = mode_context[ref_frame];
+    int c1 = cost_mv_ref(cpi, NEARMV, rfc);
+    int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
+    int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
+
+    if (this_mode == NEARMV) {
+      if (c1 > c3) return 0;
+    } else if (this_mode == NEARESTMV) {
+      if (c2 > c3) return 0;
+    } else {
+      assert(this_mode == ZEROMV);
+      if (second_ref_frame == NONE) {
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
+          return 0;
+      } else {
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
+             frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
+             frame_mv[NEARMV][second_ref_frame].as_int == 0))
+          return 0;
+      }
+    }
+  }
+  return 1;
+}
+
 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     const TileInfo *const tile,
                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
@@ -1674,7 +1719,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   MB_PREDICTION_MODE this_mode;
   MACROBLOCKD *xd = &x->e_mbd;
   VP9_COMMON *cm = &cpi->common;
-  MODE_INFO *mi = xd->mi_8x8[0];
+  MODE_INFO *mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
@@ -1691,6 +1736,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   int mode_idx;
   int subpelmv = 1, have_ref = 0;
   const int has_second_rf = has_second_ref(mbmi);
+  const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
 
   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
@@ -1706,7 +1752,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
-      int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
+      int_mv mode_mv[MB_MODE_COUNT][2];
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
       MB_PREDICTION_MODE mode_selected = ZEROMV;
       int64_t best_rd = INT64_MAX;
@@ -1728,45 +1774,14 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
 
         mode_idx = INTER_OFFSET(this_mode);
         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
-        if (cpi->sf.disable_inter_mode_mask[bsize] & (1 << mode_idx))
+        if (disable_inter_mode_mask & (1 << mode_idx))
           continue;
 
-        // if we're near/nearest and mv == 0,0, compare to zeromv
-        if ((this_mode == NEARMV || this_mode == NEARESTMV ||
-             this_mode == ZEROMV) &&
-            frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
-            (!has_second_rf ||
-             frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
-          int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
-          int c1 = cost_mv_ref(cpi, NEARMV, rfc);
-          int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
-          int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-
-          if (this_mode == NEARMV) {
-            if (c1 > c3)
-              continue;
-          } else if (this_mode == NEARESTMV) {
-            if (c2 > c3)
-              continue;
-          } else {
-            assert(this_mode == ZEROMV);
-            if (!has_second_rf) {
-              if ((c3 >= c2 &&
-                   frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
-                  (c3 >= c1 &&
-                   frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
-                continue;
-            } else {
-              if ((c3 >= c2 &&
-                   frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
-                   frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
-                  (c3 >= c1 &&
-                   frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
-                   frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
-                continue;
-            }
-          }
-        }
+        if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+                                disable_inter_mode_mask,
+                                this_mode, mbmi->ref_frame[0],
+                                mbmi->ref_frame[1]))
+          continue;
 
         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
@@ -1777,7 +1792,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
         // motion search for newmv (single predictor case only)
         if (!has_second_rf && this_mode == NEWMV &&
             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
-          int_mv *const new_mv = &mode_mv[NEWMV];
+          int_mv *const new_mv = &mode_mv[NEWMV][0];
           int step_param = 0;
           int further_steps;
           int thissme, bestsme = INT_MAX;
@@ -1835,18 +1850,30 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                      sadpb, 1, v_fn_ptr, 1,
                                      &bsi->ref_mv[0]->as_mv,
                                      &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
           } else if (cpi->sf.search_method == SQUARE) {
             bestsme = vp9_square_search(x, &mvp_full,
                                         step_param,
                                         sadpb, 1, v_fn_ptr, 1,
                                         &bsi->ref_mv[0]->as_mv,
                                         &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
           } else if (cpi->sf.search_method == BIGDIA) {
             bestsme = vp9_bigdia_search(x, &mvp_full,
                                         step_param,
                                         sadpb, 1, v_fn_ptr, 1,
                                         &bsi->ref_mv[0]->as_mv,
                                         &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
           } else {
             bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                              sadpb, further_steps, 0, v_fn_ptr,
@@ -1925,55 +1952,43 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
         }
 
         bsi->rdstat[i][mode_idx].brate =
-            labels2mode(x, i, this_mode, &mode_mv[this_mode],
-                        &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
-                        bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost,
-                        x->mvcost, cpi);
-
-
-        bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
-        if (num_4x4_blocks_wide > 1)
-          bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
-              mode_mv[this_mode].as_int;
-        if (num_4x4_blocks_high > 1)
-          bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
-              mode_mv[this_mode].as_int;
-        if (has_second_rf) {
-          bsi->rdstat[i][mode_idx].mvs[1].as_int =
-              second_mode_mv[this_mode].as_int;
+            labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv,
+                        seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost);
+
+        for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+          bsi->rdstat[i][mode_idx].mvs[ref].as_int =
+              mode_mv[this_mode][ref].as_int;
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
-                second_mode_mv[this_mode].as_int;
+            bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
+                mode_mv[this_mode][ref].as_int;
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
-                second_mode_mv[this_mode].as_int;
+            bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
+                mode_mv[this_mode][ref].as_int;
         }
 
         // Trap vectors that reach beyond the UMV borders
-        if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
+        if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
             (has_second_rf &&
-             mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
+             mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
           continue;
 
         if (filter_idx > 0) {
           BEST_SEG_INFO *ref_bsi = bsi_buf;
-          subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv);
-          have_ref = mode_mv[this_mode].as_int ==
-                         ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
-          if (has_second_rf) {
-            subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv);
-            have_ref &= second_mode_mv[this_mode].as_int ==
-                            ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
+          subpelmv = 0;
+          have_ref = 1;
+
+          for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+            subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
+            have_ref &= mode_mv[this_mode][ref].as_int ==
+                ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
           }
 
           if (filter_idx > 1 && !subpelmv && !have_ref) {
             ref_bsi = bsi_buf + 1;
-            have_ref = mode_mv[this_mode].as_int ==
-                       ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
-            if (has_second_rf) {
-              have_ref  &= second_mode_mv[this_mode].as_int ==
-                           ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
-            }
+            have_ref = 1;
+            for (ref = 0; ref < 1 + has_second_rf; ++ref)
+              have_ref &= mode_mv[this_mode][ref].as_int ==
+                  ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
           }
 
           if (!subpelmv && have_ref &&
@@ -2034,10 +2049,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
 
-      labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
-                  &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
-                  bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost,
-                  x->mvcost, cpi);
+      labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected],
+                  frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
+                  x->mvcost);
 
       br += bsi->rdstat[i][mode_idx].brate;
       bd += bsi->rdstat[i][mode_idx].bdist;
@@ -2084,7 +2098,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
   int i;
   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
   MACROBLOCKD *xd = &x->e_mbd;
-  MODE_INFO *mi = xd->mi_8x8[0];
+  MODE_INFO *mi = xd->mi[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
   int mode_idx;
 
@@ -2131,7 +2145,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
                     uint8_t *ref_y_buffer, int ref_y_stride,
                     int ref_frame, BLOCK_SIZE block_size ) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int_mv this_mv;
   int i;
   int zero_seen = 0;
@@ -2160,10 +2174,9 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
     max_mv = MAX(max_mv,
                  MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
     // only need to check zero mv once
-    if (!this_mv.as_int && zero_seen) {
-      x->mode_sad[ref_frame][i] = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
+    if (!this_mv.as_int && zero_seen)
       continue;
-    }
+
     zero_seen = zero_seen || !this_mv.as_int;
 
     row_offset = this_mv.as_mv.row >> 3;
@@ -2174,9 +2187,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
                                            ref_y_ptr, ref_y_stride,
                                            0x7fffffff);
-    x->mode_sad[ref_frame][i] = this_sad;
-    if (this_mv.as_int == 0)
-      x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = this_sad;
 
     // Note if it is the best so far.
     if (this_sad < best_sad) {
@@ -2185,12 +2195,6 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  if (!zero_seen)
-    x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] =
-        cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
-                                    ref_y_buffer, ref_y_stride,
-                                    0x7fffffff);
-
   // Note the index of the mv that worked best in the reference list.
   x->mv_best_ref_index[ref_frame] = best_index;
   x->max_mv_context[ref_frame] = max_mv;
@@ -2271,7 +2275,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
   // restored if we decide to encode this way
   ctx->skip = x->skip;
   ctx->best_mode_index = mode_index;
-  ctx->mic = *xd->mi_8x8[0];
+  ctx->mic = *xd->mi[0];
 
   ctx->best_ref_mv[0].as_int = ref_mv->as_int;
   ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
@@ -2322,7 +2326,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
   const VP9_COMMON *cm = &cpi->common;
   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mi = xd->mi_8x8[0];
+  MODE_INFO *const mi = xd->mi[0];
   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
 
@@ -2331,8 +2335,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
   setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
 
   // Gets an initial list of candidate vectors from neighbours and orders them
-  vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref_frame, candidates,
-                   mi_row, mi_col);
+  vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
 
   // Candidate refinement carried out at encoder and decoder
   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
@@ -2355,22 +2358,21 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
 }
 
-static INLINE int get_switchable_rate(const MACROBLOCK *x) {
+int vp9_get_switchable_rate(const MACROBLOCK *x) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
   return SWITCHABLE_INTERP_RATE_FACTOR *
              x->switchable_interp_costs[ctx][mbmi->interp_filter];
 }
 
 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
-                                 const TileInfo *const tile,
                                  BLOCK_SIZE bsize,
                                  int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   VP9_COMMON *cm = &cpi->common;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
   int bestsme = INT_MAX;
   int further_steps, step_param;
@@ -2400,7 +2402,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     for (i = 0; i < MAX_MB_PLANE; i++)
       backup_yv12[i] = xd->plane[i].pre[0];
 
-    setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
   }
 
   vp9_set_mv_search_range(x, &ref_mv);
@@ -2456,22 +2458,41 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   // Further step/diamond searches as necessary
   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
-  if (cpi->sf.search_method == FAST_HEX) {
-    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
+  if (cpi->sf.search_method == FAST_DIAMOND) {
+    bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
+                                  &cpi->fn_ptr[bsize], 1,
+                                  &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
+  } else if (cpi->sf.search_method == FAST_HEX) {
+    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
                                   &cpi->fn_ptr[bsize], 1,
                                   &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else if (cpi->sf.search_method == HEX) {
     bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
                              &cpi->fn_ptr[bsize], 1,
                              &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else if (cpi->sf.search_method == SQUARE) {
     bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
                                 &cpi->fn_ptr[bsize], 1,
                                 &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else if (cpi->sf.search_method == BIGDIA) {
     bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
                                 &cpi->fn_ptr[bsize], 1,
                                 &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else {
     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                      sadpb, further_steps, 1,
@@ -2517,13 +2538,14 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int refs[2] = { mbmi->ref_frame[0],
                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
   int_mv ref_mv[2];
   int ite, ref;
   // Prediction buffer from second frame.
   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
+  const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
   // Do joint motion search in compound mode to get more accurate mv.
   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
@@ -2544,7 +2566,8 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
       // motion search code to be used without additional modifications.
       for (i = 0; i < MAX_MB_PLANE; i++)
         backup_yv12[ref][i] = xd->plane[i].pre[ref];
-      setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
+      vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
+                           NULL);
     }
 
     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
@@ -2576,7 +2599,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                               &frame_mv[refs[!id]].as_mv,
                               &xd->block_refs[!id]->sf,
                               pw, ph, 0,
-                              xd->interp_kernel, MV_PRECISION_Q3,
+                              kernel, MV_PRECISION_Q3,
                               mi_col * MI_SIZE, mi_row * MI_SIZE);
 
     // Compound motion search on first ref frame.
@@ -2597,6 +2620,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                        x->nmvjointcost, x->mvcost,
                                        &ref_mv[id].as_mv, second_pred,
                                        pw, ph);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
+                                      second_pred, &cpi->fn_ptr[bsize], 1);
 
     x->mv_col_min = tmp_col_min;
     x->mv_col_max = tmp_col_max;
@@ -2658,7 +2684,6 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
 }
 
 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
-                                 const TileInfo *const tile,
                                  BLOCK_SIZE bsize,
                                  int64_t txfm_cache[],
                                  int *rate2, int64_t *distortion,
@@ -2674,7 +2699,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                  const int64_t ref_best_rd) {
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int is_comp_pred = has_second_ref(mbmi);
   const int num_refs = is_comp_pred ? 2 : 1;
   const int this_mode = mbmi->mode;
@@ -2720,13 +2745,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       *rate2 += rate_mv;
     } else {
       int_mv tmp_mv;
-      single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+      single_motion_search(cpi, x, bsize, mi_row, mi_col,
                            &tmp_mv, &rate_mv);
       if (tmp_mv.as_int == INVALID_MV)
         return INT64_MAX;
       *rate2 += rate_mv;
       frame_mv[refs[0]].as_int =
-          xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
+          xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
       single_newmv[refs[0]].as_int = tmp_mv.as_int;
     }
   }
@@ -2788,8 +2813,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         int j;
         int64_t rs_rd;
         mbmi->interp_filter = i;
-        xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
-        rs = get_switchable_rate(x);
+        rs = vp9_get_switchable_rate(x);
         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
 
         if (i > 0 && intpel_mv) {
@@ -2859,8 +2883,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   // Set the appropriate filter
   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
       cm->interp_filter : *best_filter;
-  xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
-  rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
+  rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0;
 
   if (pred_exists) {
     if (best_needs_copy) {
@@ -2890,12 +2913,15 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (cm->interp_filter == SWITCHABLE)
-    *rate2 += get_switchable_rate(x);
+    *rate2 += vp9_get_switchable_rate(x);
 
   if (!is_comp_pred) {
-    if (cpi->active_map_enabled && x->active_ptr[0] == 0)
+    if (!x->in_active_map) {
+      if (psse)
+        *psse = 0;
+      *distortion = 0;
       x->skip = 1;
-    else if (cpi->allow_encode_breakout && x->encode_breakout) {
+    } else if (cpi->allow_encode_breakout && x->encode_breakout) {
       const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
       const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
       unsigned int var, sse;
@@ -2990,7 +3016,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
 
-    super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
+    super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
                      bsize, ref_best_rd - rdcosty);
     if (*rate_uv == INT_MAX) {
       *rate2 = INT_MAX;
@@ -3045,7 +3071,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   TX_SIZE max_uv_tx_size;
   x->skip_encode = 0;
   ctx->skip = 0;
-  xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+  xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
 
   if (bsize >= BLOCK_8X8) {
     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
@@ -3054,7 +3080,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       *returnrate = INT_MAX;
       return;
     }
-    max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
+    max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
                             &dist_uv, &uv_skip, bsize, max_uv_tx_size);
   } else {
@@ -3064,7 +3090,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       *returnrate = INT_MAX;
       return;
     }
-    max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
+    max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
                             &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
   }
@@ -3087,7 +3113,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       }
   }
 
-  ctx->mic = *xd->mi_8x8[0];
+  ctx->mic = *xd->mi[0];
 }
 
 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
@@ -3100,9 +3126,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                   int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
-  const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
   MB_PREDICTION_MODE this_mode;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
   unsigned char segment_id = mbmi->segment_id;
@@ -3120,7 +3145,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
   MB_MODE_INFO best_mbmode = { 0 };
-  int mode_index, best_mode_index = 0;
+  int mode_index, best_mode_index = -1;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
   int64_t best_intra_rd = INT64_MAX;
@@ -3138,12 +3163,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
   int best_skip2 = 0;
   int mode_skip_mask = 0;
-  const int mode_skip_start = cpi->sf.mode_skip_start + 1;
+  int mode_skip_start = cpi->sf.mode_skip_start + 1;
   const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
   const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
   const int intra_y_mode_mask =
       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
+  int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
@@ -3167,7 +3193,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     x->pred_mv_sad[ref_frame] = INT_MAX;
     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
       vp9_setup_buffer_inter(cpi, x, tile,
-                             ref_frame, block_size, mi_row, mi_col,
+                             ref_frame, bsize, mi_row, mi_col,
                              frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
     }
     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
@@ -3242,6 +3268,24 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     mode_skip_mask |= new_modes_mask;
   }
 
+  if (bsize > cpi->sf.max_intra_bsize) {
+    mode_skip_mask |= 0xFF30808;
+  }
+
+  if (!x->in_active_map) {
+    int mode_index;
+    assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
+    if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
+      mode_index = THR_NEARESTMV;
+    else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
+      mode_index = THR_NEARMV;
+    else
+      mode_index = THR_ZEROMV;
+    mode_skip_mask = ~(1 << mode_index);
+    mode_skip_start = MAX_MODES;
+    disable_inter_mode_mask = 0;
+  }
+
   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
     int mode_excluded = 0;
     int64_t this_rd = INT64_MAX;
@@ -3258,7 +3302,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     // Look at the reference frame of the best mode so far and set the
     // skip mask to look at a subset of the remaining modes.
-    if (mode_index == mode_skip_start) {
+    if (mode_index == mode_skip_start && best_mode_index >= 0) {
       switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
         case INTRA_FRAME:
           break;
@@ -3288,13 +3332,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     this_mode = vp9_mode_order[mode_index].mode;
     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
     if (ref_frame != INTRA_FRAME &&
-        cpi->sf.disable_inter_mode_mask[bsize] & (1 << INTER_OFFSET(this_mode)))
+        disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
       continue;
     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
 
     comp_pred = second_ref_frame > INTRA_FRAME;
     if (comp_pred) {
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+          best_mode_index >=0 &&
           vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
         continue;
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
@@ -3322,7 +3367,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         // one of the neighboring directional modes
         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
-          if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
+          if (best_mode_index >= 0 &&
+              vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
             continue;
         }
         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -3331,46 +3377,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         }
       }
     } else {
-      // if we're near/nearest and mv == 0,0, compare to zeromv
-      if ((this_mode == NEARMV || this_mode == NEARESTMV ||
-          this_mode == ZEROMV) &&
-          frame_mv[this_mode][ref_frame].as_int == 0 &&
-          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
-          (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) {
-        int rfc = mbmi->mode_context[ref_frame];
-        int c1 = cost_mv_ref(cpi, NEARMV, rfc);
-        int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
-        int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
-
-        if (this_mode == NEARMV) {
-          if (c1 > c3)
-            continue;
-        } else if (this_mode == NEARESTMV) {
-          if (c2 > c3)
-            continue;
-        } else {
-          assert(this_mode == ZEROMV);
-          if (!comp_pred) {
-            if ((c3 >= c2 &&
-                 frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
-                (c3 >= c1 &&
-                 frame_mv[NEARMV][ref_frame].as_int == 0))
-              continue;
-          } else {
-            if ((c3 >= c2 &&
-                 frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
-                 frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
-                (c3 >= c1 &&
-                 frame_mv[NEARMV][ref_frame].as_int == 0 &&
-                 frame_mv[NEARMV][second_ref_frame].as_int == 0))
-              continue;
-          }
-        }
-      }
+      if (x->in_active_map &&
+          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+        if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+                                disable_inter_mode_mask, this_mode, ref_frame,
+                                second_ref_frame))
+          continue;
     }
 
     mbmi->mode = this_mode;
-    mbmi->uv_mode = DC_PRED;
+    mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
     mbmi->ref_frame[0] = ref_frame;
     mbmi->ref_frame[1] = second_ref_frame;
     // Evaluate all sub-pel filters irrespective of whether we can use
@@ -3379,7 +3395,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                                           : cm->interp_filter;
     x->skip = 0;
     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
 
     // Select prediction reference frames.
     for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -3422,7 +3437,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
     } else {
-      this_rd = handle_inter_mode(cpi, x, tile, bsize,
+      this_rd = handle_inter_mode(cpi, x, bsize,
                                   tx_cache,
                                   &rate2, &distortion2, &skippable,
                                   &rate_y, &distortion_y,
@@ -3641,7 +3656,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       break;
   }
 
-  if (best_rd >= best_rd_so_far)
+  if (best_mode_index < 0 || best_rd >= best_rd_so_far)
     return INT64_MAX;
 
   // If we used an estimate for the uv intra rd in the loop above...
@@ -3671,16 +3686,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   // combination that wins out.
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
+      int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
+
       if (mode_index == best_mode_index) {
-        cpi->rd_thresh_freq_fact[bsize][mode_index] -=
-          (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
+        *fact -= (*fact >> 3);
       } else {
-        cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
-        if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
-            (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
-          cpi->rd_thresh_freq_fact[bsize][mode_index] =
-            cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
-        }
+        *fact = MIN(*fact + RD_THRESH_INC,
+                    cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
       }
     }
   }
@@ -3716,6 +3728,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     vp9_zero(best_tx_diff);
   }
 
+  if (!x->in_active_map) {
+    assert(mbmi->ref_frame[0] == LAST_FRAME);
+    assert(mbmi->ref_frame[1] == NONE);
+    assert(mbmi->mode == NEARESTMV ||
+           mbmi->mode == NEARMV ||
+           mbmi->mode == ZEROMV);
+    assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
+    assert(mbmi->mode == mbmi->uv_mode);
+  }
+
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   store_coding_context(x, ctx, best_mode_index,
                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
@@ -3735,11 +3757,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
                                       BLOCK_SIZE bsize,
                                       PICK_MODE_CONTEXT *ctx,
                                       int64_t best_rd_so_far) {
-  VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
-  const struct segmentation *seg = &cm->seg;
-  const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const struct segmentation *const seg = &cm->seg;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
   unsigned char segment_id = mbmi->segment_id;
   int comp_pred, i;
@@ -3799,7 +3820,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
       vp9_setup_buffer_inter(cpi, x, tile,
-                             ref_frame, block_size, mi_row, mi_col,
+                             ref_frame, bsize, mi_row, mi_col,
                              frame_mv[NEARESTMV], frame_mv[NEARMV],
                              yv12_mb);
     }
@@ -3832,10 +3853,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     int64_t total_sse = INT_MAX;
     int early_term = 0;
 
-    for (i = 0; i < TX_MODES; ++i)
-      tx_cache[i] = INT64_MAX;
-
-    x->skip = 0;
     ref_frame = vp9_ref_order[mode_index].ref_frame[0];
     second_ref_frame = vp9_ref_order[mode_index].ref_frame[1];
 
@@ -3872,71 +3889,43 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
       continue;
 
-    // Do not allow compound prediction if the segment level reference
-    // frame feature is in use as in this case there can only be one reference.
-    if ((second_ref_frame > INTRA_FRAME) &&
-         vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
-      continue;
-
-    mbmi->ref_frame[0] = ref_frame;
-    mbmi->ref_frame[1] = second_ref_frame;
-
-    if (!(ref_frame == INTRA_FRAME
-        || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
-      continue;
-    }
-    if (!(second_ref_frame == NONE
-        || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
+    if (ref_frame > INTRA_FRAME &&
+        !(cpi->ref_frame_flags & flag_list[ref_frame])) {
       continue;
     }
 
     comp_pred = second_ref_frame > INTRA_FRAME;
     if (comp_pred) {
-      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
-        if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
-          continue;
-      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
-        if (ref_frame != best_inter_ref_frame &&
-            second_ref_frame != best_inter_ref_frame)
-          continue;
+      if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
+        continue;
+      // Do not allow compound prediction if the segment level reference frame
+      // feature is in use as in this case there can only be one reference.
+      if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+        continue;
+      if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+          vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
+        continue;
+      if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
+          ref_frame != best_inter_ref_frame &&
+          second_ref_frame != best_inter_ref_frame)
+        continue;
     }
 
     // TODO(jingning, jkoleszar): scaling reference frame not supported for
     // sub8x8 blocks.
-    if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+    if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
       continue;
 
-    if (second_ref_frame > 0 &&
+    if (second_ref_frame > NONE &&
         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
       continue;
 
-    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-    mbmi->uv_mode = DC_PRED;
-
-    // Evaluate all sub-pel filters irrespective of whether we can use
-    // them for this frame.
-    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
-                                                          : cm->interp_filter;
-    xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
-
     if (comp_pred) {
-      if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
-        continue;
-
       mode_excluded = mode_excluded ? mode_excluded
                                     : cm->reference_mode == SINGLE_REFERENCE;
-    } else {
-      if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
-        mode_excluded = mode_excluded ?
-            mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
-      }
-    }
-
-    // Select prediction reference frames.
-    for (i = 0; i < MAX_MB_PLANE; i++) {
-      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
-      if (comp_pred)
-        xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
+    } else if (ref_frame != INTRA_FRAME) {
+      mode_excluded = mode_excluded ? mode_excluded
+                                    : cm->reference_mode == COMPOUND_REFERENCE;
     }
 
     // If the segment reference frame feature is enabled....
@@ -3963,6 +3952,27 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         continue;
     }
 
+    mbmi->tx_size = TX_4X4;
+    mbmi->uv_mode = DC_PRED;
+    mbmi->ref_frame[0] = ref_frame;
+    mbmi->ref_frame[1] = second_ref_frame;
+    // Evaluate all sub-pel filters irrespective of whether we can use
+    // them for this frame.
+    mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
+                                                          : cm->interp_filter;
+    x->skip = 0;
+    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+
+    // Select prediction reference frames.
+    for (i = 0; i < MAX_MB_PLANE; i++) {
+      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
+      if (comp_pred)
+        xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
+    }
+
+    for (i = 0; i < TX_MODES; ++i)
+      tx_cache[i] = INT64_MAX;
+
 #ifdef MODE_TEST_HIT_STATS
     // TEST/DEBUG CODE
     // Keep a rcord of the number of test hits at each size
@@ -3971,7 +3981,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
     if (ref_frame == INTRA_FRAME) {
       int rate;
-      mbmi->tx_size = TX_4X4;
       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
                                        &distortion_y, best_rd) >= best_rd)
         continue;
@@ -4016,7 +4025,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
-      xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
 
       cpi->mask_filter_rd = 0;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
@@ -4024,8 +4032,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
       if (cm->interp_filter != BILINEAR) {
         tmp_best_filter = EIGHTTAP;
-        if (x->source_variance <
-            cpi->sf.disable_filter_search_var_thresh) {
+        if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
           tmp_best_filter = EIGHTTAP;
         } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
                    ctx->pred_interp_filter < SWITCHABLE) {
@@ -4040,7 +4047,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
             int newbest, rs;
             int64_t rs_rd;
             mbmi->interp_filter = switchable_filter_index;
-            xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
             tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
                                                  &mbmi->ref_mvs[ref_frame][0],
                                                  second_ref,
@@ -4053,7 +4059,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
             if (tmp_rd == INT64_MAX)
               continue;
-            rs = get_switchable_rate(x);
+            rs = vp9_get_switchable_rate(x);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
             cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
             cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
@@ -4080,7 +4086,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
               tmp_best_skippable = skippable;
               tmp_best_mbmode = *mbmi;
               for (i = 0; i < 4; i++) {
-                tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
+                tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
               }
               pred_exists = 1;
@@ -4105,7 +4111,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
                              tmp_best_filter : cm->interp_filter);
-      xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
       if (!pred_exists) {
         // Handles the special case when a filter that is not in the
         // switchable list (bilinear, 6-tap) is indicated at the frame level
@@ -4128,14 +4133,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         skippable = tmp_best_skippable;
         *mbmi = tmp_best_mbmode;
         for (i = 0; i < 4; i++)
-          xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
+          xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
       }
 
       rate2 += rate;
       distortion2 += distortion;
 
       if (cm->interp_filter == SWITCHABLE)
-        rate2 += get_switchable_rate(x);
+        rate2 += vp9_get_switchable_rate(x);
 
       if (!mode_excluded)
         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
@@ -4152,7 +4157,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         // then dont bother looking at UV
         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
                                         BLOCK_8X8);
-        super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable,
+        super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
         if (rate_uv == INT_MAX)
           continue;
@@ -4212,8 +4217,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     }
 
     // Keep record of best inter rd with single reference
-    if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
-        !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
+    if (is_inter_block(mbmi) &&
+        !has_second_ref(mbmi) &&
         !mode_excluded &&
         this_rd < best_inter_rd) {
       best_inter_rd = this_rd;
@@ -4249,11 +4254,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         best_skip2 = this_skip2;
         if (!x->select_txfm_size)
           swap_block_ptr(x, ctx, max_plane);
-        vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
+        vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
                    sizeof(uint8_t) * ctx->num_4x4_blk);
 
         for (i = 0; i < 4; i++)
-          best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
+          best_bmodes[i] = xd->mi[0]->bmi[i];
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
@@ -4289,11 +4294,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
 
-      if (second_ref_frame <= INTRA_FRAME &&
-          single_rd < best_pred_rd[SINGLE_REFERENCE]) {
+      if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) {
         best_pred_rd[SINGLE_REFERENCE] = single_rd;
-      } else if (second_ref_frame > INTRA_FRAME &&
-                 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
+      } else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
       }
       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
@@ -4324,13 +4327,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     }
 
     /* keep record of best txfm size */
-    if (bsize < BLOCK_32X32) {
-      if (bsize < BLOCK_16X16) {
-        tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
-        tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
-      }
-      tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
-    }
+    tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
+    tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
+    tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
     if (!mode_excluded && this_rd != INT64_MAX) {
       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
         int64_t adj_rd = INT64_MAX;
@@ -4369,7 +4368,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     }
   }
 
-  if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
+  if (best_rd == INT64_MAX) {
     *returnrate = INT_MAX;
     *returndistortion = INT64_MAX;
     return best_rd;
@@ -4386,16 +4385,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   // combination that wins out.
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
+      int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
+
       if (mode_index == best_mode_index) {
-        cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -=
-          (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3);
+        *fact -= (*fact >> 3);
       } else {
-        cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC;
-        if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >
-            (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
-          cpi->rd_thresh_freq_sub8x8[bsize][mode_index] =
-            cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
-        }
+        *fact = MIN(*fact + RD_THRESH_INC,
+                    cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
       }
     }
   }
@@ -4405,13 +4401,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   x->skip |= best_skip2;
   if (!is_inter_block(&best_mbmode)) {
     for (i = 0; i < 4; i++)
-      xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
+      xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
   } else {
     for (i = 0; i < 4; ++i)
-      vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
+      vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
 
-    mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
-    mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
+    mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
+    mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
   }
 
   for (i = 0; i < REFERENCE_MODES; ++i) {
@@ -4430,11 +4426,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     }
     if (cm->interp_filter == SWITCHABLE)
       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
-  } else {
-    vp9_zero(best_filter_diff);
-  }
-
-  if (!x->skip) {
     for (i = 0; i < TX_MODES; i++) {
       if (best_tx_rd[i] == INT64_MAX)
         best_tx_diff[i] = 0;
@@ -4442,6 +4433,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         best_tx_diff[i] = best_rd - best_tx_rd[i];
     }
   } else {
+    vp9_zero(best_filter_diff);
     vp9_zero(best_tx_diff);
   }
 
diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.h b/source/libvpx/vp9/encoder/vp9_rdopt.h
index 6b85d67..a01dbd4 100644
--- a/source/libvpx/vp9/encoder/vp9_rdopt.h
+++ b/source/libvpx/vp9/encoder/vp9_rdopt.h
@@ -23,11 +23,6 @@ extern "C" {
   (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
 #define QIDX_SKIP_THRESH     115
 
-#define RD_THRESH_MAX_FACT 64
-#define RD_THRESH_INC      1
-#define RD_THRESH_POW      1.25
-#define RD_MULT_EPB_RATIO  64
-
 #define MV_COST_WEIGHT      108
 #define MV_COST_WEIGHT_SUB  120
 
@@ -35,12 +30,18 @@ extern "C" {
 
 struct TileInfo;
 
-int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex);
+int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex);
 
 void vp9_initialize_rd_consts(VP9_COMP *cpi);
 
 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
 
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+                                  unsigned int qstep, int *rate,
+                                  int64_t *dist);
+
+int vp9_get_switchable_rate(const MACROBLOCK *x);
+
 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
                             const TileInfo *const tile,
                             MV_REFERENCE_FRAME ref_frame,
@@ -77,9 +78,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
 void vp9_init_me_luts();
 
-void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
-                            const MV *mv);
-
 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                               const struct macroblockd_plane *pd,
                               ENTROPY_CONTEXT t_above[16],
diff --git a/source/libvpx/vp9/encoder/vp9_sad.c b/source/libvpx/vp9/encoder/vp9_sad.c
index 58c5df4..9d8da0d 100644
--- a/source/libvpx/vp9/encoder/vp9_sad.c
+++ b/source/libvpx/vp9/encoder/vp9_sad.c
@@ -44,7 +44,7 @@ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \
                                       const uint8_t *second_pred, \
                                       unsigned int max_sad) { \
   uint8_t comp_pred[m * n]; \
-  comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
+  vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \
   return sad(src_ptr, src_stride, comp_pred, m, m, n); \
 }
 
diff --git a/source/libvpx/vp9/encoder/vp9_segmentation.c b/source/libvpx/vp9/encoder/vp9_segmentation.c
index 49fd7bb..9d3e6dc 100644
--- a/source/libvpx/vp9/encoder/vp9_segmentation.c
+++ b/source/libvpx/vp9/encoder/vp9_segmentation.c
@@ -16,6 +16,7 @@
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_tile_common.h"
 
+#include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_segmentation.h"
 
 void vp9_enable_segmentation(struct segmentation *seg) {
@@ -132,8 +133,8 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  xd->mi_8x8 = mi_8x8;
-  segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+  xd->mi = mi_8x8;
+  segment_id = xd->mi[0]->mbmi.segment_id;
 
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
 
@@ -151,7 +152,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
 
     // Store the prediction status for this mb and update counts
     // as appropriate
-    xd->mi_8x8[0]->mbmi.seg_id_predicted = pred_flag;
+    xd->mi[0]->mbmi.seg_id_predicted = pred_flag;
     temporal_predictor_count[pred_context][pred_flag]++;
 
     if (!pred_flag)
@@ -168,7 +169,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
                           int mi_row, int mi_col,
                           BLOCK_SIZE bsize) {
   const VP9_COMMON *const cm = &cpi->common;
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   int bw, bh;
   const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
 
@@ -228,7 +229,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
   vp9_prob t_pred_tree[SEG_TREE_PROBS];
   vp9_prob t_nopred_prob[PREDICTION_PROBS];
 
-  const int mis = cm->mode_info_stride;
+  const int mis = cm->mi_stride;
   MODE_INFO **mi_ptr, **mi;
 
   // Set default state for the segment tree probabilities and the
diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.c b/source/libvpx/vp9/encoder/vp9_speed_features.c
new file mode 100644
index 0000000..adad800
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -0,0 +1,391 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_speed_features.h"
+
+#define ALL_INTRA_MODES ((1 << DC_PRED) | \
+                         (1 << V_PRED) | (1 << H_PRED) | \
+                         (1 << D45_PRED) | (1 << D135_PRED) | \
+                         (1 << D117_PRED) | (1 << D153_PRED) | \
+                         (1 << D207_PRED) | (1 << D63_PRED) | \
+                         (1 << TM_PRED))
+#define INTRA_DC_ONLY   (1 << DC_PRED)
+#define INTRA_DC_TM     ((1 << TM_PRED) | (1 << DC_PRED))
+#define INTRA_DC_H_V    ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
+#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
+
+// Masks for partially or completely disabling split mode
+#define DISABLE_ALL_INTER_SPLIT   ((1 << THR_COMP_GA) | \
+                                   (1 << THR_COMP_LA) | \
+                                   (1 << THR_ALTR) | \
+                                   (1 << THR_GOLD) | \
+                                   (1 << THR_LAST))
+
+#define DISABLE_ALL_SPLIT         ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT)
+
+#define DISABLE_COMPOUND_SPLIT    ((1 << THR_COMP_GA) | (1 << THR_COMP_LA))
+
+#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \
+                                   (1 << THR_COMP_LA) | \
+                                   (1 << THR_ALTR) | \
+                                   (1 << THR_GOLD))
+
+static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
+                                   SPEED_FEATURES *sf, int speed) {
+  sf->adaptive_rd_thresh = 1;
+  sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW;
+  sf->allow_skip_recode = 1;
+
+  if (speed >= 1) {
+    sf->use_square_partition_only = !frame_is_intra_only(cm);
+    sf->less_rectangular_check  = 1;
+    sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
+                                                          : USE_LARGESTALL;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
+                                              : DISABLE_ALL_INTER_SPLIT;
+    else
+      sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+    sf->use_rd_breakout = 1;
+    sf->adaptive_motion_search = 1;
+    sf->auto_mv_step_size = 1;
+    sf->adaptive_rd_thresh = 2;
+    sf->subpel_iters_per_step = 1;
+    sf->mode_skip_start = 10;
+    sf->adaptive_pred_interp_filter = 1;
+
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
+    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+    sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+  }
+
+  if (speed >= 2) {
+    sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
+                                                          : USE_LARGESTALL;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
+                                              : DISABLE_ALL_INTER_SPLIT;
+    else
+      sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
+
+    sf->adaptive_pred_interp_filter = 2;
+    sf->reference_masking = 1;
+    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
+                                 FLAG_SKIP_INTRA_BESTINTER |
+                                 FLAG_SKIP_COMP_BESTINTRA |
+                                 FLAG_SKIP_INTRA_LOWVAR;
+    sf->disable_filter_search_var_thresh = 100;
+    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+    sf->adjust_partitioning_from_last_frame = 1;
+    sf->last_partitioning_redo_frequency = 3;
+  }
+
+  if (speed >= 3) {
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->disable_split_mask = DISABLE_ALL_SPLIT;
+    else
+      sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
+
+    sf->recode_loop = ALLOW_RECODE_KFMAXBW;
+    sf->adaptive_rd_thresh = 3;
+    sf->mode_skip_start = 6;
+    sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
+    sf->use_fast_coef_costing = 1;
+  }
+
+  if (speed >= 4) {
+    sf->use_square_partition_only = 1;
+    sf->tx_size_search_method = USE_LARGESTALL;
+    sf->disable_split_mask = DISABLE_ALL_SPLIT;
+    sf->adaptive_rd_thresh = 4;
+    sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH |
+                                  FLAG_EARLY_TERMINATE;
+    sf->disable_filter_search_var_thresh = 200;
+    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
+    sf->use_lp32x32fdct = 1;
+  }
+
+  if (speed >= 5) {
+    int i;
+
+    sf->partition_search_type = FIXED_PARTITION;
+    sf->optimize_coefficients = 0;
+    sf->search_method = HEX;
+    sf->disable_filter_search_var_thresh = 500;
+    for (i = 0; i < TX_SIZES; ++i) {
+      sf->intra_y_mode_mask[i] = INTRA_DC_ONLY;
+      sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+    }
+    cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
+  }
+}
+
+static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
+                                 int speed) {
+  sf->static_segmentation = 0;
+  sf->adaptive_rd_thresh = 1;
+  sf->encode_breakout_thresh = 1;
+  sf->use_fast_coef_costing = 1;
+
+  if (speed == 1) {
+    sf->use_square_partition_only = !frame_is_intra_only(cm);
+    sf->less_rectangular_check = 1;
+    sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
+                                                        : USE_LARGESTALL;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
+                                              : DISABLE_ALL_INTER_SPLIT;
+    else
+      sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+
+    sf->use_rd_breakout = 1;
+    sf->adaptive_motion_search = 1;
+    sf->adaptive_pred_interp_filter = 1;
+    sf->auto_mv_step_size = 1;
+    sf->adaptive_rd_thresh = 2;
+    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+    sf->encode_breakout_thresh = 8;
+  }
+
+  if (speed >= 2) {
+    sf->use_square_partition_only = !frame_is_intra_only(cm);
+    sf->less_rectangular_check = 1;
+    sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
+                                                        : USE_LARGESTALL;
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->disable_split_mask = cm->show_frame ?
+        DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+    else
+      sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
+
+    sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
+                                 FLAG_SKIP_INTRA_BESTINTER |
+                                 FLAG_SKIP_COMP_BESTINTRA |
+                                 FLAG_SKIP_INTRA_LOWVAR;
+    sf->use_rd_breakout = 1;
+    sf->adaptive_motion_search = 1;
+    sf->adaptive_pred_interp_filter = 2;
+    sf->auto_mv_step_size = 1;
+    sf->reference_masking = 1;
+
+    sf->disable_filter_search_var_thresh = 50;
+    sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+
+    sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
+    sf->adjust_partitioning_from_last_frame = 1;
+    sf->last_partitioning_redo_frequency = 3;
+
+    sf->adaptive_rd_thresh = 2;
+    sf->use_lp32x32fdct = 1;
+    sf->mode_skip_start = 11;
+    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+    sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+    sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+    sf->encode_breakout_thresh = 200;
+  }
+
+  if (speed >= 3) {
+    sf->use_square_partition_only = 1;
+    sf->disable_filter_search_var_thresh = 100;
+    sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
+    sf->constrain_copy_partition = 1;
+    sf->use_uv_intra_rd_estimate = 1;
+    sf->skip_encode_sb = 1;
+    sf->subpel_iters_per_step = 1;
+    sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
+    sf->adaptive_rd_thresh = 4;
+    sf->mode_skip_start = 6;
+    sf->allow_skip_recode = 0;
+    sf->optimize_coefficients = 0;
+    sf->disable_split_mask = DISABLE_ALL_SPLIT;
+    sf->lpf_pick = LPF_PICK_FROM_Q;
+    sf->encode_breakout_thresh = 700;
+  }
+
+  if (speed >= 4) {
+    int i;
+    sf->last_partitioning_redo_frequency = 4;
+    sf->adaptive_rd_thresh = 5;
+    sf->use_fast_coef_costing = 0;
+    sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
+    sf->adjust_partitioning_from_last_frame =
+        cm->last_frame_type != cm->frame_type || (0 ==
+        (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
+    sf->subpel_force_stop = 1;
+    for (i = 0; i < TX_SIZES; i++) {
+      sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
+      sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
+    }
+    sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY;
+    sf->frame_parameter_update = 0;
+    sf->encode_breakout_thresh = 1000;
+    sf->search_method = FAST_HEX;
+    sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV);
+    sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+    sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV));
+    sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV));
+    sf->max_intra_bsize = BLOCK_32X32;
+    sf->allow_skip_recode = 1;
+  }
+
+  if (speed >= 5) {
+    sf->max_partition_size = BLOCK_32X32;
+    sf->min_partition_size = BLOCK_8X8;
+    sf->partition_check =
+        (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1);
+    sf->force_frame_boost = cm->frame_type == KEY_FRAME ||
+        (cm->current_video_frame %
+            (sf->last_partitioning_redo_frequency << 1) == 1);
+    sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
+    sf->partition_search_type = REFERENCE_PARTITION;
+    sf->use_nonrd_pick_mode = 1;
+    sf->search_method = FAST_DIAMOND;
+    sf->allow_skip_recode = 0;
+  }
+
+  if (speed >= 6) {
+    // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
+    sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
+    sf->search_type_check_frequency = 50;
+    sf->source_var_thresh = 360;
+  }
+
+  if (speed >= 7) {
+    int i;
+    for (i = 0; i < BLOCK_SIZES; ++i)
+      sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV));
+  }
+}
+
+void vp9_set_speed_features(VP9_COMP *cpi) {
+  SPEED_FEATURES *const sf = &cpi->sf;
+  VP9_COMMON *const cm = &cpi->common;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  const int speed = cpi->speed < 0 ? -cpi->speed : cpi->speed;
+  int i;
+
+  // best quality defaults
+  sf->frame_parameter_update = 1;
+  sf->search_method = NSTEP;
+  sf->recode_loop = ALLOW_RECODE;
+  sf->subpel_search_method = SUBPEL_TREE;
+  sf->subpel_iters_per_step = 2;
+  sf->subpel_force_stop = 0;
+  sf->optimize_coefficients = !oxcf->lossless;
+  sf->reduce_first_step_size = 0;
+  sf->auto_mv_step_size = 0;
+  sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+  sf->comp_inter_joint_search_thresh = BLOCK_4X4;
+  sf->adaptive_rd_thresh = 0;
+  sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
+  sf->tx_size_search_method = USE_FULL_RD;
+  sf->use_lp32x32fdct = 0;
+  sf->adaptive_motion_search = 0;
+  sf->adaptive_pred_interp_filter = 0;
+  sf->reference_masking = 0;
+  sf->partition_search_type = SEARCH_PARTITION;
+  sf->less_rectangular_check = 0;
+  sf->use_square_partition_only = 0;
+  sf->auto_min_max_partition_size = NOT_IN_USE;
+  sf->max_partition_size = BLOCK_64X64;
+  sf->min_partition_size = BLOCK_4X4;
+  sf->adjust_partitioning_from_last_frame = 0;
+  sf->last_partitioning_redo_frequency = 4;
+  sf->constrain_copy_partition = 0;
+  sf->disable_split_mask = 0;
+  sf->mode_search_skip_flags = 0;
+  sf->force_frame_boost = 0;
+  sf->max_delta_qindex = 0;
+  sf->disable_split_var_thresh = 0;
+  sf->disable_filter_search_var_thresh = 0;
+  for (i = 0; i < TX_SIZES; i++) {
+    sf->intra_y_mode_mask[i] = ALL_INTRA_MODES;
+    sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES;
+  }
+  sf->use_rd_breakout = 0;
+  sf->skip_encode_sb = 0;
+  sf->use_uv_intra_rd_estimate = 0;
+  sf->allow_skip_recode = 0;
+  sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
+  sf->use_fast_coef_updates = TWO_LOOP;
+  sf->use_fast_coef_costing = 0;
+  sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
+  sf->use_nonrd_pick_mode = 0;
+  sf->encode_breakout_thresh = 0;
+  for (i = 0; i < BLOCK_SIZES; ++i)
+    sf->disable_inter_mode_mask[i] = 0;
+  sf->max_intra_bsize = BLOCK_64X64;
+  // This setting only takes effect when partition_search_type is set
+  // to FIXED_PARTITION.
+  sf->always_this_block_size = BLOCK_16X16;
+  sf->search_type_check_frequency = 50;
+  sf->source_var_thresh = 100;
+
+  // Recode loop tolerence %.
+  sf->recode_tolerance = 25;
+
+  switch (oxcf->mode) {
+    case MODE_BESTQUALITY:
+    case MODE_SECONDPASS_BEST:  // This is the best quality mode.
+      cpi->diamond_search_sad = vp9_full_range_search;
+      break;
+    case MODE_FIRSTPASS:
+    case MODE_GOODQUALITY:
+    case MODE_SECONDPASS:
+      set_good_speed_feature(cpi, cm, sf, speed);
+      break;
+    case MODE_REALTIME:
+      set_rt_speed_feature(cm, sf, speed);
+      break;
+  }
+
+  // Slow quant, dct and trellis not worthwhile for first pass
+  // so make sure they are always turned off.
+  if (cpi->pass == 1)
+    sf->optimize_coefficients = 0;
+
+  // No recode for 1 pass.
+  if (cpi->pass == 0) {
+    sf->recode_loop = DISALLOW_RECODE;
+    sf->optimize_coefficients = 0;
+  }
+
+  if (sf->subpel_search_method == SUBPEL_TREE) {
+    cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree;
+    cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree;
+  }
+
+  cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1;
+
+  if (cpi->encode_breakout && oxcf->mode == MODE_REALTIME &&
+      sf->encode_breakout_thresh > cpi->encode_breakout)
+    cpi->encode_breakout = sf->encode_breakout_thresh;
+
+  if (sf->disable_split_mask == DISABLE_ALL_SPLIT)
+    sf->adaptive_pred_interp_filter = 0;
+
+  if (!cpi->oxcf.frame_periodic_boost) {
+    sf->max_delta_qindex = 0;
+  }
+}
diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.h b/source/libvpx/vp9/encoder/vp9_speed_features.h
new file mode 100644
index 0000000..72f548a
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_speed_features.h
@@ -0,0 +1,359 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_
+#define VP9_ENCODER_VP9_SPEED_FEATURES_H_
+
+#include "vp9/common/vp9_enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+  DIAMOND = 0,
+  NSTEP = 1,
+  HEX = 2,
+  BIGDIA = 3,
+  SQUARE = 4,
+  FAST_HEX = 5,
+  FAST_DIAMOND = 6
+} SEARCH_METHODS;
+
+typedef enum {
+  // No recode.
+  DISALLOW_RECODE = 0,
+  // Allow recode for KF and exceeding maximum frame bandwidth.
+  ALLOW_RECODE_KFMAXBW = 1,
+  // Allow recode only for KF/ARF/GF frames.
+  ALLOW_RECODE_KFARFGF = 2,
+  // Allow recode for all frames based on bitrate constraints.
+  ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
+typedef enum {
+  SUBPEL_TREE = 0,
+  // Other methods to come
+} SUBPEL_SEARCH_METHODS;
+
+typedef enum {
+  LAST_FRAME_PARTITION_OFF = 0,
+  LAST_FRAME_PARTITION_LOW_MOTION = 1,
+  LAST_FRAME_PARTITION_ALL = 2
+} LAST_FRAME_PARTITION_METHOD;
+
+typedef enum {
+  USE_FULL_RD = 0,
+  USE_LARGESTINTRA,
+  USE_LARGESTINTRA_MODELINTER,
+  USE_LARGESTALL
+} TX_SIZE_SEARCH_METHOD;
+
+typedef enum {
+  NOT_IN_USE = 0,
+  RELAXED_NEIGHBORING_MIN_MAX = 1,
+  STRICT_NEIGHBORING_MIN_MAX = 2
+} AUTO_MIN_MAX_MODE;
+
+typedef enum {
+  // Try the full image with different values.
+  LPF_PICK_FROM_FULL_IMAGE,
+  // Try a small portion of the image with different values.
+  LPF_PICK_FROM_SUBIMAGE,
+  // Estimate the level based on quantizer and frame type
+  LPF_PICK_FROM_Q,
+} LPF_PICK_METHOD;
+
+typedef enum {
+  // Terminate search early based on distortion so far compared to
+  // qp step, distortion in the neighborhood of the frame, etc.
+  FLAG_EARLY_TERMINATE = 1 << 0,
+
+  // Skips comp inter modes if the best so far is an intra mode.
+  FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
+
+  // Skips comp inter modes if the best single intermode so far does
+  // not have the same reference as one of the two references being
+  // tested.
+  FLAG_SKIP_COMP_REFMISMATCH = 1 << 2,
+
+  // Skips oblique intra modes if the best so far is an inter mode.
+  FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
+
+  // Skips oblique intra modes  at angles 27, 63, 117, 153 if the best
+  // intra so far is not one of the neighboring directions.
+  FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
+
+  // Skips intra modes other than DC_PRED if the source variance is small
+  FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
+} MODE_SEARCH_SKIP_LOGIC;
+
+typedef enum {
+  // Search partitions using RD/NONRD criterion
+  SEARCH_PARTITION = 0,
+
+  // Always use a fixed size partition
+  FIXED_PARTITION = 1,
+
+  // Use a fixed size partition in every 64X64 SB, where the size is
+  // determined based on source variance
+  VAR_BASED_FIXED_PARTITION = 2,
+
+  REFERENCE_PARTITION = 3,
+
+  // Use an arbitrary partitioning scheme based on source variance within
+  // a 64X64 SB
+  VAR_BASED_PARTITION,
+
+  // Use non-fixed partitions based on source variance
+  SOURCE_VAR_BASED_PARTITION
+} PARTITION_SEARCH_TYPE;
+
+typedef enum {
+  // Does a dry run to see if any of the contexts need to be updated or not,
+  // before the final run.
+  TWO_LOOP = 0,
+
+  // No dry run conducted.
+  ONE_LOOP = 1,
+
+  // No dry run, also only half the coef contexts and bands are updated.
+  // The rest are not updated at all.
+  ONE_LOOP_REDUCED = 2
+} FAST_COEFF_UPDATE;
+
+typedef struct {
+  // Frame level coding parameter update
+  int frame_parameter_update;
+
+  // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
+  SEARCH_METHODS search_method;
+
+  RECODE_LOOP_TYPE recode_loop;
+
+  // Subpel_search_method can only be subpel_tree which does a subpixel
+  // logarithmic search that keeps stepping at 1/2 pixel units until
+  // you stop getting a gain, and then goes on to 1/4 and repeats
+  // the same process. Along the way it skips many diagonals.
+  SUBPEL_SEARCH_METHODS subpel_search_method;
+
+  // Maximum number of steps in logarithmic subpel search before giving up.
+  int subpel_iters_per_step;
+
+  // Control when to stop subpel search
+  int subpel_force_stop;
+
+  // This parameter controls the number of steps we'll do in a diamond
+  // search.
+  int max_step_search_steps;
+
+  // This parameter controls which step in the n-step process we start at.
+  // It's changed adaptively based on circumstances.
+  int reduce_first_step_size;
+
+  // If this is set to 1, we limit the motion search range to 2 times the
+  // largest motion vector found in the last frame.
+  int auto_mv_step_size;
+
+  // Trellis (dynamic programming) optimization of quantized values (+1, 0).
+  int optimize_coefficients;
+
+  // Always set to 0. If on it enables 0 cost background transmission
+  // (except for the initial transmission of the segmentation). The feature is
+  // disabled because the addition of very large block sizes make the
+  // backgrounds very to cheap to encode, and the segmentation we have
+  // adds overhead.
+  int static_segmentation;
+
+  // If 1 we iterate finding a best reference for 2 ref frames together - via
+  // a log search that iterates 4 times (check around mv for last for best
+  // error of combined predictor then check around mv for alt). If 0 we
+  // we just use the best motion vector found for each frame by itself.
+  int comp_inter_joint_search_thresh;
+
+  // This variable is used to cap the maximum number of times we skip testing a
+  // mode to be evaluated. A high value means we will be faster.
+  int adaptive_rd_thresh;
+
+  // Enables skipping the reconstruction step (idct, recon) in the
+  // intermediate steps assuming the last frame didn't have too many intra
+  // blocks and the q is less than a threshold.
+  int skip_encode_sb;
+  int skip_encode_frame;
+  // Speed feature to allow or disallow skipping of recode at block
+  // level within a frame.
+  int allow_skip_recode;
+
+  // This variable allows us to reuse the last frames partition choices
+  // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
+  // frame as a starting point in low motion scenes or always use it. If set
+  // we use last partitioning_redo frequency to determine how often to redo
+  // the partitioning from scratch. Adjust_partitioning_from_last_frame
+  // enables us to adjust up or down one partitioning from the last frames
+  // partitioning.
+  LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
+
+  // Determine which method we use to determine transform size. We can choose
+  // between options like full rd, largest for prediction size, largest
+  // for intra and model coefs for the rest.
+  TX_SIZE_SEARCH_METHOD tx_size_search_method;
+
+  // Low precision 32x32 fdct keeps everything in 16 bits and thus is less
+  // precise but significantly faster than the non lp version.
+  int use_lp32x32fdct;
+
+  // TODO(JBB): remove this as its no longer used.
+
+  // After looking at the first set of modes (set by index here), skip
+  // checking modes for reference frames that don't match the reference frame
+  // of the best so far.
+  int mode_skip_start;
+
+  // TODO(JBB): Remove this.
+  int reference_masking;
+
+  PARTITION_SEARCH_TYPE partition_search_type;
+
+  // Used if partition_search_type = FIXED_SIZE_PARTITION
+  BLOCK_SIZE always_this_block_size;
+
+  // Skip rectangular partition test when partition type none gives better
+  // rd than partition type split.
+  int less_rectangular_check;
+
+  // Disable testing non square partitions. (eg 16x32)
+  int use_square_partition_only;
+
+  // Sets min and max partition sizes for this 64x64 region based on the
+  // same 64x64 in last encoded frame, and the left and above neighbor.
+  AUTO_MIN_MAX_MODE auto_min_max_partition_size;
+
+  // Min and max partition size we enable (block_size) as per auto
+  // min max, but also used by adjust partitioning, and pick_partitioning.
+  BLOCK_SIZE min_partition_size;
+  BLOCK_SIZE max_partition_size;
+
+  // Whether or not we allow partitions one smaller or one greater than the last
+  // frame's partitioning. Only used if use_lastframe_partitioning is set.
+  int adjust_partitioning_from_last_frame;
+
+  // How frequently we re do the partitioning from scratch. Only used if
+  // use_lastframe_partitioning is set.
+  int last_partitioning_redo_frequency;
+
+  // This enables constrained copy partitioning, which, given an input block
+  // size bsize, will copy previous partition for partitions less than bsize,
+  // otherwise bsize partition is used. bsize is currently set to 16x16.
+  // Used for the case where motion is detected in superblock.
+  int constrain_copy_partition;
+
+  // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
+  // it always, to allow it for only Last frame and Intra, disable it for all
+  // inter modes or to enable it always.
+  int disable_split_mask;
+
+  // TODO(jingning): combine the related motion search speed features
+  // This allows us to use motion search at other sizes as a starting
+  // point for this motion search and limits the search range around it.
+  int adaptive_motion_search;
+
+  // Allows sub 8x8 modes to use the prediction filter that was determined
+  // best for 8x8 mode. If set to 0 we always re check all the filters for
+  // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
+  // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
+  int adaptive_pred_interp_filter;
+
+  // Search through variable block partition types in non-RD mode decision
+  // encoding process for RTC.
+  int partition_check;
+
+  // Use finer quantizer in every other few frames that run variable block
+  // partition type search.
+  int force_frame_boost;
+
+  // Maximally allowed base quantization index fluctuation.
+  int max_delta_qindex;
+
+  // Implements various heuristics to skip searching modes
+  // The heuristics selected are based on  flags
+  // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
+  unsigned int mode_search_skip_flags;
+
+  // A source variance threshold below which the split mode is disabled
+  unsigned int disable_split_var_thresh;
+
+  // A source variance threshold below which filter search is disabled
+  // Choose a very large value (UINT_MAX) to use 8-tap always
+  unsigned int disable_filter_search_var_thresh;
+
+  // These bit masks allow you to enable or disable intra modes for each
+  // transform size separately.
+  int intra_y_mode_mask[TX_SIZES];
+  int intra_uv_mode_mask[TX_SIZES];
+
+  // This variable enables an early break out of mode testing if the model for
+  // rd built from the prediction signal indicates a value that's much
+  // higher than the best rd we've seen so far.
+  int use_rd_breakout;
+
+  // This enables us to use an estimate for intra rd based on dc mode rather
+  // than choosing an actual uv mode in the stage of encoding before the actual
+  // final encode.
+  int use_uv_intra_rd_estimate;
+
+  // This feature controls how the loop filter level is determined.
+  LPF_PICK_METHOD lpf_pick;
+
+  // This feature limits the number of coefficients updates we actually do
+  // by only looking at counts from 1/2 the bands.
+  FAST_COEFF_UPDATE use_fast_coef_updates;
+
+  // This flag controls the use of non-RD mode decision.
+  int use_nonrd_pick_mode;
+
+  // This variable sets the encode_breakout threshold. Currently, it is only
+  // enabled in real time mode.
+  int encode_breakout_thresh;
+
+  // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
+  // modes are disabled in order from LSB to MSB for each BLOCK_SIZE.
+  int disable_inter_mode_mask[BLOCK_SIZES];
+
+  // This feature controls whether we do the expensive context update and
+  // calculation in the rd coefficient costing loop.
+  int use_fast_coef_costing;
+
+  // This feature controls the tolerence vs target used in deciding whether to
+  // recode a frame. It has no meaning if recode is disabled.
+  int recode_tolerance;
+
+  // This variable controls the maximum block size where intra blocks can be
+  // used in inter frames.
+  // TODO(aconverse): Fold this into one of the other many mode skips
+  BLOCK_SIZE max_intra_bsize;
+
+  // The frequency that we check if SOURCE_VAR_BASED_PARTITION or
+  // FIXED_PARTITION search type should be used.
+  int search_type_check_frequency;
+
+  // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
+  int source_var_thresh;
+} SPEED_FEATURES;
+
+struct VP9_COMP;
+
+void vp9_set_speed_features(struct VP9_COMP *cpi);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_SPEED_FEATURES_H_
+
diff --git a/source/libvpx/vp9/encoder/vp9_ssim.c b/source/libvpx/vp9/encoder/vp9_ssim.c
index a5f18e6..026e6a8 100644
--- a/source/libvpx/vp9/encoder/vp9_ssim.c
+++ b/source/libvpx/vp9/encoder/vp9_ssim.c
@@ -8,8 +8,9 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "./vp9_rtcd.h"
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_ssim.h"
 
 void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
                             int rp, unsigned long *sum_s, unsigned long *sum_r,
@@ -65,12 +66,6 @@ static double similarity(unsigned long sum_s, unsigned long sum_r,
   return ssim_n * 1.0 / ssim_d;
 }
 
-static double ssim_16x16(uint8_t *s, int sp, uint8_t *r, int rp) {
-  unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
-  vp9_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
-                       &sum_sxr);
-  return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
-}
 static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) {
   unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
   vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
diff --git a/source/libvpx/vp9/encoder/vp9_ssim.h b/source/libvpx/vp9/encoder/vp9_ssim.h
new file mode 100644
index 0000000..a581c2c
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_ssim.h
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_SSIM_H_
+#define VP9_ENCODER_VP9_SSIM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "vpx_scale/yv12config.h"
+
+double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+                     int lumamask, double *weight);
+
+double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+                      double *ssim_y, double *ssim_u, double *ssim_v);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_SSIM_H_
diff --git a/source/libvpx/vp9/encoder/vp9_subexp.c b/source/libvpx/vp9/encoder/vp9_subexp.c
index fdc2106..9796d64 100644
--- a/source/libvpx/vp9/encoder/vp9_subexp.c
+++ b/source/libvpx/vp9/encoder/vp9_subexp.c
@@ -11,22 +11,13 @@
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_entropy.h"
 
-#include "vp9/encoder/vp9_treewriter.h"
+#include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_writer.h"
 
 #define vp9_cost_upd256  ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
 
 static int update_bits[255];
 
-static int split_index(int i, int n, int modulus) {
-  int max1 = (n - 1 - modulus / 2) / modulus + 1;
-  if (i % modulus == modulus / 2)
-    i = i / modulus;
-  else
-    i = max1 + i - (i + modulus - modulus / 2) / modulus;
-  return i;
-}
-
 static int recenter_nonneg(int v, int m) {
   if (v > (m << 1))
     return v;
diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
new file mode 100644
index 0000000..c2b6263
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_svc_layercontext.h"
+
+void vp9_init_layer_context(VP9_COMP *const cpi) {
+  SVC *const svc = &cpi->svc;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  int layer;
+  int layer_end;
+
+  svc->spatial_layer_id = 0;
+  svc->temporal_layer_id = 0;
+
+  if (svc->number_temporal_layers > 1) {
+    layer_end = svc->number_temporal_layers;
+  } else {
+    layer_end = svc->number_spatial_layers;
+  }
+
+  for (layer = 0; layer < layer_end; ++layer) {
+    LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+    RATE_CONTROL *const lrc = &lc->rc;
+    lc->current_video_frame_in_layer = 0;
+    lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
+    lrc->ni_av_qi = oxcf->worst_allowed_q;
+    lrc->total_actual_bits = 0;
+    lrc->total_target_vs_actual = 0;
+    lrc->ni_tot_qi = 0;
+    lrc->tot_q = 0.0;
+    lrc->avg_q = 0.0;
+    lrc->ni_frames = 0;
+    lrc->decimation_count = 0;
+    lrc->decimation_factor = 0;
+    lrc->rate_correction_factor = 1.0;
+    lrc->key_frame_rate_correction_factor = 1.0;
+
+    if (svc->number_temporal_layers > 1) {
+      lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+      lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
+    } else {
+      lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+      lrc->last_q[0] = oxcf->best_allowed_q;
+      lrc->last_q[1] = oxcf->best_allowed_q;
+      lrc->last_q[2] = oxcf->best_allowed_q;
+    }
+
+    lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level),
+                                    lc->target_bandwidth, 1000);
+    lrc->bits_off_target = lrc->buffer_level;
+  }
+}
+
+// Update the layer context from a change_config() call.
+void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
+                                            const int target_bandwidth) {
+  SVC *const svc = &cpi->svc;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  const RATE_CONTROL *const rc = &cpi->rc;
+  int layer;
+  int layer_end;
+  float bitrate_alloc = 1.0;
+
+  if (svc->number_temporal_layers > 1) {
+    layer_end = svc->number_temporal_layers;
+  } else {
+    layer_end = svc->number_spatial_layers;
+  }
+
+  for (layer = 0; layer < layer_end; ++layer) {
+    LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+    RATE_CONTROL *const lrc = &lc->rc;
+
+    if (svc->number_temporal_layers > 1) {
+      lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000;
+    } else {
+      lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000;
+    }
+    bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
+    // Update buffer-related quantities.
+    lc->starting_buffer_level =
+        (int64_t)(oxcf->starting_buffer_level * bitrate_alloc);
+    lc->optimal_buffer_level =
+        (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc);
+    lc->maximum_buffer_size =
+        (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc);
+    lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+    lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
+    // Update framerate-related quantities.
+    if (svc->number_temporal_layers > 1) {
+      lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
+    } else {
+      lc->framerate = oxcf->framerate;
+    }
+    lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+    lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+    // Update qp-related quantities.
+    lrc->worst_quality = rc->worst_quality;
+    lrc->best_quality = rc->best_quality;
+  }
+}
+
+static LAYER_CONTEXT *get_layer_context(SVC *svc) {
+  return svc->number_temporal_layers > 1 ?
+         &svc->layer_context[svc->temporal_layer_id] :
+         &svc->layer_context[svc->spatial_layer_id];
+}
+
+void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
+  SVC *const svc = &cpi->svc;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  LAYER_CONTEXT *const lc = get_layer_context(svc);
+  RATE_CONTROL *const lrc = &lc->rc;
+  const int layer = svc->temporal_layer_id;
+
+  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer];
+  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+  lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
+  // Update the average layer frame size (non-cumulative per-frame-bw).
+  if (layer == 0) {
+    lc->avg_frame_size = lrc->av_per_frame_bandwidth;
+  } else {
+    const double prev_layer_framerate =
+        oxcf->framerate / oxcf->ts_rate_decimator[layer - 1];
+    const int prev_layer_target_bandwidth =
+        oxcf->ts_target_bitrate[layer - 1] * 1000;
+    lc->avg_frame_size =
+        (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
+              (lc->framerate - prev_layer_framerate));
+  }
+}
+
+void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
+  RATE_CONTROL *const lrc = &lc->rc;
+
+  lc->framerate = framerate;
+  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+  lrc->min_frame_bandwidth = (int)(lrc->av_per_frame_bandwidth *
+                                   oxcf->two_pass_vbrmin_section / 100);
+  lrc->max_frame_bandwidth = (int)(((int64_t)lrc->av_per_frame_bandwidth *
+                                   oxcf->two_pass_vbrmax_section) / 100);
+  lrc->max_gf_interval = 16;
+
+  lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+
+  if (oxcf->play_alternate && oxcf->lag_in_frames) {
+    if (lrc->max_gf_interval > oxcf->lag_in_frames - 1)
+      lrc->max_gf_interval = oxcf->lag_in_frames - 1;
+
+    if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+      lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+  }
+
+  if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval)
+    lrc->max_gf_interval = lrc->static_scene_max_gf_interval;
+}
+
+void vp9_restore_layer_context(VP9_COMP *const cpi) {
+  LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
+  const int old_frame_since_key = cpi->rc.frames_since_key;
+  const int old_frame_to_key = cpi->rc.frames_to_key;
+
+  cpi->rc = lc->rc;
+  cpi->twopass = lc->twopass;
+  cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+  cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
+  cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
+  cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
+  cpi->output_framerate = lc->framerate;
+  // Reset the frames_since_key and frames_to_key counters to their values
+  // before the layer restore. Keep these defined for the stream (not layer).
+  if (cpi->svc.number_temporal_layers > 1) {
+    cpi->rc.frames_since_key = old_frame_since_key;
+    cpi->rc.frames_to_key = old_frame_to_key;
+  }
+}
+
+void vp9_save_layer_context(VP9_COMP *const cpi) {
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc);
+
+  lc->rc = cpi->rc;
+  lc->twopass = cpi->twopass;
+  lc->target_bandwidth = (int)oxcf->target_bandwidth;
+  lc->starting_buffer_level = oxcf->starting_buffer_level;
+  lc->optimal_buffer_level = oxcf->optimal_buffer_level;
+  lc->maximum_buffer_size = oxcf->maximum_buffer_size;
+  lc->framerate = cpi->output_framerate;
+}
+
+void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
+  SVC *const svc = &cpi->svc;
+  int i;
+
+  for (i = 0; i < svc->number_spatial_layers; ++i) {
+    struct twopass_rc *const twopass = &svc->layer_context[i].twopass;
+
+    svc->spatial_layer_id = i;
+    vp9_init_second_pass(cpi);
+
+    twopass->total_stats.spatial_layer_id = i;
+    twopass->total_left_stats.spatial_layer_id = i;
+  }
+  svc->spatial_layer_id = 0;
+}
+
+void vp9_inc_frame_in_layer(SVC *svc) {
+  LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1)
+      ? &svc->layer_context[svc->temporal_layer_id]
+      : &svc->layer_context[svc->spatial_layer_id];
+  ++lc->current_video_frame_in_layer;
+}
diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
new file mode 100644
index 0000000..2abed30
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
+#define VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
+
+#include "vpx/vpx_encoder.h"
+
+#include "vp9/encoder/vp9_ratectrl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  RATE_CONTROL rc;
+  int target_bandwidth;
+  int64_t starting_buffer_level;
+  int64_t optimal_buffer_level;
+  int64_t maximum_buffer_size;
+  double framerate;
+  int avg_frame_size;
+  struct twopass_rc twopass;
+  struct vpx_fixed_buf rc_twopass_stats_in;
+  unsigned int current_video_frame_in_layer;
+} LAYER_CONTEXT;
+
+typedef struct {
+  int spatial_layer_id;
+  int temporal_layer_id;
+  int number_spatial_layers;
+  int number_temporal_layers;
+  // Layer context used for rate control in one pass temporal CBR mode or
+  // two pass spatial mode. Defined for temporal or spatial layers for now.
+  // Does not support temporal combined with spatial RC.
+  LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)];
+} SVC;
+
+struct VP9_COMP;
+
+// Initialize layer context data from init_config().
+void vp9_init_layer_context(struct VP9_COMP *const cpi);
+
+// Update the layer context from a change_config() call.
+void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi,
+                                            const int target_bandwidth);
+
+// Prior to encoding the frame, update framerate-related quantities
+// for the current temporal layer.
+void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi);
+
+// Update framerate-related quantities for the current spatial layer.
+void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi,
+                                        double framerate);
+
+// Prior to encoding the frame, set the layer context, for the current layer
+// to be encoded, to the cpi struct.
+void vp9_restore_layer_context(struct VP9_COMP *const cpi);
+
+// Save the layer context after encoding the frame.
+void vp9_save_layer_context(struct VP9_COMP *const cpi);
+
+// Initialize second pass rc for spatial svc.
+void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi);
+
+// Increment number of video frames in layer
+void vp9_inc_frame_in_layer(SVC *svc);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_ENCODER_VP9_SVC_LAYERCONTEXT_
diff --git a/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/source/libvpx/vp9/encoder/vp9_temporal_filter.c
index 6233116..0410273 100644
--- a/source/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/source/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -41,7 +41,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                                             struct scale_factors *scale,
                                             int x, int y) {
   const int which_mv = 0;
-  MV mv = { mv_row, mv_col };
+  const MV mv = { mv_row, mv_col };
+  const InterpKernel *const kernel =
+    vp9_get_interp_kernel(xd->mi[0]->mbmi.interp_filter);
+
   enum mv_precision mv_precision_uv;
   int uv_stride;
   if (uv_block_size == 8) {
@@ -58,7 +61,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             scale,
                             16, 16,
                             which_mv,
-                            xd->interp_kernel, MV_PRECISION_Q3, x, y);
+                            kernel, MV_PRECISION_Q3, x, y);
 
   vp9_build_inter_predictor(u_mb_ptr, uv_stride,
                             &pred[256], uv_block_size,
@@ -66,7 +69,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             scale,
                             uv_block_size, uv_block_size,
                             which_mv,
-                            xd->interp_kernel, mv_precision_uv, x, y);
+                            kernel, mv_precision_uv, x, y);
 
   vp9_build_inter_predictor(v_mb_ptr, uv_stride,
                             &pred[512], uv_block_size,
@@ -74,7 +77,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                             scale,
                             uv_block_size, uv_block_size,
                             which_mv,
-                            xd->interp_kernel, mv_precision_uv, x, y);
+                            kernel, mv_precision_uv, x, y);
 }
 
 void vp9_temporal_filter_apply_c(uint8_t *frame1,
@@ -133,7 +136,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
 
   MV best_ref_mv1 = {0, 0};
   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
-  MV *ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0].as_mv;
+  MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv;
 
   // Save input state
   struct buf_2d src = x->plane[0].src;
@@ -250,8 +253,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
         if (cpi->frames[frame] == NULL)
           continue;
 
-        mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0;
-        mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0;
+        mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
+        mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
 
         if (frame == alt_ref_index) {
           filter_weight = 2;
@@ -284,8 +287,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
            cpi->frames[frame]->v_buffer + mb_uv_offset,
            cpi->frames[frame]->y_stride,
            mb_uv_height,
-           mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row,
-           mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col,
+           mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
+           mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
            predictor, scale,
            mb_col * 16, mb_row * 16);
 
diff --git a/source/libvpx/vp9/encoder/vp9_tokenize.c b/source/libvpx/vp9/encoder/vp9_tokenize.c
index e8179f3..291ccb3 100644
--- a/source/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/source/libvpx/vp9/encoder/vp9_tokenize.c
@@ -8,18 +8,20 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
+#include <assert.h>
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
-#include <assert.h>
-#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_tokenize.h"
+
 #include "vpx_mem/vpx_mem.h"
 
+#include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_pred_common.h"
 #include "vp9/common/vp9_seg_common.h"
-#include "vp9/common/vp9_entropy.h"
+
+#include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_tokenize.h"
 
 static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
 const TOKENVALUE *vp9_dct_value_tokens_ptr;
@@ -106,7 +108,7 @@ void vp9_coef_tree_initialize() {
   vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree);
 }
 
-static void fill_value_tokens() {
+void vp9_tokenize_initialize() {
   TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE;
   const vp9_extra_bit *const e = vp9_extra_bits;
 
@@ -160,7 +162,6 @@ struct tokenize_b_args {
   VP9_COMP *cpi;
   MACROBLOCKD *xd;
   TOKENEXTRA **tp;
-  uint8_t *token_cache;
 };
 
 static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -211,10 +212,10 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
   VP9_COMP *cpi = args->cpi;
   MACROBLOCKD *xd = args->xd;
   TOKENEXTRA **tp = args->tp;
-  uint8_t *token_cache = args->token_cache;
+  uint8_t token_cache[32 * 32];
   struct macroblock_plane *p = &cpi->mb.plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
-  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int pt; /* near block/prev token context index */
   int c;
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
@@ -296,13 +297,6 @@ static void is_skippable(int plane, int block,
   args->skippable[0] &= (!args->x->plane[plane].eobs[block]);
 }
 
-static int sb_is_skippable(MACROBLOCK *x, BLOCK_SIZE bsize) {
-  int result = 1;
-  struct is_skippable_args args = {x, &result};
-  vp9_foreach_transformed_block(&x->e_mbd, bsize, is_skippable, &args);
-  return result;
-}
-
 int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   int result = 1;
   struct is_skippable_args args = {x, &result};
@@ -315,12 +309,12 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
                      BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   TOKENEXTRA *t_backup = *t;
   const int ctx = vp9_get_skip_context(xd);
   const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                               SEG_LVL_SKIP);
-  struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache};
+  struct tokenize_b_args arg = {cpi, xd, t};
   if (mbmi->skip) {
     if (!dry_run)
       cm->counts.skip[ctx][1] += skip_inc;
@@ -338,7 +332,3 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
     *t = t_backup;
   }
 }
-
-void vp9_tokenize_initialize() {
-  fill_value_tokens();
-}
diff --git a/source/libvpx/vp9/encoder/vp9_treewriter.c b/source/libvpx/vp9/encoder/vp9_treewriter.c
index 35e5a8f..bb04b40 100644
--- a/source/libvpx/vp9/encoder/vp9_treewriter.c
+++ b/source/libvpx/vp9/encoder/vp9_treewriter.c
@@ -10,33 +10,6 @@
 
 #include "vp9/encoder/vp9_treewriter.h"
 
-static void cost(int *costs, vp9_tree tree, const vp9_prob *probs,
-                 int i, int c) {
-  const vp9_prob prob = probs[i / 2];
-  int b;
-
-  for (b = 0; b <= 1; ++b) {
-    const int cc = c + vp9_cost_bit(prob, b);
-    const vp9_tree_index ii = tree[i + b];
-
-    if (ii <= 0)
-      costs[-ii] = cc;
-    else
-      cost(costs, tree, probs, ii, cc);
-  }
-}
-
-void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) {
-  cost(costs, tree, probs, 0, 0);
-}
-
-void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) {
-  assert(tree[0] <= 0 && tree[1] > 0);
-
-  costs[-tree[0]] = vp9_cost_bit(probs[0], 0);
-  cost(costs, tree, probs, 2, 0);
-}
-
 static void tree2tok(struct vp9_token *tokens, const vp9_tree_index *tree,
                      int i, int v, int l) {
   v += v;
diff --git a/source/libvpx/vp9/encoder/vp9_treewriter.h b/source/libvpx/vp9/encoder/vp9_treewriter.h
index fedfbe9..4a76d87 100644
--- a/source/libvpx/vp9/encoder/vp9_treewriter.h
+++ b/source/libvpx/vp9/encoder/vp9_treewriter.h
@@ -17,35 +17,6 @@
 extern "C" {
 #endif
 
-#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
-
-#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
-
-#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \
-                                                    : (prob))
-
-static INLINE unsigned int cost_branch256(const unsigned int ct[2],
-                                          vp9_prob p) {
-  return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
-}
-
-static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs,
-                             int bits, int len) {
-  int cost = 0;
-  vp9_tree_index i = 0;
-
-  do {
-    const int bit = (bits >> --len) & 1;
-    cost += vp9_cost_bit(probs[i >> 1], bit);
-    i = tree[i + bit];
-  } while (len);
-
-  return cost;
-}
-
-void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree);
-void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree);
-
 void vp9_tree_probs_from_distribution(vp9_tree tree,
                                       unsigned int branch_ct[ /* n - 1 */ ][2],
                                       const unsigned int num_events[ /* n */ ]);
diff --git a/source/libvpx/vp9/encoder/vp9_variance.c b/source/libvpx/vp9/encoder/vp9_variance.c
index 8bc3850..71867a9 100644
--- a/source/libvpx/vp9/encoder/vp9_variance.c
+++ b/source/libvpx/vp9/encoder/vp9_variance.c
@@ -216,7 +216,7 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 64, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
-  comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
+  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
   return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -273,7 +273,7 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 32, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
-  comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
+  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
   return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -330,7 +330,7 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 32, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
-  comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
+  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
   return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -387,7 +387,7 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 16, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
-  comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
+  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
   return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
   return (var - (((int64_t)avg * avg) >> 10));
 }
 
+void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
+                             const uint8_t *ref_ptr, int ref_stride,
+                             unsigned int *sse, int *sum) {
+  variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
+}
+
 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
                                  int  source_stride,
                                  const uint8_t *ref_ptr,
@@ -614,7 +620,7 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
 
   // Now filter Verticaly
   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
-  comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
+  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
   return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -658,7 +664,7 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
-  comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
+  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
   return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -703,7 +709,7 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
                                     1, 17, 16, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
 
-  comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
+  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
   return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -747,7 +753,7 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 64, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
-  comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
+  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
   return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -791,7 +797,7 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 32, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
-  comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
+  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
   return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -955,7 +961,7 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 16, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
-  comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
+  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
   return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -999,7 +1005,7 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
-  comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
+  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
   return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -1043,7 +1049,7 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
-  comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
+  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
   return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -1089,6 +1095,23 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 4, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
-  comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
+  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
   return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
 }
+
+
+void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+                       int height, const uint8_t *ref, int ref_stride) {
+  int i, j;
+
+  for (i = 0; i < height; i++) {
+    for (j = 0; j < width; j++) {
+      int tmp;
+      tmp = pred[j] + ref[j];
+      comp_pred[j] = (tmp + 1) >> 1;
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
diff --git a/source/libvpx/vp9/encoder/vp9_variance.h b/source/libvpx/vp9/encoder/vp9_variance.h
index 3bc2091..62e20dc 100644
--- a/source/libvpx/vp9/encoder/vp9_variance.h
+++ b/source/libvpx/vp9/encoder/vp9_variance.h
@@ -100,21 +100,9 @@ typedef struct vp9_variance_vtable {
   vp9_sad_multi_d_fn_t       sdx4df;
 } vp9_variance_fn_ptr_t;
 
-static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
-                          int height, const uint8_t *ref, int ref_stride) {
-  int i, j;
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      int tmp;
-      tmp = pred[j] + ref[j];
-      comp_pred[j] = (tmp + 1) >> 1;
-    }
-    comp_pred += width;
-    pred += width;
-    ref += ref_stride;
-  }
-}
+void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
+                       int height, const uint8_t *ref, int ref_stride);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_write_bit_buffer.c b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.c
new file mode 100644
index 0000000..962d0ca
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.c
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/encoder/vp9_write_bit_buffer.h"
+
+size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
+  return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
+}
+
+void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
+  const int off = (int)wb->bit_offset;
+  const int p = off / CHAR_BIT;
+  const int q = CHAR_BIT - 1 - off % CHAR_BIT;
+  if (q == CHAR_BIT -1) {
+    wb->bit_buffer[p] = bit << q;
+  } else {
+    wb->bit_buffer[p] &= ~(1 << q);
+    wb->bit_buffer[p] |= bit << q;
+  }
+  wb->bit_offset = off + 1;
+}
+
+void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) {
+  int bit;
+  for (bit = bits - 1; bit >= 0; bit--)
+    vp9_wb_write_bit(wb, (data >> bit) & 1);
+}
diff --git a/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h
index 1795e05..073608d 100644
--- a/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h
+++ b/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h
@@ -24,29 +24,11 @@ struct vp9_write_bit_buffer {
   size_t bit_offset;
 };
 
-static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) {
-  return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
-}
-
-static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
-  const int off = (int)wb->bit_offset;
-  const int p = off / CHAR_BIT;
-  const int q = CHAR_BIT - 1 - off % CHAR_BIT;
-  if (q == CHAR_BIT -1) {
-    wb->bit_buffer[p] = bit << q;
-  } else {
-    wb->bit_buffer[p] &= ~(1 << q);
-    wb->bit_buffer[p] |= bit << q;
-  }
-  wb->bit_offset = off + 1;
-}
-
-static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb,
-                              int data, int bits) {
-  int bit;
-  for (bit = bits - 1; bit >= 0; bit--)
-    vp9_wb_write_bit(wb, (data >> bit) & 1);
-}
+size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb);
+
+void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit);
+
+void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits);
 
 
 #ifdef __cplusplus
diff --git a/source/libvpx/vp9/encoder/vp9_writer.c b/source/libvpx/vp9/encoder/vp9_writer.c
index fda1b39..8398fc0 100644
--- a/source/libvpx/vp9/encoder/vp9_writer.c
+++ b/source/libvpx/vp9/encoder/vp9_writer.c
@@ -12,34 +12,6 @@
 #include "vp9/encoder/vp9_writer.h"
 #include "vp9/common/vp9_entropy.h"
 
-#ifdef ENTROPY_STATS
-unsigned int active_section = 0;
-#endif
-
-const unsigned int vp9_prob_cost[256] = {
-  2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
-  1129, 1099, 1072, 1046, 1023, 1000, 979,  959,  940,  922,  905,  889,
-  873,  858,  843,  829,  816,  803,  790,  778,  767,  755,  744,  733,
-  723,  713,  703,  693,  684,  675,  666,  657,  649,  641,  633,  625,
-  617,  609,  602,  594,  587,  580,  573,  567,  560,  553,  547,  541,
-  534,  528,  522,  516,  511,  505,  499,  494,  488,  483,  477,  472,
-  467,  462,  457,  452,  447,  442,  437,  433,  428,  424,  419,  415,
-  410,  406,  401,  397,  393,  389,  385,  381,  377,  373,  369,  365,
-  361,  357,  353,  349,  346,  342,  338,  335,  331,  328,  324,  321,
-  317,  314,  311,  307,  304,  301,  297,  294,  291,  288,  285,  281,
-  278,  275,  272,  269,  266,  263,  260,  257,  255,  252,  249,  246,
-  243,  240,  238,  235,  232,  229,  227,  224,  221,  219,  216,  214,
-  211,  208,  206,  203,  201,  198,  196,  194,  191,  189,  186,  184,
-  181,  179,  177,  174,  172,  170,  168,  165,  163,  161,  159,  156,
-  154,  152,  150,  148,  145,  143,  141,  139,  137,  135,  133,  131,
-  129,  127,  125,  123,  121,  119,  117,  115,  113,  111,  109,  107,
-  105,  103,  101,  99,   97,   95,   93,   92,   90,   88,   86,   84,
-  82,   81,   79,   77,   75,   73,   72,   70,   68,   66,   65,   63,
-  61,   60,   58,   56,   55,   53,   51,   50,   48,   46,   45,   43,
-  41,   40,   38,   37,   35,   33,   32,   30,   29,   27,   25,   24,
-  22,   21,   19,   18,   16,   15,   13,   12,   10,   9,    7,    6,
-  4,    3,    1,    1};
-
 void vp9_start_encode(vp9_writer *br, uint8_t *source) {
   br->lowvalue = 0;
   br->range    = 255;
diff --git a/source/libvpx/vp9/encoder/vp9_writer.h b/source/libvpx/vp9/encoder/vp9_writer.h
index defeec3..7f4fa1e 100644
--- a/source/libvpx/vp9/encoder/vp9_writer.h
+++ b/source/libvpx/vp9/encoder/vp9_writer.h
@@ -32,8 +32,6 @@ typedef struct {
   uint64_t bit_counter;
 } vp9_writer;
 
-extern const unsigned int vp9_prob_cost[256];
-
 void vp9_start_encode(vp9_writer *bc, uint8_t *buffer);
 void vp9_stop_encode(vp9_writer *bc);
 
diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
index f3735eb..6865822 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
@@ -13,39 +13,80 @@
 #include "vpx_ports/mem.h"
 
 void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
-  // The 2D transform is done with two passes which are actually pretty
-  // similar. In the first one, we transform the columns and transpose
-  // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we transpose the columns (that
-  // is the transposed rows) and transpose the results (so that it goes back
-  // in normal/row positions).
-  int pass;
+  // This 2D transform implements 4 vertical 1D transforms followed
+  // by 4 horizontal 1D transforms.  The multiplies and adds are as given
+  // by Chen, Smith and Fralick ('77).  The commands for moving the data
+  // around have been minimized by hand.
+  // For the purposes of the comments, the 16 inputs are referred to at i0
+  // through iF (in raster order), intermediate variables are a0, b0, c0
+  // through f, and correspond to the in-place computations mapped to input
+  // locations.  The outputs, o0 through oF are labeled according to the
+  // output locations.
+
   // Constants
-  //    When we use them, in one case, they are all the same. In all others
-  //    it's a pair of them that we need to repeat four times. This is done
-  //    by constructing the 32 bit constant corresponding to that pair.
-  const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
-  const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
-  const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
-  const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+  // These are the coefficients used for the multiplies.
+  // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
+  // where cospi_N_64 = cos(N pi /64)
+  const __m128i k__cospi_A = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_B = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_C = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+                                            cospi_8_64, cospi_24_64,
+                                            cospi_24_64, -cospi_8_64,
+                                            cospi_24_64, -cospi_8_64);
+  const __m128i k__cospi_D = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+                                            cospi_24_64, -cospi_8_64,
+                                            cospi_8_64, cospi_24_64,
+                                            cospi_8_64, cospi_24_64);
+  const __m128i k__cospi_E = _mm_setr_epi16(cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64,
+                                            cospi_16_64, cospi_16_64);
+  const __m128i k__cospi_F = _mm_setr_epi16(cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64,
+                                            cospi_16_64, -cospi_16_64);
+  const __m128i k__cospi_G = _mm_setr_epi16(cospi_8_64, cospi_24_64,
+                                            cospi_8_64, cospi_24_64,
+                                            -cospi_8_64, -cospi_24_64,
+                                            -cospi_8_64, -cospi_24_64);
+  const __m128i k__cospi_H = _mm_setr_epi16(cospi_24_64, -cospi_8_64,
+                                            cospi_24_64, -cospi_8_64,
+                                            -cospi_24_64, cospi_8_64,
+                                            -cospi_24_64, cospi_8_64);
+
   const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+  // This second rounding constant saves doing some extra adds at the end
+  const __m128i k__DCT_CONST_ROUNDING2 = _mm_set1_epi32(DCT_CONST_ROUNDING
+                                               +(DCT_CONST_ROUNDING << 1));
+  const int DCT_CONST_BITS2 =  DCT_CONST_BITS+2;
   const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
   const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
-  const __m128i kOne = _mm_set1_epi16(1);
   __m128i in0, in1;
+
   // Load inputs.
   {
     in0  = _mm_loadl_epi64((const __m128i *)(input +  0 * stride));
+    in1  = _mm_loadl_epi64((const __m128i *)(input +  1 * stride));
+    in1  = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *)
+           (input +  2 * stride)));
     in0  = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *)
-           (input +  1 * stride)));
-    in1  = _mm_loadl_epi64((const __m128i *)(input +  2 * stride));
-    in1  = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)
-           (input +  3 * stride)), in1);
+           (input +  3 * stride)));
+    // in0 = [i0 i1 i2 i3 iC iD iE iF]
+    // in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+
 
-    // x = x << 4
+    // multiply by 16 to give some extra precision
     in0 = _mm_slli_epi16(in0, 4);
     in1 = _mm_slli_epi16(in1, 4);
     // if (i == 0 && input[0]) input[0] += 1;
+    // add 1 to the upper left pixel if it is non-zero, which helps reduce
+    // the round-trip error
     {
       // The mask will only contain whether the first value is zero, all
       // other comparison will fail as something shifted by 4 (above << 4)
@@ -58,57 +99,119 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
       in0 = _mm_add_epi16(in0, k__nonzero_bias_b);
     }
   }
-  // Do the two transform/transpose passes
-  for (pass = 0; pass < 2; ++pass) {
-    // Transform 1/2: Add/subtract
-    const __m128i r0 = _mm_add_epi16(in0, in1);
-    const __m128i r1 = _mm_sub_epi16(in0, in1);
-    const __m128i r2 = _mm_unpacklo_epi64(r0, r1);
-    const __m128i r3 = _mm_unpackhi_epi64(r0, r1);
-    // Transform 1/2: Interleave to do the multiply by constants which gets us
-    //                into 32 bits.
-    const __m128i t0 = _mm_unpacklo_epi16(r2, r3);
-    const __m128i t2 = _mm_unpackhi_epi16(r2, r3);
-    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
-    const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
-    const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p08_p24);
-    const __m128i u6 = _mm_madd_epi16(t2, k__cospi_p24_m08);
+  // There are 4 total stages, alternating between an add/subtract stage
+  // followed by an multiply-and-add stage.
+  {
+    // Stage 1: Add/subtract
+
+    // in0 = [i0 i1 i2 i3 iC iD iE iF]
+    // in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+    const __m128i r0 = _mm_unpacklo_epi16(in0, in1);
+    const __m128i r1 = _mm_unpackhi_epi16(in0, in1);
+    // r0 = [i0 i4 i1 i5 i2 i6 i3 i7]
+    // r1 = [iC i8 iD i9 iE iA iF iB]
+    const __m128i r2 = _mm_shuffle_epi32(r0, 0xB4);
+    const __m128i r3 = _mm_shuffle_epi32(r1, 0xB4);
+    // r2 = [i0 i4 i1 i5 i3 i7 i2 i6]
+    // r3 = [iC i8 iD i9 iF iB iE iA]
+
+    const __m128i t0 = _mm_add_epi16(r2, r3);
+    const __m128i t1 = _mm_sub_epi16(r2, r3);
+    // t0 = [a0 a4 a1 a5 a3 a7 a2 a6]
+    // t1 = [aC a8 aD a9 aF aB aE aA]
+
+    // Stage 2: multiply by constants (which gets us into 32 bits).
+    // The constants needed here are:
+    // k__cospi_A = [p16 p16 p16 p16 p16 m16 p16 m16]
+    // k__cospi_B = [p16 m16 p16 m16 p16 p16 p16 p16]
+    // k__cospi_C = [p08 p24 p08 p24 p24 m08 p24 m08]
+    // k__cospi_D = [p24 m08 p24 m08 p08 p24 p08 p24]
+    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_A);
+    const __m128i u2 = _mm_madd_epi16(t0, k__cospi_B);
+    const __m128i u1 = _mm_madd_epi16(t1, k__cospi_C);
+    const __m128i u3 = _mm_madd_epi16(t1, k__cospi_D);
+    // Then add and right-shift to get back to 16-bit range
     const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+    const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
     const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
-    const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
-    const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+    const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
     const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+    const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
     const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
-    const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
-    const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
-    // Combine and transpose
-    const __m128i res0 = _mm_packs_epi32(w0, w2);
-    const __m128i res1 = _mm_packs_epi32(w4, w6);
-    // 00 01 02 03 20 21 22 23
-    // 10 11 12 13 30 31 32 33
-    const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
-    const __m128i tr0_1 = _mm_unpackhi_epi16(res0, res1);
-    // 00 10 01 11 02 12 03 13
-    // 20 30 21 31 22 32 23 33
-    in0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
-    in1 = _mm_unpackhi_epi32(tr0_0, tr0_1);
-    in1 = _mm_shuffle_epi32(in1, 0x4E);
-    // 00 10 20 30 01 11 21 31      in0 contains 0 followed by 1
-    // 02 12 22 32 03 13 23 33      in1 contains 2 followed by 3
+    const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+    // w0 = [b0 b1 b7 b6]
+    // w1 = [b8 b9 bF bE]
+    // w2 = [b4 b5 b3 b2]
+    // w3 = [bC bD bB bA]
+    const __m128i x0 = _mm_packs_epi32(w0, w1);
+    const __m128i x1 = _mm_packs_epi32(w2, w3);
+    // x0 = [b0 b1 b7 b6 b8 b9 bF bE]
+    // x1 = [b4 b5 b3 b2 bC bD bB bA]
+    in0 = _mm_shuffle_epi32(x0, 0xD8);
+    in1 = _mm_shuffle_epi32(x1, 0x8D);
+    // in0 = [b0 b1 b8 b9 b7 b6 bF bE]
+    // in1 = [b3 b2 bB bA b4 b5 bC bD]
   }
-  in1 = _mm_shuffle_epi32(in1, 0x4E);
-  // Post-condition output and store it (v + 1) >> 2, taking advantage
-  // of the fact 1/3 are stored just after 0/2.
   {
-     __m128i out01 = _mm_add_epi16(in0, kOne);
-     __m128i out23 = _mm_add_epi16(in1, kOne);
-     out01 = _mm_srai_epi16(out01, 2);
-     out23 = _mm_srai_epi16(out23, 2);
-     _mm_storeu_si128((__m128i *)(output + 0 * 4), out01);
-     _mm_storeu_si128((__m128i *)(output + 2 * 4), out23);
+    // vertical DCTs finished. Now we do the horizontal DCTs.
+    // Stage 3: Add/subtract
+
+    const __m128i t0 = _mm_add_epi16(in0, in1);
+    const __m128i t1 = _mm_sub_epi16(in0, in1);
+    // t0 = [c0 c1 c8 c9  c4  c5  cC  cD]
+    // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE]
+
+    // Stage 4: multiply by constants (which gets us into 32 bits).
+    // The constants needed here are:
+    // k__cospi_E = [p16 p16 p16 p16 p16 p16 p16 p16]
+    // k__cospi_F = [p16 m16 p16 m16 p16 m16 p16 m16]
+    // k__cospi_G = [p08 p24 p08 p24 m08 m24 m08 m24]
+    // k__cospi_H = [p24 m08 p24 m08 m24 p08 m24 p08]
+    const __m128i u0 = _mm_madd_epi16(t0, k__cospi_E);
+    const __m128i u1 = _mm_madd_epi16(t0, k__cospi_F);
+    const __m128i u2 = _mm_madd_epi16(t1, k__cospi_G);
+    const __m128i u3 = _mm_madd_epi16(t1, k__cospi_H);
+    // Then add and right-shift to get back to 16-bit range
+    // but this combines the final right-shift as well to save operations
+    // This unusual rounding operations is to maintain bit-accurate
+    // compatibility with the c version of this function which has two
+    // rounding steps in a row.
+    const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING2);
+    const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING2);
+    const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING2);
+    const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING2);
+    const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS2);
+    const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS2);
+    const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS2);
+    const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS2);
+    // w0 = [o0 o4 o8 oC]
+    // w1 = [o2 o6 oA oE]
+    // w2 = [o1 o5 o9 oD]
+    // w3 = [o3 o7 oB oF]
+    // remember the o's are numbered according to the correct output location
+    const __m128i x0 = _mm_packs_epi32(w0, w1);
+    const __m128i x1 = _mm_packs_epi32(w2, w3);
+    // x0 = [o0 o4 o8 oC o2 o6 oA oE]
+    // x1 = [o1 o5 o9 oD o3 o7 oB oF]
+    const __m128i y0 = _mm_unpacklo_epi16(x0, x1);
+    const __m128i y1 = _mm_unpackhi_epi16(x0, x1);
+    // y0 = [o0 o1 o4 o5 o8 o9 oC oD]
+    // y1 = [o2 o3 o6 o7 oA oB oE oF]
+    in0 = _mm_unpacklo_epi32(y0, y1);
+    // in0 = [o0 o1 o2 o3 o4 o5 o6 o7]
+    in1 = _mm_unpackhi_epi32(y0, y1);
+    // in1 = [o8 o9 oA oB oC oD oE oF]
+  }
+  // Post-condition (v + 1) >> 2 is now incorporated into previous
+  // add and right-shift commands.  Only 2 store instructions needed
+  // because we are using the fact that 1/3 are stored just after 0/2.
+  {
+     _mm_storeu_si128((__m128i *)(output + 0 * 4), in0);
+     _mm_storeu_si128((__m128i *)(output + 2 * 4), in1);
   }
 }
 
+
 static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
                                    int stride) {
   const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
diff --git a/source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
new file mode 100644
index 0000000..f31b176
--- /dev/null
+++ b/source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c
@@ -0,0 +1,167 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <immintrin.h>  // AVX2
+#include "vpx/vpx_integer.h"
+
+void vp9_sad32x32x4d_avx2(uint8_t *src,
+                          int src_stride,
+                          uint8_t *ref[4],
+                          int ref_stride,
+                          unsigned int res[4]) {
+  __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
+  __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
+  __m256i sum_mlow, sum_mhigh;
+  int i;
+  uint8_t *ref0, *ref1, *ref2, *ref3;
+
+  ref0 = ref[0];
+  ref1 = ref[1];
+  ref2 = ref[2];
+  ref3 = ref[3];
+  sum_ref0 = _mm256_set1_epi16(0);
+  sum_ref1 = _mm256_set1_epi16(0);
+  sum_ref2 = _mm256_set1_epi16(0);
+  sum_ref3 = _mm256_set1_epi16(0);
+  for (i = 0; i < 32 ; i++) {
+    // load src and all refs
+    src_reg = _mm256_load_si256((__m256i *)(src));
+    ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
+    ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
+    ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
+    ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+    // sum of the absolute differences between every ref-i to src
+    ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
+    ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
+    ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
+    ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
+    // sum every ref-i
+    sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
+    sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
+    sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
+    sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
+
+    src+= src_stride;
+    ref0+= ref_stride;
+    ref1+= ref_stride;
+    ref2+= ref_stride;
+    ref3+= ref_stride;
+  }
+  {
+    __m128i sum;
+    // in sum_ref-i the result is saved in the first 4 bytes
+    // the other 4 bytes are zeroed.
+    // sum_ref1 and sum_ref3 are shifted left by 4 bytes
+    sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
+    sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
+
+    // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
+    sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
+    sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
+
+    // merge every 64 bit from each sum_ref-i
+    sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
+    sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
+
+    // add the low 64 bit to the high 64 bit
+    sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
+
+    // add the low 128 bit to the high 128 bit
+    sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
+                        _mm256_extractf128_si256(sum_mlow, 1));
+
+    _mm_storeu_si128((__m128i *)(res), sum);
+  }
+}
+
+void vp9_sad64x64x4d_avx2(uint8_t *src,
+                          int src_stride,
+                          uint8_t *ref[4],
+                          int ref_stride,
+                          unsigned int res[4]) {
+  __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
+  __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg;
+  __m256i ref3_reg, ref3next_reg;
+  __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
+  __m256i sum_mlow, sum_mhigh;
+  int i;
+  uint8_t *ref0, *ref1, *ref2, *ref3;
+
+  ref0 = ref[0];
+  ref1 = ref[1];
+  ref2 = ref[2];
+  ref3 = ref[3];
+  sum_ref0 = _mm256_set1_epi16(0);
+  sum_ref1 = _mm256_set1_epi16(0);
+  sum_ref2 = _mm256_set1_epi16(0);
+  sum_ref3 = _mm256_set1_epi16(0);
+  for (i = 0; i < 64 ; i++) {
+    // load 64 bytes from src and all refs
+    src_reg = _mm256_load_si256((__m256i *)(src));
+    srcnext_reg = _mm256_load_si256((__m256i *)(src + 32));
+    ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
+    ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
+    ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
+    ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32));
+    ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
+    ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32));
+    ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+    ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32));
+    // sum of the absolute differences between every ref-i to src
+    ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
+    ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
+    ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg);
+    ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg);
+    ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg);
+    ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg);
+    ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg);
+    ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg);
+
+    // sum every ref-i
+    sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg);
+    sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg);
+    sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg);
+    sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg);
+    sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg);
+    sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg);
+    sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg);
+    sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg);
+    src+= src_stride;
+    ref0+= ref_stride;
+    ref1+= ref_stride;
+    ref2+= ref_stride;
+    ref3+= ref_stride;
+  }
+  {
+    __m128i sum;
+
+    // in sum_ref-i the result is saved in the first 4 bytes
+    // the other 4 bytes are zeroed.
+    // sum_ref1 and sum_ref3 are shifted left by 4 bytes
+    sum_ref1 = _mm256_slli_si256(sum_ref1, 4);
+    sum_ref3 = _mm256_slli_si256(sum_ref3, 4);
+
+    // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
+    sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1);
+    sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3);
+
+    // merge every 64 bit from each sum_ref-i
+    sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2);
+    sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2);
+
+    // add the low 64 bit to the high 64 bit
+    sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh);
+
+    // add the low 128 bit to the high 128 bit
+    sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow),
+                        _mm256_extractf128_si256(sum_mlow, 1));
+
+    _mm_storeu_si128((__m128i *)(res), sum);
+  }
+}
diff --git a/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c b/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c
index a3d0114..c4d17fc 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c
+++ b/source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c
@@ -13,7 +13,6 @@
 #include "vp9/common/vp9_pragmas.h"
 #include "vpx_ports/mem.h"
 
-extern unsigned int vp9_get_mb_ss_mmx(const int16_t *src_ptr);
 extern unsigned int vp9_get8x8var_mmx
 (
   const unsigned char *src_ptr,
diff --git a/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
index 79e42c4..9e65694 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
@@ -24,10 +24,6 @@ extern unsigned int vp9_get4x4var_mmx
   int *Sum
 );
 
-unsigned int vp9_get_mb_ss_sse2
-(
-  const int16_t *src_ptr
-);
 unsigned int vp9_get16x16var_sse2
 (
   const unsigned char *src_ptr,
diff --git a/source/libvpx/vp9/vp9_common.mk b/source/libvpx/vp9/vp9_common.mk
index 9fb6115..b1ba0b1 100644
--- a/source/libvpx/vp9/vp9_common.mk
+++ b/source/libvpx/vp9/vp9_common.mk
@@ -12,7 +12,6 @@ VP9_COMMON_SRCS-yes += vp9_common.mk
 VP9_COMMON_SRCS-yes += vp9_iface_common.h
 VP9_COMMON_SRCS-yes += common/vp9_pragmas.h
 VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
-VP9_COMMON_SRCS-yes += common/vp9_onyx.h
 VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
 VP9_COMMON_SRCS-yes += common/vp9_blockd.c
 VP9_COMMON_SRCS-yes += common/vp9_convolve.c
@@ -45,7 +44,7 @@ VP9_COMMON_SRCS-yes += common/vp9_quant_common.h
 VP9_COMMON_SRCS-yes += common/vp9_reconinter.h
 VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
 VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
-VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
+VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.pl
 VP9_COMMON_SRCS-yes += common/vp9_scale.h
 VP9_COMMON_SRCS-yes += common/vp9_scale.c
 VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
@@ -145,4 +144,4 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
 VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM)
 
-$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
+$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/source/libvpx/vp9/vp9_cx_iface.c b/source/libvpx/vp9/vp9_cx_iface.c
index 28c60d1..0623ad1 100644
--- a/source/libvpx/vp9/vp9_cx_iface.c
+++ b/source/libvpx/vp9/vp9_cx_iface.c
@@ -17,12 +17,11 @@
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vpx/vp8cx.h"
 #include "vp9/encoder/vp9_firstpass.h"
-#include "vp9/common/vp9_onyx.h"
 #include "vp9/vp9_iface_common.h"
 
 struct vp9_extracfg {
   struct vpx_codec_pkt_list *pkt_list;
-  int                         cpu_used;  /* available cpu percentage in 1/16 */
+  int                         cpu_used;  // available cpu percentage in 1/16
   unsigned int                enable_auto_alt_ref;
   unsigned int                noise_sensitivity;
   unsigned int                sharpness;
@@ -33,15 +32,17 @@ struct vp9_extracfg {
   unsigned int                arnr_strength;
   unsigned int                arnr_type;
   vp8e_tuning                 tuning;
-  unsigned int                cq_level;         /* constrained quality level */
+  unsigned int                cq_level;  // constrained quality level
   unsigned int                rc_max_intra_bitrate_pct;
   unsigned int                lossless;
   unsigned int                frame_parallel_decoding_mode;
-  unsigned int                aq_mode;
+  AQ_MODE                     aq_mode;
+  unsigned int                frame_periodic_boost;
+  BIT_DEPTH                   bit_depth;
 };
 
 struct extraconfig_map {
-  int                 usage;
+  int usage;
   struct vp9_extracfg cfg;
 };
 
@@ -50,22 +51,24 @@ static const struct extraconfig_map extracfg_map[] = {
     0,
     { // NOLINT
       NULL,
-      0,                          /* cpu_used      */
-      1,                          /* enable_auto_alt_ref */
-      0,                          /* noise_sensitivity */
-      0,                          /* sharpness */
-      0,                          /* static_thresh */
-      0,                          /* tile_columns */
-      0,                          /* tile_rows */
-      7,                          /* arnr_max_frames */
-      5,                          /* arnr_strength */
-      3,                          /* arnr_type*/
-      0,                          /* tuning*/
-      10,                         /* cq_level */
-      0,                          /* rc_max_intra_bitrate_pct */
-      0,                          /* lossless */
-      0,                          /* frame_parallel_decoding_mode */
-      0,                          /* aq_mode */
+      0,                          // cpu_used
+      1,                          // enable_auto_alt_ref
+      0,                          // noise_sensitivity
+      0,                          // sharpness
+      0,                          // static_thresh
+      0,                          // tile_columns
+      0,                          // tile_rows
+      7,                          // arnr_max_frames
+      5,                          // arnr_strength
+      3,                          // arnr_type
+      VP8_TUNE_PSNR,              // tuning
+      10,                         // cq_level
+      0,                          // rc_max_intra_bitrate_pct
+      0,                          // lossless
+      0,                          // frame_parallel_decoding_mode
+      NO_AQ,                      // aq_mode
+      0,                          // frame_periodic_delta_q
+      BITS_8,                     // Bit depth
     }
   }
 };
@@ -73,9 +76,9 @@ static const struct extraconfig_map extracfg_map[] = {
 struct vpx_codec_alg_priv {
   vpx_codec_priv_t        base;
   vpx_codec_enc_cfg_t     cfg;
-  struct vp9_extracfg     vp8_cfg;
+  struct vp9_extracfg     extra_cfg;
   VP9_CONFIG              oxcf;
-  VP9_PTR                 cpi;
+  VP9_COMP               *cpi;
   unsigned char          *cx_data;
   size_t                  cx_data_sz;
   unsigned char          *pending_cx_data;
@@ -102,15 +105,12 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
   return VP9_LAST_FLAG;
 }
 
-static vpx_codec_err_t
-update_error_state(vpx_codec_alg_priv_t                 *ctx,
-                   const struct vpx_internal_error_info *error) {
-  vpx_codec_err_t res;
+static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx,
+    const struct vpx_internal_error_info *error) {
+  const vpx_codec_err_t res = error->error_code;
 
-  if ((res = error->error_code))
-    ctx->base.err_detail = error->has_detail
-                           ? error->detail
-                           : NULL;
+  if (res != VPX_CODEC_OK)
+    ctx->base.err_detail = error->has_detail ? error->detail : NULL;
 
   return res;
 }
@@ -141,24 +141,20 @@ update_error_state(vpx_codec_alg_priv_t                 *ctx,
     if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean");\
   } while (0)
 
-static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
+static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
                                        const vpx_codec_enc_cfg_t *cfg,
-                                       const struct vp9_extracfg *vp8_cfg) {
-  RANGE_CHECK(cfg, g_w,                   1, 65535); /* 16 bits available */
-  RANGE_CHECK(cfg, g_h,                   1, 65535); /* 16 bits available */
+                                       const struct vp9_extracfg *extra_cfg) {
+  RANGE_CHECK(cfg, g_w,                   1, 65535);  // 16 bits available
+  RANGE_CHECK(cfg, g_h,                   1, 65535);  // 16 bits available
   RANGE_CHECK(cfg, g_timebase.den,        1, 1000000000);
   RANGE_CHECK(cfg, g_timebase.num,        1, cfg->g_timebase.den);
   RANGE_CHECK_HI(cfg, g_profile,          3);
 
   RANGE_CHECK_HI(cfg, rc_max_quantizer,   63);
   RANGE_CHECK_HI(cfg, rc_min_quantizer,   cfg->rc_max_quantizer);
-  RANGE_CHECK_BOOL(vp8_cfg, lossless);
-  if (vp8_cfg->lossless) {
-    RANGE_CHECK_HI(cfg, rc_max_quantizer, 0);
-    RANGE_CHECK_HI(cfg, rc_min_quantizer, 0);
-  }
-  RANGE_CHECK(vp8_cfg, aq_mode,           0, AQ_MODES_COUNT - 1);
-
+  RANGE_CHECK_BOOL(extra_cfg, lossless);
+  RANGE_CHECK(extra_cfg, aq_mode,           0, AQ_MODE_COUNT - 1);
+  RANGE_CHECK(extra_cfg, frame_periodic_boost, 0, 1);
   RANGE_CHECK_HI(cfg, g_threads,          64);
   RANGE_CHECK_HI(cfg, g_lag_in_frames,    MAX_LAG_BUFFERS);
   RANGE_CHECK(cfg, rc_end_usage,          VPX_VBR, VPX_Q);
@@ -166,61 +162,53 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
   RANGE_CHECK_HI(cfg, rc_overshoot_pct,   1000);
   RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
   RANGE_CHECK(cfg, kf_mode,               VPX_KF_DISABLED, VPX_KF_AUTO);
-  // RANGE_CHECK_BOOL(cfg,                 g_delete_firstpassfile);
   RANGE_CHECK_BOOL(cfg,                   rc_resize_allowed);
   RANGE_CHECK_HI(cfg, rc_dropframe_thresh,   100);
   RANGE_CHECK_HI(cfg, rc_resize_up_thresh,   100);
   RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
   RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
 
-  RANGE_CHECK(cfg, ss_number_layers,      1,
-              VPX_SS_MAX_LAYERS); /*Spatial layers max */
-
+  RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
   if (cfg->ts_number_layers > 1) {
     unsigned int i;
-    for (i = 1; i < cfg->ts_number_layers; ++i) {
-      if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) {
+    for (i = 1; i < cfg->ts_number_layers; ++i)
+      if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1])
         ERROR("ts_target_bitrate entries are not increasing");
-      }
-    }
-    RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1);
-    for (i = cfg->ts_number_layers-2; i > 0; --i) {
-      if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) {
+
+    RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1);
+    for (i = cfg->ts_number_layers - 2; i > 0; --i)
+      if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i])
         ERROR("ts_rate_decimator factors are not powers of 2");
-      }
-    }
   }
 
-  /* VP8 does not support a lower bound on the keyframe interval in
-   * automatic keyframe placement mode.
-   */
-  if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist
-      && cfg->kf_min_dist > 0)
+  // VP8 does not support a lower bound on the keyframe interval in
+  // automatic keyframe placement mode.
+  if (cfg->kf_mode != VPX_KF_DISABLED &&
+      cfg->kf_min_dist != cfg->kf_max_dist &&
+      cfg->kf_min_dist > 0)
     ERROR("kf_min_dist not supported in auto mode, use 0 "
           "or kf_max_dist instead.");
 
-  RANGE_CHECK_BOOL(vp8_cfg,               enable_auto_alt_ref);
-  RANGE_CHECK(vp8_cfg, cpu_used,           -16, 16);
-
-  RANGE_CHECK_HI(vp8_cfg, noise_sensitivity,  6);
-
-  RANGE_CHECK(vp8_cfg, tile_columns, 0, 6);
-  RANGE_CHECK(vp8_cfg, tile_rows, 0, 2);
-  RANGE_CHECK_HI(vp8_cfg, sharpness, 7);
-  RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
-  RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
-  RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
-  RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
+  RANGE_CHECK_BOOL(extra_cfg,  enable_auto_alt_ref);
+  RANGE_CHECK(extra_cfg, cpu_used, -16, 16);
+  RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
+  RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
+  RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
+  RANGE_CHECK_HI(extra_cfg, sharpness, 7);
+  RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15);
+  RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
+  RANGE_CHECK(extra_cfg, arnr_type, 1, 3);
+  RANGE_CHECK(extra_cfg, cq_level, 0, 63);
 
   // TODO(yaowu): remove this when ssim tuning is implemented for vp9
-  if (vp8_cfg->tuning == VP8_TUNE_SSIM)
+  if (extra_cfg->tuning == VP8_TUNE_SSIM)
       ERROR("Option --tune=ssim is not currently supported in VP9.");
 
   if (cfg->g_pass == VPX_RC_LAST_PASS) {
     size_t           packet_sz = sizeof(FIRSTPASS_STATS);
     int              n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
-    FIRSTPASS_STATS *stats;
+    const FIRSTPASS_STATS *stats;
 
     if (cfg->rc_twopass_stats_in.buf == NULL)
       ERROR("rc_twopass_stats_in.buf not set.");
@@ -228,22 +216,57 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     if (cfg->rc_twopass_stats_in.sz % packet_sz)
       ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
 
-    if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
-      ERROR("rc_twopass_stats_in requires at least two packets.");
+    if (cfg->ss_number_layers > 1) {
+      int i;
+      unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = {0};
+
+      stats = cfg->rc_twopass_stats_in.buf;
+      for (i = 0; i < n_packets; ++i) {
+        const int layer_id = (int)stats[i].spatial_layer_id;
+        if (layer_id >= 0 && layer_id < (int)cfg->ss_number_layers) {
+          ++n_packets_per_layer[layer_id];
+        }
+      }
+
+      for (i = 0; i < (int)cfg->ss_number_layers; ++i) {
+        unsigned int layer_id;
+        if (n_packets_per_layer[i] < 2) {
+          ERROR("rc_twopass_stats_in requires at least two packets for each "
+                "layer.");
+        }
+
+        stats = (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf +
+                n_packets - cfg->ss_number_layers + i;
+        layer_id = (int)stats->spatial_layer_id;
+
+        if (layer_id >= cfg->ss_number_layers
+            ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1)
+          ERROR("rc_twopass_stats_in missing EOS stats packet");
+      }
+    } else {
+      if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
+        ERROR("rc_twopass_stats_in requires at least two packets.");
 
-    stats = (void *)((char *)cfg->rc_twopass_stats_in.buf
-                     + (n_packets - 1) * packet_sz);
+      stats =
+          (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1;
 
-    if ((int)(stats->count + 0.5) != n_packets - 1)
-      ERROR("rc_twopass_stats_in missing EOS stats packet");
+      if ((int)(stats->count + 0.5) != n_packets - 1)
+        ERROR("rc_twopass_stats_in missing EOS stats packet");
+    }
   }
+  if (cfg->g_profile <= (unsigned int)PROFILE_1 &&
+      extra_cfg->bit_depth > BITS_8)
+    ERROR("High bit-depth not supported in profile < 2");
+  if (cfg->g_profile > (unsigned int)PROFILE_1 &&
+      extra_cfg->bit_depth == BITS_8)
+    ERROR("Bit-depth 8 not supported in profile > 1");
 
   return VPX_CODEC_OK;
 }
 
 
 static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
-                                    const vpx_image_t    *img) {
+                                    const vpx_image_t *img) {
   switch (img->fmt) {
     case VPX_IMG_FMT_YV12:
     case VPX_IMG_FMT_I420:
@@ -255,28 +278,27 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
             "supported.");
   }
 
-  if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h))
+  if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h)
     ERROR("Image size must match encoder init configuration size");
 
   return VPX_CODEC_OK;
 }
 
 
-static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
-                                       vpx_codec_enc_cfg_t cfg,
-                                       struct vp9_extracfg vp9_cfg) {
-  oxcf->version = cfg.g_profile;
-  oxcf->width   = cfg.g_w;
-  oxcf->height  = cfg.g_h;
-  /* guess a frame rate if out of whack, use 30 */
-  oxcf->framerate = (double)(cfg.g_timebase.den)
-                    / (double)(cfg.g_timebase.num);
-
-  if (oxcf->framerate > 180) {
+static vpx_codec_err_t set_encoder_config(
+    VP9_CONFIG *oxcf,
+    const vpx_codec_enc_cfg_t *cfg,
+    const struct vp9_extracfg *extra_cfg) {
+  oxcf->profile = cfg->g_profile;
+  oxcf->width   = cfg->g_w;
+  oxcf->height  = cfg->g_h;
+  oxcf->bit_depth = extra_cfg->bit_depth;
+  // guess a frame rate if out of whack, use 30
+  oxcf->framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
+  if (oxcf->framerate > 180)
     oxcf->framerate = 30;
-  }
 
-  switch (cfg.g_pass) {
+  switch (cfg->g_pass) {
     case VPX_RC_ONE_PASS:
       oxcf->mode = MODE_GOODQUALITY;
       break;
@@ -288,87 +310,83 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
       break;
   }
 
-  if (cfg.g_pass == VPX_RC_FIRST_PASS) {
-    oxcf->lag_in_frames = 0;
-  } else {
-    oxcf->lag_in_frames = cfg.g_lag_in_frames;
-  }
+  oxcf->lag_in_frames = cfg->g_pass == VPX_RC_FIRST_PASS ? 0
+                                                         : cfg->g_lag_in_frames;
 
-  oxcf->end_usage   = USAGE_LOCAL_FILE_PLAYBACK;
-  if (cfg.rc_end_usage == VPX_CQ)
+  oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
+  if (cfg->rc_end_usage == VPX_CQ)
     oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
-  else if (cfg.rc_end_usage == VPX_Q)
+  else if (cfg->rc_end_usage == VPX_Q)
     oxcf->end_usage = USAGE_CONSTANT_QUALITY;
-  else if (cfg.rc_end_usage == VPX_CBR)
+  else if (cfg->rc_end_usage == VPX_CBR)
     oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
 
-  oxcf->target_bandwidth         = cfg.rc_target_bitrate;
-  oxcf->rc_max_intra_bitrate_pct = vp9_cfg.rc_max_intra_bitrate_pct;
+  oxcf->target_bandwidth         = cfg->rc_target_bitrate;
+  oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
 
-  oxcf->best_allowed_q          = cfg.rc_min_quantizer;
-  oxcf->worst_allowed_q         = cfg.rc_max_quantizer;
-  oxcf->cq_level                = vp9_cfg.cq_level;
+  oxcf->best_allowed_q  = vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
+  oxcf->worst_allowed_q = vp9_quantizer_to_qindex(cfg->rc_max_quantizer);
+  oxcf->cq_level        = vp9_quantizer_to_qindex(extra_cfg->cq_level);
   oxcf->fixed_q = -1;
 
-  oxcf->under_shoot_pct         = cfg.rc_undershoot_pct;
-  oxcf->over_shoot_pct          = cfg.rc_overshoot_pct;
+  oxcf->under_shoot_pct         = cfg->rc_undershoot_pct;
+  oxcf->over_shoot_pct          = cfg->rc_overshoot_pct;
+
+  oxcf->maximum_buffer_size     = cfg->rc_buf_sz;
+  oxcf->starting_buffer_level   = cfg->rc_buf_initial_sz;
+  oxcf->optimal_buffer_level    = cfg->rc_buf_optimal_sz;
 
-  oxcf->maximum_buffer_size     = cfg.rc_buf_sz;
-  oxcf->starting_buffer_level   = cfg.rc_buf_initial_sz;
-  oxcf->optimal_buffer_level    = cfg.rc_buf_optimal_sz;
+  oxcf->drop_frames_water_mark   = cfg->rc_dropframe_thresh;
 
-  oxcf->drop_frames_water_mark   = cfg.rc_dropframe_thresh;
+  oxcf->two_pass_vbrbias         = cfg->rc_2pass_vbr_bias_pct;
+  oxcf->two_pass_vbrmin_section  = cfg->rc_2pass_vbr_minsection_pct;
+  oxcf->two_pass_vbrmax_section  = cfg->rc_2pass_vbr_maxsection_pct;
 
-  oxcf->two_pass_vbrbias         = cfg.rc_2pass_vbr_bias_pct;
-  oxcf->two_pass_vbrmin_section  = cfg.rc_2pass_vbr_minsection_pct;
-  oxcf->two_pass_vbrmax_section  = cfg.rc_2pass_vbr_maxsection_pct;
+  oxcf->auto_key               = cfg->kf_mode == VPX_KF_AUTO &&
+                                 cfg->kf_min_dist != cfg->kf_max_dist;
 
-  oxcf->auto_key               = cfg.kf_mode == VPX_KF_AUTO
-                                 && cfg.kf_min_dist != cfg.kf_max_dist;
-  // oxcf->kf_min_dist         = cfg.kf_min_dis;
-  oxcf->key_freq               = cfg.kf_max_dist;
+  oxcf->key_freq               = cfg->kf_max_dist;
 
-  oxcf->cpu_used               =  vp9_cfg.cpu_used;
-  oxcf->encode_breakout        =  vp9_cfg.static_thresh;
-  oxcf->play_alternate         =  vp9_cfg.enable_auto_alt_ref;
-  oxcf->noise_sensitivity      =  vp9_cfg.noise_sensitivity;
-  oxcf->sharpness              =  vp9_cfg.sharpness;
+  oxcf->cpu_used               =  extra_cfg->cpu_used;
+  oxcf->encode_breakout        =  extra_cfg->static_thresh;
+  oxcf->play_alternate         =  extra_cfg->enable_auto_alt_ref;
+  oxcf->noise_sensitivity      =  extra_cfg->noise_sensitivity;
+  oxcf->sharpness              =  extra_cfg->sharpness;
 
-  oxcf->two_pass_stats_in      =  cfg.rc_twopass_stats_in;
-  oxcf->output_pkt_list        =  vp9_cfg.pkt_list;
+  oxcf->two_pass_stats_in      =  cfg->rc_twopass_stats_in;
+  oxcf->output_pkt_list        =  extra_cfg->pkt_list;
 
-  oxcf->arnr_max_frames = vp9_cfg.arnr_max_frames;
-  oxcf->arnr_strength   = vp9_cfg.arnr_strength;
-  oxcf->arnr_type       = vp9_cfg.arnr_type;
+  oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
+  oxcf->arnr_strength   = extra_cfg->arnr_strength;
+  oxcf->arnr_type       = extra_cfg->arnr_type;
 
-  oxcf->tuning = vp9_cfg.tuning;
+  oxcf->tuning = extra_cfg->tuning;
 
-  oxcf->tile_columns = vp9_cfg.tile_columns;
-  oxcf->tile_rows    = vp9_cfg.tile_rows;
+  oxcf->tile_columns = extra_cfg->tile_columns;
+  oxcf->tile_rows    = extra_cfg->tile_rows;
 
-  oxcf->lossless = vp9_cfg.lossless;
+  oxcf->lossless = extra_cfg->lossless;
 
-  oxcf->error_resilient_mode         = cfg.g_error_resilient;
-  oxcf->frame_parallel_decoding_mode = vp9_cfg.frame_parallel_decoding_mode;
+  oxcf->error_resilient_mode         = cfg->g_error_resilient;
+  oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
 
-  oxcf->aq_mode = vp9_cfg.aq_mode;
+  oxcf->aq_mode = extra_cfg->aq_mode;
 
-  oxcf->ss_number_layers = cfg.ss_number_layers;
+  oxcf->frame_periodic_boost =  extra_cfg->frame_periodic_boost;
+
+  oxcf->ss_number_layers = cfg->ss_number_layers;
 
   if (oxcf->ss_number_layers > 1) {
-    memcpy(oxcf->ss_target_bitrate, cfg.ss_target_bitrate,
-           sizeof(cfg.ss_target_bitrate));
+    vp9_copy(oxcf->ss_target_bitrate, cfg->ss_target_bitrate);
   } else if (oxcf->ss_number_layers == 1) {
     oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
   }
 
-  oxcf->ts_number_layers = cfg.ts_number_layers;
+  oxcf->ts_number_layers = cfg->ts_number_layers;
 
   if (oxcf->ts_number_layers > 1) {
-    memcpy(oxcf->ts_target_bitrate, cfg.ts_target_bitrate,
-           sizeof(cfg.ts_target_bitrate));
-    memcpy(oxcf->ts_rate_decimator, cfg.ts_rate_decimator,
-           sizeof(cfg.ts_rate_decimator));
+    vp9_copy(oxcf->ts_target_bitrate, cfg->ts_target_bitrate);
+    vp9_copy(oxcf->ts_rate_decimator, cfg->ts_rate_decimator);
   } else if (oxcf->ts_number_layers == 1) {
     oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
     oxcf->ts_rate_decimator[0] = 1;
@@ -406,49 +424,44 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf,
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_set_config(vpx_codec_alg_priv_t       *ctx,
-                                       const vpx_codec_enc_cfg_t  *cfg) {
+static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx,
+                                          const vpx_codec_enc_cfg_t  *cfg) {
   vpx_codec_err_t res;
 
-  if ((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h))
+  if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h)
     ERROR("Cannot change width or height after initialization");
 
-  /* Prevent increasing lag_in_frames. This check is stricter than it needs
-   * to be -- the limit is not increasing past the first lag_in_frames
-   * value, but we don't track the initial config, only the last successful
-   * config.
-   */
-  if ((cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames))
+  // Prevent increasing lag_in_frames. This check is stricter than it needs
+  // to be -- the limit is not increasing past the first lag_in_frames
+  // value, but we don't track the initial config, only the last successful
+  // config.
+  if (cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames)
     ERROR("Cannot increase lag_in_frames");
 
-  res = validate_config(ctx, cfg, &ctx->vp8_cfg);
+  res = validate_config(ctx, cfg, &ctx->extra_cfg);
 
   if (res == VPX_CODEC_OK) {
     ctx->cfg = *cfg;
-    set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
+    set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
     vp9_change_config(ctx->cpi, &ctx->oxcf);
   }
 
   return res;
 }
 
-
-int vp9_reverse_trans(int q);
-
-
-static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx,
-                                 int                   ctrl_id,
-                                 va_list               args) {
+static vpx_codec_err_t ctrl_get_param(vpx_codec_alg_priv_t *ctx, int ctrl_id,
+                                 va_list args) {
   void *arg = va_arg(args, void *);
 
 #define MAP(id, var) case id: *(RECAST(id, arg)) = var; break
 
-  if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+  if (arg == NULL)
+    return VPX_CODEC_INVALID_PARAM;
 
   switch (ctrl_id) {
-      MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi));
-      MAP(VP8E_GET_LAST_QUANTIZER_64,
-          vp9_reverse_trans(vp9_get_quantizer(ctx->cpi)));
+    MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi));
+    MAP(VP8E_GET_LAST_QUANTIZER_64,
+        vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi)));
   }
 
   return VPX_CODEC_OK;
@@ -456,38 +469,39 @@ static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx,
 }
 
 
-static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
-                                 int                   ctrl_id,
-                                 va_list               args) {
-  vpx_codec_err_t     res  = VPX_CODEC_OK;
-  struct vp9_extracfg xcfg = ctx->vp8_cfg;
+static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id,
+                                      va_list args) {
+  vpx_codec_err_t res = VPX_CODEC_OK;
+  struct vp9_extracfg extra_cfg = ctx->extra_cfg;
 
 #define MAP(id, var) case id: var = CAST(id, args); break;
 
   switch (ctrl_id) {
-      MAP(VP8E_SET_CPUUSED,                 xcfg.cpu_used);
-      MAP(VP8E_SET_ENABLEAUTOALTREF,        xcfg.enable_auto_alt_ref);
-      MAP(VP8E_SET_NOISE_SENSITIVITY,       xcfg.noise_sensitivity);
-      MAP(VP8E_SET_SHARPNESS,               xcfg.sharpness);
-      MAP(VP8E_SET_STATIC_THRESHOLD,        xcfg.static_thresh);
-      MAP(VP9E_SET_TILE_COLUMNS,            xcfg.tile_columns);
-      MAP(VP9E_SET_TILE_ROWS,               xcfg.tile_rows);
-      MAP(VP8E_SET_ARNR_MAXFRAMES,          xcfg.arnr_max_frames);
-      MAP(VP8E_SET_ARNR_STRENGTH,           xcfg.arnr_strength);
-      MAP(VP8E_SET_ARNR_TYPE,               xcfg.arnr_type);
-      MAP(VP8E_SET_TUNING,                  xcfg.tuning);
-      MAP(VP8E_SET_CQ_LEVEL,                xcfg.cq_level);
-      MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT,   xcfg.rc_max_intra_bitrate_pct);
-      MAP(VP9E_SET_LOSSLESS,                xcfg.lossless);
-      MAP(VP9E_SET_FRAME_PARALLEL_DECODING, xcfg.frame_parallel_decoding_mode);
-      MAP(VP9E_SET_AQ_MODE,                 xcfg.aq_mode);
+    MAP(VP8E_SET_CPUUSED,                 extra_cfg.cpu_used);
+    MAP(VP8E_SET_ENABLEAUTOALTREF,        extra_cfg.enable_auto_alt_ref);
+    MAP(VP8E_SET_NOISE_SENSITIVITY,       extra_cfg.noise_sensitivity);
+    MAP(VP8E_SET_SHARPNESS,               extra_cfg.sharpness);
+    MAP(VP8E_SET_STATIC_THRESHOLD,        extra_cfg.static_thresh);
+    MAP(VP9E_SET_TILE_COLUMNS,            extra_cfg.tile_columns);
+    MAP(VP9E_SET_TILE_ROWS,               extra_cfg.tile_rows);
+    MAP(VP8E_SET_ARNR_MAXFRAMES,          extra_cfg.arnr_max_frames);
+    MAP(VP8E_SET_ARNR_STRENGTH,           extra_cfg.arnr_strength);
+    MAP(VP8E_SET_ARNR_TYPE,               extra_cfg.arnr_type);
+    MAP(VP8E_SET_TUNING,                  extra_cfg.tuning);
+    MAP(VP8E_SET_CQ_LEVEL,                extra_cfg.cq_level);
+    MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT,   extra_cfg.rc_max_intra_bitrate_pct);
+    MAP(VP9E_SET_LOSSLESS,                extra_cfg.lossless);
+    MAP(VP9E_SET_FRAME_PARALLEL_DECODING,
+        extra_cfg.frame_parallel_decoding_mode);
+    MAP(VP9E_SET_AQ_MODE,                 extra_cfg.aq_mode);
+    MAP(VP9E_SET_FRAME_PERIODIC_BOOST,   extra_cfg.frame_periodic_boost);
   }
 
-  res = validate_config(ctx, &ctx->cfg, &xcfg);
+  res = validate_config(ctx, &ctx->cfg, &extra_cfg);
 
   if (res == VPX_CODEC_OK) {
-    ctx->vp8_cfg = xcfg;
-    set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
+    ctx->extra_cfg = extra_cfg;
+    set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
     vp9_change_config(ctx->cpi, &ctx->oxcf);
   }
 
@@ -495,17 +509,13 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx,
 #undef MAP
 }
 
-
-static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
-  vpx_codec_err_t            res = VPX_CODEC_OK;
-  struct vpx_codec_alg_priv *priv;
-  vpx_codec_enc_cfg_t       *cfg;
-  unsigned int               i;
-
-  VP9_PTR optr;
+static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) {
+  vpx_codec_err_t res = VPX_CODEC_OK;
 
   if (ctx->priv == NULL) {
-    priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
+    int i;
+    vpx_codec_enc_cfg_t *cfg;
+    struct vpx_codec_alg_priv *priv = calloc(1, sizeof(*priv));
 
     if (priv == NULL) return VPX_CODEC_MEM_ERROR;
 
@@ -517,49 +527,47 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
     ctx->priv->enc.total_encoders = 1;
 
     if (ctx->config.enc) {
-      /* Update the reference to the config structure to an
-       * internal copy.
-       */
+      // Update the reference to the config structure to an
+      // internal copy.
       ctx->priv->alg_priv->cfg = *ctx->config.enc;
       ctx->config.enc = &ctx->priv->alg_priv->cfg;
     }
 
-    cfg =  &ctx->priv->alg_priv->cfg;
+    cfg = &ctx->priv->alg_priv->cfg;
 
-    /* Select the extra vp6 configuration table based on the current
-     * usage value. If the current usage value isn't found, use the
-     * values for usage case 0.
-     */
+    // Select the extra vp6 configuration table based on the current
+    // usage value. If the current usage value isn't found, use the
+    // values for usage case 0.
     for (i = 0;
          extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
-         i++) {}
+         ++i) {}
 
-    priv->vp8_cfg = extracfg_map[i].cfg;
-    priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
+    priv->extra_cfg = extracfg_map[i].cfg;
+    priv->extra_cfg.pkt_list = &priv->pkt_list.head;
 
     // Maximum buffer size approximated based on having multiple ARF.
     priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 8;
 
     if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
 
-    priv->cx_data = malloc(priv->cx_data_sz);
-
-    if (priv->cx_data == NULL) return VPX_CODEC_MEM_ERROR;
+    priv->cx_data = (unsigned char *)malloc(priv->cx_data_sz);
+    if (priv->cx_data == NULL)
+      return VPX_CODEC_MEM_ERROR;
 
     vp9_initialize_enc();
 
-    res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+    res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
 
     if (res == VPX_CODEC_OK) {
-      set_vp9e_config(&ctx->priv->alg_priv->oxcf,
-                      ctx->priv->alg_priv->cfg,
-                      ctx->priv->alg_priv->vp8_cfg);
-      optr = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
-
-      if (optr == NULL)
+      VP9_COMP *cpi;
+      set_encoder_config(&ctx->priv->alg_priv->oxcf,
+                      &ctx->priv->alg_priv->cfg,
+                      &ctx->priv->alg_priv->extra_cfg);
+      cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
+      if (cpi == NULL)
         res = VPX_CODEC_MEM_ERROR;
       else
-        ctx->priv->alg_priv->cpi = optr;
+        ctx->priv->alg_priv->cpi = cpi;
     }
   }
 
@@ -567,46 +575,40 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) {
 }
 
 
-static vpx_codec_err_t vp9e_init(vpx_codec_ctx_t *ctx,
-                                 vpx_codec_priv_enc_mr_cfg_t *data) {
-  return vp9e_common_init(ctx);
+static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
+                                    vpx_codec_priv_enc_mr_cfg_t *data) {
+  return encoder_common_init(ctx);
 }
 
-static vpx_codec_err_t vp9e_destroy(vpx_codec_alg_priv_t *ctx) {
+static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) {
   free(ctx->cx_data);
-  vp9_remove_compressor(&ctx->cpi);
+  vp9_remove_compressor(ctx->cpi);
   free(ctx);
   return VPX_CODEC_OK;
 }
 
 static void pick_quickcompress_mode(vpx_codec_alg_priv_t  *ctx,
-                                    unsigned long          duration,
-                                    unsigned long          deadline) {
-  unsigned int new_qc;
-
-  /* Use best quality mode if no deadline is given. */
-  new_qc = MODE_BESTQUALITY;
+                                    unsigned long duration,
+                                    unsigned long deadline) {
+  // Use best quality mode if no deadline is given.
+  MODE new_qc = MODE_BESTQUALITY;
 
   if (deadline) {
-      uint64_t     duration_us;
-
-      /* Convert duration parameter from stream timebase to microseconds */
-      duration_us = (uint64_t)duration * 1000000
-                    * (uint64_t)ctx->cfg.g_timebase.num
-                    / (uint64_t)ctx->cfg.g_timebase.den;
-
-      /* If the deadline is more that the duration this frame is to be shown,
-       * use good quality mode. Otherwise use realtime mode.
-       */
-      new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME;
+    // Convert duration parameter from stream timebase to microseconds
+    const uint64_t duration_us = (uint64_t)duration * 1000000 *
+                               (uint64_t)ctx->cfg.g_timebase.num /
+                               (uint64_t)ctx->cfg.g_timebase.den;
+
+    // If the deadline is more that the duration this frame is to be shown,
+    // use good quality mode. Otherwise use realtime mode.
+    new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME;
   }
 
   if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
     new_qc = MODE_FIRSTPASS;
   else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
-    new_qc = (new_qc == MODE_BESTQUALITY)
-             ? MODE_SECONDPASS_BEST
-             : MODE_SECONDPASS;
+    new_qc = (new_qc == MODE_BESTQUALITY) ? MODE_SECONDPASS_BEST
+                                          : MODE_SECONDPASS;
 
   if (ctx->oxcf.mode != new_qc) {
     ctx->oxcf.mode = new_qc;
@@ -623,10 +625,10 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
   assert(ctx->pending_frame_count);
   assert(ctx->pending_frame_count <= 8);
 
-  /* Add the number of frames to the marker byte */
+  // Add the number of frames to the marker byte
   marker |= ctx->pending_frame_count - 1;
 
-  /* Choose the magnitude */
+  // Choose the magnitude
   for (mag = 0, mask = 0xff; mag < 4; mag++) {
     if (ctx->pending_frame_magnitude < mask)
       break;
@@ -635,7 +637,7 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
   }
   marker |= mag << 3;
 
-  /* Write the index */
+  // Write the index
   index_sz = 2 + (mag + 1) * ctx->pending_frame_count;
   if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
     uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
@@ -656,12 +658,12 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
   return index_sz;
 }
 
-static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
-                                   const vpx_image_t     *img,
-                                   vpx_codec_pts_t        pts,
-                                   unsigned long          duration,
-                                   vpx_enc_frame_flags_t  flags,
-                                   unsigned long          deadline) {
+static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
+                                      const vpx_image_t *img,
+                                      vpx_codec_pts_t pts,
+                                      unsigned long duration,
+                                      vpx_enc_frame_flags_t flags,
+                                      unsigned long deadline) {
   vpx_codec_err_t res = VPX_CODEC_OK;
 
   if (img)
@@ -670,15 +672,15 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
   pick_quickcompress_mode(ctx, duration, deadline);
   vpx_codec_pkt_list_init(&ctx->pkt_list);
 
-  /* Handle Flags */
-  if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF))
-      || ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) {
+  // Handle Flags
+  if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) ||
+       ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) {
     ctx->base.err_detail = "Conflicting flags.";
     return VPX_CODEC_INVALID_PARAM;
   }
 
-  if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF
-               | VP8_EFLAG_NO_REF_ARF)) {
+  if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF |
+               VP8_EFLAG_NO_REF_ARF)) {
     int ref = 7;
 
     if (flags & VP8_EFLAG_NO_REF_LAST)
@@ -693,9 +695,9 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
     vp9_use_as_reference(ctx->cpi, ref);
   }
 
-  if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF
-               | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF
-               | VP8_EFLAG_FORCE_ARF)) {
+  if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+               VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF |
+               VP8_EFLAG_FORCE_ARF)) {
     int upd = 7;
 
     if (flags & VP8_EFLAG_NO_UPD_LAST)
@@ -714,16 +716,16 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
     vp9_update_entropy(ctx->cpi, 0);
   }
 
-  /* Handle fixed keyframe intervals */
-  if (ctx->cfg.kf_mode == VPX_KF_AUTO
-      && ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
+  // Handle fixed keyframe intervals
+  if (ctx->cfg.kf_mode == VPX_KF_AUTO &&
+      ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
     if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) {
       flags |= VPX_EFLAG_FORCE_KF;
       ctx->fixed_kf_cntr = 1;
     }
   }
 
-  /* Initialize the encoder instance on the first frame. */
+  // Initialize the encoder instance on the first frame.
   if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
     unsigned int lib_flags;
     YV12_BUFFER_CONFIG sd;
@@ -731,15 +733,15 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
     size_t size, cx_data_sz;
     unsigned char *cx_data;
 
-    /* Set up internal flags */
+    // Set up internal flags
     if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
       ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
 
-    /* Convert API flags to internal codec lib flags */
+    // Convert API flags to internal codec lib flags
     lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
 
-    /* vp8 use 10,000,000 ticks/second as time stamp */
-    dst_time_stamp = pts * 10000000 * ctx->cfg.g_timebase.num
+    /* vp9 use 10,000,000 ticks/second as time stamp */
+    dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num)
                      / ctx->cfg.g_timebase.den;
     dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num /
                          ctx->cfg.g_timebase.den;
@@ -779,11 +781,11 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
                                          cx_data, &dst_time_stamp,
                                          &dst_end_time_stamp, !img)) {
       if (size) {
-        vpx_codec_pts_t    round, delta;
+        vpx_codec_pts_t round, delta;
         vpx_codec_cx_pkt_t pkt;
-        VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
+        VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
 
-        /* Pack invisible frames with the next visible frame */
+        // Pack invisible frames with the next visible frame
         if (cpi->common.show_frame == 0) {
           if (ctx->pending_cx_data == 0)
             ctx->pending_cx_data = cx_data;
@@ -795,7 +797,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
           continue;
         }
 
-        /* Add the frame packet to the list of returned packets. */
+        // Add the frame packet to the list of returned packets.
         round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1;
         delta = (dst_end_time_stamp - dst_time_stamp);
         pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@@ -826,48 +828,25 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
         if (cpi->droppable)
           pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE;
 
-        /*if (cpi->output_partition)
-        {
-            int i;
-            const int num_partitions = 1;
-
-            pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT;
-
-            for (i = 0; i < num_partitions; ++i)
-            {
-                pkt.data.frame.buf = cx_data;
-                pkt.data.frame.sz = cpi->partition_sz[i];
-                pkt.data.frame.partition_id = i;
-                // don't set the fragment bit for the last partition
-                if (i == (num_partitions - 1))
-                    pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT;
-                vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-                cx_data += cpi->partition_sz[i];
-                cx_data_sz -= cpi->partition_sz[i];
-            }
-        }
-        else*/
-        {
-          if (ctx->pending_cx_data) {
-            ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-            ctx->pending_frame_magnitude |= size;
-            ctx->pending_cx_data_sz += size;
-            size += write_superframe_index(ctx);
-            pkt.data.frame.buf = ctx->pending_cx_data;
-            pkt.data.frame.sz  = ctx->pending_cx_data_sz;
-            ctx->pending_cx_data = NULL;
-            ctx->pending_cx_data_sz = 0;
-            ctx->pending_frame_count = 0;
-            ctx->pending_frame_magnitude = 0;
-          } else {
-            pkt.data.frame.buf = cx_data;
-            pkt.data.frame.sz  = size;
-          }
-          pkt.data.frame.partition_id = -1;
-          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-          cx_data += size;
-          cx_data_sz -= size;
+        if (ctx->pending_cx_data) {
+          ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+          ctx->pending_frame_magnitude |= size;
+          ctx->pending_cx_data_sz += size;
+          size += write_superframe_index(ctx);
+          pkt.data.frame.buf = ctx->pending_cx_data;
+          pkt.data.frame.sz  = ctx->pending_cx_data_sz;
+          ctx->pending_cx_data = NULL;
+          ctx->pending_cx_data_sz = 0;
+          ctx->pending_frame_count = 0;
+          ctx->pending_frame_magnitude = 0;
+        } else {
+          pkt.data.frame.buf = cx_data;
+          pkt.data.frame.sz  = size;
         }
+        pkt.data.frame.partition_id = -1;
+        vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+        cx_data += size;
+        cx_data_sz -= size;
       }
     }
   }
@@ -876,15 +855,14 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t  *ctx,
 }
 
 
-static const vpx_codec_cx_pkt_t *vp9e_get_cxdata(vpx_codec_alg_priv_t  *ctx,
-                                                 vpx_codec_iter_t      *iter) {
+static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t  *ctx,
+                                                    vpx_codec_iter_t *iter) {
   return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter);
 }
 
-static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id,
-                                          va_list args) {
-  vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *);
+static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
+                                          int ctr_id, va_list args) {
+  vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
 
   if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
@@ -898,10 +876,9 @@ static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx,
   }
 }
 
-static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx,
-                                           int ctr_id,
-                                           va_list args) {
-  vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *);
+static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
+                                           int ctr_id, va_list args) {
+  vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
 
   if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
@@ -915,9 +892,8 @@ static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx,
   }
 }
 
-static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
-                                     int ctr_id,
-                                     va_list args) {
+static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
+                                          int ctr_id, va_list args) {
   vp9_ref_frame_t *frame = va_arg(args, vp9_ref_frame_t *);
 
   if (frame != NULL) {
@@ -931,9 +907,8 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
   }
 }
 
-static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id,
-                                          va_list args) {
+static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
+                                          int ctr_id, va_list args) {
 #if CONFIG_VP9_POSTPROC
   vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
   (void)ctr_id;
@@ -953,17 +928,17 @@ static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx,
 }
 
 
-static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) {
+static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
   YV12_BUFFER_CONFIG sd;
   vp9_ppflags_t flags = {0};
 
   if (ctx->preview_ppcfg.post_proc_flag) {
-    flags.post_proc_flag        = ctx->preview_ppcfg.post_proc_flag;
-    flags.deblocking_level      = ctx->preview_ppcfg.deblocking_level;
-    flags.noise_level           = ctx->preview_ppcfg.noise_level;
+    flags.post_proc_flag   = ctx->preview_ppcfg.post_proc_flag;
+    flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
+    flags.noise_level      = ctx->preview_ppcfg.noise_level;
   }
 
-  if (0 == vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags)) {
+  if (vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) {
     yuvconfig2image(&ctx->preview_img, &sd, NULL);
     return &ctx->preview_img;
   } else {
@@ -971,100 +946,107 @@ static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) {
   }
 }
 
-static vpx_codec_err_t vp9e_update_entropy(vpx_codec_alg_priv_t *ctx,
-                                           int ctr_id,
-                                           va_list args) {
-  int update = va_arg(args, int);
+static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx,
+                                           int ctr_id, va_list args) {
+  const int update = va_arg(args, int);
   vp9_update_entropy(ctx->cpi, update);
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_update_reference(vpx_codec_alg_priv_t *ctx,
-                                             int ctr_id,
-                                             va_list args) {
-  int update = va_arg(args, int);
-  vp9_update_reference(ctx->cpi, update);
+static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx,
+                                             int ctr_id, va_list args) {
+  const int ref_frame_flags = va_arg(args, int);
+  vp9_update_reference(ctx->cpi, ref_frame_flags);
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_use_reference(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id,
-                                          va_list args) {
-  int reference_flag = va_arg(args, int);
+static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
+                                          int ctr_id, va_list args) {
+  const int reference_flag = va_arg(args, int);
   vp9_use_as_reference(ctx->cpi, reference_flag);
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_set_roi_map(vpx_codec_alg_priv_t *ctx,
-                                        int ctr_id,
-                                        va_list args) {
+static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
+                                        int ctr_id, va_list args) {
   // TODO(yaowu): Need to re-implement and test for VP9.
   return VPX_CODEC_INVALID_PARAM;
 }
 
 
-static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id,
-                                          va_list args) {
-  // TODO(yaowu): Need to re-implement and test for VP9.
-  return VPX_CODEC_INVALID_PARAM;
+static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
+                                           int ctr_id, va_list args) {
+  vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
+
+  if (map) {
+    if (!vp9_set_active_map(ctx->cpi, map->active_map, map->rows, map->cols))
+      return VPX_CODEC_OK;
+    else
+      return VPX_CODEC_INVALID_PARAM;
+  } else {
+    return VPX_CODEC_INVALID_PARAM;
+  }
 }
 
-static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx,
-                                          int ctr_id,
-                                          va_list args) {
-  vpx_scaling_mode_t *scalemode =  va_arg(args, vpx_scaling_mode_t *);
+static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
+                                           int ctr_id, va_list args) {
+  vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *);
 
-  if (scalemode != NULL) {
-    int res;
-    res = vp9_set_internal_size(ctx->cpi,
-                                (VPX_SCALING)scalemode->h_scaling_mode,
-                                (VPX_SCALING)scalemode->v_scaling_mode);
+  if (mode) {
+    const int res = vp9_set_internal_size(ctx->cpi,
+                                          (VPX_SCALING)mode->h_scaling_mode,
+                                          (VPX_SCALING)mode->v_scaling_mode);
     return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM;
   } else {
     return VPX_CODEC_INVALID_PARAM;
   }
 }
 
-static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
+static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id,
                                     va_list args) {
   int data = va_arg(args, int);
+  const vpx_codec_enc_cfg_t *cfg = &ctx->cfg;
   vp9_set_svc(ctx->cpi, data);
-  // CBR mode for SVC with both temporal and spatial layers not yet supported.
+  // CBR or two pass mode for SVC with both temporal and spatial layers
+  // not yet supported.
   if (data == 1 &&
-      ctx->cfg.rc_end_usage == VPX_CBR &&
-      ctx->cfg.ss_number_layers > 1 &&
-      ctx->cfg.ts_number_layers > 1) {
+      (cfg->rc_end_usage == VPX_CBR ||
+       cfg->g_pass == VPX_RC_FIRST_PASS ||
+       cfg->g_pass == VPX_RC_LAST_PASS) &&
+      cfg->ss_number_layers > 1 &&
+      cfg->ts_number_layers > 1) {
     return VPX_CODEC_INVALID_PARAM;
   }
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
+static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
                                              int ctr_id,
                                              va_list args) {
-  vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *);
-  VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
-  cpi->svc.spatial_layer_id = data->spatial_layer_id;
-  cpi->svc.temporal_layer_id = data->temporal_layer_id;
+  vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *);
+  VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
+  SVC *const svc = &cpi->svc;
+  svc->spatial_layer_id = data->spatial_layer_id;
+  svc->temporal_layer_id = data->temporal_layer_id;
   // Checks on valid layer_id input.
-  if (cpi->svc.temporal_layer_id < 0 ||
-      cpi->svc.temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
+  if (svc->temporal_layer_id < 0 ||
+      svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
     return VPX_CODEC_INVALID_PARAM;
   }
-  if (cpi->svc.spatial_layer_id < 0 ||
-      cpi->svc.spatial_layer_id >= (int)ctx->cfg.ss_number_layers) {
+  if (svc->spatial_layer_id < 0 ||
+      svc->spatial_layer_id >= (int)ctx->cfg.ss_number_layers) {
     return VPX_CODEC_INVALID_PARAM;
   }
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
+static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
                                                int ctr_id, va_list args) {
-  VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
-  vpx_svc_parameters_t *params = va_arg(args, vpx_svc_parameters_t *);
+  VP9_COMP *const cpi = ctx->cpi;
+  vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *);
 
-  if (params == NULL) return VPX_CODEC_INVALID_PARAM;
+  if (params == NULL)
+    return VPX_CODEC_INVALID_PARAM;
 
   cpi->svc.spatial_layer_id = params->spatial_layer;
   cpi->svc.temporal_layer_id = params->temporal_layer;
@@ -1079,135 +1061,139 @@ static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
   ctx->cfg.rc_max_quantizer = params->max_quantizer;
   ctx->cfg.rc_min_quantizer = params->min_quantizer;
 
-  set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
+  set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
   vp9_change_config(ctx->cpi, &ctx->oxcf);
 
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = {
-  {VP8_SET_REFERENCE,                 vp9e_set_reference},
-  {VP8_COPY_REFERENCE,                vp9e_copy_reference},
-  {VP8_SET_POSTPROC,                  vp9e_set_previewpp},
-  {VP8E_UPD_ENTROPY,                  vp9e_update_entropy},
-  {VP8E_UPD_REFERENCE,                vp9e_update_reference},
-  {VP8E_USE_REFERENCE,                vp9e_use_reference},
-  {VP8E_SET_ROI_MAP,                  vp9e_set_roi_map},
-  {VP8E_SET_ACTIVEMAP,                vp9e_set_activemap},
-  {VP8E_SET_SCALEMODE,                vp9e_set_scalemode},
-  {VP8E_SET_CPUUSED,                  set_param},
-  {VP8E_SET_NOISE_SENSITIVITY,        set_param},
-  {VP8E_SET_ENABLEAUTOALTREF,         set_param},
-  {VP8E_SET_SHARPNESS,                set_param},
-  {VP8E_SET_STATIC_THRESHOLD,         set_param},
-  {VP9E_SET_TILE_COLUMNS,             set_param},
-  {VP9E_SET_TILE_ROWS,                set_param},
-  {VP8E_GET_LAST_QUANTIZER,           get_param},
-  {VP8E_GET_LAST_QUANTIZER_64,        get_param},
-  {VP8E_SET_ARNR_MAXFRAMES,           set_param},
-  {VP8E_SET_ARNR_STRENGTH,            set_param},
-  {VP8E_SET_ARNR_TYPE,                set_param},
-  {VP8E_SET_TUNING,                   set_param},
-  {VP8E_SET_CQ_LEVEL,                 set_param},
-  {VP8E_SET_MAX_INTRA_BITRATE_PCT,    set_param},
-  {VP9E_SET_LOSSLESS,                 set_param},
-  {VP9E_SET_FRAME_PARALLEL_DECODING,  set_param},
-  {VP9E_SET_AQ_MODE,                  set_param},
-  {VP9_GET_REFERENCE,                 get_reference},
-  {VP9E_SET_SVC,                      vp9e_set_svc},
-  {VP9E_SET_SVC_PARAMETERS,           vp9e_set_svc_parameters},
-  {VP9E_SET_SVC_LAYER_ID,             vp9e_set_svc_layer_id},
+static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
+  {VP8_COPY_REFERENCE,                ctrl_copy_reference},
+  {VP8E_UPD_ENTROPY,                  ctrl_update_entropy},
+  {VP8E_UPD_REFERENCE,                ctrl_update_reference},
+  {VP8E_USE_REFERENCE,                ctrl_use_reference},
+
+  // Setters
+  {VP8_SET_REFERENCE,                 ctrl_set_reference},
+  {VP8_SET_POSTPROC,                  ctrl_set_previewpp},
+  {VP8E_SET_ROI_MAP,                  ctrl_set_roi_map},
+  {VP8E_SET_ACTIVEMAP,                ctrl_set_active_map},
+  {VP8E_SET_SCALEMODE,                ctrl_set_scale_mode},
+  {VP8E_SET_CPUUSED,                  ctrl_set_param},
+  {VP8E_SET_NOISE_SENSITIVITY,        ctrl_set_param},
+  {VP8E_SET_ENABLEAUTOALTREF,         ctrl_set_param},
+  {VP8E_SET_SHARPNESS,                ctrl_set_param},
+  {VP8E_SET_STATIC_THRESHOLD,         ctrl_set_param},
+  {VP9E_SET_TILE_COLUMNS,             ctrl_set_param},
+  {VP9E_SET_TILE_ROWS,                ctrl_set_param},
+  {VP8E_SET_ARNR_MAXFRAMES,           ctrl_set_param},
+  {VP8E_SET_ARNR_STRENGTH,            ctrl_set_param},
+  {VP8E_SET_ARNR_TYPE,                ctrl_set_param},
+  {VP8E_SET_TUNING,                   ctrl_set_param},
+  {VP8E_SET_CQ_LEVEL,                 ctrl_set_param},
+  {VP8E_SET_MAX_INTRA_BITRATE_PCT,    ctrl_set_param},
+  {VP9E_SET_LOSSLESS,                 ctrl_set_param},
+  {VP9E_SET_FRAME_PARALLEL_DECODING,  ctrl_set_param},
+  {VP9E_SET_AQ_MODE,                  ctrl_set_param},
+  {VP9E_SET_FRAME_PERIODIC_BOOST,     ctrl_set_param},
+  {VP9E_SET_SVC,                      ctrl_set_svc},
+  {VP9E_SET_SVC_PARAMETERS,           ctrl_set_svc_parameters},
+  {VP9E_SET_SVC_LAYER_ID,             ctrl_set_svc_layer_id},
+
+  // Getters
+  {VP8E_GET_LAST_QUANTIZER,           ctrl_get_param},
+  {VP8E_GET_LAST_QUANTIZER_64,        ctrl_get_param},
+  {VP9_GET_REFERENCE,                 ctrl_get_reference},
+
   { -1, NULL},
 };
 
-static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = {
+static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
   {
     0,
     {  // NOLINT
-      0,                  /* g_usage */
-      0,                  /* g_threads */
-      0,                  /* g_profile */
+      0,                  // g_usage
+      0,                  // g_threads
+      0,                  // g_profile
 
-      320,                /* g_width */
-      240,                /* g_height */
-      {1, 30},            /* g_timebase */
+      320,                // g_width
+      240,                // g_height
+      {1, 30},            // g_timebase
 
-      0,                  /* g_error_resilient */
+      0,                  // g_error_resilient
 
-      VPX_RC_ONE_PASS,    /* g_pass */
+      VPX_RC_ONE_PASS,    // g_pass
 
-      25,                 /* g_lag_in_frames */
+      25,                 // g_lag_in_frames
 
-      0,                  /* rc_dropframe_thresh */
-      0,                  /* rc_resize_allowed */
-      60,                 /* rc_resize_down_thresold */
-      30,                 /* rc_resize_up_thresold */
+      0,                  // rc_dropframe_thresh
+      0,                  // rc_resize_allowed
+      60,                 // rc_resize_down_thresold
+      30,                 // rc_resize_up_thresold
 
-      VPX_VBR,            /* rc_end_usage */
+      VPX_VBR,            // rc_end_usage
 #if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION)
-      {0},                /* rc_twopass_stats_in */
+      {0},                // rc_twopass_stats_in
 #endif
-      256,                /* rc_target_bandwidth */
-      0,                  /* rc_min_quantizer */
-      63,                 /* rc_max_quantizer */
-      100,                /* rc_undershoot_pct */
-      100,                /* rc_overshoot_pct */
-
-      6000,               /* rc_max_buffer_size */
-      4000,               /* rc_buffer_initial_size; */
-      5000,               /* rc_buffer_optimal_size; */
-
-      50,                 /* rc_two_pass_vbrbias  */
-      0,                  /* rc_two_pass_vbrmin_section */
-      2000,               /* rc_two_pass_vbrmax_section */
-
-      /* keyframing settings (kf) */
-      VPX_KF_AUTO,        /* g_kfmode*/
-      0,                  /* kf_min_dist */
-      9999,               /* kf_max_dist */
-
-      VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
-      {0},                /* ss_target_bitrate */
-      1,                  /* ts_number_layers */
-      {0},                /* ts_target_bitrate */
-      {0},                /* ts_rate_decimator */
-      0,                  /* ts_periodicity */
-      {0},                /* ts_layer_id */
+      256,                // rc_target_bandwidth
+      0,                  // rc_min_quantizer
+      63,                 // rc_max_quantizer
+      100,                // rc_undershoot_pct
+      100,                // rc_overshoot_pct
+
+      6000,               // rc_max_buffer_size
+      4000,               // rc_buffer_initial_size
+      5000,               // rc_buffer_optimal_size
+
+      50,                 // rc_two_pass_vbrbias
+      0,                  // rc_two_pass_vbrmin_section
+      2000,               // rc_two_pass_vbrmax_section
+
+      // keyframing settings (kf)
+      VPX_KF_AUTO,        // g_kfmode
+      0,                  // kf_min_dist
+      9999,               // kf_max_dist
+
+      VPX_SS_DEFAULT_LAYERS,  // ss_number_layers
+      {0},                    // ss_target_bitrate
+      1,                      // ts_number_layers
+      {0},                    // ts_target_bitrate
+      {0},                    // ts_rate_decimator
+      0,                      // ts_periodicity
+      {0},                    // ts_layer_id
 #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION)
-      "vp8.fpf"           /* first pass filename */
+      "vp8.fpf"           // first pass filename
 #endif
     }
   },
   { -1, {NOT_IMPLEMENTED}}
 };
 
-
 #ifndef VERSION_STRING
 #define VERSION_STRING
 #endif
 CODEC_INTERFACE(vpx_codec_vp9_cx) = {
   "WebM Project VP9 Encoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
-  VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR |
-  VPX_CODEC_CAP_OUTPUT_PARTITION,
-  /* vpx_codec_caps_t          caps; */
-  vp9e_init,          /* vpx_codec_init_fn_t       init; */
-  vp9e_destroy,       /* vpx_codec_destroy_fn_t    destroy; */
-  vp9e_ctf_maps,      /* vpx_codec_ctrl_fn_map_t  *ctrl_maps; */
-  NOT_IMPLEMENTED,    /* vpx_codec_get_mmap_fn_t   get_mmap; */
-  NOT_IMPLEMENTED,    /* vpx_codec_set_mmap_fn_t   set_mmap; */
+  VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,  // vpx_codec_caps_t
+  encoder_init,       // vpx_codec_init_fn_t
+  encoder_destroy,    // vpx_codec_destroy_fn_t
+  encoder_ctrl_maps,  // vpx_codec_ctrl_fn_map_t
+  NOT_IMPLEMENTED,    // vpx_codec_get_mmap_fn_t
+  NOT_IMPLEMENTED,    // vpx_codec_set_mmap_fn_t
   {  // NOLINT
-    NOT_IMPLEMENTED,    /* vpx_codec_peek_si_fn_t    peek_si; */
-    NOT_IMPLEMENTED,    /* vpx_codec_get_si_fn_t     get_si; */
-    NOT_IMPLEMENTED,    /* vpx_codec_decode_fn_t     decode; */
-    NOT_IMPLEMENTED,    /* vpx_codec_frame_get_fn_t  frame_get; */
+    NOT_IMPLEMENTED,  // vpx_codec_peek_si_fn_t
+    NOT_IMPLEMENTED,  // vpx_codec_get_si_fn_t
+    NOT_IMPLEMENTED,  // vpx_codec_decode_fn_t
+    NOT_IMPLEMENTED,  // vpx_codec_frame_get_fn_t
   },
   {  // NOLINT
-    vp9e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t    peek_si; */
-    vp9e_encode,        /* vpx_codec_encode_fn_t      encode; */
-    vp9e_get_cxdata,    /* vpx_codec_get_cx_data_fn_t   frame_get; */
-    vp9e_set_config,
-    NOT_IMPLEMENTED,
-    vp9e_get_preview,
-  } /* encoder functions */
+    encoder_usage_cfg_map,  // vpx_codec_enc_cfg_map_t
+    encoder_encode,         // vpx_codec_encode_fn_t
+    encoder_get_cxdata,     // vpx_codec_get_cx_data_fn_t
+    encoder_set_config,     // vpx_codec_enc_config_set_fn_t
+    NOT_IMPLEMENTED,        // vpx_codec_get_global_headers_fn_t
+    encoder_get_preview,    // vpx_codec_get_preview_frame_fn_t
+    NOT_IMPLEMENTED ,       // vpx_codec_enc_mr_get_mem_loc_fn_t
+  }
 };
diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c
index 83d64b8..5ed7484 100644
--- a/source/libvpx/vp9/vp9_dx_iface.c
+++ b/source/libvpx/vp9/vp9_dx_iface.c
@@ -8,45 +8,32 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
 #include <stdlib.h>
 #include <string.h>
-#include "vpx/vpx_decoder.h"
-#include "vpx/vp8dx.h"
-#include "vpx/internal/vpx_codec_internal.h"
+
 #include "./vpx_version.h"
+
+#include "vpx/internal/vpx_codec_internal.h"
+#include "vpx/vp8dx.h"
+#include "vpx/vpx_decoder.h"
+
 #include "vp9/common/vp9_frame_buffers.h"
-#include "vp9/decoder/vp9_onyxd.h"
-#include "vp9/decoder/vp9_onyxd_int.h"
+
+#include "vp9/decoder/vp9_decoder.h"
 #include "vp9/decoder/vp9_read_bit_buffer.h"
+
 #include "vp9/vp9_iface_common.h"
 
 #define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
-typedef vpx_codec_stream_info_t  vp9_stream_info_t;
 
-/* Structures for handling memory allocations */
-typedef enum {
-  VP9_SEG_ALG_PRIV = 256,
-  VP9_SEG_MAX
-} mem_seg_id_t;
-#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0])))
-
-static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si,
-                             vpx_codec_flags_t flags);
-
-static const mem_req_t vp9_mem_req_segs[] = {
-  {VP9_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, priv_sz},
-  {VP9_SEG_MAX, 0, 0, 0, NULL}
-};
+typedef vpx_codec_stream_info_t vp9_stream_info_t;
 
 struct vpx_codec_alg_priv {
   vpx_codec_priv_t        base;
-  vpx_codec_mmap_t        mmaps[NELEMENTS(vp9_mem_req_segs) - 1];
   vpx_codec_dec_cfg_t     cfg;
   vp9_stream_info_t       si;
-  int                     defer_alloc;
   int                     decoder_init;
-  struct VP9Decompressor *pbi;
+  struct VP9Decoder *pbi;
   int                     postproc_cfg_set;
   vp8_postproc_cfg_t      postproc_cfg;
 #if CONFIG_POSTPROC_VISUALIZER
@@ -67,86 +54,54 @@ struct vpx_codec_alg_priv {
   vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
 };
 
-static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si,
-                             vpx_codec_flags_t flags) {
-  /* Although this declaration is constant, we can't use it in the requested
-   * segments list because we want to define the requested segments list
-   * before defining the private type (so that the number of memory maps is
-   * known)
-   */
-  (void)si;
-  return sizeof(vpx_codec_alg_priv_t);
-}
-
-static void vp9_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap) {
-  int i;
-
-  ctx->priv = mmap->base;
-  ctx->priv->sz = sizeof(*ctx->priv);
-  ctx->priv->iface = ctx->iface;
-  ctx->priv->alg_priv = mmap->base;
-
-  for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++)
-    ctx->priv->alg_priv->mmaps[i].id = vp9_mem_req_segs[i].id;
-
-  ctx->priv->alg_priv->mmaps[0] = *mmap;
-  ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
-  ctx->priv->init_flags = ctx->init_flags;
-
-  if (ctx->config.dec) {
-    /* Update the reference to the config structure to an internal copy. */
-    ctx->priv->alg_priv->cfg = *ctx->config.dec;
-    ctx->config.dec = &ctx->priv->alg_priv->cfg;
-  }
-}
-
-static void vp9_finalize_mmaps(vpx_codec_alg_priv_t *ctx) {
-  /* nothing to clean up */
-}
-
-static vpx_codec_err_t vp9_init(vpx_codec_ctx_t *ctx,
-                                vpx_codec_priv_enc_mr_cfg_t *data) {
-  vpx_codec_err_t res = VPX_CODEC_OK;
-
+static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
+                            vpx_codec_priv_enc_mr_cfg_t *data) {
   // This function only allocates space for the vpx_codec_alg_priv_t
   // structure. More memory may be required at the time the stream
   // information becomes known.
   if (!ctx->priv) {
-    vpx_codec_mmap_t mmap;
-
-    mmap.id = vp9_mem_req_segs[0].id;
-    mmap.sz = sizeof(vpx_codec_alg_priv_t);
-    mmap.align = vp9_mem_req_segs[0].align;
-    mmap.flags = vp9_mem_req_segs[0].flags;
-
-    res = vpx_mmap_alloc(&mmap);
-    if (!res) {
-      vp9_init_ctx(ctx, &mmap);
-
-      ctx->priv->alg_priv->defer_alloc = 1;
+    vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv));
+    if (alg_priv == NULL)
+      return VPX_CODEC_MEM_ERROR;
+
+    vp9_zero(*alg_priv);
+
+    ctx->priv = (vpx_codec_priv_t *)alg_priv;
+    ctx->priv->sz = sizeof(*ctx->priv);
+    ctx->priv->iface = ctx->iface;
+    ctx->priv->alg_priv = alg_priv;
+    ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
+    ctx->priv->init_flags = ctx->init_flags;
+
+    if (ctx->config.dec) {
+      // Update the reference to the config structure to an internal copy.
+      ctx->priv->alg_priv->cfg = *ctx->config.dec;
+      ctx->config.dec = &ctx->priv->alg_priv->cfg;
     }
   }
 
-  return res;
+  return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9_destroy(vpx_codec_alg_priv_t *ctx) {
-  int i;
-
-  vp9_remove_decompressor(ctx->pbi);
-
-  for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) {
-    if (ctx->mmaps[i].dtor)
-      ctx->mmaps[i].dtor(&ctx->mmaps[i]);
+static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
+  if (ctx->pbi) {
+    vp9_decoder_remove(ctx->pbi);
+    ctx->pbi = NULL;
   }
 
+  vpx_free(ctx);
+
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz,
-                                   vpx_codec_stream_info_t *si) {
-  if (data_sz <= 8) return VPX_CODEC_UNSUP_BITSTREAM;
-  if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM;
+static vpx_codec_err_t decoder_peek_si(const uint8_t *data,
+                                       unsigned int data_sz,
+                                       vpx_codec_stream_info_t *si) {
+  if (data_sz <= 8)
+    return VPX_CODEC_UNSUP_BITSTREAM;
+
+  if (data + data_sz <= data)
+    return VPX_CODEC_INVALID_PARAM;
 
   si->is_kf = 0;
   si->w = si->h = 0;
@@ -204,8 +159,8 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz,
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t    *ctx,
-                                  vpx_codec_stream_info_t *si) {
+static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx,
+                                      vpx_codec_stream_info_t *si) {
   const size_t sz = (si->sz >= sizeof(vp9_stream_info_t))
                        ? sizeof(vp9_stream_info_t)
                        : sizeof(vpx_codec_stream_info_t);
@@ -215,7 +170,6 @@ static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t    *ctx,
   return VPX_CODEC_OK;
 }
 
-
 static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx,
                            const struct vpx_internal_error_info *error) {
   if (error->error_code)
@@ -224,149 +178,122 @@ static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx,
   return error->error_code;
 }
 
-static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
-                                  const uint8_t **data, unsigned int data_sz,
-                                  void *user_priv, int64_t deadline) {
-  vpx_codec_err_t res = VPX_CODEC_OK;
+static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) {
+  VP9_COMMON *const cm = &ctx->pbi->common;
 
-  ctx->img_avail = 0;
+  cm->new_fb_idx = -1;
 
-  /* Determine the stream parameters. Note that we rely on peek_si to
-   * validate that we have a buffer that does not wrap around the top
-   * of the heap.
-   */
-  if (!ctx->si.h)
-    res = ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si);
+  if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
+    cm->get_fb_cb = ctx->get_ext_fb_cb;
+    cm->release_fb_cb = ctx->release_ext_fb_cb;
+    cm->cb_priv = ctx->ext_priv;
+  } else {
+    cm->get_fb_cb = vp9_get_frame_buffer;
+    cm->release_fb_cb = vp9_release_frame_buffer;
 
+    if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers))
+      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                         "Failed to initialize internal frame buffers");
 
-  /* Perform deferred allocations, if required */
-  if (!res && ctx->defer_alloc) {
-    int i;
+    cm->cb_priv = &cm->int_frame_buffers;
+  }
+}
 
-    for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++) {
-      vpx_codec_dec_cfg_t cfg;
+static void set_default_ppflags(vp8_postproc_cfg_t *cfg) {
+  cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK;
+  cfg->deblocking_level = 4;
+  cfg->noise_level = 0;
+}
 
-      cfg.w = ctx->si.w;
-      cfg.h = ctx->si.h;
-      ctx->mmaps[i].id = vp9_mem_req_segs[i].id;
-      ctx->mmaps[i].sz = vp9_mem_req_segs[i].sz;
-      ctx->mmaps[i].align = vp9_mem_req_segs[i].align;
-      ctx->mmaps[i].flags = vp9_mem_req_segs[i].flags;
+static void set_ppflags(const vpx_codec_alg_priv_t *ctx,
+                        vp9_ppflags_t *flags) {
+  flags->post_proc_flag =
+#if CONFIG_POSTPROC_VISUALIZER
+      (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) |
+      (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
+      (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
+      (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) |
+#endif
+      ctx->postproc_cfg.post_proc_flag;
 
-      if (!ctx->mmaps[i].sz)
-        ctx->mmaps[i].sz = vp9_mem_req_segs[i].calc_sz(&cfg,
-                                                       ctx->base.init_flags);
+  flags->deblocking_level = ctx->postproc_cfg.deblocking_level;
+  flags->noise_level = ctx->postproc_cfg.noise_level;
+#if CONFIG_POSTPROC_VISUALIZER
+  flags->display_ref_frame_flag = ctx->dbg_color_ref_frame_flag;
+  flags->display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
+  flags->display_b_modes_flag = ctx->dbg_color_b_modes_flag;
+  flags->display_mv_flag = ctx->dbg_display_mv_flag;
+#endif
+}
 
-      res = vpx_mmap_alloc(&ctx->mmaps[i]);
-    }
+static void init_decoder(vpx_codec_alg_priv_t *ctx) {
+  VP9D_CONFIG oxcf;
+  oxcf.width = ctx->si.w;
+  oxcf.height = ctx->si.h;
+  oxcf.version = 9;
+  oxcf.max_threads = ctx->cfg.threads;
+  oxcf.inv_tile_order = ctx->invert_tile_order;
 
-    if (!res)
-      vp9_finalize_mmaps(ctx);
+  ctx->pbi = vp9_decoder_create(&oxcf);
+  if (ctx->pbi == NULL)
+    return;
 
-    ctx->defer_alloc = 0;
-  }
+  vp9_initialize_dec();
 
-  /* Initialize the decoder instance on the first frame*/
-  if (!res && !ctx->decoder_init) {
-    res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
-                             vp9_mem_req_segs, NELEMENTS(vp9_mem_req_segs),
-                             ctx->base.init_flags);
-
-    if (!res) {
-      VP9D_CONFIG oxcf;
-      struct VP9Decompressor *optr;
-
-      vp9_initialize_dec();
-
-      oxcf.width = ctx->si.w;
-      oxcf.height = ctx->si.h;
-      oxcf.version = 9;
-      oxcf.postprocess = 0;
-      oxcf.max_threads = ctx->cfg.threads;
-      oxcf.inv_tile_order = ctx->invert_tile_order;
-      optr = vp9_create_decompressor(&oxcf);
-
-      // If postprocessing was enabled by the application and a
-      // configuration has not been provided, default it.
-      if (!ctx->postproc_cfg_set &&
-          (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) {
-        ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK;
-        ctx->postproc_cfg.deblocking_level = 4;
-        ctx->postproc_cfg.noise_level = 0;
-      }
+  // If postprocessing was enabled by the application and a
+  // configuration has not been provided, default it.
+  if (!ctx->postproc_cfg_set &&
+      (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
+    set_default_ppflags(&ctx->postproc_cfg);
 
-      if (!optr) {
-        res = VPX_CODEC_ERROR;
-      } else {
-        VP9D_COMP *const pbi = (VP9D_COMP*)optr;
-        VP9_COMMON *const cm = &pbi->common;
+  init_buffer_callbacks(ctx);
+}
 
-        // Set index to not initialized.
-        cm->new_fb_idx = -1;
+static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
+                                  const uint8_t **data, unsigned int data_sz,
+                                  void *user_priv, int64_t deadline) {
+  YV12_BUFFER_CONFIG sd = { 0 };
+  int64_t time_stamp = 0, time_end_stamp = 0;
+  vp9_ppflags_t flags = {0};
+  VP9_COMMON *cm = NULL;
 
-        if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
-          cm->get_fb_cb = ctx->get_ext_fb_cb;
-          cm->release_fb_cb = ctx->release_ext_fb_cb;
-          cm->cb_priv = ctx->ext_priv;
-        } else {
-          cm->get_fb_cb = vp9_get_frame_buffer;
-          cm->release_fb_cb = vp9_release_frame_buffer;
+  ctx->img_avail = 0;
 
-          if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers))
-            vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
-                               "Failed to initialize internal frame buffers");
-          cm->cb_priv = &cm->int_frame_buffers;
-        }
+  // Determine the stream parameters. Note that we rely on peek_si to
+  // validate that we have a buffer that does not wrap around the top
+  // of the heap.
+  if (!ctx->si.h) {
+    const vpx_codec_err_t res =
+        ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si);
+    if (res != VPX_CODEC_OK)
+      return res;
+  }
 
-        ctx->pbi = optr;
-      }
-    }
+  // Initialize the decoder instance on the first frame
+  if (!ctx->decoder_init) {
+    init_decoder(ctx);
+    if (ctx->pbi == NULL)
+      return VPX_CODEC_ERROR;
 
     ctx->decoder_init = 1;
   }
 
-  if (!res && ctx->pbi) {
-    YV12_BUFFER_CONFIG sd;
-    int64_t time_stamp = 0, time_end_stamp = 0;
-    vp9_ppflags_t flags = {0};
-
-    if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) {
-      flags.post_proc_flag =
-#if CONFIG_POSTPROC_VISUALIZER
-          (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) |
-          (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
-          (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
-          (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) |
-#endif
-          ctx->postproc_cfg.post_proc_flag;
+  cm = &ctx->pbi->common;
 
-      flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
-      flags.noise_level = ctx->postproc_cfg.noise_level;
-#if CONFIG_POSTPROC_VISUALIZER
-      flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag;
-      flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
-      flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag;
-      flags.display_mv_flag = ctx->dbg_display_mv_flag;
-#endif
-    }
+  if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
+    return update_error_state(ctx, &cm->error);
 
-    if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) {
-      VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
-      res = update_error_state(ctx, &pbi->common.error);
-    }
+  if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
+    set_ppflags(ctx, &flags);
 
-    if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
-                                       &time_end_stamp, &flags)) {
-      VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
-      VP9_COMMON *const cm = &pbi->common;
-      yuvconfig2image(&ctx->img, &sd, user_priv);
+  if (vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
+    return update_error_state(ctx, &cm->error);
 
-      ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
-      ctx->img_avail = 1;
-    }
-  }
+  yuvconfig2image(&ctx->img, &sd, user_priv);
+  ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
+  ctx->img_avail = 1;
 
-  return res;
+  return VPX_CODEC_OK;
 }
 
 static void parse_superframe_index(const uint8_t *data, size_t data_sz,
@@ -385,7 +312,7 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz,
     if (data_sz >= index_sz && data[data_sz - index_sz] == marker) {
       // found a valid superframe index
       uint32_t i, j;
-      const uint8_t *x = data + data_sz - index_sz + 1;
+      const uint8_t *x = &data[data_sz - index_sz + 1];
 
       for (i = 0; i < frames; i++) {
         uint32_t this_sz = 0;
@@ -400,18 +327,17 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz,
   }
 }
 
-static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t  *ctx,
-                                  const uint8_t         *data,
-                                  unsigned int           data_sz,
-                                  void                  *user_priv,
-                                  long                   deadline) {
+static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
+                                      const uint8_t *data, unsigned int data_sz,
+                                      void *user_priv, long deadline) {
   const uint8_t *data_start = data;
   const uint8_t *data_end = data + data_sz;
-  vpx_codec_err_t res = 0;
+  vpx_codec_err_t res = VPX_CODEC_OK;
   uint32_t sizes[8];
   int frames_this_pts, frame_count = 0;
 
-  if (data == NULL || data_sz == 0) return VPX_CODEC_INVALID_PARAM;
+  if (data == NULL || data_sz == 0)
+    return VPX_CODEC_INVALID_PARAM;
 
   parse_superframe_index(data, data_sz, sizes, &frames_this_pts);
 
@@ -450,27 +376,27 @@ static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t  *ctx,
     assert(data_start >= data);
     assert(data_start <= data_end);
 
-    /* Early exit if there was a decode error */
+    // Early exit if there was a decode error
     if (res)
       break;
 
-    /* Account for suboptimal termination by the encoder. */
+    // Account for suboptimal termination by the encoder.
     while (data_start < data_end && *data_start == 0)
       data_start++;
 
     data_sz = (unsigned int)(data_end - data_start);
   } while (data_start < data_end);
+
   return res;
 }
 
-static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t  *ctx,
-                                  vpx_codec_iter_t      *iter) {
+static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
+                                      vpx_codec_iter_t *iter) {
   vpx_image_t *img = NULL;
 
   if (ctx->img_avail) {
-    /* iter acts as a flip flop, so an image is only returned on the first
-     * call to get_frame.
-     */
+    // iter acts as a flip flop, so an image is only returned on the first
+    // call to get_frame.
     if (!(*iter)) {
       img = &ctx->img;
       *iter = img;
@@ -481,7 +407,7 @@ static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t  *ctx,
   return img;
 }
 
-static vpx_codec_err_t vp9_set_fb_fn(
+static vpx_codec_err_t decoder_set_fb_fn(
     vpx_codec_alg_priv_t *ctx,
     vpx_get_frame_buffer_cb_fn_t cb_get,
     vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
@@ -499,93 +425,24 @@ static vpx_codec_err_t vp9_set_fb_fn(
   return VPX_CODEC_ERROR;
 }
 
-static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx,
-                                        vpx_codec_mmap_t *mmap,
-                                        vpx_codec_iter_t *iter) {
-  vpx_codec_err_t res;
-  const mem_req_t *seg_iter = *iter;
-
-  /* Get address of next segment request */
-  do {
-    if (!seg_iter)
-      seg_iter = vp9_mem_req_segs;
-    else if (seg_iter->id != VP9_SEG_MAX)
-      seg_iter++;
-
-    *iter = (vpx_codec_iter_t)seg_iter;
-
-    if (seg_iter->id != VP9_SEG_MAX) {
-      mmap->id = seg_iter->id;
-      mmap->sz = seg_iter->sz;
-      mmap->align = seg_iter->align;
-      mmap->flags = seg_iter->flags;
-
-      if (!seg_iter->sz)
-        mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags);
-
-      res = VPX_CODEC_OK;
-    } else {
-      res = VPX_CODEC_LIST_END;
-    }
-  } while (!mmap->sz && res != VPX_CODEC_LIST_END);
-
-  return res;
-}
-
-static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx,
-                                        const vpx_codec_mmap_t  *mmap) {
-  vpx_codec_err_t res = VPX_CODEC_MEM_ERROR;
-  int i, done;
-
-  if (!ctx->priv) {
-    if (mmap->id == VP9_SEG_ALG_PRIV) {
-      if (!ctx->priv) {
-        vp9_init_ctx(ctx, mmap);
-        res = VPX_CODEC_OK;
-      }
-    }
-  }
-
-  done = 1;
-
-  if (!res && ctx->priv->alg_priv) {
-    for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++) {
-      if (ctx->priv->alg_priv->mmaps[i].id == mmap->id)
-        if (!ctx->priv->alg_priv->mmaps[i].base) {
-          ctx->priv->alg_priv->mmaps[i] = *mmap;
-          res = VPX_CODEC_OK;
-        }
-
-      done &= (ctx->priv->alg_priv->mmaps[i].base != NULL);
-    }
-  }
-
-  if (done && !res) {
-    vp9_finalize_mmaps(ctx->priv->alg_priv);
-    res = ctx->iface->init(ctx, NULL);
-  }
-
-  return res;
-}
-
-static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
-                                     va_list args) {
-  vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
+                                          int ctr_id, va_list args) {
+  vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *);
 
   if (data) {
-    vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+    vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data;
     YV12_BUFFER_CONFIG sd;
 
     image2yuvconfig(&frame->img, &sd);
-    return vp9_set_reference_dec(ctx->pbi,
+    return vp9_set_reference_dec(&ctx->pbi->common,
                                  (VP9_REFFRAME)frame->frame_type, &sd);
   } else {
     return VPX_CODEC_INVALID_PARAM;
   }
 }
 
-static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
-                                      va_list args) {
+static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
+                                           int ctr_id, va_list args) {
   vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
 
   if (data) {
@@ -601,8 +458,8 @@ static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
   }
 }
 
-static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
-                                     va_list args) {
+static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
+                                          int ctr_id, va_list args) {
   vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
 
   if (data) {
@@ -616,8 +473,8 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id,
   }
 }
 
-static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id,
-                                    va_list args) {
+static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
+                                         int ctr_id, va_list args) {
 #if CONFIG_VP9_POSTPROC
   vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
 
@@ -633,8 +490,8 @@ static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id,
 #endif
 }
 
-static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id,
-                                       va_list args) {
+static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
+                                            int ctrl_id, va_list args) {
 #if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
   int data = va_arg(args, int);
 
@@ -653,14 +510,15 @@ static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id,
 #endif
 }
 
-static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
-                                            int ctrl_id, va_list args) {
-  int *update_info = va_arg(args, int *);
-  VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
+static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
+                                                 int ctrl_id, va_list args) {
+  int *const update_info = va_arg(args, int *);
 
   if (update_info) {
-    *update_info = pbi->refresh_frame_flags;
-
+    if (ctx->pbi)
+      *update_info = ctx->pbi->refresh_frame_flags;
+    else
+      return VPX_CODEC_ERROR;
     return VPX_CODEC_OK;
   } else {
     return VPX_CODEC_INVALID_PARAM;
@@ -668,14 +526,13 @@ static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
 }
 
 
-static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
-                                           int ctrl_id, va_list args) {
+static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
+                                                int ctrl_id, va_list args) {
   int *corrupted = va_arg(args, int *);
 
   if (corrupted) {
-    VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
-    if (pbi)
-      *corrupted = pbi->common.frame_to_show->corrupted;
+    if (ctx->pbi)
+      *corrupted = ctx->pbi->common.frame_to_show->corrupted;
     else
       return VPX_CODEC_ERROR;
     return VPX_CODEC_OK;
@@ -684,15 +541,15 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
   }
 }
 
-static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx,
-                                        int ctrl_id, va_list args) {
+static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
+                                             int ctrl_id, va_list args) {
   int *const display_size = va_arg(args, int *);
 
   if (display_size) {
-    const VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi;
-    if (pbi) {
-      display_size[0] = pbi->common.display_width;
-      display_size[1] = pbi->common.display_height;
+    if (ctx->pbi) {
+      const VP9_COMMON *const cm = &ctx->pbi->common;
+      display_size[0] = cm->display_width;
+      display_size[1] = cm->display_height;
     } else {
       return VPX_CODEC_ERROR;
     }
@@ -702,30 +559,33 @@ static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx,
   }
 }
 
-static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
-                                             int ctr_id,
-                                             va_list args) {
+static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
+                                                  int ctr_id, va_list args) {
   ctx->invert_tile_order = va_arg(args, int);
   return VPX_CODEC_OK;
 }
 
-static vpx_codec_ctrl_fn_map_t ctf_maps[] = {
-  {VP8_SET_REFERENCE,             set_reference},
-  {VP8_COPY_REFERENCE,            copy_reference},
-  {VP8_SET_POSTPROC,              set_postproc},
-  {VP8_SET_DBG_COLOR_REF_FRAME,   set_dbg_options},
-  {VP8_SET_DBG_COLOR_MB_MODES,    set_dbg_options},
-  {VP8_SET_DBG_COLOR_B_MODES,     set_dbg_options},
-  {VP8_SET_DBG_DISPLAY_MV,        set_dbg_options},
-  {VP8D_GET_LAST_REF_UPDATES,     get_last_ref_updates},
-  {VP8D_GET_FRAME_CORRUPTED,      get_frame_corrupted},
-  {VP9_GET_REFERENCE,             get_reference},
-  {VP9D_GET_DISPLAY_SIZE,         get_display_size},
-  {VP9_INVERT_TILE_DECODE_ORDER,  set_invert_tile_order},
+static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
+  {VP8_COPY_REFERENCE,            ctrl_copy_reference},
+
+  // Setters
+  {VP8_SET_REFERENCE,             ctrl_set_reference},
+  {VP8_SET_POSTPROC,              ctrl_set_postproc},
+  {VP8_SET_DBG_COLOR_REF_FRAME,   ctrl_set_dbg_options},
+  {VP8_SET_DBG_COLOR_MB_MODES,    ctrl_set_dbg_options},
+  {VP8_SET_DBG_COLOR_B_MODES,     ctrl_set_dbg_options},
+  {VP8_SET_DBG_DISPLAY_MV,        ctrl_set_dbg_options},
+  {VP9_INVERT_TILE_DECODE_ORDER,  ctrl_set_invert_tile_order},
+
+  // Getters
+  {VP8D_GET_LAST_REF_UPDATES,     ctrl_get_last_ref_updates},
+  {VP8D_GET_FRAME_CORRUPTED,      ctrl_get_frame_corrupted},
+  {VP9_GET_REFERENCE,             ctrl_get_reference},
+  {VP9D_GET_DISPLAY_SIZE,         ctrl_get_display_size},
+
   { -1, NULL},
 };
 
-
 #ifndef VERSION_STRING
 #define VERSION_STRING
 #endif
@@ -733,22 +593,20 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
   "WebM Project VP9 Decoder" VERSION_STRING,
   VPX_CODEC_INTERNAL_ABI_VERSION,
   VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC |
-      VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER,
-  /* vpx_codec_caps_t          caps; */
-  vp9_init,         /* vpx_codec_init_fn_t       init; */
-  vp9_destroy,      /* vpx_codec_destroy_fn_t    destroy; */
-  ctf_maps,         /* vpx_codec_ctrl_fn_map_t  *ctrl_maps; */
-  vp9_xma_get_mmap, /* vpx_codec_get_mmap_fn_t   get_mmap; */
-  vp9_xma_set_mmap, /* vpx_codec_set_mmap_fn_t   set_mmap; */
+      VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER,  // vpx_codec_caps_t
+  decoder_init,       // vpx_codec_init_fn_t
+  decoder_destroy,    // vpx_codec_destroy_fn_t
+  decoder_ctrl_maps,  // vpx_codec_ctrl_fn_map_t
+  NOT_IMPLEMENTED,    // vpx_codec_get_mmap_fn_t
+  NOT_IMPLEMENTED,    // vpx_codec_set_mmap_fn_t
   { // NOLINT
-    vp9_peek_si,      /* vpx_codec_peek_si_fn_t    peek_si; */
-    vp9_get_si,       /* vpx_codec_get_si_fn_t     get_si; */
-    vp9_decode,       /* vpx_codec_decode_fn_t     decode; */
-    vp9_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
-    vp9_set_fb_fn,    /* vpx_codec_set_fb_fn_t     set_fb_fn; */
+    decoder_peek_si,    // vpx_codec_peek_si_fn_t
+    decoder_get_si,     // vpx_codec_get_si_fn_t
+    decoder_decode,     // vpx_codec_decode_fn_t
+    decoder_get_frame,  // vpx_codec_frame_get_fn_t
+    decoder_set_fb_fn,  // vpx_codec_set_fb_fn_t
   },
   { // NOLINT
-    /* encoder functions */
     NOT_IMPLEMENTED,
     NOT_IMPLEMENTED,
     NOT_IMPLEMENTED,
diff --git a/source/libvpx/vp9/vp9cx.mk b/source/libvpx/vp9/vp9cx.mk
index 6679f89..da6c0f8 100644
--- a/source/libvpx/vp9/vp9cx.mk
+++ b/source/libvpx/vp9/vp9cx.mk
@@ -18,6 +18,8 @@ VP9_CX_SRCS_REMOVE-no  += $(VP9_COMMON_SRCS_REMOVE-no)
 VP9_CX_SRCS-yes += vp9_cx_iface.c
 
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
+VP9_CX_SRCS-yes += encoder/vp9_cost.h
+VP9_CX_SRCS-yes += encoder/vp9_cost.c
 VP9_CX_SRCS-yes += encoder/vp9_dct.c
 VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c
 VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h
@@ -28,6 +30,7 @@ VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
 VP9_CX_SRCS-yes += encoder/vp9_block.h
 VP9_CX_SRCS-yes += encoder/vp9_writer.h
 VP9_CX_SRCS-yes += encoder/vp9_writer.c
+VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.c
 VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
 VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
@@ -42,6 +45,7 @@ VP9_CX_SRCS-yes += encoder/vp9_quantize.h
 VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
 VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
 VP9_CX_SRCS-yes += encoder/vp9_pickmode.h
+VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.h
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.h
 VP9_CX_SRCS-yes += encoder/vp9_treewriter.h
 VP9_CX_SRCS-yes += encoder/vp9_variance.h
@@ -56,16 +60,24 @@ VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
 VP9_CX_SRCS-yes += encoder/vp9_sad.c
 VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
 VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
+VP9_CX_SRCS-yes += encoder/vp9_speed_features.c
+VP9_CX_SRCS-yes += encoder/vp9_speed_features.h
 VP9_CX_SRCS-yes += encoder/vp9_subexp.c
 VP9_CX_SRCS-yes += encoder/vp9_subexp.h
+VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.c
 VP9_CX_SRCS-yes += encoder/vp9_resize.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
 VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
 VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
 VP9_CX_SRCS-yes += encoder/vp9_variance.c
-VP9_CX_SRCS-yes += encoder/vp9_vaq.c
-VP9_CX_SRCS-yes += encoder/vp9_vaq.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h
+VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c
+VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h
 ifeq ($(CONFIG_VP9_POSTPROC),yes)
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
 VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
@@ -83,6 +95,7 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
diff --git a/source/libvpx/vp9/vp9dx.mk b/source/libvpx/vp9/vp9dx.mk
index de210f4..92ec6fd 100644
--- a/source/libvpx/vp9/vp9dx.mk
+++ b/source/libvpx/vp9/vp9dx.mk
@@ -25,14 +25,14 @@ VP9_DX_SRCS-yes += decoder/vp9_dthread.c
 VP9_DX_SRCS-yes += decoder/vp9_dthread.h
 VP9_DX_SRCS-yes += decoder/vp9_reader.h
 VP9_DX_SRCS-yes += decoder/vp9_reader.c
+VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.c
 VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
 VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
 VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
-VP9_DX_SRCS-yes += decoder/vp9_onyxd.h
-VP9_DX_SRCS-yes += decoder/vp9_onyxd_int.h
+VP9_DX_SRCS-yes += decoder/vp9_decoder.c
+VP9_DX_SRCS-yes += decoder/vp9_decoder.h
 VP9_DX_SRCS-yes += decoder/vp9_thread.c
 VP9_DX_SRCS-yes += decoder/vp9_thread.h
-VP9_DX_SRCS-yes += decoder/vp9_onyxd_if.c
 VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c
 VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h
 
diff --git a/source/libvpx/vpx/exports_enc b/source/libvpx/vpx/exports_enc
index 99b1bfa..155faf6 100644
--- a/source/libvpx/vpx/exports_enc
+++ b/source/libvpx/vpx/exports_enc
@@ -21,3 +21,5 @@ text vpx_svc_set_options
 text vpx_svc_set_quantizers
 text vpx_svc_set_scale_factors
 text vpx_svc_get_layer_resolution
+text vpx_svc_get_rc_stats_buffer_size
+text vpx_svc_get_rc_stats_buffer
+\ No newline at end of file
diff --git a/source/libvpx/vpx/src/svc_encodeframe.c b/source/libvpx/vpx/src/svc_encodeframe.c
index c783724..76aacd2 100644
--- a/source/libvpx/vpx/src/svc_encodeframe.c
+++ b/source/libvpx/vpx/src/svc_encodeframe.c
@@ -13,6 +13,7 @@
  * VP9 SVC encoding support via libvpx
  */
 
+#include <assert.h>
 #include <math.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -81,6 +82,10 @@ typedef struct SvcInternal {
   size_t buffer_size;
   void *buffer;
 
+  char *rc_stats_buf;
+  size_t rc_stats_buf_size;
+  size_t rc_stats_buf_used;
+
   char message_buffer[2048];
   vpx_codec_ctx_t *codec_ctx;
 } SvcInternal;
@@ -519,9 +524,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
             svc_ctx->spatial_layers);
     return VPX_CODEC_INVALID_PARAM;
   }
-  // use SvcInternal value for number of layers to enable forcing single layer
-  // for first frame
-  si->layers = svc_ctx->spatial_layers;
 
   res = parse_quantizer_values(svc_ctx, si->quantizers, 0);
   if (res != VPX_CODEC_OK) return res;
@@ -533,10 +535,13 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   res = parse_scale_factors(svc_ctx, si->scale_factors);
   if (res != VPX_CODEC_OK) return res;
 
-  // parse aggregate command line options
+  // Parse aggregate command line options. Options must start with
+  // "layers=xx" then followed by other options
   res = parse_options(svc_ctx, si->options);
   if (res != VPX_CODEC_OK) return res;
 
+  si->layers = svc_ctx->spatial_layers;
+
   // Assign target bitrate for each layer. We calculate the ratio
   // from the resolution for now.
   // TODO(Minghai): Optimize the mechanism of allocating bits after
@@ -546,6 +551,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
     float total = 0;
     float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
 
+    assert(si->layers <= VPX_SS_MAX_LAYERS);
     for (i = 0; i < si->layers; ++i) {
       int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers;
       if (pos < VPX_SS_MAX_LAYERS && si->scaling_factor_den[pos] > 0) {
@@ -569,7 +575,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   enc_cfg->ss_number_layers = si->layers;
   enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
   enc_cfg->kf_mode = VPX_KF_DISABLED;
-  enc_cfg->g_pass = VPX_RC_ONE_PASS;
   // Lag in frames not currently supported
   enc_cfg->g_lag_in_frames = 0;
 
@@ -578,8 +583,12 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   enc_cfg->rc_dropframe_thresh = 0;
   enc_cfg->rc_end_usage = VPX_CBR;
   enc_cfg->rc_resize_allowed = 0;
-  enc_cfg->rc_min_quantizer = 33;
-  enc_cfg->rc_max_quantizer = 33;
+
+  if (enc_cfg->g_pass == VPX_RC_ONE_PASS) {
+    enc_cfg->rc_min_quantizer = 33;
+    enc_cfg->rc_max_quantizer = 33;
+  }
+
   enc_cfg->rc_undershoot_pct = 100;
   enc_cfg->rc_overshoot_pct = 15;
   enc_cfg->rc_buf_initial_sz = 500;
@@ -779,12 +788,17 @@ static void set_svc_parameters(SvcContext *svc_ctx,
   }
   layer_index = layer + VPX_SS_MAX_LAYERS - si->layers;
 
-  if (vpx_svc_is_keyframe(svc_ctx)) {
-    svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
-    svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
+  if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) {
+    if (vpx_svc_is_keyframe(svc_ctx)) {
+      svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
+      svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
+    } else {
+      svc_params.min_quantizer = si->quantizer[layer_index];
+      svc_params.max_quantizer = si->quantizer[layer_index];
+    }
   } else {
-    svc_params.min_quantizer = si->quantizer[layer_index];
-    svc_params.max_quantizer = si->quantizer[layer_index];
+    svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer;
+    svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer;
   }
 
   svc_params.distance_from_i_frame = si->frame_within_gop;
@@ -845,12 +859,13 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   struct LayerData *layer_data;
   struct Superframe superframe;
   SvcInternal *const si = get_svc_internal(svc_ctx);
-  if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) {
+  if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) {
     return VPX_CODEC_INVALID_PARAM;
   }
 
   memset(&superframe, 0, sizeof(superframe));
   svc_log_reset(svc_ctx);
+  si->rc_stats_buf_used = 0;
 
   si->layers = svc_ctx->spatial_layers;
   if (si->frame_within_gop >= si->kf_dist ||
@@ -860,9 +875,12 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
   si->is_keyframe = (si->frame_within_gop == 0);
   si->frame_size = 0;
 
-  svc_log(svc_ctx, SVC_LOG_DEBUG,
-          "vpx_svc_encode  layers: %d, frame_count: %d, frame_within_gop: %d\n",
-          si->layers, si->encode_frame_count, si->frame_within_gop);
+  if (rawimg != NULL) {
+    svc_log(svc_ctx, SVC_LOG_DEBUG,
+            "vpx_svc_encode  layers: %d, frame_count: %d, "
+            "frame_within_gop: %d\n", si->layers, si->encode_frame_count,
+            si->frame_within_gop);
+  }
 
   // encode each layer
   for (si->layer = 0; si->layer < si->layers; ++si->layer) {
@@ -871,9 +889,11 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
       svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer);
       continue;
     }
-    calculate_enc_frame_flags(svc_ctx);
 
-    set_svc_parameters(svc_ctx, codec_ctx);
+    if (rawimg != NULL) {
+      calculate_enc_frame_flags(svc_ctx);
+      set_svc_parameters(svc_ctx, codec_ctx);
+    }
 
     res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
                            si->enc_frame_flags, deadline);
@@ -923,39 +943,63 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
           }
           break;
         }
+        case VPX_CODEC_STATS_PKT: {
+          size_t new_size = si->rc_stats_buf_used +
+              cx_pkt->data.twopass_stats.sz;
+
+          if (new_size > si->rc_stats_buf_size) {
+            char *p = (char*)realloc(si->rc_stats_buf, new_size);
+            if (p == NULL) {
+              svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n");
+              break;
+            }
+            si->rc_stats_buf = p;
+            si->rc_stats_buf_size = new_size;
+          }
+
+          memcpy(si->rc_stats_buf + si->rc_stats_buf_used,
+                 cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz);
+          si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz;
+          break;
+        }
         default: {
           break;
         }
       }
     }
+    if (rawimg == NULL) {
+      break;
+    }
   }
-  // add superframe index to layer data list
-  sf_create_index(&superframe);
-  layer_data = ld_create(superframe.buffer, superframe.index_size);
-  ld_list_add(&cx_layer_list, layer_data);
-
-  // get accumulated size of layer data
-  si->frame_size = ld_list_get_buffer_size(cx_layer_list);
-  if (si->frame_size == 0) return VPX_CODEC_ERROR;
+  if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) {
+    // add superframe index to layer data list
+    sf_create_index(&superframe);
+    layer_data = ld_create(superframe.buffer, superframe.index_size);
+    ld_list_add(&cx_layer_list, layer_data);
+
+    // get accumulated size of layer data
+    si->frame_size = ld_list_get_buffer_size(cx_layer_list);
+    if (si->frame_size > 0) {
+      // all layers encoded, create single buffer with concatenated layers
+      if (si->frame_size > si->buffer_size) {
+        free(si->buffer);
+        si->buffer = malloc(si->frame_size);
+        if (si->buffer == NULL) {
+          ld_list_free(cx_layer_list);
+          return VPX_CODEC_MEM_ERROR;
+        }
+        si->buffer_size = si->frame_size;
+      }
+      // copy layer data into packet
+      ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer);
 
-  // all layers encoded, create single buffer with concatenated layers
-  if (si->frame_size > si->buffer_size) {
-    free(si->buffer);
-    si->buffer = malloc(si->frame_size);
-    if (si->buffer == NULL) {
       ld_list_free(cx_layer_list);
-      return VPX_CODEC_MEM_ERROR;
+
+      svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, "
+              "pts: %d\n", si->encode_frame_count, si->is_keyframe,
+              (int)si->frame_size, (int)pts);
     }
-    si->buffer_size = si->frame_size;
   }
-  // copy layer data into packet
-  ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer);
-
-  ld_list_free(cx_layer_list);
-
-  svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n",
-          si->encode_frame_count, si->is_keyframe, (int)si->frame_size,
-          (int)pts);
   ++si->frame_within_gop;
   ++si->encode_frame_count;
 
@@ -1077,7 +1121,24 @@ void vpx_svc_release(SvcContext *svc_ctx) {
   si = (SvcInternal *)svc_ctx->internal;
   if (si != NULL) {
     free(si->buffer);
+    if (si->rc_stats_buf) {
+      free(si->rc_stats_buf);
+    }
     free(si);
     svc_ctx->internal = NULL;
   }
 }
+
+size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return 0;
+  return si->rc_stats_buf_used;
+}
+
+char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx) {
+  const SvcInternal *const si = get_const_svc_internal(svc_ctx);
+  if (svc_ctx == NULL || si == NULL) return NULL;
+  return si->rc_stats_buf;
+}
+
+
diff --git a/source/libvpx/vpx/src/vpx_encoder.c b/source/libvpx/vpx/src/vpx_encoder.c
index e69d96e..ece2d0b 100644
--- a/source/libvpx/vpx/src/vpx_encoder.c
+++ b/source/libvpx/vpx/src/vpx_encoder.c
@@ -394,7 +394,7 @@ const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list
     *iter = list->pkts;
   }
 
-  pkt = (const void *) * iter;
+  pkt = (const vpx_codec_cx_pkt_t *)*iter;
 
   if ((size_t)(pkt - list->pkts) < list->cnt)
     *iter = pkt + 1;
diff --git a/source/libvpx/vpx/svc_context.h b/source/libvpx/vpx/svc_context.h
index 98474ca..5d0fbbd 100644
--- a/source/libvpx/vpx/svc_context.h
+++ b/source/libvpx/vpx/svc_context.h
@@ -114,6 +114,17 @@ size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx);
 void *vpx_svc_get_buffer(const SvcContext *svc_ctx);
 
 /**
+ * return size of two pass rate control stats data to be returned by
+ * vpx_svc_get_rc_stats_buffer
+ */
+size_t vpx_svc_get_rc_stats_buffer_size(const SvcContext *svc_ctx);
+
+/**
+ * return buffer two pass of rate control stats data
+ */
+char *vpx_svc_get_rc_stats_buffer(const SvcContext *svc_ctx);
+
+/**
  * return spatial resolution of the specified layer
  */
 vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx,
diff --git a/source/libvpx/vpx/vp8cx.h b/source/libvpx/vpx/vp8cx.h
index 0b637d4..8944a26 100644
--- a/source/libvpx/vpx/vp8cx.h
+++ b/source/libvpx/vpx/vp8cx.h
@@ -192,6 +192,7 @@ enum vp8e_enc_control_id {
   VP9E_SET_TILE_ROWS,
   VP9E_SET_FRAME_PARALLEL_DECODING,
   VP9E_SET_AQ_MODE,
+  VP9E_SET_FRAME_PERIODIC_BOOST,
 
   VP9E_SET_SVC,
   VP9E_SET_SVC_PARAMETERS,
@@ -364,6 +365,8 @@ VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int)
 
 VPX_CTRL_USE_TYPE(VP9E_SET_AQ_MODE, unsigned int)
 
+VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int)
+
 /*! @} - end defgroup vp8_encoder */
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/source/libvpx/vpx/vpx_encoder.h b/source/libvpx/vpx/vpx_encoder.h
index 851ff1a..2c882c1 100644
--- a/source/libvpx/vpx/vpx_encoder.h
+++ b/source/libvpx/vpx/vpx_encoder.h
@@ -49,7 +49,7 @@ extern "C" {
 #define VPX_SS_MAX_LAYERS       5
 
 /*! Spatial Scalability: Default number of coding layers */
-#define VPX_SS_DEFAULT_LAYERS       3
+#define VPX_SS_DEFAULT_LAYERS       1
 
   /*!\brief Current ABI version number
    *
diff --git a/source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm b/source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
deleted file mode 100644
index d070a47..0000000
--- a/source/libvpx/vpx_scale/arm/neon/vp8_vpxyv12_copy_y_neon.asm
+++ /dev/null
@@ -1,123 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vpx_yv12_copy_y_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    INCLUDE vpx_scale_asm_offsets.asm
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vpx_yv12_copy_y_neon(const YV12_BUFFER_CONFIG *src_ybc,
-;                          YV12_BUFFER_CONFIG *dst_ybc)
-|vpx_yv12_copy_y_neon| PROC
-    push            {r4 - r11, lr}
-    vpush           {d8-d15}
-
-    ldr             r4, [r0, #yv12_buffer_config_y_height]
-    ldr             r5, [r0, #yv12_buffer_config_y_width]
-    ldr             r6, [r0, #yv12_buffer_config_y_stride]
-    ldr             r7, [r1, #yv12_buffer_config_y_stride]
-    ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
-    ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
-
-    ; copy two rows at one time
-    mov             lr, r4, lsr #1
-
-cp_src_to_dst_height_loop1
-    mov             r8, r2
-    mov             r9, r3
-    add             r10, r2, r6
-    add             r11, r3, r7
-    movs            r12, r5, lsr #7
-    ble             extra_copy_needed   ; y_width < 128
-
-cp_src_to_dst_width_loop1
-    vld1.8          {q0, q1}, [r8]!
-    vld1.8          {q8, q9}, [r10]!
-    vld1.8          {q2, q3}, [r8]!
-    vld1.8          {q10, q11}, [r10]!
-    vld1.8          {q4, q5}, [r8]!
-    vld1.8          {q12, q13}, [r10]!
-    vld1.8          {q6, q7}, [r8]!
-    vld1.8          {q14, q15}, [r10]!
-
-    subs            r12, r12, #1
-
-    vst1.8          {q0, q1}, [r9]!
-    vst1.8          {q8, q9}, [r11]!
-    vst1.8          {q2, q3}, [r9]!
-    vst1.8          {q10, q11}, [r11]!
-    vst1.8          {q4, q5}, [r9]!
-    vst1.8          {q12, q13}, [r11]!
-    vst1.8          {q6, q7}, [r9]!
-    vst1.8          {q14, q15}, [r11]!
-
-    bne             cp_src_to_dst_width_loop1
-
-    subs            lr, lr, #1
-    add             r2, r2, r6, lsl #1
-    add             r3, r3, r7, lsl #1
-
-    bne             cp_src_to_dst_height_loop1
-
-extra_copy_needed
-    ands            r10, r5, #0x7f                  ;check to see if extra copy is needed
-    sub             r11, r5, r10
-    ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
-    ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
-    bne             extra_cp_src_to_dst_width1
-end_of_cp_src_to_dst1
-
-    vpop            {d8 - d15}
-    pop             {r4-r11, pc}
-
-;=============================
-extra_cp_src_to_dst_width1
-    add             r2, r2, r11
-    add             r3, r3, r11
-    add             r0, r8, r6
-    add             r11, r9, r7
-
-    mov             lr, r4, lsr #1
-extra_cp_src_to_dst_height_loop1
-    mov             r8, r2
-    mov             r9, r3
-    add             r0, r8, r6
-    add             r11, r9, r7
-
-    mov             r12, r10
-
-extra_cp_src_to_dst_width_loop1
-    vld1.8          {q0}, [r8]!
-    vld1.8          {q1}, [r0]!
-
-    subs            r12, r12, #16
-
-    vst1.8          {q0}, [r9]!
-    vst1.8          {q1}, [r11]!
-    bne             extra_cp_src_to_dst_width_loop1
-
-    subs            lr, lr, #1
-
-    add             r2, r2, r6, lsl #1
-    add             r3, r3, r7, lsl #1
-
-    bne             extra_cp_src_to_dst_height_loop1
-
-    b               end_of_cp_src_to_dst1
-
-    ENDP
-
-    END
diff --git a/source/libvpx/vpx_scale/vpx_scale.mk b/source/libvpx/vpx_scale/vpx_scale.mk
index 50d3e9d..ded8e0b 100644
--- a/source/libvpx/vpx_scale/vpx_scale.mk
+++ b/source/libvpx/vpx_scale/vpx_scale.mk
@@ -7,11 +7,10 @@ SCALE_SRCS-yes += generic/yv12extend.c
 SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
 SCALE_SRCS-yes += vpx_scale_asm_offsets.c
 SCALE_SRCS-yes += vpx_scale_rtcd.c
-SCALE_SRCS-yes += vpx_scale_rtcd.sh
+SCALE_SRCS-yes += vpx_scale_rtcd.pl
 
 #neon
 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copy_y_neon$(ASM)
 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM)
 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
 SCALE_SRCS-$(HAVE_NEON)  += arm/neon/yv12extend_arm.c
@@ -24,4 +23,4 @@ SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
 $(eval $(call asm_offsets_template,\
 	         vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c))
 
-$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.sh))
+$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl))
diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.pl b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl
new file mode 100644
index 0000000..8c92570
--- /dev/null
+++ b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl
@@ -0,0 +1,34 @@
+sub vpx_scale_forward_decls() {
+print <<EOF
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vpx_scale_forward_decls/;
+
+# Scaler functions
+if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") {
+    add_proto qw/void vp8_horizontal_line_5_4_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_5_4_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+    add_proto qw/void vp8_horizontal_line_5_3_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_5_3_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+    add_proto qw/void vp8_horizontal_line_2_1_scale/, "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_2_1_scale/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+    add_proto qw/void vp8_vertical_band_2_1_scale_i/, "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width";
+}
+
+add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+specialize qw/vp8_yv12_extend_frame_borders neon/;
+
+add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+specialize qw/vp8_yv12_copy_frame neon/;
+
+add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
+
+if (vpx_config("CONFIG_VP9") eq "yes") {
+    add_proto qw/void vp9_extend_frame_borders/, "struct yv12_buffer_config *ybf";
+    specialize qw/vp9_extend_frame_borders dspr2/;
+
+    add_proto qw/void vp9_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf";
+    specialize qw/vp9_extend_frame_inner_borders dspr2/;
+}
+1;
diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.sh b/source/libvpx/vpx_scale/vpx_scale_rtcd.sh
deleted file mode 100755
index c26208c..0000000
--- a/source/libvpx/vpx_scale/vpx_scale_rtcd.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-vpx_scale_forward_decls() {
-cat <<EOF
-struct yv12_buffer_config;
-EOF
-}
-forward_decls vpx_scale_forward_decls
-
-# Scaler functions
-if [ "$CONFIG_SPATIAL_RESAMPLING" = "yes" ]; then
-    prototype void vp8_horizontal_line_5_4_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_5_4_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_5_3_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_5_3_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_horizontal_line_2_1_scale "const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_1_scale "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-    prototype void vp8_vertical_band_2_1_scale_i "unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width"
-fi
-
-prototype void vp8_yv12_extend_frame_borders "struct yv12_buffer_config *ybf"
-specialize vp8_yv12_extend_frame_borders neon
-
-prototype void vp8_yv12_copy_frame "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vp8_yv12_copy_frame neon
-
-prototype void vpx_yv12_copy_y "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc"
-specialize vpx_yv12_copy_y neon
-
-if [ "$CONFIG_VP9" = "yes" ]; then
-    prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf"
-    specialize vp9_extend_frame_borders dspr2
-
-    prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf"
-    specialize vp9_extend_frame_inner_borders dspr2
-fi
diff --git a/source/libvpx/vpxdec.c b/source/libvpx/vpxdec.c
index b69e55e..4c37234 100644
--- a/source/libvpx/vpxdec.c
+++ b/source/libvpx/vpxdec.c
@@ -218,9 +218,11 @@ static int raw_read_frame(FILE *infile, uint8_t **buffer,
 static int read_frame(struct VpxDecInputContext *input, uint8_t **buf,
                       size_t *bytes_in_buffer, size_t *buffer_size) {
   switch (input->vpx_input_ctx->file_type) {
+#if CONFIG_WEBM_IO
     case FILE_TYPE_WEBM:
       return webm_read_frame(input->webm_ctx,
                              buf, bytes_in_buffer, buffer_size);
+#endif
     case FILE_TYPE_RAW:
       return raw_read_frame(input->vpx_input_ctx->file,
                             buf, bytes_in_buffer, buffer_size);
@@ -663,12 +665,17 @@ int main_loop(int argc, const char **argv_) {
   input.vpx_input_ctx->file = infile;
   if (file_is_ivf(input.vpx_input_ctx))
     input.vpx_input_ctx->file_type = FILE_TYPE_IVF;
+#if CONFIG_WEBM_IO
   else if (file_is_webm(input.webm_ctx, input.vpx_input_ctx))
     input.vpx_input_ctx->file_type = FILE_TYPE_WEBM;
+#endif
   else if (file_is_raw(input.vpx_input_ctx))
     input.vpx_input_ctx->file_type = FILE_TYPE_RAW;
   else {
     fprintf(stderr, "Unrecognized input file type.\n");
+#if !CONFIG_WEBM_IO
+    fprintf(stderr, "vpxdec was built without WebM container support.\n");
+#endif
     return EXIT_FAILURE;
   }
 
@@ -691,6 +698,7 @@ int main_loop(int argc, const char **argv_) {
       return EXIT_FAILURE;
     }
 
+#if CONFIG_WEBM_IO
     if (vpx_input_ctx.file_type == FILE_TYPE_WEBM) {
       if (webm_guess_framerate(input.webm_ctx, input.vpx_input_ctx)) {
         fprintf(stderr, "Failed to guess framerate -- error parsing "
@@ -698,6 +706,7 @@ int main_loop(int argc, const char **argv_) {
         return EXIT_FAILURE;
       }
     }
+#endif
   }
 
   fourcc_interface = get_vpx_decoder_by_fourcc(vpx_input_ctx.fourcc);
@@ -941,9 +950,12 @@ fail:
     }
   }
 
+#if CONFIG_WEBM_IO
   if (input.vpx_input_ctx->file_type == FILE_TYPE_WEBM)
     webm_free(input.webm_ctx);
-  else
+#endif
+
+  if (input.vpx_input_ctx->file_type != FILE_TYPE_WEBM)
     free(buf);
 
   if (scaled_img) vpx_img_free(scaled_img);
diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c
index c61d83e..00d3e3e 100644
--- a/source/libvpx/vpxenc.c
+++ b/source/libvpx/vpxenc.c
@@ -123,6 +123,7 @@ int fourcc_is_ivf(const char detect[4]) {
   return 0;
 }
 
+#if CONFIG_WEBM_IO
 /* Murmur hash derived from public domain reference implementation at
  *   http:// sites.google.com/site/murmurhash/
  */
@@ -169,7 +170,7 @@ static unsigned int murmur(const void *key, int len, unsigned int seed) {
 
   return h;
 }
-
+#endif  // CONFIG_WEBM_IO
 
 static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
                                            "Debug mode (makes output deterministic)");
@@ -218,7 +219,7 @@ static const arg_def_t recontest = ARG_DEF_ENUM(NULL, "test-decode", 1,
 static const arg_def_t framerate        = ARG_DEF(NULL, "fps", 1,
                                                   "Stream frame rate (rate/scale)");
 static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
-                                                  "Output IVF (default is WebM)");
+                                                  "Output IVF (default is WebM if WebM IO is enabled)");
 static const arg_def_t out_part = ARG_DEF("P", "output-partitions", 0,
                                           "Makes encoder output partitions. Requires IVF output!");
 static const arg_def_t q_hist_n         = ARG_DEF(NULL, "q-hist", 1,
@@ -399,13 +400,17 @@ static const arg_def_t frame_parallel_decoding = ARG_DEF(
     NULL, "frame-parallel", 1, "Enable frame parallel decodability features");
 static const arg_def_t aq_mode = ARG_DEF(
     NULL, "aq-mode", 1,
-    "Adaptive q mode (0: off (by default), 1: variance 2: complexity)");
+    "Adaptive q mode (0: off (by default), 1: variance 2: complexity, "
+    "3: cyclic refresh)");
+static const arg_def_t frame_periodic_boost = ARG_DEF(
+    NULL, "frame_boost", 1,
+    "Enable frame periodic boost (0: off (by default), 1: on)");
 
 static const arg_def_t *vp9_args[] = {
   &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh,
   &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
   &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless,
-  &frame_parallel_decoding, &aq_mode,
+  &frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
   NULL
 };
 static const int vp9_arg_ctrl_map[] = {
@@ -415,6 +420,7 @@ static const int vp9_arg_ctrl_map[] = {
   VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE,
   VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT,
   VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
+  VP9E_SET_FRAME_PERIODIC_BOOST,
   0
 };
 #endif
@@ -834,7 +840,9 @@ static struct stream_state *new_stream(struct VpxEncoderConfig *global,
     /* Initialize remaining stream parameters */
     stream->config.stereo_fmt = STEREO_FORMAT_MONO;
     stream->config.write_webm = 1;
+#if CONFIG_WEBM_IO
     stream->ebml.last_pts_ms = -1;
+#endif
 
     /* Allows removal of the application version from the EBML tags */
     stream->ebml.debug = global->debug;
@@ -1143,13 +1151,17 @@ static void open_output_file(struct stream_state *stream,
   if (stream->config.write_webm && fseek(stream->file, 0, SEEK_CUR))
     fatal("WebM output to pipes not supported.");
 
+#if CONFIG_WEBM_IO
   if (stream->config.write_webm) {
     stream->ebml.stream = stream->file;
     write_webm_file_header(&stream->ebml, cfg,
                            &global->framerate,
                            stream->config.stereo_fmt,
                            global->codec->fourcc);
-  } else {
+  }
+#endif
+
+  if (!stream->config.write_webm) {
     ivf_write_file_header(stream->file, cfg, global->codec->fourcc, 0);
   }
 }
@@ -1162,11 +1174,15 @@ static void close_output_file(struct stream_state *stream,
   if (cfg->g_pass == VPX_RC_FIRST_PASS)
     return;
 
+#if CONFIG_WEBM_IO
   if (stream->config.write_webm) {
     write_webm_file_footer(&stream->ebml, stream->hash);
     free(stream->ebml.cue_list);
     stream->ebml.cue_list = NULL;
-  } else {
+  }
+#endif
+
+  if (!stream->config.write_webm) {
     if (!fseek(stream->file, 0, SEEK_SET))
       ivf_write_file_header(stream->file, &stream->config.cfg,
                             fourcc,
@@ -1316,6 +1332,7 @@ static void get_cx_data(struct stream_state *stream,
           fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz);
 
         update_rate_histogram(stream->rate_hist, cfg, pkt);
+#if CONFIG_WEBM_IO
         if (stream->config.write_webm) {
           /* Update the hash */
           if (!stream->ebml.debug)
@@ -1324,7 +1341,9 @@ static void get_cx_data(struct stream_state *stream,
                                   stream->hash);
 
           write_webm_block(&stream->ebml, cfg, pkt);
-        } else {
+        }
+#endif
+        if (!stream->config.write_webm) {
           if (pkt->data.frame.partition_id <= 0) {
             ivf_header_pos = ftello(stream->file);
             fsize = pkt->data.frame.sz;
@@ -1484,7 +1503,7 @@ static void print_time(const char *label, int64_t etl) {
     etl -= mins * 60;
     secs = etl;
 
-    fprintf(stderr, "[%3s %2"PRId64":%02"PRId64": % 02"PRId64"] ",
+    fprintf(stderr, "[%3s %2"PRId64":%02"PRId64":%02"PRId64"] ",
             label, hours, mins, secs);
   } else {
     fprintf(stderr, "[%3s  unknown] ", label);
@@ -1594,6 +1613,14 @@ int main(int argc, const char **argv_) {
         " and --passes=2\n", stream->index, global.pass);
     });
 
+#if !CONFIG_WEBM_IO
+    FOREACH_STREAM({
+      stream->config.write_webm = 0;
+      warn("vpxenc was compiled without WebM container support."
+           "Producing IVF output");
+    });
+#endif
+
     /* Use the frame rate from the file only if none was specified
      * on the command-line.
      */
diff --git a/source/libvpx/y4minput.c b/source/libvpx/y4minput.c
index 47f005a..90c5310a 100644
--- a/source/libvpx/y4minput.c
+++ b/source/libvpx/y4minput.c
@@ -10,10 +10,45 @@
  *  Based on code from the OggTheora software codec source code,
  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
  */
+#include <errno.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include "vpx/vpx_integer.h"
 #include "y4minput.h"
 
+// Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
+// Returns true on success.
+static int file_read(void *buf, size_t size, FILE *file) {
+  const int kMaxRetries = 5;
+  int retry_count = 0;
+  int file_error;
+  size_t len = 0;
+  do {
+    const size_t n = fread((uint8_t*)buf + len, 1, size - len, file);
+    len += n;
+    file_error = ferror(file);
+    if (file_error) {
+      if (errno == EINTR || errno == EAGAIN) {
+        clearerr(file);
+        continue;
+      } else {
+        fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n",
+                (uint32_t)len, (uint32_t)size, errno, strerror(errno));
+        return 0;
+      }
+    }
+  } while (!feof(file) && len < size && ++retry_count < kMaxRetries);
+
+  if (!feof(file) && len != size) {
+    fprintf(stderr, "Error reading file: %u of %u bytes read,"
+                    " error: %d, retries: %d, %d: %s\n",
+            (uint32_t)len, (uint32_t)size, file_error, retry_count,
+            errno, strerror(errno));
+  }
+  return len == size;
+}
+
 static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
   int   got_w;
   int   got_h;
@@ -670,8 +705,7 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
       buffer[i] = *_skip++;
       _nskip--;
     } else {
-      ret = (int)fread(buffer + i, 1, 1, _fin);
-      if (ret < 1)return -1;
+      if (!file_read(buffer + i, 1, _fin)) return -1;
     }
     if (buffer[i] == '\n')break;
   }
@@ -853,10 +887,8 @@ int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
   int  c_w;
   int  c_h;
   int  c_sz;
-  int  ret;
   /*Read and skip the frame header.*/
-  ret = (int)fread(frame, 1, 6, _fin);
-  if (ret < 6)return 0;
+  if (!file_read(frame, 6, _fin)) return 0;
   if (memcmp(frame, "FRAME", 5)) {
     fprintf(stderr, "Loss of framing in Y4M input data\n");
     return -1;
@@ -864,19 +896,19 @@ int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) {
   if (frame[5] != '\n') {
     char c;
     int  j;
-    for (j = 0; j < 79 && fread(&c, 1, 1, _fin) && c != '\n'; j++);
+    for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) {}
     if (j == 79) {
       fprintf(stderr, "Error parsing Y4M frame header\n");
       return -1;
     }
   }
   /*Read the frame data that needs no conversion.*/
-  if (fread(_y4m->dst_buf, 1, _y4m->dst_buf_read_sz, _fin) != _y4m->dst_buf_read_sz) {
+  if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) {
     fprintf(stderr, "Error reading Y4M frame data.\n");
     return -1;
   }
   /*Read the frame data that does need conversion.*/
-  if (fread(_y4m->aux_buf, 1, _y4m->aux_buf_read_sz, _fin) != _y4m->aux_buf_read_sz) {
+  if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) {
     fprintf(stderr, "Error reading Y4M frame data.\n");
     return -1;
   }
author	johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c>	2014-04-10 17:14:25 +0000
committer	johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c>	2014-04-10 17:14:25 +0000
commit	93a74791c8e808ea76001ee07693aa2a5fdd3500 (patch)
tree	88c3a21369388876dccedda352d254b16007ba22 /source
parent	ef98d99073c8ddc400dac9bd4a1b31fb7240d861 (diff)
download	libvpx-93a74791c8e808ea76001ee07693aa2a5fdd3500.tar.gz