Merge third_party/libvpx from https://chromium.googlesource.com/chromium/deps/libvpx.git at db9ac6c76553d95d7eb35e2bcf84c16a7901c3c3

This commit was generated by merge_from_chromium.py. Change-Id: Idb617399bc04dbbe59bbf8e499a42d67308f92c3
author: Torne (Richard Coles) <torne@google.com> 2014-06-24 11:04:27 +0100
committer: Torne (Richard Coles) <torne@google.com> 2014-06-24 11:04:27 +0100
commit: c1633d58a5bb3344df388ccd1c12445a6dfd3098 (patch)
tree: 2ae99634b3c1b3d9d69eb6e531455175a81dda6c
parent: d77dc4514a925c51ea9a72901526e45e361f55c8 (diff)
parent: db9ac6c76553d95d7eb35e2bcf84c16a7901c3c3 (diff)
download: libvpx-c1633d58a5bb3344df388ccd1c12445a6dfd3098.tar.gz
165 files changed, 3963 insertions, 2637 deletions
diff --git a/README.chromium b/README.chromium
index 8f30d03..569b4b9 100644
--- a/README.chromium
+++ b/README.chromium
@@ -5,9 +5,9 @@ License: BSD
 License File: source/libvpx/LICENSE
 Security Critical: yes
 
-Date: Thursday May 8 2014
+Date: Friday May 16 2014
 Branch: master
-Commit: 91344f0a36f83d73af1f5325be792235eb021802
+Commit: ed83c2a94c8664a6d2e54b21771c0560b2bb90ac
 
 Description:
 Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/generate_gypi.sh b/generate_gypi.sh
index 6ca1b49..0a58086 100755
--- a/generate_gypi.sh
+++ b/generate_gypi.sh
@@ -227,9 +227,13 @@ function make_clean {
 # Lint a pair of vpx_config.h and vpx_config.asm to make sure they match.
 # $1 - Header file directory.
 function lint_config {
-  $BASE_DIR/lint_config.sh \
-    -h $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.h \
-    -a $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.asm
+  # mips does not contain any assembly so the header does not need to be
+  # compared to the asm.
+  if [[ "$1" != *mipsel ]]; then
+    $BASE_DIR/lint_config.sh \
+      -h $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.h \
+      -a $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_config.asm
+  fi
 }
 
 # Print the configuration.
@@ -330,9 +334,10 @@ echo "Generate Config Files"
 all_platforms="--enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2"
 gen_config_files linux/ia32 "--target=x86-linux-gcc --disable-ccache --enable-pic --enable-realtime-only ${all_platforms}"
 gen_config_files linux/x64 "--target=x86_64-linux-gcc --disable-ccache --enable-pic --enable-realtime-only ${all_platforms}"
-gen_config_files linux/arm "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs ${all_platforms}"
-gen_config_files linux/arm-neon "--target=armv7-linux-gcc --enable-pic --enable-realtime-only ${all_platforms}"
-gen_config_files linux/arm-neon-cpu-detect "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect ${all_platforms}"
+gen_config_files linux/arm "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --disable-edsp ${all_platforms}"
+gen_config_files linux/arm-neon "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --disable-edsp ${all_platforms}"
+gen_config_files linux/arm-neon-cpu-detect "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --disable-edsp ${all_platforms}"
+gen_config_files linux/arm64 "--force-target=armv8-linux-gcc --enable-pic --enable-realtime-only --disable-edsp ${all_platforms}"
 gen_config_files linux/mipsel "--target=mips32-linux-gcc --disable-fast-unaligned ${all_platforms}"
 gen_config_files linux/generic "--target=generic-gnu --enable-pic --enable-realtime-only ${all_platforms}"
 gen_config_files win/ia32 "--target=x86-win32-vs12 --enable-realtime-only ${all_platforms}"
@@ -351,6 +356,8 @@ lint_config linux/x64
 lint_config linux/arm
 lint_config linux/arm-neon
 lint_config linux/arm-neon-cpu-detect
+lint_config linux/arm64
+lint_config linux/mipsel
 lint_config linux/generic
 lint_config win/ia32
 lint_config win/x64
@@ -369,6 +376,7 @@ gen_rtcd_header linux/x64 x86_64
 gen_rtcd_header linux/arm armv6
 gen_rtcd_header linux/arm-neon armv7
 gen_rtcd_header linux/arm-neon-cpu-detect armv7
+gen_rtcd_header linux/arm64 armv8
 gen_rtcd_header linux/mipsel mipsel
 gen_rtcd_header linux/generic generic
 gen_rtcd_header win/ia32 x86
@@ -414,6 +422,12 @@ make_clean
 make libvpx_srcs.txt target=libs $config > /dev/null
 convert_srcs_to_gypi libvpx_srcs.txt libvpx_srcs_arm_neon_cpu_detect
 
+echo "Generate ARM64 source list."
+config=$(print_config linux/arm64)
+make_clean
+make libvpx_srcs.txt target=libs $config > /dev/null
+convert_srcs_to_gypi libvpx_srcs.txt libvpx_srcs_arm64
+
 echo "Generate MIPS source list."
 config=$(print_config_basic linux/mipsel)
 make_clean
diff --git a/libvpx.gyp b/libvpx.gyp
index 3e93783..8286777 100644
--- a/libvpx.gyp
+++ b/libvpx.gyp
@@ -29,7 +29,7 @@
             ],
           }],
           ['target_arch=="arm64"', {
-            'target_arch_full': 'generic',
+            'target_arch_full': 'arm64',
           }],
         ],
       }],
@@ -53,7 +53,7 @@
     'variables': {
       'conditions': [
         ['OS=="win" and buildtype=="Official"', {
-          # Do not set to 'size', as it results in an error on win64. 
+          # Do not set to 'size', as it results in an error on win64.
           'optimize' :'speed',
         }],
       ],
@@ -132,7 +132,7 @@
               ],
             }],
             ['target_arch=="arm64"', {
-              'includes': [ 'libvpx_srcs_generic.gypi', ],
+              'includes': [ 'libvpx_srcs_arm64.gypi', ],
             }],
             ['target_arch=="x64"', {
               'conditions': [
@@ -280,14 +280,6 @@
             'ads2gas_script_path': '<(libvpx_source)/build/make/<(ads2gas_script)',
             'ads2gas_script_include': '<(libvpx_source)/build/make/thumb.pm',
           },
-          # We need to explicitly tell the assembler to look for
-          # .include directive files from the place where they're
-          # generated to.
-          'cflags': [
-             '-Wa,-I,<!(pwd)/source/config/<(OS_CATEGORY)/<(target_arch_full)',
-             '-Wa,-I,<!(pwd)/source/config',
-             '-Wa,-I,<(shared_generated_dir)',
-          ],
           'xcode_settings': {
             'OTHER_CFLAGS': [
               '-I<!(pwd)/source/config/<(OS_CATEGORY)/<(target_arch_full)',
@@ -305,7 +297,21 @@
               '<(libvpx_source)',
             ],
           },
+          # We need to explicitly tell the assembler to look for
+          # .include directive files from the place where they're
+          # generated to.
+          'cflags': [
+             '-Wa,-I,<(shared_generated_dir)',
+          ],
           'conditions': [
+            # For Android WebView, the following pathc are not required and not
+            # allowed, because they generate the absolute path.
+            ['android_webview_build!=1', {
+              'cflags': [
+                '-Wa,-I,<!(pwd)/source/config/<(OS_CATEGORY)/<(target_arch_full)',
+                '-Wa,-I,<!(pwd)/source/config',
+              ],
+            }],
             # Libvpx optimizations for ARMv6 or ARMv7 without NEON.
             ['arm_neon==0', {
               'conditions': [
@@ -457,7 +463,7 @@
           ['android_webview_build==1', {
             # pass the empty string for 3rd and 4th arguments of
             # intermediates-dir-for macro.
-            'lib_intermediate_name' : '$(realpath $(call intermediates-dir-for, STATIC_LIBRARIES, libvpx_asm_offsets_vp8,,, $(GYP_VAR_PREFIX)))/libvpx_asm_offsets_vp8.a',
+            'lib_intermediate_name' : '$(abspath $(call intermediates-dir-for,STATIC_LIBRARIES,libvpx_asm_offsets_vp8,,,$(gyp_var_prefix)))/libvpx_asm_offsets_vp8.a',
           }],
           ['(target_arch=="arm" or target_arch=="armv7")', {
             'output_format': 'gas',
@@ -530,7 +536,7 @@
           ['android_webview_build==1', {
             # pass the empty string for 3rd and 4th arguments of
             # intermediates-dir-for macro.
-            'lib_intermediate_name' : '<(android_src)/$(call intermediates-dir-for, STATIC_LIBRARIES, libvpx_asm_offsets_vpx_scale,,, $(GYP_VAR_PREFIX))/libvpx_asm_offsets_vpx_scale.a',
+            'lib_intermediate_name' : '$(abspath $(call intermediates-dir-for,STATIC_LIBRARIES,libvpx_asm_offsets_vpx_scale,,,$(gyp_var_prefix)))/libvpx_asm_offsets_vpx_scale.a',
           }],
           ['(target_arch=="arm" or target_arch=="armv7")', {
             'output_format': 'gas',
diff --git a/libvpx_srcs_arm.gypi b/libvpx_srcs_arm.gypi
index 904b9ed..2e6dd9b 100644
--- a/libvpx_srcs_arm.gypi
+++ b/libvpx_srcs_arm.gypi
@@ -33,7 +33,6 @@
     '<(libvpx_source)/vp8/common/arm/dequantize_arm.c',
     '<(libvpx_source)/vp8/common/arm/filter_arm.c',
     '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c',
-    '<(libvpx_source)/vp8/common/arm/reconintra_arm.c',
     '<(libvpx_source)/vp8/common/arm/variance_arm.c',
     '<(libvpx_source)/vp8/common/blockd.c',
     '<(libvpx_source)/vp8/common/blockd.h',
@@ -106,21 +105,17 @@
     '<(libvpx_source)/vp8/decoder/onyxd_int.h',
     '<(libvpx_source)/vp8/decoder/threading.c',
     '<(libvpx_source)/vp8/decoder/treereader.h',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
-    '<(libvpx_source)/vp8/encoder/arm/boolhuff_arm.c',
     '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
     '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c',
     '<(libvpx_source)/vp8/encoder/bitstream.c',
     '<(libvpx_source)/vp8/encoder/bitstream.h',
     '<(libvpx_source)/vp8/encoder/block.h',
+    '<(libvpx_source)/vp8/encoder/boolhuff.c',
     '<(libvpx_source)/vp8/encoder/boolhuff.h',
     '<(libvpx_source)/vp8/encoder/dct.c',
     '<(libvpx_source)/vp8/encoder/dct_value_cost.h',
diff --git a/libvpx_srcs_arm64.gypi b/libvpx_srcs_arm64.gypi
new file mode 100644
index 0000000..46aeedb
--- /dev/null
+++ b/libvpx_srcs_arm64.gypi
@@ -0,0 +1,325 @@
+# This file is generated. Do not edit.
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+{
+  'sources': [
+    '<(libvpx_source)/vp8/common/alloccommon.c',
+    '<(libvpx_source)/vp8/common/alloccommon.h',
+    '<(libvpx_source)/vp8/common/arm/dequantize_arm.c',
+    '<(libvpx_source)/vp8/common/arm/filter_arm.c',
+    '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c',
+    '<(libvpx_source)/vp8/common/arm/neon/bilinearpredict_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/copymem_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/dc_only_idct_add_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/dequant_idct_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/dequantizeb_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/iwalsh_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/mbloopfilter_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/sad_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
+    '<(libvpx_source)/vp8/common/arm/variance_arm.c',
+    '<(libvpx_source)/vp8/common/blockd.c',
+    '<(libvpx_source)/vp8/common/blockd.h',
+    '<(libvpx_source)/vp8/common/coefupdateprobs.h',
+    '<(libvpx_source)/vp8/common/common.h',
+    '<(libvpx_source)/vp8/common/debugmodes.c',
+    '<(libvpx_source)/vp8/common/default_coef_probs.h',
+    '<(libvpx_source)/vp8/common/dequantize.c',
+    '<(libvpx_source)/vp8/common/entropy.c',
+    '<(libvpx_source)/vp8/common/entropy.h',
+    '<(libvpx_source)/vp8/common/entropymode.c',
+    '<(libvpx_source)/vp8/common/entropymode.h',
+    '<(libvpx_source)/vp8/common/entropymv.c',
+    '<(libvpx_source)/vp8/common/entropymv.h',
+    '<(libvpx_source)/vp8/common/extend.c',
+    '<(libvpx_source)/vp8/common/extend.h',
+    '<(libvpx_source)/vp8/common/filter.c',
+    '<(libvpx_source)/vp8/common/filter.h',
+    '<(libvpx_source)/vp8/common/findnearmv.c',
+    '<(libvpx_source)/vp8/common/findnearmv.h',
+    '<(libvpx_source)/vp8/common/generic/systemdependent.c',
+    '<(libvpx_source)/vp8/common/header.h',
+    '<(libvpx_source)/vp8/common/idct_blk.c',
+    '<(libvpx_source)/vp8/common/idctllm.c',
+    '<(libvpx_source)/vp8/common/invtrans.h',
+    '<(libvpx_source)/vp8/common/loopfilter.c',
+    '<(libvpx_source)/vp8/common/loopfilter.h',
+    '<(libvpx_source)/vp8/common/loopfilter_filters.c',
+    '<(libvpx_source)/vp8/common/mbpitch.c',
+    '<(libvpx_source)/vp8/common/mfqe.c',
+    '<(libvpx_source)/vp8/common/modecont.c',
+    '<(libvpx_source)/vp8/common/modecont.h',
+    '<(libvpx_source)/vp8/common/mv.h',
+    '<(libvpx_source)/vp8/common/onyx.h',
+    '<(libvpx_source)/vp8/common/onyxc_int.h',
+    '<(libvpx_source)/vp8/common/onyxd.h',
+    '<(libvpx_source)/vp8/common/postproc.c',
+    '<(libvpx_source)/vp8/common/postproc.h',
+    '<(libvpx_source)/vp8/common/ppflags.h',
+    '<(libvpx_source)/vp8/common/pragmas.h',
+    '<(libvpx_source)/vp8/common/quant_common.c',
+    '<(libvpx_source)/vp8/common/quant_common.h',
+    '<(libvpx_source)/vp8/common/reconinter.c',
+    '<(libvpx_source)/vp8/common/reconinter.h',
+    '<(libvpx_source)/vp8/common/reconintra.c',
+    '<(libvpx_source)/vp8/common/reconintra4x4.c',
+    '<(libvpx_source)/vp8/common/reconintra4x4.h',
+    '<(libvpx_source)/vp8/common/rtcd.c',
+    '<(libvpx_source)/vp8/common/sad_c.c',
+    '<(libvpx_source)/vp8/common/setupintrarecon.c',
+    '<(libvpx_source)/vp8/common/setupintrarecon.h',
+    '<(libvpx_source)/vp8/common/swapyv12buffer.c',
+    '<(libvpx_source)/vp8/common/swapyv12buffer.h',
+    '<(libvpx_source)/vp8/common/systemdependent.h',
+    '<(libvpx_source)/vp8/common/threading.h',
+    '<(libvpx_source)/vp8/common/treecoder.c',
+    '<(libvpx_source)/vp8/common/treecoder.h',
+    '<(libvpx_source)/vp8/common/variance.h',
+    '<(libvpx_source)/vp8/common/variance_c.c',
+    '<(libvpx_source)/vp8/common/vp8_entropymodedata.h',
+    '<(libvpx_source)/vp8/decoder/dboolhuff.c',
+    '<(libvpx_source)/vp8/decoder/dboolhuff.h',
+    '<(libvpx_source)/vp8/decoder/decodeframe.c',
+    '<(libvpx_source)/vp8/decoder/decodemv.c',
+    '<(libvpx_source)/vp8/decoder/decodemv.h',
+    '<(libvpx_source)/vp8/decoder/decoderthreading.h',
+    '<(libvpx_source)/vp8/decoder/detokenize.c',
+    '<(libvpx_source)/vp8/decoder/detokenize.h',
+    '<(libvpx_source)/vp8/decoder/onyxd_if.c',
+    '<(libvpx_source)/vp8/decoder/onyxd_int.h',
+    '<(libvpx_source)/vp8/decoder/threading.c',
+    '<(libvpx_source)/vp8/decoder/treereader.h',
+    '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
+    '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c',
+    '<(libvpx_source)/vp8/encoder/arm/quantize_arm.c',
+    '<(libvpx_source)/vp8/encoder/bitstream.c',
+    '<(libvpx_source)/vp8/encoder/bitstream.h',
+    '<(libvpx_source)/vp8/encoder/block.h',
+    '<(libvpx_source)/vp8/encoder/boolhuff.c',
+    '<(libvpx_source)/vp8/encoder/boolhuff.h',
+    '<(libvpx_source)/vp8/encoder/dct.c',
+    '<(libvpx_source)/vp8/encoder/dct_value_cost.h',
+    '<(libvpx_source)/vp8/encoder/dct_value_tokens.h',
+    '<(libvpx_source)/vp8/encoder/defaultcoefcounts.h',
+    '<(libvpx_source)/vp8/encoder/denoising.c',
+    '<(libvpx_source)/vp8/encoder/denoising.h',
+    '<(libvpx_source)/vp8/encoder/encodeframe.c',
+    '<(libvpx_source)/vp8/encoder/encodeframe.h',
+    '<(libvpx_source)/vp8/encoder/encodeintra.c',
+    '<(libvpx_source)/vp8/encoder/encodeintra.h',
+    '<(libvpx_source)/vp8/encoder/encodemb.c',
+    '<(libvpx_source)/vp8/encoder/encodemb.h',
+    '<(libvpx_source)/vp8/encoder/encodemv.c',
+    '<(libvpx_source)/vp8/encoder/encodemv.h',
+    '<(libvpx_source)/vp8/encoder/ethreading.c',
+    '<(libvpx_source)/vp8/encoder/firstpass.h',
+    '<(libvpx_source)/vp8/encoder/lookahead.c',
+    '<(libvpx_source)/vp8/encoder/lookahead.h',
+    '<(libvpx_source)/vp8/encoder/mcomp.c',
+    '<(libvpx_source)/vp8/encoder/mcomp.h',
+    '<(libvpx_source)/vp8/encoder/modecosts.c',
+    '<(libvpx_source)/vp8/encoder/modecosts.h',
+    '<(libvpx_source)/vp8/encoder/mr_dissim.c',
+    '<(libvpx_source)/vp8/encoder/mr_dissim.h',
+    '<(libvpx_source)/vp8/encoder/onyx_if.c',
+    '<(libvpx_source)/vp8/encoder/onyx_int.h',
+    '<(libvpx_source)/vp8/encoder/pickinter.c',
+    '<(libvpx_source)/vp8/encoder/pickinter.h',
+    '<(libvpx_source)/vp8/encoder/picklpf.c',
+    '<(libvpx_source)/vp8/encoder/quantize.c',
+    '<(libvpx_source)/vp8/encoder/quantize.h',
+    '<(libvpx_source)/vp8/encoder/ratectrl.c',
+    '<(libvpx_source)/vp8/encoder/ratectrl.h',
+    '<(libvpx_source)/vp8/encoder/rdopt.c',
+    '<(libvpx_source)/vp8/encoder/rdopt.h',
+    '<(libvpx_source)/vp8/encoder/segmentation.c',
+    '<(libvpx_source)/vp8/encoder/segmentation.h',
+    '<(libvpx_source)/vp8/encoder/tokenize.c',
+    '<(libvpx_source)/vp8/encoder/tokenize.h',
+    '<(libvpx_source)/vp8/encoder/treewriter.c',
+    '<(libvpx_source)/vp8/encoder/treewriter.h',
+    '<(libvpx_source)/vp8/vp8_cx_iface.c',
+    '<(libvpx_source)/vp8/vp8_dx_iface.c',
+    '<(libvpx_source)/vp9/common/vp9_alloccommon.c',
+    '<(libvpx_source)/vp9/common/vp9_alloccommon.h',
+    '<(libvpx_source)/vp9/common/vp9_blockd.c',
+    '<(libvpx_source)/vp9/common/vp9_blockd.h',
+    '<(libvpx_source)/vp9/common/vp9_common.h',
+    '<(libvpx_source)/vp9/common/vp9_common_data.c',
+    '<(libvpx_source)/vp9/common/vp9_common_data.h',
+    '<(libvpx_source)/vp9/common/vp9_convolve.c',
+    '<(libvpx_source)/vp9/common/vp9_convolve.h',
+    '<(libvpx_source)/vp9/common/vp9_debugmodes.c',
+    '<(libvpx_source)/vp9/common/vp9_entropy.c',
+    '<(libvpx_source)/vp9/common/vp9_entropy.h',
+    '<(libvpx_source)/vp9/common/vp9_entropymode.c',
+    '<(libvpx_source)/vp9/common/vp9_entropymode.h',
+    '<(libvpx_source)/vp9/common/vp9_entropymv.c',
+    '<(libvpx_source)/vp9/common/vp9_entropymv.h',
+    '<(libvpx_source)/vp9/common/vp9_enums.h',
+    '<(libvpx_source)/vp9/common/vp9_filter.c',
+    '<(libvpx_source)/vp9/common/vp9_filter.h',
+    '<(libvpx_source)/vp9/common/vp9_frame_buffers.c',
+    '<(libvpx_source)/vp9/common/vp9_frame_buffers.h',
+    '<(libvpx_source)/vp9/common/vp9_idct.c',
+    '<(libvpx_source)/vp9/common/vp9_idct.h',
+    '<(libvpx_source)/vp9/common/vp9_loopfilter.c',
+    '<(libvpx_source)/vp9/common/vp9_loopfilter.h',
+    '<(libvpx_source)/vp9/common/vp9_loopfilter_filters.c',
+    '<(libvpx_source)/vp9/common/vp9_mv.h',
+    '<(libvpx_source)/vp9/common/vp9_mvref_common.c',
+    '<(libvpx_source)/vp9/common/vp9_mvref_common.h',
+    '<(libvpx_source)/vp9/common/vp9_onyxc_int.h',
+    '<(libvpx_source)/vp9/common/vp9_ppflags.h',
+    '<(libvpx_source)/vp9/common/vp9_pragmas.h',
+    '<(libvpx_source)/vp9/common/vp9_pred_common.c',
+    '<(libvpx_source)/vp9/common/vp9_pred_common.h',
+    '<(libvpx_source)/vp9/common/vp9_prob.c',
+    '<(libvpx_source)/vp9/common/vp9_prob.h',
+    '<(libvpx_source)/vp9/common/vp9_quant_common.c',
+    '<(libvpx_source)/vp9/common/vp9_quant_common.h',
+    '<(libvpx_source)/vp9/common/vp9_reconinter.c',
+    '<(libvpx_source)/vp9/common/vp9_reconinter.h',
+    '<(libvpx_source)/vp9/common/vp9_reconintra.c',
+    '<(libvpx_source)/vp9/common/vp9_reconintra.h',
+    '<(libvpx_source)/vp9/common/vp9_rtcd.c',
+    '<(libvpx_source)/vp9/common/vp9_scale.c',
+    '<(libvpx_source)/vp9/common/vp9_scale.h',
+    '<(libvpx_source)/vp9/common/vp9_scan.c',
+    '<(libvpx_source)/vp9/common/vp9_scan.h',
+    '<(libvpx_source)/vp9/common/vp9_seg_common.c',
+    '<(libvpx_source)/vp9/common/vp9_seg_common.h',
+    '<(libvpx_source)/vp9/common/vp9_systemdependent.h',
+    '<(libvpx_source)/vp9/common/vp9_textblit.h',
+    '<(libvpx_source)/vp9/common/vp9_tile_common.c',
+    '<(libvpx_source)/vp9/common/vp9_tile_common.h',
+    '<(libvpx_source)/vp9/decoder/vp9_decodeframe.c',
+    '<(libvpx_source)/vp9/decoder/vp9_decodeframe.h',
+    '<(libvpx_source)/vp9/decoder/vp9_decodemv.c',
+    '<(libvpx_source)/vp9/decoder/vp9_decodemv.h',
+    '<(libvpx_source)/vp9/decoder/vp9_decoder.c',
+    '<(libvpx_source)/vp9/decoder/vp9_decoder.h',
+    '<(libvpx_source)/vp9/decoder/vp9_detokenize.c',
+    '<(libvpx_source)/vp9/decoder/vp9_detokenize.h',
+    '<(libvpx_source)/vp9/decoder/vp9_dsubexp.c',
+    '<(libvpx_source)/vp9/decoder/vp9_dsubexp.h',
+    '<(libvpx_source)/vp9/decoder/vp9_dthread.c',
+    '<(libvpx_source)/vp9/decoder/vp9_dthread.h',
+    '<(libvpx_source)/vp9/decoder/vp9_read_bit_buffer.c',
+    '<(libvpx_source)/vp9/decoder/vp9_read_bit_buffer.h',
+    '<(libvpx_source)/vp9/decoder/vp9_reader.c',
+    '<(libvpx_source)/vp9/decoder/vp9_reader.h',
+    '<(libvpx_source)/vp9/decoder/vp9_thread.c',
+    '<(libvpx_source)/vp9/decoder/vp9_thread.h',
+    '<(libvpx_source)/vp9/encoder/vp9_aq_complexity.c',
+    '<(libvpx_source)/vp9/encoder/vp9_aq_complexity.h',
+    '<(libvpx_source)/vp9/encoder/vp9_aq_cyclicrefresh.c',
+    '<(libvpx_source)/vp9/encoder/vp9_aq_cyclicrefresh.h',
+    '<(libvpx_source)/vp9/encoder/vp9_aq_variance.c',
+    '<(libvpx_source)/vp9/encoder/vp9_aq_variance.h',
+    '<(libvpx_source)/vp9/encoder/vp9_bitstream.c',
+    '<(libvpx_source)/vp9/encoder/vp9_bitstream.h',
+    '<(libvpx_source)/vp9/encoder/vp9_block.h',
+    '<(libvpx_source)/vp9/encoder/vp9_context_tree.c',
+    '<(libvpx_source)/vp9/encoder/vp9_context_tree.h',
+    '<(libvpx_source)/vp9/encoder/vp9_cost.c',
+    '<(libvpx_source)/vp9/encoder/vp9_cost.h',
+    '<(libvpx_source)/vp9/encoder/vp9_dct.c',
+    '<(libvpx_source)/vp9/encoder/vp9_encodeframe.c',
+    '<(libvpx_source)/vp9/encoder/vp9_encodeframe.h',
+    '<(libvpx_source)/vp9/encoder/vp9_encodemb.c',
+    '<(libvpx_source)/vp9/encoder/vp9_encodemb.h',
+    '<(libvpx_source)/vp9/encoder/vp9_encodemv.c',
+    '<(libvpx_source)/vp9/encoder/vp9_encodemv.h',
+    '<(libvpx_source)/vp9/encoder/vp9_encoder.c',
+    '<(libvpx_source)/vp9/encoder/vp9_encoder.h',
+    '<(libvpx_source)/vp9/encoder/vp9_extend.c',
+    '<(libvpx_source)/vp9/encoder/vp9_extend.h',
+    '<(libvpx_source)/vp9/encoder/vp9_firstpass.c',
+    '<(libvpx_source)/vp9/encoder/vp9_firstpass.h',
+    '<(libvpx_source)/vp9/encoder/vp9_lookahead.c',
+    '<(libvpx_source)/vp9/encoder/vp9_lookahead.h',
+    '<(libvpx_source)/vp9/encoder/vp9_mbgraph.c',
+    '<(libvpx_source)/vp9/encoder/vp9_mbgraph.h',
+    '<(libvpx_source)/vp9/encoder/vp9_mcomp.c',
+    '<(libvpx_source)/vp9/encoder/vp9_mcomp.h',
+    '<(libvpx_source)/vp9/encoder/vp9_picklpf.c',
+    '<(libvpx_source)/vp9/encoder/vp9_picklpf.h',
+    '<(libvpx_source)/vp9/encoder/vp9_pickmode.c',
+    '<(libvpx_source)/vp9/encoder/vp9_pickmode.h',
+    '<(libvpx_source)/vp9/encoder/vp9_quantize.c',
+    '<(libvpx_source)/vp9/encoder/vp9_quantize.h',
+    '<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
+    '<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+    '<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
+    '<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
+    '<(libvpx_source)/vp9/encoder/vp9_resize.c',
+    '<(libvpx_source)/vp9/encoder/vp9_resize.h',
+    '<(libvpx_source)/vp9/encoder/vp9_sad.c',
+    '<(libvpx_source)/vp9/encoder/vp9_segmentation.c',
+    '<(libvpx_source)/vp9/encoder/vp9_segmentation.h',
+    '<(libvpx_source)/vp9/encoder/vp9_speed_features.c',
+    '<(libvpx_source)/vp9/encoder/vp9_speed_features.h',
+    '<(libvpx_source)/vp9/encoder/vp9_subexp.c',
+    '<(libvpx_source)/vp9/encoder/vp9_subexp.h',
+    '<(libvpx_source)/vp9/encoder/vp9_svc_layercontext.c',
+    '<(libvpx_source)/vp9/encoder/vp9_svc_layercontext.h',
+    '<(libvpx_source)/vp9/encoder/vp9_temporal_filter.c',
+    '<(libvpx_source)/vp9/encoder/vp9_temporal_filter.h',
+    '<(libvpx_source)/vp9/encoder/vp9_tokenize.c',
+    '<(libvpx_source)/vp9/encoder/vp9_tokenize.h',
+    '<(libvpx_source)/vp9/encoder/vp9_treewriter.c',
+    '<(libvpx_source)/vp9/encoder/vp9_treewriter.h',
+    '<(libvpx_source)/vp9/encoder/vp9_variance.c',
+    '<(libvpx_source)/vp9/encoder/vp9_variance.h',
+    '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.c',
+    '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
+    '<(libvpx_source)/vp9/encoder/vp9_writer.c',
+    '<(libvpx_source)/vp9/encoder/vp9_writer.h',
+    '<(libvpx_source)/vp9/vp9_cx_iface.c',
+    '<(libvpx_source)/vp9/vp9_dx_iface.c',
+    '<(libvpx_source)/vp9/vp9_iface_common.h',
+    '<(libvpx_source)/vpx/internal/vpx_codec_internal.h',
+    '<(libvpx_source)/vpx/internal/vpx_psnr.h',
+    '<(libvpx_source)/vpx/src/svc_encodeframe.c',
+    '<(libvpx_source)/vpx/src/vpx_codec.c',
+    '<(libvpx_source)/vpx/src/vpx_decoder.c',
+    '<(libvpx_source)/vpx/src/vpx_encoder.c',
+    '<(libvpx_source)/vpx/src/vpx_image.c',
+    '<(libvpx_source)/vpx/src/vpx_psnr.c',
+    '<(libvpx_source)/vpx/svc_context.h',
+    '<(libvpx_source)/vpx/vp8.h',
+    '<(libvpx_source)/vpx/vp8cx.h',
+    '<(libvpx_source)/vpx/vp8dx.h',
+    '<(libvpx_source)/vpx/vpx_codec.h',
+    '<(libvpx_source)/vpx/vpx_decoder.h',
+    '<(libvpx_source)/vpx/vpx_encoder.h',
+    '<(libvpx_source)/vpx/vpx_frame_buffer.h',
+    '<(libvpx_source)/vpx/vpx_image.h',
+    '<(libvpx_source)/vpx/vpx_integer.h',
+    '<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
+    '<(libvpx_source)/vpx_mem/vpx_mem.c',
+    '<(libvpx_source)/vpx_mem/vpx_mem.h',
+    '<(libvpx_source)/vpx_ports/arm.h',
+    '<(libvpx_source)/vpx_ports/arm_cpudetect.c',
+    '<(libvpx_source)/vpx_ports/asm_offsets.h',
+    '<(libvpx_source)/vpx_ports/emmintrin_compat.h',
+    '<(libvpx_source)/vpx_ports/mem.h',
+    '<(libvpx_source)/vpx_ports/mem_ops.h',
+    '<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+    '<(libvpx_source)/vpx_ports/vpx_once.h',
+    '<(libvpx_source)/vpx_ports/vpx_timer.h',
+    '<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
+    '<(libvpx_source)/vpx_scale/generic/vpx_scale.c',
+    '<(libvpx_source)/vpx_scale/generic/yv12config.c',
+    '<(libvpx_source)/vpx_scale/generic/yv12extend.c',
+    '<(libvpx_source)/vpx_scale/vpx_scale.h',
+    '<(libvpx_source)/vpx_scale/vpx_scale_rtcd.c',
+    '<(libvpx_source)/vpx_scale/yv12config.h',
+  ],
+}
diff --git a/libvpx_srcs_arm_neon.gypi b/libvpx_srcs_arm_neon.gypi
index 13735d3..01e9bf9 100644
--- a/libvpx_srcs_arm_neon.gypi
+++ b/libvpx_srcs_arm_neon.gypi
@@ -34,14 +34,13 @@
     '<(libvpx_source)/vp8/common/arm/filter_arm.c',
     '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c',
     '<(libvpx_source)/vp8/common/arm/neon/bilinearpredict_neon.c',
-    '<(libvpx_source)/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/copymem_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/dc_only_idct_add_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/dequant_idct_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/dequantizeb_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/idct_blk_neon.c',
-    '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.c',
-    '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.c',
+    '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm',
+    '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/iwalsh_neon.c',
     '<(libvpx_source)/vp8/common/arm/neon/loopfilter_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c',
@@ -54,7 +53,6 @@
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm',
-    '<(libvpx_source)/vp8/common/arm/reconintra_arm.c',
     '<(libvpx_source)/vp8/common/arm/variance_arm.c',
     '<(libvpx_source)/vp8/common/blockd.c',
     '<(libvpx_source)/vp8/common/blockd.h',
@@ -127,16 +125,11 @@
     '<(libvpx_source)/vp8/decoder/onyxd_int.h',
     '<(libvpx_source)/vp8/decoder/threading.c',
     '<(libvpx_source)/vp8/decoder/treereader.h',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
-    '<(libvpx_source)/vp8/encoder/arm/boolhuff_arm.c',
     '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.asm',
@@ -150,6 +143,7 @@
     '<(libvpx_source)/vp8/encoder/bitstream.c',
     '<(libvpx_source)/vp8/encoder/bitstream.h',
     '<(libvpx_source)/vp8/encoder/block.h',
+    '<(libvpx_source)/vp8/encoder/boolhuff.c',
     '<(libvpx_source)/vp8/encoder/boolhuff.h',
     '<(libvpx_source)/vp8/encoder/dct.c',
     '<(libvpx_source)/vp8/encoder/dct_value_cost.h',
diff --git a/libvpx_srcs_arm_neon_cpu_detect.gypi b/libvpx_srcs_arm_neon_cpu_detect.gypi
index a7945f6..3a43d66 100644
--- a/libvpx_srcs_arm_neon_cpu_detect.gypi
+++ b/libvpx_srcs_arm_neon_cpu_detect.gypi
@@ -33,13 +33,13 @@
     '<(libvpx_source)/vp8/common/arm/dequantize_arm.c',
     '<(libvpx_source)/vp8/common/arm/filter_arm.c',
     '<(libvpx_source)/vp8/common/arm/loopfilter_arm.c',
-    '<(libvpx_source)/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm',
+    '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm',
+    '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/loopfilter_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm',
-    '<(libvpx_source)/vp8/common/arm/reconintra_arm.c',
     '<(libvpx_source)/vp8/common/arm/variance_arm.c',
     '<(libvpx_source)/vp8/common/blockd.c',
     '<(libvpx_source)/vp8/common/blockd.h',
@@ -112,16 +112,11 @@
     '<(libvpx_source)/vp8/decoder/onyxd_int.h',
     '<(libvpx_source)/vp8/decoder/threading.c',
     '<(libvpx_source)/vp8/decoder/treereader.h',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm',
-    '<(libvpx_source)/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_fast_quantize_b_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_subtract_armv6.asm',
     '<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
-    '<(libvpx_source)/vp8/encoder/arm/boolhuff_arm.c',
     '<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
     '<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.asm',
     '<(libvpx_source)/vp8/encoder/arm/neon/picklpf_arm.c',
@@ -134,6 +129,7 @@
     '<(libvpx_source)/vp8/encoder/bitstream.c',
     '<(libvpx_source)/vp8/encoder/bitstream.h',
     '<(libvpx_source)/vp8/encoder/block.h',
+    '<(libvpx_source)/vp8/encoder/boolhuff.c',
     '<(libvpx_source)/vp8/encoder/boolhuff.h',
     '<(libvpx_source)/vp8/encoder/dct.c',
     '<(libvpx_source)/vp8/encoder/dct_value_cost.h',
diff --git a/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi b/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
index 07c2c32..2359023 100644
--- a/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
+++ b/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
@@ -19,8 +19,6 @@
         '<(libvpx_source)/vp8/common/arm/neon/dequant_idct_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/dequantizeb_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/idct_blk_neon.c',
-        '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_0_2x_neon.c',
-        '<(libvpx_source)/vp8/common/arm/neon/idct_dequant_full_2x_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/iwalsh_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c',
         '<(libvpx_source)/vp8/common/arm/neon/mbloopfilter_neon.c',
diff --git a/libvpx_srcs_x86.gypi b/libvpx_srcs_x86.gypi
index b274e8a..06b411a 100644
--- a/libvpx_srcs_x86.gypi
+++ b/libvpx_srcs_x86.gypi
@@ -309,6 +309,7 @@
     '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
     '<(libvpx_source)/vp9/encoder/vp9_writer.c',
     '<(libvpx_source)/vp9/encoder/vp9_writer.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_sad4d_sse2.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_sad_mmx.asm',
diff --git a/libvpx_srcs_x86_64.gypi b/libvpx_srcs_x86_64.gypi
index 2633756..f048f2c 100644
--- a/libvpx_srcs_x86_64.gypi
+++ b/libvpx_srcs_x86_64.gypi
@@ -312,6 +312,7 @@
     '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
     '<(libvpx_source)/vp9/encoder/vp9_writer.c',
     '<(libvpx_source)/vp9/encoder/vp9_writer.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_dct_ssse3.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_quantize_ssse3.asm',
diff --git a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
index 77cc0b7..4ab0f6b 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
@@ -87,9 +87,9 @@ void vp8_dc_only_idct_add_v6(short input, unsigned char *pred, int pred_stride,
 void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_neon(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_v6(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index e15c480..6f28ce3 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -279,9 +279,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
-RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -745,8 +745,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_NEON) vp9_idct4x4_16_add = vp9_idct4x4_16_add_neon;
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_NEON) vp9_idct4x4_1_add = vp9_idct4x4_1_add_neon;
-    vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
-    if (flags & HAS_NEON) vp9_idct8x8_10_add = vp9_idct8x8_10_add_neon;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_c;
+    if (flags & HAS_NEON) vp9_idct8x8_12_add = vp9_idct8x8_12_add_neon;
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_NEON) vp9_idct8x8_1_add = vp9_idct8x8_1_add_neon;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
index b15c213..3fa8266 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
@@ -7,9 +7,10 @@
 .equ ARCH_X86_64 ,  0
 .equ ARCH_PPC32 ,  0
 .equ ARCH_PPC64 ,  0
-.equ HAVE_EDSP ,  1
+.equ HAVE_EDSP ,  0
 .equ HAVE_MEDIA ,  1
 .equ HAVE_NEON ,  1
+.equ HAVE_NEON_ASM ,  1
 .equ HAVE_MIPS32 ,  0
 .equ HAVE_DSPR2 ,  0
 .equ HAVE_MMX ,  0
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.c b/source/config/linux/arm-neon-cpu-detect/vpx_config.c
index 95a39b3..3467569 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.c
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.c
@@ -5,5 +5,5 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
-static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
+static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/source/config/linux/arm-neon-cpu-detect/vpx_config.h
index 89d030b..b3d8bf0 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.h
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.h
@@ -16,9 +16,10 @@
 #define ARCH_X86_64 0
 #define ARCH_PPC32 0
 #define ARCH_PPC64 0
-#define HAVE_EDSP 1
+#define HAVE_EDSP 0
 #define HAVE_MEDIA 1
 #define HAVE_NEON 1
+#define HAVE_NEON_ASM 1
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 0
diff --git a/source/config/linux/arm-neon/vp8_rtcd.h b/source/config/linux/arm-neon/vp8_rtcd.h
index a52d575..184b486 100644
--- a/source/config/linux/arm-neon/vp8_rtcd.h
+++ b/source/config/linux/arm-neon/vp8_rtcd.h
@@ -87,8 +87,8 @@ void vp8_dc_only_idct_add_v6(short input, unsigned char *pred, int pred_stride,
 void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_neon
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_neon(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_neon
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/arm-neon/vp9_rtcd.h b/source/config/linux/arm-neon/vp9_rtcd.h
index a94d300..9e401dd 100644
--- a/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/source/config/linux/arm-neon/vp9_rtcd.h
@@ -279,9 +279,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_neon
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_neon
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_12_add vp9_idct8x8_12_add_neon
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_neon(const int16_t *input, uint8_t *dest, int dest_stride);
diff --git a/source/config/linux/arm-neon/vpx_config.asm b/source/config/linux/arm-neon/vpx_config.asm
index 228a6ae..190cceb 100644
--- a/source/config/linux/arm-neon/vpx_config.asm
+++ b/source/config/linux/arm-neon/vpx_config.asm
@@ -7,9 +7,10 @@
 .equ ARCH_X86_64 ,  0
 .equ ARCH_PPC32 ,  0
 .equ ARCH_PPC64 ,  0
-.equ HAVE_EDSP ,  1
+.equ HAVE_EDSP ,  0
 .equ HAVE_MEDIA ,  1
 .equ HAVE_NEON ,  1
+.equ HAVE_NEON_ASM ,  1
 .equ HAVE_MIPS32 ,  0
 .equ HAVE_DSPR2 ,  0
 .equ HAVE_MMX ,  0
diff --git a/source/config/linux/arm-neon/vpx_config.c b/source/config/linux/arm-neon/vpx_config.c
index 793f12a..c61708d 100644
--- a/source/config/linux/arm-neon/vpx_config.c
+++ b/source/config/linux/arm-neon/vpx_config.c
@@ -5,5 +5,5 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
-static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
+static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/source/config/linux/arm-neon/vpx_config.h b/source/config/linux/arm-neon/vpx_config.h
index 2f5ddb1..97710cf 100644
--- a/source/config/linux/arm-neon/vpx_config.h
+++ b/source/config/linux/arm-neon/vpx_config.h
@@ -16,9 +16,10 @@
 #define ARCH_X86_64 0
 #define ARCH_PPC32 0
 #define ARCH_PPC64 0
-#define HAVE_EDSP 1
+#define HAVE_EDSP 0
 #define HAVE_MEDIA 1
 #define HAVE_NEON 1
+#define HAVE_NEON_ASM 1
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 0
diff --git a/source/config/linux/arm/vp8_rtcd.h b/source/config/linux/arm/vp8_rtcd.h
index ab5fa41..20208cc 100644
--- a/source/config/linux/arm/vp8_rtcd.h
+++ b/source/config/linux/arm/vp8_rtcd.h
@@ -79,7 +79,7 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_v6(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_v6
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_c
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/arm/vp9_rtcd.h b/source/config/linux/arm/vp9_rtcd.h
index 1c0f5f2..79faee9 100644
--- a/source/config/linux/arm/vp9_rtcd.h
+++ b/source/config/linux/arm/vp9_rtcd.h
@@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c
diff --git a/source/config/linux/arm/vpx_config.asm b/source/config/linux/arm/vpx_config.asm
index c99dea6..27b450b 100644
--- a/source/config/linux/arm/vpx_config.asm
+++ b/source/config/linux/arm/vpx_config.asm
@@ -7,9 +7,10 @@
 .equ ARCH_X86_64 ,  0
 .equ ARCH_PPC32 ,  0
 .equ ARCH_PPC64 ,  0
-.equ HAVE_EDSP ,  1
+.equ HAVE_EDSP ,  0
 .equ HAVE_MEDIA ,  1
 .equ HAVE_NEON ,  0
+.equ HAVE_NEON_ASM ,  0
 .equ HAVE_MIPS32 ,  0
 .equ HAVE_DSPR2 ,  0
 .equ HAVE_MMX ,  0
diff --git a/source/config/linux/arm/vpx_config.c b/source/config/linux/arm/vpx_config.c
index be1757a..703e1d3 100644
--- a/source/config/linux/arm/vpx_config.c
+++ b/source/config/linux/arm/vpx_config.c
@@ -5,5 +5,5 @@
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
-static const char* const cfg = "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
+static const char* const cfg = "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
 const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/source/config/linux/arm/vpx_config.h b/source/config/linux/arm/vpx_config.h
index 50e0ea7..85aca7d 100644
--- a/source/config/linux/arm/vpx_config.h
+++ b/source/config/linux/arm/vpx_config.h
@@ -16,9 +16,10 @@
 #define ARCH_X86_64 0
 #define ARCH_PPC32 0
 #define ARCH_PPC64 0
-#define HAVE_EDSP 1
+#define HAVE_EDSP 0
 #define HAVE_MEDIA 1
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 0
diff --git a/source/config/linux/arm64/vp8_rtcd.h b/source/config/linux/arm64/vp8_rtcd.h
new file mode 100644
index 0000000..9601515
--- /dev/null
+++ b/source/config/linux/arm64/vp8_rtcd.h
@@ -0,0 +1,378 @@
+#ifndef VP8_RTCD_H_
+#define VP8_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * VP8
+ */
+
+struct blockd;
+struct macroblockd;
+struct loop_filter_info;
+
+/* Encoder forward decls */
+struct block;
+struct macroblock;
+struct variance_vtable;
+union int_mv;
+struct yv12_buffer_config;
+
+void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_neon
+
+void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_neon
+
+void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_neon
+
+void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_neon
+
+void vp8_blend_b_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride);
+#define vp8_blend_b vp8_blend_b_c
+
+void vp8_blend_mb_inner_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride);
+#define vp8_blend_mb_inner vp8_blend_mb_inner_c
+
+void vp8_blend_mb_outer_c(unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride);
+#define vp8_blend_mb_outer vp8_blend_mb_outer_c
+
+int vp8_block_error_c(short *coeff, short *dqcoeff);
+#define vp8_block_error vp8_block_error_c
+
+void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row,  unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
+#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c
+
+void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
+#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c
+
+void vp8_clear_system_state_c();
+#define vp8_clear_system_state vp8_clear_system_state_c
+
+void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+#define vp8_copy_mem16x16 vp8_copy_mem16x16_neon
+
+void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+#define vp8_copy_mem8x4 vp8_copy_mem8x4_neon
+
+void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+#define vp8_copy_mem8x8 vp8_copy_mem8x8_neon
+
+void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+#define vp8_dc_only_idct_add vp8_dc_only_idct_add_neon
+
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter vp8_denoiser_filter_neon
+
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
+void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
+#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
+
+void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
+
+void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
+
+void vp8_dequantize_b_c(struct blockd*, short *dqc);
+void vp8_dequantize_b_neon(struct blockd*, short *dqc);
+#define vp8_dequantize_b vp8_dequantize_b_neon
+
+int vp8_diamond_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
+#define vp8_diamond_search_sad vp8_diamond_search_sad_c
+
+void vp8_fast_quantize_b_c(struct block *, struct blockd *);
+#define vp8_fast_quantize_b vp8_fast_quantize_b_c
+
+void vp8_fast_quantize_b_pair_c(struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2);
+#define vp8_fast_quantize_b_pair vp8_fast_quantize_b_pair_c
+
+void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
+#define vp8_filter_by_weight16x16 vp8_filter_by_weight16x16_c
+
+void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
+#define vp8_filter_by_weight4x4 vp8_filter_by_weight4x4_c
+
+void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight);
+#define vp8_filter_by_weight8x8 vp8_filter_by_weight8x8_c
+
+int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
+#define vp8_full_search_sad vp8_full_search_sad_c
+
+unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride);
+#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
+
+unsigned int vp8_get_mb_ss_c(const short *);
+#define vp8_get_mb_ss vp8_get_mb_ss_c
+
+void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
+#define vp8_intra4x4_predict vp8_intra4x4_predict_c
+
+void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_bh vp8_loop_filter_bh_c
+
+void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_bv vp8_loop_filter_bv_c
+
+void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_mbh vp8_loop_filter_mbh_neon
+
+void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_mbv vp8_loop_filter_mbv_neon
+
+void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_neon
+
+void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c
+
+void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_mbh vp8_loop_filter_mbhs_neon
+
+void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c
+
+int vp8_mbblock_error_c(struct macroblock *mb, int dc);
+#define vp8_mbblock_error vp8_mbblock_error_c
+
+void vp8_mbpost_proc_across_ip_c(unsigned char *dst, int pitch, int rows, int cols,int flimit);
+#define vp8_mbpost_proc_across_ip vp8_mbpost_proc_across_ip_c
+
+void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,int flimit);
+#define vp8_mbpost_proc_down vp8_mbpost_proc_down_c
+
+int vp8_mbuverror_c(struct macroblock *mb);
+#define vp8_mbuverror vp8_mbuverror_c
+
+unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_mse16x16 vp8_mse16x16_c
+
+void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
+#define vp8_plane_add_noise vp8_plane_add_noise_c
+
+void vp8_post_proc_down_and_across_mb_row_c(unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size);
+#define vp8_post_proc_down_and_across_mb_row vp8_post_proc_down_and_across_mb_row_c
+
+void vp8_quantize_mb_c(struct macroblock *);
+void vp8_quantize_mb_neon(struct macroblock *);
+#define vp8_quantize_mb vp8_quantize_mb_neon
+
+void vp8_quantize_mbuv_c(struct macroblock *);
+void vp8_quantize_mbuv_neon(struct macroblock *);
+#define vp8_quantize_mbuv vp8_quantize_mbuv_neon
+
+void vp8_quantize_mby_c(struct macroblock *);
+void vp8_quantize_mby_neon(struct macroblock *);
+#define vp8_quantize_mby vp8_quantize_mby_neon
+
+int vp8_refining_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
+#define vp8_refining_search_sad vp8_refining_search_sad_c
+
+void vp8_regular_quantize_b_c(struct block *, struct blockd *);
+#define vp8_regular_quantize_b vp8_regular_quantize_b_c
+
+void vp8_regular_quantize_b_pair_c(struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2);
+#define vp8_regular_quantize_b_pair vp8_regular_quantize_b_pair_c
+
+unsigned int vp8_sad16x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+unsigned int vp8_sad16x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp8_sad16x16 vp8_sad16x16_neon
+
+void vp8_sad16x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp8_sad16x16x3 vp8_sad16x16x3_c
+
+void vp8_sad16x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp8_sad16x16x4d vp8_sad16x16x4d_c
+
+void vp8_sad16x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array);
+#define vp8_sad16x16x8 vp8_sad16x16x8_c
+
+unsigned int vp8_sad16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+unsigned int vp8_sad16x8_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp8_sad16x8 vp8_sad16x8_neon
+
+void vp8_sad16x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp8_sad16x8x3 vp8_sad16x8x3_c
+
+void vp8_sad16x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp8_sad16x8x4d vp8_sad16x8x4d_c
+
+void vp8_sad16x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array);
+#define vp8_sad16x8x8 vp8_sad16x8x8_c
+
+unsigned int vp8_sad4x4_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+unsigned int vp8_sad4x4_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp8_sad4x4 vp8_sad4x4_neon
+
+void vp8_sad4x4x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp8_sad4x4x3 vp8_sad4x4x3_c
+
+void vp8_sad4x4x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp8_sad4x4x4d vp8_sad4x4x4d_c
+
+void vp8_sad4x4x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array);
+#define vp8_sad4x4x8 vp8_sad4x4x8_c
+
+unsigned int vp8_sad8x16_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+unsigned int vp8_sad8x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp8_sad8x16 vp8_sad8x16_neon
+
+void vp8_sad8x16x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp8_sad8x16x3 vp8_sad8x16x3_c
+
+void vp8_sad8x16x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp8_sad8x16x4d vp8_sad8x16x4d_c
+
+void vp8_sad8x16x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array);
+#define vp8_sad8x16x8 vp8_sad8x16x8_c
+
+unsigned int vp8_sad8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+unsigned int vp8_sad8x8_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp8_sad8x8 vp8_sad8x8_neon
+
+void vp8_sad8x8x3_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp8_sad8x8x3 vp8_sad8x8x3_c
+
+void vp8_sad8x8x4d_c(const unsigned char *src_ptr, int src_stride, const unsigned char * const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp8_sad8x8x4d vp8_sad8x8x4d_c
+
+void vp8_sad8x8x8_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned short *sad_array);
+#define vp8_sad8x8x8 vp8_sad8x8x8_c
+
+void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
+#define vp8_short_fdct4x4 vp8_short_fdct4x4_c
+
+void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
+#define vp8_short_fdct8x4 vp8_short_fdct8x4_c
+
+void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+#define vp8_short_idct4x4llm vp8_short_idct4x4llm_neon
+
+void vp8_short_inv_walsh4x4_c(short *input, short *output);
+void vp8_short_inv_walsh4x4_neon(short *input, short *output);
+#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_neon
+
+void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
+#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
+
+void vp8_short_walsh4x4_c(short *input, short *output, int pitch);
+#define vp8_short_walsh4x4 vp8_short_walsh4x4_c
+
+void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_neon
+
+void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_neon
+
+void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_neon
+
+void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_neon
+
+unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
+
+unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
+
+unsigned int vp8_sub_pixel_variance16x8_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance16x8 vp8_sub_pixel_variance16x8_c
+
+unsigned int vp8_sub_pixel_variance4x4_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance4x4 vp8_sub_pixel_variance4x4_c
+
+unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance8x16 vp8_sub_pixel_variance8x16_c
+
+unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_c
+
+void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch);
+#define vp8_subtract_b vp8_subtract_b_c
+
+void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride);
+#define vp8_subtract_mbuv vp8_subtract_mbuv_c
+
+void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
+#define vp8_subtract_mby vp8_subtract_mby_c
+
+unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance16x16 vp8_variance16x16_neon
+
+unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance16x8 vp8_variance16x8_neon
+
+unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance4x4 vp8_variance4x4_c
+
+unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance8x16 vp8_variance8x16_neon
+
+unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance8x8 vp8_variance8x8_neon
+
+unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
+
+unsigned int vp8_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
+
+unsigned int vp8_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse);
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
+
+void vp8_yv12_copy_partial_frame_c(struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
+#define vp8_yv12_copy_partial_frame vp8_yv12_copy_partial_frame_c
+
+void vp8_rtcd(void);
+
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    (void)flags;
+
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/source/config/linux/arm64/vp9_rtcd.h b/source/config/linux/arm64/vp9_rtcd.h
new file mode 100644
index 0000000..79faee9
--- /dev/null
+++ b/source/config/linux/arm64/vp9_rtcd.h
@@ -0,0 +1,671 @@
+#ifndef VP9_RTCD_H_
+#define VP9_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * VP9
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_enums.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vp9_variance_vtable;
+struct search_site_config;
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+
+void vp9_blend_b_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride);
+#define vp9_blend_b vp9_blend_b_c
+
+void vp9_blend_mb_inner_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride);
+#define vp9_blend_mb_inner vp9_blend_mb_inner_c
+
+void vp9_blend_mb_outer_c(uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride);
+#define vp9_blend_mb_outer vp9_blend_mb_outer_c
+
+int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz);
+#define vp9_block_error vp9_block_error_c
+
+void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve8 vp9_convolve8_c
+
+void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve8_avg vp9_convolve8_avg_c
+
+void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve8_avg_horiz vp9_convolve8_avg_horiz_c
+
+void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve8_avg_vert vp9_convolve8_avg_vert_c
+
+void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve8_horiz vp9_convolve8_horiz_c
+
+void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve8_vert vp9_convolve8_vert_c
+
+void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve_avg vp9_convolve_avg_c
+
+void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vp9_convolve_copy vp9_convolve_copy_c
+
+void vp9_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d117_predictor_16x16 vp9_d117_predictor_16x16_c
+
+void vp9_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d117_predictor_32x32 vp9_d117_predictor_32x32_c
+
+void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d117_predictor_4x4 vp9_d117_predictor_4x4_c
+
+void vp9_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d117_predictor_8x8 vp9_d117_predictor_8x8_c
+
+void vp9_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_16x16 vp9_d135_predictor_16x16_c
+
+void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
+
+void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+
+void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
+
+void vp9_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d153_predictor_16x16 vp9_d153_predictor_16x16_c
+
+void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+
+void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d153_predictor_4x4 vp9_d153_predictor_4x4_c
+
+void vp9_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d153_predictor_8x8 vp9_d153_predictor_8x8_c
+
+void vp9_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d207_predictor_16x16 vp9_d207_predictor_16x16_c
+
+void vp9_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d207_predictor_32x32 vp9_d207_predictor_32x32_c
+
+void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d207_predictor_4x4 vp9_d207_predictor_4x4_c
+
+void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
+
+void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+
+void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
+
+void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+
+void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+
+void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
+
+void vp9_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d63_predictor_32x32 vp9_d63_predictor_32x32_c
+
+void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d63_predictor_4x4 vp9_d63_predictor_4x4_c
+
+void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
+
+void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+
+void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+
+void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+
+void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+
+void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+
+void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+
+void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+
+void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+
+void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+
+void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+
+void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+
+void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+
+void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+
+void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+
+void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+
+void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+#define vp9_diamond_search_sad vp9_diamond_search_sad_c
+
+void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
+#define vp9_fdct16x16 vp9_fdct16x16_c
+
+void vp9_fdct32x32_c(const int16_t *input, int16_t *output, int stride);
+#define vp9_fdct32x32 vp9_fdct32x32_c
+
+void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *output, int stride);
+#define vp9_fdct32x32_rd vp9_fdct32x32_rd_c
+
+void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride);
+#define vp9_fdct4x4 vp9_fdct4x4_c
+
+void vp9_fdct8x8_c(const int16_t *input, int16_t *output, int stride);
+#define vp9_fdct8x8 vp9_fdct8x8_c
+
+void vp9_fht16x16_c(const int16_t *input, int16_t *output, int stride, int tx_type);
+#define vp9_fht16x16 vp9_fht16x16_c
+
+void vp9_fht4x4_c(const int16_t *input, int16_t *output, int stride, int tx_type);
+#define vp9_fht4x4 vp9_fht4x4_c
+
+void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
+#define vp9_fht8x8 vp9_fht8x8_c
+
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+#define vp9_full_range_search vp9_full_range_search_c
+
+int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
+#define vp9_full_search_sad vp9_full_search_sad_c
+
+void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
+#define vp9_fwht4x4 vp9_fwht4x4_c
+
+unsigned int vp9_get_mb_ss_c(const int16_t *);
+#define vp9_get_mb_ss vp9_get_mb_ss_c
+
+void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
+
+void vp9_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_h_predictor_32x32 vp9_h_predictor_32x32_c
+
+void vp9_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_h_predictor_4x4 vp9_h_predictor_4x4_c
+
+void vp9_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_h_predictor_8x8 vp9_h_predictor_8x8_c
+
+void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct16x16_10_add vp9_idct16x16_10_add_c
+
+void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct16x16_1_add vp9_idct16x16_1_add_c
+
+void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct16x16_256_add vp9_idct16x16_256_add_c
+
+void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct32x32_1024_add vp9_idct32x32_1024_add_c
+
+void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct32x32_1_add vp9_idct32x32_1_add_c
+
+void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct32x32_34_add vp9_idct32x32_34_add_c
+
+void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct4x4_16_add vp9_idct4x4_16_add_c
+
+void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct4x4_1_add vp9_idct4x4_1_add_c
+
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c
+
+void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_1_add vp9_idct8x8_1_add_c
+
+void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_64_add vp9_idct8x8_64_add_c
+
+void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *output, int pitch, int tx_type);
+#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
+
+void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
+#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c
+
+void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int dest_stride, int tx_type);
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
+
+void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_iwht4x4_16_add vp9_iwht4x4_16_add_c
+
+void vp9_iwht4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_iwht4x4_1_add vp9_iwht4x4_1_add_c
+
+void vp9_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+#define vp9_lpf_horizontal_16 vp9_lpf_horizontal_16_c
+
+void vp9_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+#define vp9_lpf_horizontal_4 vp9_lpf_horizontal_4_c
+
+void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vp9_lpf_horizontal_4_dual vp9_lpf_horizontal_4_dual_c
+
+void vp9_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+#define vp9_lpf_horizontal_8 vp9_lpf_horizontal_8_c
+
+void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vp9_lpf_horizontal_8_dual vp9_lpf_horizontal_8_dual_c
+
+void vp9_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vp9_lpf_vertical_16 vp9_lpf_vertical_16_c
+
+void vp9_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vp9_lpf_vertical_16_dual vp9_lpf_vertical_16_dual_c
+
+void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+#define vp9_lpf_vertical_4 vp9_lpf_vertical_4_c
+
+void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vp9_lpf_vertical_4_dual vp9_lpf_vertical_4_dual_c
+
+void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count);
+#define vp9_lpf_vertical_8 vp9_lpf_vertical_8_c
+
+void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vp9_lpf_vertical_8_dual vp9_lpf_vertical_8_dual_c
+
+unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vp9_mse16x16 vp9_mse16x16_c
+
+unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vp9_mse16x8 vp9_mse16x8_c
+
+unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vp9_mse8x16 vp9_mse8x16_c
+
+unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse);
+#define vp9_mse8x8 vp9_mse8x8_c
+
+void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_b vp9_quantize_b_c
+
+void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+
+int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+#define vp9_refining_search_sad vp9_refining_search_sad_c
+
+unsigned int vp9_sad16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad16x16 vp9_sad16x16_c
+
+unsigned int vp9_sad16x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad16x16_avg vp9_sad16x16_avg_c
+
+void vp9_sad16x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad16x16x3 vp9_sad16x16x3_c
+
+void vp9_sad16x16x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad16x16x4d vp9_sad16x16x4d_c
+
+void vp9_sad16x16x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad16x16x8 vp9_sad16x16x8_c
+
+unsigned int vp9_sad16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp9_sad16x32 vp9_sad16x32_c
+
+unsigned int vp9_sad16x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad16x32_avg vp9_sad16x32_avg_c
+
+void vp9_sad16x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad16x32x4d vp9_sad16x32x4d_c
+
+unsigned int vp9_sad16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad16x8 vp9_sad16x8_c
+
+unsigned int vp9_sad16x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad16x8_avg vp9_sad16x8_avg_c
+
+void vp9_sad16x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad16x8x3 vp9_sad16x8x3_c
+
+void vp9_sad16x8x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad16x8x4d vp9_sad16x8x4d_c
+
+void vp9_sad16x8x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad16x8x8 vp9_sad16x8x8_c
+
+unsigned int vp9_sad32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp9_sad32x16 vp9_sad32x16_c
+
+unsigned int vp9_sad32x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad32x16_avg vp9_sad32x16_avg_c
+
+void vp9_sad32x16x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad32x16x4d vp9_sad32x16x4d_c
+
+unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad32x32 vp9_sad32x32_c
+
+unsigned int vp9_sad32x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad32x32_avg vp9_sad32x32_avg_c
+
+void vp9_sad32x32x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad32x32x3 vp9_sad32x32x3_c
+
+void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad32x32x4d vp9_sad32x32x4d_c
+
+void vp9_sad32x32x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad32x32x8 vp9_sad32x32x8_c
+
+unsigned int vp9_sad32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp9_sad32x64 vp9_sad32x64_c
+
+unsigned int vp9_sad32x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad32x64_avg vp9_sad32x64_avg_c
+
+void vp9_sad32x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad32x64x4d vp9_sad32x64x4d_c
+
+unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad4x4 vp9_sad4x4_c
+
+unsigned int vp9_sad4x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad4x4_avg vp9_sad4x4_avg_c
+
+void vp9_sad4x4x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad4x4x3 vp9_sad4x4x3_c
+
+void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad4x4x4d vp9_sad4x4x4d_c
+
+void vp9_sad4x4x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad4x4x8 vp9_sad4x4x8_c
+
+unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp9_sad4x8 vp9_sad4x8_c
+
+unsigned int vp9_sad4x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad4x8_avg vp9_sad4x8_avg_c
+
+void vp9_sad4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array);
+#define vp9_sad4x8x4d vp9_sad4x8x4d_c
+
+void vp9_sad4x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+#define vp9_sad4x8x8 vp9_sad4x8x8_c
+
+unsigned int vp9_sad64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp9_sad64x32 vp9_sad64x32_c
+
+unsigned int vp9_sad64x32_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad64x32_avg vp9_sad64x32_avg_c
+
+void vp9_sad64x32x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad64x32x4d vp9_sad64x32x4d_c
+
+unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad64x64 vp9_sad64x64_c
+
+unsigned int vp9_sad64x64_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad64x64_avg vp9_sad64x64_avg_c
+
+void vp9_sad64x64x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad64x64x3 vp9_sad64x64x3_c
+
+void vp9_sad64x64x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad64x64x4d vp9_sad64x64x4d_c
+
+void vp9_sad64x64x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad64x64x8 vp9_sad64x64x8_c
+
+unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad8x16 vp9_sad8x16_c
+
+unsigned int vp9_sad8x16_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad8x16_avg vp9_sad8x16_avg_c
+
+void vp9_sad8x16x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad8x16x3 vp9_sad8x16x3_c
+
+void vp9_sad8x16x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad8x16x4d vp9_sad8x16x4d_c
+
+void vp9_sad8x16x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad8x16x8 vp9_sad8x16x8_c
+
+unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad);
+#define vp9_sad8x4 vp9_sad8x4_c
+
+unsigned int vp9_sad8x4_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad8x4_avg vp9_sad8x4_avg_c
+
+void vp9_sad8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array);
+#define vp9_sad8x4x4d vp9_sad8x4x4d_c
+
+void vp9_sad8x4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+#define vp9_sad8x4x8 vp9_sad8x4x8_c
+
+unsigned int vp9_sad8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int max_sad);
+#define vp9_sad8x8 vp9_sad8x8_c
+
+unsigned int vp9_sad8x8_avg_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad);
+#define vp9_sad8x8_avg vp9_sad8x8_avg_c
+
+void vp9_sad8x8x3_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array);
+#define vp9_sad8x8x3 vp9_sad8x8x3_c
+
+void vp9_sad8x8x4d_c(const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array);
+#define vp9_sad8x8x4d vp9_sad8x8x4d_c
+
+void vp9_sad8x8x8_c(const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array);
+#define vp9_sad8x8x8 vp9_sad8x8x8_c
+
+unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance16x16 vp9_sub_pixel_avg_variance16x16_c
+
+unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance16x32 vp9_sub_pixel_avg_variance16x32_c
+
+unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance16x8 vp9_sub_pixel_avg_variance16x8_c
+
+unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance32x16 vp9_sub_pixel_avg_variance32x16_c
+
+unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance32x32 vp9_sub_pixel_avg_variance32x32_c
+
+unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance32x64 vp9_sub_pixel_avg_variance32x64_c
+
+unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance4x4 vp9_sub_pixel_avg_variance4x4_c
+
+unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance4x8 vp9_sub_pixel_avg_variance4x8_c
+
+unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance64x32 vp9_sub_pixel_avg_variance64x32_c
+
+unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance64x64 vp9_sub_pixel_avg_variance64x64_c
+
+unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance8x16 vp9_sub_pixel_avg_variance8x16_c
+
+unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance8x4 vp9_sub_pixel_avg_variance8x4_c
+
+unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
+#define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
+
+unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
+
+unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance16x32 vp9_sub_pixel_variance16x32_c
+
+unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance16x8 vp9_sub_pixel_variance16x8_c
+
+unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance32x16 vp9_sub_pixel_variance32x16_c
+
+unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance32x32 vp9_sub_pixel_variance32x32_c
+
+unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance32x64 vp9_sub_pixel_variance32x64_c
+
+unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance4x4 vp9_sub_pixel_variance4x4_c
+
+unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance4x8 vp9_sub_pixel_variance4x8_c
+
+unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance64x32 vp9_sub_pixel_variance64x32_c
+
+unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance64x64 vp9_sub_pixel_variance64x64_c
+
+unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance8x16 vp9_sub_pixel_variance8x16_c
+
+unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance8x4 vp9_sub_pixel_variance8x4_c
+
+unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_sub_pixel_variance8x8 vp9_sub_pixel_variance8x8_c
+
+void vp9_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride);
+#define vp9_subtract_block vp9_subtract_block_c
+
+void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count);
+#define vp9_temporal_filter_apply vp9_temporal_filter_apply_c
+
+void vp9_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_tm_predictor_16x16 vp9_tm_predictor_16x16_c
+
+void vp9_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_tm_predictor_32x32 vp9_tm_predictor_32x32_c
+
+void vp9_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_tm_predictor_4x4 vp9_tm_predictor_4x4_c
+
+void vp9_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_tm_predictor_8x8 vp9_tm_predictor_8x8_c
+
+void vp9_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_v_predictor_16x16 vp9_v_predictor_16x16_c
+
+void vp9_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_v_predictor_32x32 vp9_v_predictor_32x32_c
+
+void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_v_predictor_4x4 vp9_v_predictor_4x4_c
+
+void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
+
+unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance16x16 vp9_variance16x16_c
+
+unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance16x32 vp9_variance16x32_c
+
+unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance16x8 vp9_variance16x8_c
+
+unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance32x16 vp9_variance32x16_c
+
+unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance32x32 vp9_variance32x32_c
+
+unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance32x64 vp9_variance32x64_c
+
+unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance4x4 vp9_variance4x4_c
+
+unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance4x8 vp9_variance4x8_c
+
+unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance64x32 vp9_variance64x32_c
+
+unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance64x64 vp9_variance64x64_c
+
+unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance8x16 vp9_variance8x16_c
+
+unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance8x4 vp9_variance8x4_c
+
+unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vp9_variance8x8 vp9_variance8x8_c
+
+void vp9_rtcd(void);
+
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    (void)flags;
+
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/source/config/linux/arm64/vpx_config.asm b/source/config/linux/arm64/vpx_config.asm
new file mode 100644
index 0000000..d4ae581
--- /dev/null
+++ b/source/config/linux/arm64/vpx_config.asm
@@ -0,0 +1,84 @@
+@ This file was created from a .asm file
+@  using the ads2gas.pl script.
+	.equ DO1STROUNDING, 0
+.equ ARCH_ARM ,  1
+.equ ARCH_MIPS ,  0
+.equ ARCH_X86 ,  0
+.equ ARCH_X86_64 ,  0
+.equ ARCH_PPC32 ,  0
+.equ ARCH_PPC64 ,  0
+.equ HAVE_EDSP ,  0
+.equ HAVE_MEDIA ,  0
+.equ HAVE_NEON ,  1
+.equ HAVE_NEON_ASM ,  0
+.equ HAVE_MIPS32 ,  0
+.equ HAVE_DSPR2 ,  0
+.equ HAVE_MMX ,  0
+.equ HAVE_SSE ,  0
+.equ HAVE_SSE2 ,  0
+.equ HAVE_SSE3 ,  0
+.equ HAVE_SSSE3 ,  0
+.equ HAVE_SSE4_1 ,  0
+.equ HAVE_AVX ,  0
+.equ HAVE_AVX2 ,  0
+.equ HAVE_ALTIVEC ,  0
+.equ HAVE_VPX_PORTS ,  1
+.equ HAVE_STDINT_H ,  1
+.equ HAVE_ALT_TREE_LAYOUT ,  0
+.equ HAVE_PTHREAD_H ,  1
+.equ HAVE_SYS_MMAN_H ,  1
+.equ HAVE_UNISTD_H ,  1
+.equ CONFIG_EXTERNAL_BUILD ,  1
+.equ CONFIG_INSTALL_DOCS ,  0
+.equ CONFIG_INSTALL_BINS ,  1
+.equ CONFIG_INSTALL_LIBS ,  1
+.equ CONFIG_INSTALL_SRCS ,  0
+.equ CONFIG_USE_X86INC ,  1
+.equ CONFIG_DEBUG ,  0
+.equ CONFIG_GPROF ,  0
+.equ CONFIG_GCOV ,  0
+.equ CONFIG_RVCT ,  0
+.equ CONFIG_GCC ,  1
+.equ CONFIG_MSVS ,  0
+.equ CONFIG_PIC ,  1
+.equ CONFIG_BIG_ENDIAN ,  0
+.equ CONFIG_CODEC_SRCS ,  0
+.equ CONFIG_DEBUG_LIBS ,  0
+.equ CONFIG_FAST_UNALIGNED ,  1
+.equ CONFIG_MEM_MANAGER ,  0
+.equ CONFIG_MEM_TRACKER ,  0
+.equ CONFIG_MEM_CHECKS ,  0
+.equ CONFIG_DEQUANT_TOKENS ,  0
+.equ CONFIG_DC_RECON ,  0
+.equ CONFIG_RUNTIME_CPU_DETECT ,  0
+.equ CONFIG_POSTPROC ,  1
+.equ CONFIG_VP9_POSTPROC ,  0
+.equ CONFIG_MULTITHREAD ,  1
+.equ CONFIG_INTERNAL_STATS ,  0
+.equ CONFIG_VP8_ENCODER ,  1
+.equ CONFIG_VP8_DECODER ,  1
+.equ CONFIG_VP9_ENCODER ,  1
+.equ CONFIG_VP9_DECODER ,  1
+.equ CONFIG_VP8 ,  1
+.equ CONFIG_VP9 ,  1
+.equ CONFIG_ENCODERS ,  1
+.equ CONFIG_DECODERS ,  1
+.equ CONFIG_STATIC_MSVCRT ,  0
+.equ CONFIG_SPATIAL_RESAMPLING ,  1
+.equ CONFIG_REALTIME_ONLY ,  1
+.equ CONFIG_ONTHEFLY_BITPACKING ,  0
+.equ CONFIG_ERROR_CONCEALMENT ,  0
+.equ CONFIG_SHARED ,  0
+.equ CONFIG_STATIC ,  1
+.equ CONFIG_SMALL ,  0
+.equ CONFIG_POSTPROC_VISUALIZER ,  0
+.equ CONFIG_OS_SUPPORT ,  1
+.equ CONFIG_UNIT_TESTS ,  0
+.equ CONFIG_WEBM_IO ,  1
+.equ CONFIG_DECODE_PERF_TESTS ,  0
+.equ CONFIG_MULTI_RES_ENCODING ,  1
+.equ CONFIG_TEMPORAL_DENOISING ,  1
+.equ CONFIG_EXPERIMENTAL ,  0
+.equ CONFIG_MULTIPLE_ARF ,  0
+.equ CONFIG_ALPHA ,  0
+	.section	.note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm64/vpx_config.c b/source/config/linux/arm64/vpx_config.c
new file mode 100644
index 0000000..9ef6c2a
--- /dev/null
+++ b/source/config/linux/arm64/vpx_config.c
@@ -0,0 +1,9 @@
+/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
+/*  */
+/* Use of this source code is governed by a BSD-style license */
+/* that can be found in the LICENSE file in the root of the source */
+/* tree. An additional intellectual property rights grant can be found */
+/* in the file PATENTS.  All contributing project authors may */
+/* be found in the AUTHORS file in the root of the source tree. */
+static const char* const cfg = "--force-target=armv8-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --disable-avx2";
+const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/source/config/linux/arm64/vpx_config.h b/source/config/linux/arm64/vpx_config.h
new file mode 100644
index 0000000..8cb9cb0
--- /dev/null
+++ b/source/config/linux/arm64/vpx_config.h
@@ -0,0 +1,93 @@
+/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
+/*  */
+/* Use of this source code is governed by a BSD-style license */
+/* that can be found in the LICENSE file in the root of the source */
+/* tree. An additional intellectual property rights grant can be found */
+/* in the file PATENTS.  All contributing project authors may */
+/* be found in the AUTHORS file in the root of the source tree. */
+/* This file automatically generated by configure. Do not edit! */
+#ifndef VPX_CONFIG_H
+#define VPX_CONFIG_H
+#define RESTRICT    
+#define INLINE      __inline__ __attribute__((always_inline))
+#define ARCH_ARM 1
+#define ARCH_MIPS 0
+#define ARCH_X86 0
+#define ARCH_X86_64 0
+#define ARCH_PPC32 0
+#define ARCH_PPC64 0
+#define HAVE_EDSP 0
+#define HAVE_MEDIA 0
+#define HAVE_NEON 1
+#define HAVE_NEON_ASM 0
+#define HAVE_MIPS32 0
+#define HAVE_DSPR2 0
+#define HAVE_MMX 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define HAVE_SSE3 0
+#define HAVE_SSSE3 0
+#define HAVE_SSE4_1 0
+#define HAVE_AVX 0
+#define HAVE_AVX2 0
+#define HAVE_ALTIVEC 0
+#define HAVE_VPX_PORTS 1
+#define HAVE_STDINT_H 1
+#define HAVE_ALT_TREE_LAYOUT 0
+#define HAVE_PTHREAD_H 1
+#define HAVE_SYS_MMAN_H 1
+#define HAVE_UNISTD_H 1
+#define CONFIG_EXTERNAL_BUILD 1
+#define CONFIG_INSTALL_DOCS 0
+#define CONFIG_INSTALL_BINS 1
+#define CONFIG_INSTALL_LIBS 1
+#define CONFIG_INSTALL_SRCS 0
+#define CONFIG_USE_X86INC 1
+#define CONFIG_DEBUG 0
+#define CONFIG_GPROF 0
+#define CONFIG_GCOV 0
+#define CONFIG_RVCT 0
+#define CONFIG_GCC 1
+#define CONFIG_MSVS 0
+#define CONFIG_PIC 1
+#define CONFIG_BIG_ENDIAN 0
+#define CONFIG_CODEC_SRCS 0
+#define CONFIG_DEBUG_LIBS 0
+#define CONFIG_FAST_UNALIGNED 1
+#define CONFIG_MEM_MANAGER 0
+#define CONFIG_MEM_TRACKER 0
+#define CONFIG_MEM_CHECKS 0
+#define CONFIG_DEQUANT_TOKENS 0
+#define CONFIG_DC_RECON 0
+#define CONFIG_RUNTIME_CPU_DETECT 0
+#define CONFIG_POSTPROC 1
+#define CONFIG_VP9_POSTPROC 0
+#define CONFIG_MULTITHREAD 1
+#define CONFIG_INTERNAL_STATS 0
+#define CONFIG_VP8_ENCODER 1
+#define CONFIG_VP8_DECODER 1
+#define CONFIG_VP9_ENCODER 1
+#define CONFIG_VP9_DECODER 1
+#define CONFIG_VP8 1
+#define CONFIG_VP9 1
+#define CONFIG_ENCODERS 1
+#define CONFIG_DECODERS 1
+#define CONFIG_STATIC_MSVCRT 0
+#define CONFIG_SPATIAL_RESAMPLING 1
+#define CONFIG_REALTIME_ONLY 1
+#define CONFIG_ONTHEFLY_BITPACKING 0
+#define CONFIG_ERROR_CONCEALMENT 0
+#define CONFIG_SHARED 0
+#define CONFIG_STATIC 1
+#define CONFIG_SMALL 0
+#define CONFIG_POSTPROC_VISUALIZER 0
+#define CONFIG_OS_SUPPORT 1
+#define CONFIG_UNIT_TESTS 0
+#define CONFIG_WEBM_IO 1
+#define CONFIG_DECODE_PERF_TESTS 0
+#define CONFIG_MULTI_RES_ENCODING 1
+#define CONFIG_TEMPORAL_DENOISING 1
+#define CONFIG_EXPERIMENTAL 0
+#define CONFIG_MULTIPLE_ARF 0
+#define CONFIG_ALPHA 0
+#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm64/vpx_scale_rtcd.h b/source/config/linux/arm64/vpx_scale_rtcd.h
new file mode 100644
index 0000000..0a6d790
--- /dev/null
+++ b/source/config/linux/arm64/vpx_scale_rtcd.h
@@ -0,0 +1,71 @@
+#ifndef VPX_SCALE_RTCD_H_
+#define VPX_SCALE_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct yv12_buffer_config;
+
+void vp8_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
+#define vp8_horizontal_line_2_1_scale vp8_horizontal_line_2_1_scale_c
+
+void vp8_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
+#define vp8_horizontal_line_5_3_scale vp8_horizontal_line_5_3_scale_c
+
+void vp8_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width);
+#define vp8_horizontal_line_5_4_scale vp8_horizontal_line_5_4_scale_c
+
+void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
+#define vp8_vertical_band_2_1_scale vp8_vertical_band_2_1_scale_c
+
+void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
+#define vp8_vertical_band_2_1_scale_i vp8_vertical_band_2_1_scale_i_c
+
+void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
+#define vp8_vertical_band_5_3_scale vp8_vertical_band_5_3_scale_c
+
+void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width);
+#define vp8_vertical_band_5_4_scale vp8_vertical_band_5_4_scale_c
+
+void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
+#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c
+
+void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf);
+#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c
+
+void vp9_extend_frame_borders_c(struct yv12_buffer_config *ybf);
+#define vp9_extend_frame_borders vp9_extend_frame_borders_c
+
+void vp9_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf);
+#define vp9_extend_frame_inner_borders vp9_extend_frame_inner_borders_c
+
+void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
+#define vpx_yv12_copy_y vpx_yv12_copy_y_c
+
+void vpx_scale_rtcd(void);
+
+#include "vpx_config.h"
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    (void)flags;
+
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/source/config/linux/generic/vp8_rtcd.h b/source/config/linux/generic/vp8_rtcd.h
index d6de728..ef9fa5f 100644
--- a/source/config/linux/generic/vp8_rtcd.h
+++ b/source/config/linux/generic/vp8_rtcd.h
@@ -71,7 +71,7 @@ void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, in
 void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_c
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/generic/vp9_rtcd.h b/source/config/linux/generic/vp9_rtcd.h
index c42a60b..fcb14ab 100644
--- a/source/config/linux/generic/vp9_rtcd.h
+++ b/source/config/linux/generic/vp9_rtcd.h
@@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c
diff --git a/source/config/linux/generic/vpx_config.asm b/source/config/linux/generic/vpx_config.asm
index 4e01f9c..b94e4f0 100644
--- a/source/config/linux/generic/vpx_config.asm
+++ b/source/config/linux/generic/vpx_config.asm
@@ -10,6 +10,7 @@
 .equ HAVE_EDSP ,  0
 .equ HAVE_MEDIA ,  0
 .equ HAVE_NEON ,  0
+.equ HAVE_NEON_ASM ,  0
 .equ HAVE_MIPS32 ,  0
 .equ HAVE_DSPR2 ,  0
 .equ HAVE_MMX ,  0
diff --git a/source/config/linux/generic/vpx_config.h b/source/config/linux/generic/vpx_config.h
index a0ffc4e..122c0e7 100644
--- a/source/config/linux/generic/vpx_config.h
+++ b/source/config/linux/generic/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 0
diff --git a/source/config/linux/ia32/vp8_rtcd.h b/source/config/linux/ia32/vp8_rtcd.h
index 7e90462..fc0f7a2 100644
--- a/source/config/linux/ia32/vp8_rtcd.h
+++ b/source/config/linux/ia32/vp8_rtcd.h
@@ -96,9 +96,9 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/ia32/vp9_rtcd.h b/source/config/linux/ia32/vp9_rtcd.h
index 40965d0..9482f75 100644
--- a/source/config/linux/ia32/vp9_rtcd.h
+++ b/source/config/linux/ia32/vp9_rtcd.h
@@ -263,7 +263,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
-#define vp9_fwht4x4 vp9_fwht4x4_c
+void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride);
+RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, int16_t *output, int stride);
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_mmx(const int16_t *);
@@ -318,9 +319,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
-RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -951,6 +952,8 @@ static void setup_rtcd_internal(void)
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
+    vp9_fwht4x4 = vp9_fwht4x4_c;
+    if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
@@ -978,8 +981,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2;
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2;
-    vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
-    if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_c;
+    if (flags & HAS_SSE2) vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
diff --git a/source/config/linux/ia32/vpx_config.asm b/source/config/linux/ia32/vpx_config.asm
index c7b5491..f783cc5 100644
--- a/source/config/linux/ia32/vpx_config.asm
+++ b/source/config/linux/ia32/vpx_config.asm
@@ -7,6 +7,7 @@ ARCH_PPC64 equ 0
 HAVE_EDSP equ 0
 HAVE_MEDIA equ 0
 HAVE_NEON equ 0
+HAVE_NEON_ASM equ 0
 HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MMX equ 1
diff --git a/source/config/linux/ia32/vpx_config.h b/source/config/linux/ia32/vpx_config.h
index 3ebfb59..0cbf5de 100644
--- a/source/config/linux/ia32/vpx_config.h
+++ b/source/config/linux/ia32/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 1
diff --git a/source/config/linux/mipsel/vp8_rtcd.h b/source/config/linux/mipsel/vp8_rtcd.h
index 72a7d9e..bfb056b 100644
--- a/source/config/linux/mipsel/vp8_rtcd.h
+++ b/source/config/linux/mipsel/vp8_rtcd.h
@@ -71,7 +71,7 @@ void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, in
 void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_c
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/mipsel/vp9_rtcd.h b/source/config/linux/mipsel/vp9_rtcd.h
index c42a60b..fcb14ab 100644
--- a/source/config/linux/mipsel/vp9_rtcd.h
+++ b/source/config/linux/mipsel/vp9_rtcd.h
@@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c
diff --git a/source/config/linux/mipsel/vpx_config.h b/source/config/linux/mipsel/vpx_config.h
index 7b7fd99..a39eb99 100644
--- a/source/config/linux/mipsel/vpx_config.h
+++ b/source/config/linux/mipsel/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 1
 #define HAVE_DSPR2 0
 #define HAVE_MMX 0
diff --git a/source/config/linux/x64/vp8_rtcd.h b/source/config/linux/x64/vp8_rtcd.h
index 9653130..d2ebae0 100644
--- a/source/config/linux/x64/vp8_rtcd.h
+++ b/source/config/linux/x64/vp8_rtcd.h
@@ -96,8 +96,8 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_mmx
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_sse2
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/x64/vp9_rtcd.h b/source/config/linux/x64/vp9_rtcd.h
index b7056fa..beb342b 100644
--- a/source/config/linux/x64/vp9_rtcd.h
+++ b/source/config/linux/x64/vp9_rtcd.h
@@ -264,7 +264,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
-#define vp9_fwht4x4 vp9_fwht4x4_c
+void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride);
+#define vp9_fwht4x4 vp9_fwht4x4_mmx
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_mmx(const int16_t *);
@@ -319,9 +320,10 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_sse2
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_sse2
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_ssse3(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -930,6 +932,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
+    if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
     vp9_quantize_b = vp9_quantize_b_c;
diff --git a/source/config/linux/x64/vpx_config.asm b/source/config/linux/x64/vpx_config.asm
index 37939c6..9ebc29f 100644
--- a/source/config/linux/x64/vpx_config.asm
+++ b/source/config/linux/x64/vpx_config.asm
@@ -7,6 +7,7 @@ ARCH_PPC64 equ 0
 HAVE_EDSP equ 0
 HAVE_MEDIA equ 0
 HAVE_NEON equ 0
+HAVE_NEON_ASM equ 0
 HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MMX equ 1
diff --git a/source/config/linux/x64/vpx_config.h b/source/config/linux/x64/vpx_config.h
index be57865..169de86 100644
--- a/source/config/linux/x64/vpx_config.h
+++ b/source/config/linux/x64/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 1
diff --git a/source/config/mac/ia32/vp8_rtcd.h b/source/config/mac/ia32/vp8_rtcd.h
index 7e90462..fc0f7a2 100644
--- a/source/config/mac/ia32/vp8_rtcd.h
+++ b/source/config/mac/ia32/vp8_rtcd.h
@@ -96,9 +96,9 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/mac/ia32/vp9_rtcd.h b/source/config/mac/ia32/vp9_rtcd.h
index 45544eb..28ae79b 100644
--- a/source/config/mac/ia32/vp9_rtcd.h
+++ b/source/config/mac/ia32/vp9_rtcd.h
@@ -292,9 +292,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
-RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -800,8 +800,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2;
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2;
-    vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
-    if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_c;
+    if (flags & HAS_SSE2) vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
diff --git a/source/config/mac/ia32/vpx_config.asm b/source/config/mac/ia32/vpx_config.asm
index 3903e04..5884bb1 100644
--- a/source/config/mac/ia32/vpx_config.asm
+++ b/source/config/mac/ia32/vpx_config.asm
@@ -7,6 +7,7 @@ ARCH_PPC64 equ 0
 HAVE_EDSP equ 0
 HAVE_MEDIA equ 0
 HAVE_NEON equ 0
+HAVE_NEON_ASM equ 0
 HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MMX equ 1
diff --git a/source/config/mac/ia32/vpx_config.h b/source/config/mac/ia32/vpx_config.h
index d38d8ca..c180d92 100644
--- a/source/config/mac/ia32/vpx_config.h
+++ b/source/config/mac/ia32/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 1
diff --git a/source/config/mac/x64/vp8_rtcd.h b/source/config/mac/x64/vp8_rtcd.h
index 9653130..d2ebae0 100644
--- a/source/config/mac/x64/vp8_rtcd.h
+++ b/source/config/mac/x64/vp8_rtcd.h
@@ -96,8 +96,8 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_mmx
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_sse2
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/mac/x64/vp9_rtcd.h b/source/config/mac/x64/vp9_rtcd.h
index b7056fa..beb342b 100644
--- a/source/config/mac/x64/vp9_rtcd.h
+++ b/source/config/mac/x64/vp9_rtcd.h
@@ -264,7 +264,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
-#define vp9_fwht4x4 vp9_fwht4x4_c
+void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride);
+#define vp9_fwht4x4 vp9_fwht4x4_mmx
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_mmx(const int16_t *);
@@ -319,9 +320,10 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_sse2
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_sse2
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_ssse3(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -930,6 +932,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
+    if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
     vp9_quantize_b = vp9_quantize_b_c;
diff --git a/source/config/mac/x64/vpx_config.asm b/source/config/mac/x64/vpx_config.asm
index 37939c6..9ebc29f 100644
--- a/source/config/mac/x64/vpx_config.asm
+++ b/source/config/mac/x64/vpx_config.asm
@@ -7,6 +7,7 @@ ARCH_PPC64 equ 0
 HAVE_EDSP equ 0
 HAVE_MEDIA equ 0
 HAVE_NEON equ 0
+HAVE_NEON_ASM equ 0
 HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MMX equ 1
diff --git a/source/config/mac/x64/vpx_config.h b/source/config/mac/x64/vpx_config.h
index be57865..169de86 100644
--- a/source/config/mac/x64/vpx_config.h
+++ b/source/config/mac/x64/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 1
diff --git a/source/config/nacl/vp8_rtcd.h b/source/config/nacl/vp8_rtcd.h
index d6de728..ef9fa5f 100644
--- a/source/config/nacl/vp8_rtcd.h
+++ b/source/config/nacl/vp8_rtcd.h
@@ -71,7 +71,7 @@ void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, in
 void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_c
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/nacl/vp9_rtcd.h b/source/config/nacl/vp9_rtcd.h
index c42a60b..fcb14ab 100644
--- a/source/config/nacl/vp9_rtcd.h
+++ b/source/config/nacl/vp9_rtcd.h
@@ -259,8 +259,8 @@ void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_c
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_c
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+#define vp9_idct8x8_12_add vp9_idct8x8_12_add_c
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct8x8_1_add vp9_idct8x8_1_add_c
diff --git a/source/config/nacl/vpx_config.asm b/source/config/nacl/vpx_config.asm
index 4e01f9c..b94e4f0 100644
--- a/source/config/nacl/vpx_config.asm
+++ b/source/config/nacl/vpx_config.asm
@@ -10,6 +10,7 @@
 .equ HAVE_EDSP ,  0
 .equ HAVE_MEDIA ,  0
 .equ HAVE_NEON ,  0
+.equ HAVE_NEON_ASM ,  0
 .equ HAVE_MIPS32 ,  0
 .equ HAVE_DSPR2 ,  0
 .equ HAVE_MMX ,  0
diff --git a/source/config/nacl/vpx_config.h b/source/config/nacl/vpx_config.h
index a0ffc4e..122c0e7 100644
--- a/source/config/nacl/vpx_config.h
+++ b/source/config/nacl/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 0
diff --git a/source/config/win/ia32/vp8_rtcd.h b/source/config/win/ia32/vp8_rtcd.h
index 7e90462..fc0f7a2 100644
--- a/source/config/win/ia32/vp8_rtcd.h
+++ b/source/config/win/ia32/vp8_rtcd.h
@@ -96,9 +96,9 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-RTCD_EXTERN int (*vp8_denoiser_filter)(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/win/ia32/vp9_rtcd.h b/source/config/win/ia32/vp9_rtcd.h
index 40965d0..9482f75 100644
--- a/source/config/win/ia32/vp9_rtcd.h
+++ b/source/config/win/ia32/vp9_rtcd.h
@@ -263,7 +263,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
-#define vp9_fwht4x4 vp9_fwht4x4_c
+void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride);
+RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, int16_t *output, int stride);
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_mmx(const int16_t *);
@@ -318,9 +319,9 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vp9_idct4x4_1_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
-RTCD_EXTERN void (*vp9_idct8x8_10_add)(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -951,6 +952,8 @@ static void setup_rtcd_internal(void)
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
+    vp9_fwht4x4 = vp9_fwht4x4_c;
+    if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
     vp9_get_mb_ss = vp9_get_mb_ss_c;
     if (flags & HAS_MMX) vp9_get_mb_ss = vp9_get_mb_ss_mmx;
     if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
@@ -978,8 +981,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSE2) vp9_idct4x4_16_add = vp9_idct4x4_16_add_sse2;
     vp9_idct4x4_1_add = vp9_idct4x4_1_add_c;
     if (flags & HAS_SSE2) vp9_idct4x4_1_add = vp9_idct4x4_1_add_sse2;
-    vp9_idct8x8_10_add = vp9_idct8x8_10_add_c;
-    if (flags & HAS_SSE2) vp9_idct8x8_10_add = vp9_idct8x8_10_add_sse2;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_c;
+    if (flags & HAS_SSE2) vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
     vp9_idct8x8_1_add = vp9_idct8x8_1_add_c;
     if (flags & HAS_SSE2) vp9_idct8x8_1_add = vp9_idct8x8_1_add_sse2;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_c;
diff --git a/source/config/win/ia32/vpx_config.asm b/source/config/win/ia32/vpx_config.asm
index d5677dd..3b1a8de 100644
--- a/source/config/win/ia32/vpx_config.asm
+++ b/source/config/win/ia32/vpx_config.asm
@@ -7,6 +7,7 @@ ARCH_PPC64 equ 0
 HAVE_EDSP equ 0
 HAVE_MEDIA equ 0
 HAVE_NEON equ 0
+HAVE_NEON_ASM equ 0
 HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MMX equ 1
diff --git a/source/config/win/ia32/vpx_config.h b/source/config/win/ia32/vpx_config.h
index fb663d0..255ce65 100644
--- a/source/config/win/ia32/vpx_config.h
+++ b/source/config/win/ia32/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 1
diff --git a/source/config/win/x64/vp8_rtcd.h b/source/config/win/x64/vp8_rtcd.h
index 9653130..d2ebae0 100644
--- a/source/config/win/x64/vp8_rtcd.h
+++ b/source/config/win/x64/vp8_rtcd.h
@@ -96,8 +96,8 @@ void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, u
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_mmx
 
-int vp8_denoiser_filter_c(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
-int vp8_denoiser_filter_sse2(struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset);
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
 #define vp8_denoiser_filter vp8_denoiser_filter_sse2
 
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h
index b7056fa..beb342b 100644
--- a/source/config/win/x64/vp9_rtcd.h
+++ b/source/config/win/x64/vp9_rtcd.h
@@ -264,7 +264,8 @@ int vp9_full_search_sadx8(const struct macroblock *x, const struct mv *ref_mv, i
 RTCD_EXTERN int (*vp9_full_search_sad)(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
 
 void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride);
-#define vp9_fwht4x4 vp9_fwht4x4_c
+void vp9_fwht4x4_mmx(const int16_t *input, int16_t *output, int stride);
+#define vp9_fwht4x4 vp9_fwht4x4_mmx
 
 unsigned int vp9_get_mb_ss_c(const int16_t *);
 unsigned int vp9_get_mb_ss_mmx(const int16_t *);
@@ -319,9 +320,10 @@ void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
 #define vp9_idct4x4_1_add vp9_idct4x4_1_add_sse2
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
-#define vp9_idct8x8_10_add vp9_idct8x8_10_add_sse2
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
+void vp9_idct8x8_12_add_ssse3(const int16_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vp9_idct8x8_12_add)(const int16_t *input, uint8_t *dest, int dest_stride);
 
 void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride);
 void vp9_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int dest_stride);
@@ -930,6 +932,8 @@ static void setup_rtcd_internal(void)
     if (flags & HAS_SSSE3) vp9_h_predictor_4x4 = vp9_h_predictor_4x4_ssse3;
     vp9_h_predictor_8x8 = vp9_h_predictor_8x8_c;
     if (flags & HAS_SSSE3) vp9_h_predictor_8x8 = vp9_h_predictor_8x8_ssse3;
+    vp9_idct8x8_12_add = vp9_idct8x8_12_add_sse2;
+    if (flags & HAS_SSSE3) vp9_idct8x8_12_add = vp9_idct8x8_12_add_ssse3;
     vp9_idct8x8_64_add = vp9_idct8x8_64_add_sse2;
     if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
     vp9_quantize_b = vp9_quantize_b_c;
diff --git a/source/config/win/x64/vpx_config.asm b/source/config/win/x64/vpx_config.asm
index 6617125..b9288b7 100644
--- a/source/config/win/x64/vpx_config.asm
+++ b/source/config/win/x64/vpx_config.asm
@@ -7,6 +7,7 @@ ARCH_PPC64 equ 0
 HAVE_EDSP equ 0
 HAVE_MEDIA equ 0
 HAVE_NEON equ 0
+HAVE_NEON_ASM equ 0
 HAVE_MIPS32 equ 0
 HAVE_DSPR2 equ 0
 HAVE_MMX equ 1
diff --git a/source/config/win/x64/vpx_config.h b/source/config/win/x64/vpx_config.h
index 4de3b21..5181177 100644
--- a/source/config/win/x64/vpx_config.h
+++ b/source/config/win/x64/vpx_config.h
@@ -19,6 +19,7 @@
 #define HAVE_EDSP 0
 #define HAVE_MEDIA 0
 #define HAVE_NEON 0
+#define HAVE_NEON_ASM 0
 #define HAVE_MIPS32 0
 #define HAVE_DSPR2 0
 #define HAVE_MMX 1
diff --git a/source/libvpx/README b/source/libvpx/README
index ce9c1c6..6d7d5ec 100644
--- a/source/libvpx/README
+++ b/source/libvpx/README
@@ -12,22 +12,20 @@ COMPILING THE APPLICATIONS/LIBRARIES:
 
     * All x86 targets require the Yasm[1] assembler be installed.
     * All Windows builds require that Cygwin[2] be installed.
-    * Building the documentation requires PHP[3] and Doxygen[4]. If you do not
-      have these packages, you must pass --disable-install-docs to the
-      configure script.
-    * Downloading the data for the unit tests requires curl[5] and sha1sum.
+    * Building the documentation requires Doxygen[3]. If you do not
+      have this package, the install-docs option will be disabled.
+    * Downloading the data for the unit tests requires curl[4] and sha1sum.
       sha1sum is provided via the GNU coreutils, installed by default on
       many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
       available, a compatible version of sha1sum can be built from
-      source[6]. These requirements are optional if not running the unit
+      source[5]. These requirements are optional if not running the unit
       tests.
 
     [1]: http://www.tortall.net/projects/yasm
     [2]: http://www.cygwin.com
-    [3]: http://php.net
-    [4]: http://www.doxygen.org
-    [5]: http://curl.haxx.se
-    [6]: http://www.microbrew.org/tools/md5sha1sum/
+    [3]: http://www.doxygen.org
+    [4]: http://curl.haxx.se
+    [5]: http://www.microbrew.org/tools/md5sha1sum/
 
   2. Out-of-tree builds
   Out of tree builds are a supported method of building the application. For
diff --git a/source/libvpx/build/make/Android.mk b/source/libvpx/build/make/Android.mk
index 369c2a5..816334e 100644
--- a/source/libvpx/build/make/Android.mk
+++ b/source/libvpx/build/make/Android.mk
@@ -38,8 +38,9 @@
 # For this we import the 'cpufeatures' module from the NDK sources.
 # libvpx can also be configured without this runtime detection method.
 # Configuring with --disable-runtime-cpu-detect will assume presence of NEON.
-# Configuring with --disable-runtime-cpu-detect --disable-neon will remove any
-# NEON dependency.
+# Configuring with --disable-runtime-cpu-detect --disable-neon \
+#     --disable-neon-asm
+# will remove any NEON dependency.
 
 # To change to building armeabi, run ./libvpx/configure again, but with
 # --target=arm5te-android-gcc and modify the Application.mk file to
@@ -61,6 +62,9 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
 else ifeq  ($(TARGET_ARCH_ABI),armeabi)
   include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
   LOCAL_ARM_MODE := arm
+else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
+  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
+  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
   include $(CONFIG_DIR)libs-x86-android-gcc.mk
 else ifeq ($(TARGET_ARCH_ABI),mips)
@@ -126,7 +130,7 @@ endef
 ifeq ($(CONFIG_VP8_ENCODER), yes)
   ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
 endif
-ifeq ($(HAVE_NEON), yes)
+ifeq ($(HAVE_NEON_ASM), yes)
   ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
 endif
 
@@ -153,7 +157,11 @@ LOCAL_NEON_SRCS_C = $(filter %_neon.c, $(CODEC_SRCS_C))
 LOCAL_CODEC_SRCS_C = $(filter-out vpx_config.c %_neon.c, $(CODEC_SRCS_C))
 
 LOCAL_SRC_FILES += $(foreach file, $(LOCAL_CODEC_SRCS_C), libvpx/$(file))
-LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon)
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+  LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon)
+else # If there are neon sources then we are building for arm64 and do not need to specify .neon
+  LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file))
+endif
 
 # Pull out assembly files, splitting NEON from the rest.  This is
 # done to specify that the NEON assembly files use NEON assembler flags.
diff --git a/source/libvpx/build/make/configure.sh b/source/libvpx/build/make/configure.sh
index 4c3b05f..c07b049 100755
--- a/source/libvpx/build/make/configure.sh
+++ b/source/libvpx/build/make/configure.sh
@@ -518,7 +518,7 @@ process_common_cmdline() {
         --enable-?*|--disable-?*)
         eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
         if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
-            [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}${opt} "
+            [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} "
         elif [ $action = "disable" ] && ! disabled $option ; then
           echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
             die_unknown $opt
@@ -792,8 +792,12 @@ process_common_toolchain() {
     arm*)
         # on arm, isa versions are supersets
         case ${tgt_isa} in
+        armv8)
+            soft_enable neon
+            ;;
         armv7)
             soft_enable neon
+            soft_enable neon_asm
             soft_enable media
             soft_enable edsp
             soft_enable fast_unaligned
@@ -831,7 +835,7 @@ EOF
                 check_add_cflags  -march=armv7-a -mfloat-abi=${float_abi}
                 check_add_asflags -march=armv7-a -mfloat-abi=${float_abi}
 
-                if enabled neon
+                if enabled neon || enabled neon_asm
                 then
                     check_add_cflags -mfpu=neon #-ftree-vectorize
                     check_add_asflags -mfpu=neon
@@ -878,7 +882,7 @@ EOF
             tune_asflags="--cpu="
             if [ -z "${tune_cpu}" ]; then
                 if [ ${tgt_isa} = "armv7" ]; then
-                    if enabled neon
+                    if enabled neon || enabled neon_asm
                     then
                         check_add_cflags --fpu=softvfp+vfpv3
                         check_add_asflags --fpu=softvfp+vfpv3
diff --git a/source/libvpx/build/make/rtcd.pl b/source/libvpx/build/make/rtcd.pl
index 18ee80d..f5f59b1 100755
--- a/source/libvpx/build/make/rtcd.pl
+++ b/source/libvpx/build/make/rtcd.pl
@@ -272,6 +272,9 @@ sub arm() {
   # Assign the helper variable for each enabled extension
   foreach my $opt (@ALL_ARCHS) {
     my $opt_uc = uc $opt;
+    # Enable neon assembly based on HAVE_NEON logic instead of adding new
+    # HAVE_NEON_ASM logic
+    if ($opt eq 'neon_asm') { $opt_uc = 'NEON' }
     eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
   }
 
@@ -381,7 +384,10 @@ if ($opts{arch} eq 'x86') {
   @ALL_ARCHS = filter(qw/edsp media/);
   arm;
 } elsif ($opts{arch} eq 'armv7') {
-  @ALL_ARCHS = filter(qw/edsp media neon/);
+  @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
+  arm;
+} elsif ($opts{arch} eq 'armv8') {
+  @ALL_ARCHS = filter(qw/neon/);
   arm;
 } else {
   unoptimized;
diff --git a/source/libvpx/configure b/source/libvpx/configure
index 690ac48..bd95056 100755
--- a/source/libvpx/configure
+++ b/source/libvpx/configure
@@ -189,7 +189,7 @@ fi
 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
 # case.
-enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
+enabled doxygen && enable_feature install_docs
 enable_feature install_bins
 enable_feature install_libs
 
@@ -239,6 +239,7 @@ ARCH_EXT_LIST="
     edsp
     media
     neon
+    neon_asm
 
     mips32
     dspr2
diff --git a/source/libvpx/docs.mk b/source/libvpx/docs.mk
index 797b466..889d182 100644
--- a/source/libvpx/docs.mk
+++ b/source/libvpx/docs.mk
@@ -23,12 +23,6 @@ CODEC_DOX :=    mainpage.dox \
 # Other doxy files sourced in Markdown
 TXT_DOX = $(call enabled,TXT_DOX)
 
-%.dox: %.txt
-	@echo "    [DOXY] $@"
-	@$(SRC_PATH_BARE)/examples/gen_example_doxy.php \
-             $(@:.dox=)  "$($@.DESC)" > $@ < $<
-
-
 EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc
 EXAMPLE_PATH += $(SRC_PATH_BARE)/examples
 
diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk
index f6e7c00..28ab33a 100644
--- a/source/libvpx/examples.mk
+++ b/source/libvpx/examples.mk
@@ -67,32 +67,27 @@ ifeq ($(CONFIG_WEBM_IO),yes)
 endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
-EXAMPLES-$(CONFIG_VP9_ENCODER)    += vp9_spatial_scalable_encoder.c
-vp9_spatial_scalable_encoder.SRCS += args.c args.h
-vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h
-vp9_spatial_scalable_encoder.SRCS += tools_common.c tools_common.h
-vp9_spatial_scalable_encoder.SRCS += video_common.h
-vp9_spatial_scalable_encoder.SRCS += video_writer.h video_writer.c
-vp9_spatial_scalable_encoder.SRCS += vpxstats.c vpxstats.h
-vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
-vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
+EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
+vp9_spatial_svc_encoder.SRCS        += args.c args.h
+vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
+vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
+vp9_spatial_svc_encoder.SRCS        += video_common.h
+vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
+vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
+vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
+vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
 
 ifneq ($(CONFIG_SHARED),yes)
 EXAMPLES-$(CONFIG_VP9_ENCODER)    += resize_util.c
 endif
 
-# XMA example disabled for now, not used in VP8
-#UTILS-$(CONFIG_DECODERS)    += example_xma.c
-#example_xma.GUID             = A955FC4A-73F1-44F7-135E-30D84D32F022
-#example_xma.DESCRIPTION      = External Memory Allocation mode usage
-
-EXAMPLES-$(CONFIG_ENCODERS)         += vpx_temporal_scalable_patterns.c
-vpx_temporal_scalable_patterns.SRCS += ivfenc.c ivfenc.h
-vpx_temporal_scalable_patterns.SRCS += tools_common.c tools_common.h
-vpx_temporal_scalable_patterns.SRCS += video_common.h
-vpx_temporal_scalable_patterns.SRCS += video_writer.h video_writer.c
-vpx_temporal_scalable_patterns.GUID  = B18C08F2-A439-4502-A78E-849BE3D60947
-vpx_temporal_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
+EXAMPLES-$(CONFIG_ENCODERS)          += vpx_temporal_svc_encoder.c
+vpx_temporal_svc_encoder.SRCS        += ivfenc.c ivfenc.h
+vpx_temporal_svc_encoder.SRCS        += tools_common.c tools_common.h
+vpx_temporal_svc_encoder.SRCS        += video_common.h
+vpx_temporal_svc_encoder.SRCS        += video_writer.h video_writer.c
+vpx_temporal_svc_encoder.GUID        = B18C08F2-A439-4502-A78E-849BE3D60947
+vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
 EXAMPLES-$(CONFIG_VP8_DECODER)     += simple_decoder.c
 simple_decoder.GUID                 = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
 simple_decoder.SRCS                += ivfdec.h ivfdec.c
@@ -146,11 +141,6 @@ decode_with_drops.SRCS          += vpx_ports/mem_ops_aligned.h
 endif
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
-ifeq ($(CONFIG_VP8_DECODER),yes)
-EXAMPLES-$(CONFIG_ERROR_CONCEALMENT)    += decode_with_partial_drops.c
-endif
-decode_with_partial_drops.GUID           = 61C2D026-5754-46AC-916F-1343ECC5537E
-decode_with_partial_drops.DESCRIPTION    = Drops parts of frames while decoding
 EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
 set_maps.SRCS                      += ivfenc.h ivfenc.c
 set_maps.SRCS                      += tools_common.h tools_common.c
diff --git a/source/libvpx/examples/decode_with_partial_drops.c b/source/libvpx/examples/decode_with_partial_drops.c
deleted file mode 100644
index d7132de..0000000
--- a/source/libvpx/examples/decode_with_partial_drops.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-// Decode With Partial Drops Example
-// =========================
-//
-// This is an example utility which drops a series of frames (or parts of
-// frames), as specified on the command line. This is useful for observing the
-// error recovery features of the codec.
-//
-// Usage
-// -----
-// This example adds a single argument to the `simple_decoder` example,
-// which specifies the range or pattern of frames to drop. The parameter is
-// parsed as follows.
-//
-// Dropping A Range Of Frames
-// --------------------------
-// To drop a range of frames, specify the starting frame and the ending
-// frame to drop, separated by a dash. The following command will drop
-// frames 5 through 10 (base 1).
-//
-//  $ ./decode_with_partial_drops in.ivf out.i420 5-10
-//
-//
-// Dropping A Pattern Of Frames
-// ----------------------------
-// To drop a pattern of frames, specify the number of frames to drop and
-// the number of frames after which to repeat the pattern, separated by
-// a forward-slash. The following command will drop 3 of 7 frames.
-// Specifically, it will decode 4 frames, then drop 3 frames, and then
-// repeat.
-//
-//  $ ./decode_with_partial_drops in.ivf out.i420 3/7
-//
-// Dropping Random Parts Of Frames
-// -------------------------------
-// A third argument tuple is available to split the frame into 1500 bytes pieces
-// and randomly drop pieces rather than frames. The frame will be split at
-// partition boundaries where possible. The following example will seed the RNG
-// with the seed 123 and drop approximately 5% of the pieces. Pieces which
-// are depending on an already dropped piece will also be dropped.
-//
-//  $ ./decode_with_partial_drops in.ivf out.i420 5,123
-//
-// Extra Variables
-// ---------------
-// This example maintains the pattern passed on the command line in the
-// `n`, `m`, and `is_range` variables:
-//
-// Making The Drop Decision
-// ------------------------
-// The example decides whether to drop the frame based on the current
-// frame number, immediately before decoding the frame.
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "./vpx_config.h"
-#include "vpx/vp8dx.h"
-#include "vpx/vpx_decoder.h"
-#define interface (vpx_codec_vp8_dx())
-#include <time.h>
-
-
-#define IVF_FILE_HDR_SZ  (32)
-#define IVF_FRAME_HDR_SZ (12)
-
-static unsigned int mem_get_le32(const unsigned char *mem) {
-    return (mem[3] << 24)|(mem[2] << 16)|(mem[1] << 8)|(mem[0]);
-}
-
-static void die(const char *fmt, ...) {
-    va_list ap;
-
-    va_start(ap, fmt);
-    vprintf(fmt, ap);
-    if(fmt[strlen(fmt)-1] != '\n')
-        printf("\n");
-    exit(EXIT_FAILURE);
-}
-
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
-    const char *detail = vpx_codec_error_detail(ctx);
-
-    printf("%s: %s\n", s, vpx_codec_error(ctx));
-    if(detail)
-        printf("    %s\n",detail);
-    exit(EXIT_FAILURE);
-}
-
-struct parsed_header
-{
-    char key_frame;
-    int version;
-    char show_frame;
-    int first_part_size;
-};
-
-int next_packet(struct parsed_header* hdr, int pos, int length, int mtu)
-{
-    int size = 0;
-    int remaining = length - pos;
-    /* Uncompressed part is 3 bytes for P frames and 10 bytes for I frames */
-    int uncomp_part_size = (hdr->key_frame ? 10 : 3);
-    /* number of bytes yet to send from header and the first partition */
-    int remainFirst = uncomp_part_size + hdr->first_part_size - pos;
-    if (remainFirst > 0)
-    {
-        if (remainFirst <= mtu)
-        {
-            size = remainFirst;
-        }
-        else
-        {
-            size = mtu;
-        }
-
-        return size;
-    }
-
-    /* second partition; just slot it up according to MTU */
-    if (remaining <= mtu)
-    {
-        size = remaining;
-        return size;
-    }
-    return mtu;
-}
-
-void throw_packets(unsigned char* frame, int* size, int loss_rate,
-                   int* thrown, int* kept)
-{
-    unsigned char loss_frame[256*1024];
-    int pkg_size = 1;
-    int pos = 0;
-    int loss_pos = 0;
-    struct parsed_header hdr;
-    unsigned int tmp;
-    int mtu = 1500;
-
-    if (*size < 3)
-    {
-        return;
-    }
-    putc('|', stdout);
-    /* parse uncompressed 3 bytes */
-    tmp = (frame[2] << 16) | (frame[1] << 8) | frame[0];
-    hdr.key_frame = !(tmp & 0x1); /* inverse logic */
-    hdr.version = (tmp >> 1) & 0x7;
-    hdr.show_frame = (tmp >> 4) & 0x1;
-    hdr.first_part_size = (tmp >> 5) & 0x7FFFF;
-
-    /* don't drop key frames */
-    if (hdr.key_frame)
-    {
-        int i;
-        *kept = *size/mtu + ((*size % mtu > 0) ? 1 : 0); /* approximate */
-        for (i=0; i < *kept; i++)
-            putc('.', stdout);
-        return;
-    }
-
-    while ((pkg_size = next_packet(&hdr, pos, *size, mtu)) > 0)
-    {
-        int loss_event = ((rand() + 1.0)/(RAND_MAX + 1.0) < loss_rate/100.0);
-        if (*thrown == 0 && !loss_event)
-        {
-            memcpy(loss_frame + loss_pos, frame + pos, pkg_size);
-            loss_pos += pkg_size;
-            (*kept)++;
-            putc('.', stdout);
-        }
-        else
-        {
-            (*thrown)++;
-            putc('X', stdout);
-        }
-        pos += pkg_size;
-    }
-    memcpy(frame, loss_frame, loss_pos);
-    memset(frame + loss_pos, 0, *size - loss_pos);
-    *size = loss_pos;
-}
-
-int main(int argc, char **argv) {
-    FILE            *infile, *outfile;
-    vpx_codec_ctx_t  codec;
-    int              flags = 0, frame_cnt = 0;
-    unsigned char    file_hdr[IVF_FILE_HDR_SZ];
-    unsigned char    frame_hdr[IVF_FRAME_HDR_SZ];
-    unsigned char    frame[256*1024];
-    vpx_codec_err_t  res;
-    int              n, m, mode;
-    unsigned int     seed;
-    int              thrown=0, kept=0;
-    int              thrown_frame=0, kept_frame=0;
-    vpx_codec_dec_cfg_t  dec_cfg = {0};
-
-    (void)res;
-    /* Open files */
-    if(argc < 4 || argc > 6)
-        die("Usage: %s <infile> <outfile> [-t <num threads>] <N-M|N/M|L,S>\n",
-            argv[0]);
-    {
-        char *nptr;
-        int arg_num = 3;
-        if (argc == 6 && strncmp(argv[arg_num++], "-t", 2) == 0)
-            dec_cfg.threads = strtol(argv[arg_num++], NULL, 0);
-        n = strtol(argv[arg_num], &nptr, 0);
-        mode = (*nptr == '\0' || *nptr == ',') ? 2 : (*nptr == '-') ? 1 : 0;
-
-        m = strtol(nptr+1, NULL, 0);
-        if((!n && !m) || (*nptr != '-' && *nptr != '/' &&
-            *nptr != '\0' && *nptr != ','))
-            die("Couldn't parse pattern %s\n", argv[3]);
-    }
-    seed = (m > 0) ? m : (unsigned int)time(NULL);
-    srand(seed);thrown_frame = 0;
-    printf("Seed: %u\n", seed);
-    printf("Threads: %d\n", dec_cfg.threads);
-    if(!(infile = fopen(argv[1], "rb")))
-        die("Failed to open %s for reading", argv[1]);
-    if(!(outfile = fopen(argv[2], "wb")))
-        die("Failed to open %s for writing", argv[2]);
-
-    /* Read file header */
-    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
-         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
-         && file_hdr[3]=='F'))
-        die("%s is not an IVF file.", argv[1]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-    /* Initialize codec */
-    flags = VPX_CODEC_USE_ERROR_CONCEALMENT;
-    res = vpx_codec_dec_init(&codec, interface, &dec_cfg, flags);
-    if(res)
-        die_codec(&codec, "Failed to initialize decoder");
-
-
-    /* Read each frame */
-    while(fread(frame_hdr, 1, IVF_FRAME_HDR_SZ, infile) == IVF_FRAME_HDR_SZ) {
-        int               frame_sz = mem_get_le32(frame_hdr);
-        vpx_codec_iter_t  iter = NULL;
-        vpx_image_t      *img;
-
-
-        frame_cnt++;
-        if(frame_sz > sizeof(frame))
-            die("Frame %d data too big for example code buffer", frame_sz);
-        if(fread(frame, 1, frame_sz, infile) != frame_sz)
-            die("Frame %d failed to read complete frame", frame_cnt);
-
-        /* Decide whether to throw parts of the frame or the whole frame
-           depending on the drop mode */
-        thrown_frame = 0;
-        kept_frame = 0;
-        switch (mode)
-        {
-        case 0:
-            if (m - (frame_cnt-1)%m <= n)
-            {
-                frame_sz = 0;
-            }
-            break;
-        case 1:
-            if (frame_cnt >= n && frame_cnt <= m)
-            {
-                frame_sz = 0;
-            }
-            break;
-        case 2:
-            throw_packets(frame, &frame_sz, n, &thrown_frame, &kept_frame);
-            break;
-        default: break;
-        }
-        if (mode < 2)
-        {
-            if (frame_sz == 0)
-            {
-                putc('X', stdout);
-                thrown_frame++;
-            }
-            else
-            {
-                putc('.', stdout);
-                kept_frame++;
-            }
-        }
-        thrown += thrown_frame;
-        kept += kept_frame;
-        fflush(stdout);
-        /* Decode the frame */
-        if(vpx_codec_decode(&codec, frame, frame_sz, NULL, 0))
-            die_codec(&codec, "Failed to decode frame");
-
-        /* Write decoded data to disk */
-        while((img = vpx_codec_get_frame(&codec, &iter))) {
-            unsigned int plane, y;
-
-            for(plane=0; plane < 3; plane++) {
-                unsigned char *buf =img->planes[plane];
-            
-                for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
-                    (void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
-                                  outfile);
-                    buf += img->stride[plane];
-                }
-            }
-        }
-    }
-    printf("Processed %d frames.\n",frame_cnt);
-    if(vpx_codec_destroy(&codec))
-        die_codec(&codec, "Failed to destroy codec");
-
-    fclose(outfile);
-    fclose(infile);
-    return EXIT_SUCCESS;
-}
diff --git a/source/libvpx/examples/example_xma.c b/source/libvpx/examples/example_xma.c
deleted file mode 100644
index c960c28..0000000
--- a/source/libvpx/examples/example_xma.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This is a simple program showing how to initialize the decoder in XMA mode */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_config.h"
-#include "vpx/vpx_decoder.h"
-#include "vpx/vpx_integer.h"
-#if CONFIG_VP9_DECODER
-#include "vpx/vp8dx.h"
-#endif
-
-static char *exec_name;
-static int   verbose = 0;
-
-static const struct {
-  const char *name;
-  vpx_codec_iface_t *iface;
-} ifaces[] = {
-#if CONFIG_VP9_DECODER
-  {"vp9",  &vpx_codec_vp8_dx_algo},
-#endif
-};
-
-static void usage_exit(void) {
-  int i;
-
-  printf("Usage: %s <options>\n\n"
-         "Options:\n"
-         "\t--codec <name>\tCodec to use (default=%s)\n"
-         "\t-h <height>\tHeight of the simulated video frame, in pixels\n"
-         "\t-w <width> \tWidth of the simulated video frame, in pixels\n"
-         "\t-v         \tVerbose mode (show individual segment sizes)\n"
-         "\t--help     \tShow this message\n"
-         "\n"
-         "Included decoders:\n"
-         "\n",
-         exec_name,
-         ifaces[0].name);
-
-  for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-    printf("    %-6s - %s\n",
-           ifaces[i].name,
-           vpx_codec_iface_name(ifaces[i].iface));
-
-  exit(EXIT_FAILURE);
-}
-
-static void usage_error(const char *fmt, ...) {
-  va_list ap;
-  va_start(ap, fmt);
-  vprintf(fmt, ap);
-  printf("\n");
-  usage_exit();
-}
-
-void my_mem_dtor(vpx_codec_mmap_t *mmap) {
-  if (verbose)
-    printf("freeing segment %d\n", mmap->id);
-
-  free(mmap->priv);
-}
-
-int main(int argc, char **argv) {
-  vpx_codec_ctx_t           decoder;
-  vpx_codec_iface_t        *iface = ifaces[0].iface;
-  vpx_codec_iter_t          iter;
-  vpx_codec_dec_cfg_t       cfg;
-  vpx_codec_err_t           res = VPX_CODEC_OK;
-  unsigned int            alloc_sz = 0;
-  unsigned int            w = 352;
-  unsigned int            h = 288;
-  int                     i;
-
-  exec_name = argv[0];
-
-  for (i = 1; i < argc; i++) {
-    if (!strcmp(argv[i], "--codec")) {
-      if (i + 1 < argc) {
-        int j, k = -1;
-
-        i++;
-
-        for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
-          if (!strcmp(ifaces[j].name, argv[i]))
-            k = j;
-
-        if (k >= 0)
-          iface = ifaces[k].iface;
-        else
-          usage_error("Error: Unrecognized argument (%s) to --codec\n",
-                      argv[i]);
-      } else
-        usage_error("Error: Option --codec requires argument.\n");
-    } else if (!strcmp(argv[i], "-v"))
-      verbose = 1;
-    else if (!strcmp(argv[i], "-h"))
-      if (i + 1 < argc) {
-        h = atoi(argv[++i]);
-      } else
-        usage_error("Error: Option -h requires argument.\n");
-    else if (!strcmp(argv[i], "-w"))
-      if (i + 1 < argc) {
-        w = atoi(argv[++i]);
-      } else
-        usage_error("Error: Option -w requires argument.\n");
-    else if (!strcmp(argv[i], "--help"))
-      usage_exit();
-    else
-      usage_error("Error: Unrecognized option %s\n\n", argv[i]);
-  }
-
-  if (argc == 1)
-    printf("Using built-in defaults. For options, rerun with --help\n\n");
-
-  /* XMA mode is not supported on all decoders! */
-  if (!(vpx_codec_get_caps(iface) & VPX_CODEC_CAP_XMA)) {
-    printf("%s does not support XMA mode!\n", vpx_codec_iface_name(iface));
-    return EXIT_FAILURE;
-  }
-
-  /* The codec knows how much memory to allocate based on the size of the
-   * encoded frames. This data can be parsed from the bitstream with
-   * vpx_codec_peek_stream_info() if a bitstream is available. Otherwise,
-   * a fixed size can be used that will be the upper limit on the frame
-   * size the decoder can decode.
-   */
-  cfg.w = w;
-  cfg.h = h;
-
-  /* Initialize the decoder in XMA mode. */
-  if (vpx_codec_dec_init(&decoder, iface, &cfg, VPX_CODEC_USE_XMA)) {
-    printf("Failed to initialize decoder in XMA mode: %s\n",
-           vpx_codec_error(&decoder));
-    return EXIT_FAILURE;
-  }
-
-  /* Iterate through the list of memory maps, allocating them with the
-   * requested alignment.
-   */
-  iter = NULL;
-
-  do {
-    vpx_codec_mmap_t  mmap;
-    unsigned int    align;
-
-    res = vpx_codec_get_mem_map(&decoder, &mmap, &iter);
-    align = mmap.align ? mmap.align - 1 : 0;
-
-    if (!res) {
-      if (verbose)
-        printf("Allocating segment %u, size %lu, align %u %s\n",
-               mmap.id, mmap.sz, mmap.align,
-               mmap.flags & VPX_CODEC_MEM_ZERO ? "(ZEROED)" : "");
-
-      if (mmap.flags & VPX_CODEC_MEM_ZERO)
-        mmap.priv = calloc(1, mmap.sz + align);
-      else
-        mmap.priv = malloc(mmap.sz + align);
-
-      mmap.base = (void *)((((uintptr_t)mmap.priv) + align) &
-                  ~(uintptr_t)align);
-      mmap.dtor = my_mem_dtor;
-      alloc_sz += mmap.sz + align;
-
-      if (vpx_codec_set_mem_map(&decoder, &mmap, 1)) {
-        printf("Failed to set mmap: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-      }
-    } else if (res != VPX_CODEC_LIST_END) {
-      printf("Failed to get mmap: %s\n", vpx_codec_error(&decoder));
-      return EXIT_FAILURE;
-    }
-  } while (res != VPX_CODEC_LIST_END);
-
-  printf("%s\n    %d bytes external memory required for %dx%d.\n",
-         decoder.name, alloc_sz, cfg.w, cfg.h);
-  vpx_codec_destroy(&decoder);
-  return EXIT_SUCCESS;
-
-}
diff --git a/source/libvpx/examples/set_maps.c b/source/libvpx/examples/set_maps.c
index 4343832..4ba38ee 100644
--- a/source/libvpx/examples/set_maps.c
+++ b/source/libvpx/examples/set_maps.c
@@ -64,7 +64,8 @@ void usage_exit() {
 static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
                         vpx_codec_ctx_t *codec) {
   unsigned int i;
-  vpx_roi_map_t roi = {0};
+  vpx_roi_map_t roi;
+  memset(&roi, 0, sizeof(roi));
 
   roi.rows = (cfg->g_h + 15) / 16;
   roi.cols = (cfg->g_w + 15) / 16;
@@ -97,7 +98,7 @@ static void set_roi_map(const vpx_codec_enc_cfg_t *cfg,
 static void set_active_map(const vpx_codec_enc_cfg_t *cfg,
                            vpx_codec_ctx_t *codec) {
   unsigned int i;
-  vpx_active_map_t map = {0};
+  vpx_active_map_t map = {0, 0, 0};
 
   map.rows = (cfg->g_h + 15) / 16;
   map.cols = (cfg->g_w + 15) / 16;
@@ -114,7 +115,7 @@ static void set_active_map(const vpx_codec_enc_cfg_t *cfg,
 
 static void unset_active_map(const vpx_codec_enc_cfg_t *cfg,
                              vpx_codec_ctx_t *codec) {
-  vpx_active_map_t map = {0};
+  vpx_active_map_t map = {0, 0, 0};
 
   map.rows = (cfg->g_h + 15) / 16;
   map.cols = (cfg->g_w + 15) / 16;
@@ -153,22 +154,23 @@ static void encode_frame(vpx_codec_ctx_t *codec,
 
 int main(int argc, char **argv) {
   FILE *infile = NULL;
-  vpx_codec_ctx_t codec = {0};
-  vpx_codec_enc_cfg_t cfg = {0};
+  vpx_codec_ctx_t codec;
+  vpx_codec_enc_cfg_t cfg;
   int frame_count = 0;
-  vpx_image_t raw = {0};
+  vpx_image_t raw;
   vpx_codec_err_t res;
-  VpxVideoInfo info = {0};
+  VpxVideoInfo info;
   VpxVideoWriter *writer = NULL;
   const VpxInterface *encoder = NULL;
   const int fps = 2;        // TODO(dkovalev) add command line argument
   const double bits_per_pixel_per_frame = 0.067;
 
   exec_name = argv[0];
-
   if (argc != 6)
     die("Invalid number of arguments");
 
+  memset(&info, 0, sizeof(info));
+
   encoder = get_vpx_encoder_by_name(argv[1]);
   if (!encoder)
     die("Unsupported codec.");
diff --git a/source/libvpx/examples/vp9_spatial_scalable_encoder.c b/source/libvpx/examples/vp9_spatial_svc_encoder.c
index 983f52d..983f52d 100644
--- a/source/libvpx/examples/vp9_spatial_scalable_encoder.c
+++ b/source/libvpx/examples/vp9_spatial_svc_encoder.c
diff --git a/source/libvpx/examples/vpx_temporal_scalable_patterns.c b/source/libvpx/examples/vpx_temporal_svc_encoder.c
index 07dd318..e45b50c 100644
--- a/source/libvpx/examples/vpx_temporal_scalable_patterns.c
+++ b/source/libvpx/examples/vpx_temporal_svc_encoder.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-//  This is an example demonstrating how to implement a multi-layer VP9
+//  This is an example demonstrating how to implement a multi-layer VPx
 //  encoding scheme based on temporal scalability for video applications
 //  that benefit from a scalable bitstream.
 
diff --git a/source/libvpx/test/convolve_test.cc b/source/libvpx/test/convolve_test.cc
index 37ee0ef..cbb4036 100644
--- a/source/libvpx/test/convolve_test.cc
+++ b/source/libvpx/test/convolve_test.cc
@@ -634,7 +634,7 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
     make_tuple(64, 64, &convolve8_ssse3)));
 #endif
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
     vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
     vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
diff --git a/source/libvpx/test/dct16x16_test.cc b/source/libvpx/test/dct16x16_test.cc
index cb5562e..143a267 100644
--- a/source/libvpx/test/dct16x16_test.cc
+++ b/source/libvpx/test/dct16x16_test.cc
@@ -512,7 +512,7 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
         make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans16x16DCT,
     ::testing::Values(
diff --git a/source/libvpx/test/dct32x32_test.cc b/source/libvpx/test/dct32x32_test.cc
index 013f451..72c0bd6 100644
--- a/source/libvpx/test/dct32x32_test.cc
+++ b/source/libvpx/test/dct32x32_test.cc
@@ -248,7 +248,7 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),
         make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans32x32Test,
     ::testing::Values(
diff --git a/source/libvpx/test/fdct4x4_test.cc b/source/libvpx/test/fdct4x4_test.cc
index 02458db..030665e 100644
--- a/source/libvpx/test/fdct4x4_test.cc
+++ b/source/libvpx/test/fdct4x4_test.cc
@@ -338,7 +338,7 @@ INSTANTIATE_TEST_CASE_P(
     ::testing::Values(
         make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0)));
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
     NEON, Trans4x4DCT,
     ::testing::Values(
@@ -353,6 +353,13 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
 #endif
 
+#if CONFIG_USE_X86INC && HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0)));
+#endif
+
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
     SSE2, Trans4x4DCT,
diff --git a/source/libvpx/test/fdct8x8_test.cc b/source/libvpx/test/fdct8x8_test.cc
index 6f2d7d1..c7cf164 100644
--- a/source/libvpx/test/fdct8x8_test.cc
+++ b/source/libvpx/test/fdct8x8_test.cc
@@ -313,7 +313,7 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
     NEON, FwdTrans8x8DCT,
     ::testing::Values(
@@ -340,4 +340,11 @@ INSTANTIATE_TEST_CASE_P(
         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
 #endif
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, FwdTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0)));
+#endif
 }  // namespace
diff --git a/source/libvpx/test/partial_idct_test.cc b/source/libvpx/test/partial_idct_test.cc
index 8849ce6..79ef521 100644
--- a/source/libvpx/test/partial_idct_test.cc
+++ b/source/libvpx/test/partial_idct_test.cc
@@ -132,15 +132,15 @@ INSTANTIATE_TEST_CASE_P(
                    &vp9_idct16x16_1_add_c,
                    TX_16X16, 1),
         make_tuple(&vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_10_add_c,
-                   TX_8X8, 10),
+                   &vp9_idct8x8_12_add_c,
+                   TX_8X8, 12),
         make_tuple(&vp9_idct8x8_64_add_c,
                    &vp9_idct8x8_1_add_c,
                    TX_8X8, 1),
         make_tuple(&vp9_idct4x4_16_add_c,
                    &vp9_idct4x4_1_add_c,
                    TX_4X4, 1)));
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
     NEON, PartialIDctTest,
     ::testing::Values(
@@ -154,8 +154,8 @@ INSTANTIATE_TEST_CASE_P(
                    &vp9_idct16x16_1_add_neon,
                    TX_16X16, 1),
         make_tuple(&vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_10_add_neon,
-                   TX_8X8, 10),
+                   &vp9_idct8x8_12_add_neon,
+                   TX_8X8, 12),
         make_tuple(&vp9_idct8x8_64_add_c,
                    &vp9_idct8x8_1_add_neon,
                    TX_8X8, 1),
@@ -181,8 +181,8 @@ INSTANTIATE_TEST_CASE_P(
                    &vp9_idct16x16_1_add_sse2,
                    TX_16X16, 1),
         make_tuple(&vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_10_add_sse2,
-                   TX_8X8, 10),
+                   &vp9_idct8x8_12_add_sse2,
+                   TX_8X8, 12),
         make_tuple(&vp9_idct8x8_64_add_c,
                    &vp9_idct8x8_1_add_sse2,
                    TX_8X8, 1),
@@ -190,4 +190,13 @@ INSTANTIATE_TEST_CASE_P(
                    &vp9_idct4x4_1_add_sse2,
                    TX_4X4, 1)));
 #endif
+
+#if HAVE_SSSE3 && ARCH_X86_64
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, PartialIDctTest,
+    ::testing::Values(
+        make_tuple(&vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_12_add_ssse3,
+                   TX_8X8, 12)));
+#endif
 }  // namespace
diff --git a/source/libvpx/test/register_state_check.h b/source/libvpx/test/register_state_check.h
index 5987fe3..1ee149b 100644
--- a/source/libvpx/test/register_state_check.h
+++ b/source/libvpx/test/register_state_check.h
@@ -82,8 +82,8 @@ class RegisterStateCheck {
 
 }  // namespace libvpx_test
 
-#elif defined(CONFIG_SHARED) && defined(HAVE_NEON) && defined(CONFIG_VP9) \
-      && !CONFIG_SHARED && HAVE_NEON && CONFIG_VP9
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && defined(CONFIG_VP9) \
+      && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9
 
 #include "vpx/vpx_integer.h"
 
diff --git a/source/libvpx/test/resize_util.sh b/source/libvpx/test/resize_util.sh
new file mode 100755
index 0000000..2a8e3fb
--- /dev/null
+++ b/source/libvpx/test/resize_util.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx resize_util example code. To add new tests to
+##  this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to resize_util_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+resize_util_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Resizes $YUV_RAW_INPUT using the resize_util example. $1 is the output
+# dimensions that will be passed to resize_util.
+resize_util() {
+  local resizer="${LIBVPX_BIN_PATH}/resize_util${VPX_TEST_EXE_SUFFIX}"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/resize_util.raw"
+  local frames_to_resize="10"
+  local target_dimensions="$1"
+
+  # resize_util is available only when CONFIG_SHARED is disabled.
+  if [ -z "$(vpx_config_option_enabled CONFIG_SHARED)" ]; then
+    [ -x "${resizer}" ] || return 1
+
+    eval "${resizer}" "${YUV_RAW_INPUT}" \
+        "${YUV_RAW_INPUT_WIDTH}x${YUV_RAW_INPUT_HEIGHT}" \
+        "${target_dimensions}" "${output_file}" ${frames_to_resize} \
+        ${devnull}
+
+    [ -e "${output_file}" ] || return 1
+  fi
+}
+
+# Halves each dimension of $YUV_RAW_INPUT using resize_util().
+resize_down() {
+  local target_width=$((${YUV_RAW_INPUT_WIDTH} / 2))
+  local target_height=$((${YUV_RAW_INPUT_HEIGHT} / 2))
+
+  resize_util "${target_width}x${target_height}"
+}
+
+# Doubles each dimension of $YUV_RAW_INPUT using resize_util().
+resize_up() {
+  local target_width=$((${YUV_RAW_INPUT_WIDTH} * 2))
+  local target_height=$((${YUV_RAW_INPUT_HEIGHT} * 2))
+
+  resize_util "${target_width}x${target_height}"
+}
+
+resize_util_tests="resize_down
+                   resize_up"
+
+run_tests resize_util_verify_environment "${resize_util_tests}"
diff --git a/source/libvpx/test/subtract_test.cc b/source/libvpx/test/subtract_test.cc
index 3efb955..63e999d 100644
--- a/source/libvpx/test/subtract_test.cc
+++ b/source/libvpx/test/subtract_test.cc
@@ -105,7 +105,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
 INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
                         ::testing::Values(vp8_subtract_b_c));
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
                         ::testing::Values(vp8_subtract_b_neon));
 #endif
diff --git a/source/libvpx/test/test-data.sha1 b/source/libvpx/test/test-data.sha1
index cf2ad1e..9c23929 100644
--- a/source/libvpx/test/test-data.sha1
+++ b/source/libvpx/test/test-data.sha1
@@ -635,3 +635,8 @@ be0fe64a1a4933696ff92d93f9bdecdbd886dc13  vp90-2-14-resize-fp-tiles-16-8.webm.md
 1765315acccfe6cd12230e731369fcb15325ebfa  vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
 4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8  vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
 1ef480392112b3509cb190afbb96f9a38dd9fbac  vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
+e615575ded499ea1d992f3b38e3baa434509cdcd  vp90-2-15-segkey.webm
+e3ab35d4316c5e81325c50f5236ceca4bc0d35df  vp90-2-15-segkey.webm.md5
+9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d  vp90-2-15-segkey_adpq.webm
+8f46ba5f785d0c2170591a153e0d0d146a7c8090  vp90-2-15-segkey_adpq.webm.md5
+
diff --git a/source/libvpx/test/test.mk b/source/libvpx/test/test.mk
index 0dcb6c8..44d2f9c 100644
--- a/source/libvpx/test/test.mk
+++ b/source/libvpx/test/test.mk
@@ -748,6 +748,10 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.w
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5
 
 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # BBB VP9 streams
diff --git a/source/libvpx/test/test_vectors.cc b/source/libvpx/test/test_vectors.cc
index ff3c389..fd8c4c3 100644
--- a/source/libvpx/test/test_vectors.cc
+++ b/source/libvpx/test/test_vectors.cc
@@ -177,7 +177,8 @@ const char *const kVP9TestVectors[] = {
   "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm",
   "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm",
   "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
-  "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm"
+  "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
+  "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm"
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
diff --git a/source/libvpx/test/tools_common.sh b/source/libvpx/test/tools_common.sh
index 30f0fae..9c10d48 100755
--- a/source/libvpx/test/tools_common.sh
+++ b/source/libvpx/test/tools_common.sh
@@ -18,7 +18,9 @@ set -e
 devnull='> /dev/null 2>&1'
 
 vlog() {
-  [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "$@"
+  if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
+    echo "$@"
+  fi
 }
 
 # Sets $VPX_TOOL_TEST to the name specified by positional parameter one.
diff --git a/source/libvpx/test/twopass_encoder.sh b/source/libvpx/test/twopass_encoder.sh
new file mode 100755
index 0000000..fe3cbbb
--- /dev/null
+++ b/source/libvpx/test/twopass_encoder.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx twopass_encoder example. To add new tests to this
+##  file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to twopass_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+twopass_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs twopass_encoder using the codec specified by $1.
+twopass_encoder() {
+  local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf"
+
+  [ -x "${encoder}" ] || return 1
+
+  eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+twopass_encoder_vp8() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    twopass_encoder vp8 || return 1
+  fi
+}
+
+# TODO(tomfinegan): Add a frame limit param to twopass_encoder and enable this
+# test. VP9 is just too slow right now: This test takes 31m16s+ on a fast
+# machine.
+DISABLED_twopass_encoder_vp9() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    twopass_encoder vp9 || return 1
+  fi
+}
+
+twopass_encoder_tests="twopass_encoder_vp8
+                       DISABLED_twopass_encoder_vp9"
+
+run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
diff --git a/source/libvpx/test/variance_test.cc b/source/libvpx/test/variance_test.cc
index 817ba14..c9bf13a 100644
--- a/source/libvpx/test/variance_test.cc
+++ b/source/libvpx/test/variance_test.cc
@@ -294,41 +294,60 @@ TEST_P(VP8VarianceTest, Zero) { ZeroTest(); }
 TEST_P(VP8VarianceTest, Ref) { RefTest(); }
 TEST_P(VP8VarianceTest, OneQuarter) { OneQuarterTest(); }
 
+const vp8_variance_fn_t variance4x4_c = vp8_variance4x4_c;
+const vp8_variance_fn_t variance8x8_c = vp8_variance8x8_c;
+const vp8_variance_fn_t variance8x16_c = vp8_variance8x16_c;
+const vp8_variance_fn_t variance16x8_c = vp8_variance16x8_c;
+const vp8_variance_fn_t variance16x16_c = vp8_variance16x16_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP8VarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp8_variance4x4_c),
-                      make_tuple(3, 3, vp8_variance8x8_c),
-                      make_tuple(3, 4, vp8_variance8x16_c),
-                      make_tuple(4, 3, vp8_variance16x8_c),
-                      make_tuple(4, 4, vp8_variance16x16_c)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_c),
+                      make_tuple(3, 3, variance8x8_c),
+                      make_tuple(3, 4, variance8x16_c),
+                      make_tuple(4, 3, variance16x8_c),
+                      make_tuple(4, 4, variance16x16_c)));
 
 #if HAVE_NEON
+const vp8_variance_fn_t variance8x8_neon = vp8_variance8x8_neon;
+const vp8_variance_fn_t variance8x16_neon = vp8_variance8x16_neon;
+const vp8_variance_fn_t variance16x8_neon = vp8_variance16x8_neon;
+const vp8_variance_fn_t variance16x16_neon = vp8_variance16x16_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VP8VarianceTest,
-    ::testing::Values(make_tuple(3, 3, vp8_variance8x8_neon),
-                      make_tuple(3, 4, vp8_variance8x16_neon),
-                      make_tuple(4, 3, vp8_variance16x8_neon),
-                      make_tuple(4, 4, vp8_variance16x16_neon)));
+    ::testing::Values(make_tuple(3, 3, variance8x8_neon),
+                      make_tuple(3, 4, variance8x16_neon),
+                      make_tuple(4, 3, variance16x8_neon),
+                      make_tuple(4, 4, variance16x16_neon)));
 #endif
 
 #if HAVE_MMX
+const vp8_variance_fn_t variance4x4_mmx = vp8_variance4x4_mmx;
+const vp8_variance_fn_t variance8x8_mmx = vp8_variance8x8_mmx;
+const vp8_variance_fn_t variance8x16_mmx = vp8_variance8x16_mmx;
+const vp8_variance_fn_t variance16x8_mmx = vp8_variance16x8_mmx;
+const vp8_variance_fn_t variance16x16_mmx = vp8_variance16x16_mmx;
 INSTANTIATE_TEST_CASE_P(
     MMX, VP8VarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp8_variance4x4_mmx),
-                      make_tuple(3, 3, vp8_variance8x8_mmx),
-                      make_tuple(3, 4, vp8_variance8x16_mmx),
-                      make_tuple(4, 3, vp8_variance16x8_mmx),
-                      make_tuple(4, 4, vp8_variance16x16_mmx)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_mmx),
+                      make_tuple(3, 3, variance8x8_mmx),
+                      make_tuple(3, 4, variance8x16_mmx),
+                      make_tuple(4, 3, variance16x8_mmx),
+                      make_tuple(4, 4, variance16x16_mmx)));
 #endif
 
 #if HAVE_SSE2
+const vp8_variance_fn_t variance4x4_wmt = vp8_variance4x4_wmt;
+const vp8_variance_fn_t variance8x8_wmt = vp8_variance8x8_wmt;
+const vp8_variance_fn_t variance8x16_wmt = vp8_variance8x16_wmt;
+const vp8_variance_fn_t variance16x8_wmt = vp8_variance16x8_wmt;
+const vp8_variance_fn_t variance16x16_wmt = vp8_variance16x16_wmt;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP8VarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp8_variance4x4_wmt),
-                      make_tuple(3, 3, vp8_variance8x8_wmt),
-                      make_tuple(3, 4, vp8_variance8x16_wmt),
-                      make_tuple(4, 3, vp8_variance16x8_wmt),
-                      make_tuple(4, 4, vp8_variance16x16_wmt)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_wmt),
+                      make_tuple(3, 3, variance8x8_wmt),
+                      make_tuple(3, 4, variance8x16_wmt),
+                      make_tuple(4, 3, variance16x8_wmt),
+                      make_tuple(4, 4, variance16x16_wmt)));
 #endif
 #endif  // CONFIG_VP8_ENCODER
 
@@ -350,150 +369,337 @@ TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
 TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
 TEST_P(VP9VarianceTest, OneQuarter) { OneQuarterTest(); }
 
+const vp9_variance_fn_t variance4x4_c = vp9_variance4x4_c;
+const vp9_variance_fn_t variance4x8_c = vp9_variance4x8_c;
+const vp9_variance_fn_t variance8x4_c = vp9_variance8x4_c;
+const vp9_variance_fn_t variance8x8_c = vp9_variance8x8_c;
+const vp9_variance_fn_t variance8x16_c = vp9_variance8x16_c;
+const vp9_variance_fn_t variance16x8_c = vp9_variance16x8_c;
+const vp9_variance_fn_t variance16x16_c = vp9_variance16x16_c;
+const vp9_variance_fn_t variance16x32_c = vp9_variance16x32_c;
+const vp9_variance_fn_t variance32x16_c = vp9_variance32x16_c;
+const vp9_variance_fn_t variance32x32_c = vp9_variance32x32_c;
+const vp9_variance_fn_t variance32x64_c = vp9_variance32x64_c;
+const vp9_variance_fn_t variance64x32_c = vp9_variance64x32_c;
+const vp9_variance_fn_t variance64x64_c = vp9_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9VarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_variance4x4_c),
-                      make_tuple(2, 3, vp9_variance4x8_c),
-                      make_tuple(3, 2, vp9_variance8x4_c),
-                      make_tuple(3, 3, vp9_variance8x8_c),
-                      make_tuple(3, 4, vp9_variance8x16_c),
-                      make_tuple(4, 3, vp9_variance16x8_c),
-                      make_tuple(4, 4, vp9_variance16x16_c),
-                      make_tuple(4, 5, vp9_variance16x32_c),
-                      make_tuple(5, 4, vp9_variance32x16_c),
-                      make_tuple(5, 5, vp9_variance32x32_c),
-                      make_tuple(5, 6, vp9_variance32x64_c),
-                      make_tuple(6, 5, vp9_variance64x32_c),
-                      make_tuple(6, 6, vp9_variance64x64_c)));
-
+    ::testing::Values(make_tuple(2, 2, variance4x4_c),
+                      make_tuple(2, 3, variance4x8_c),
+                      make_tuple(3, 2, variance8x4_c),
+                      make_tuple(3, 3, variance8x8_c),
+                      make_tuple(3, 4, variance8x16_c),
+                      make_tuple(4, 3, variance16x8_c),
+                      make_tuple(4, 4, variance16x16_c),
+                      make_tuple(4, 5, variance16x32_c),
+                      make_tuple(5, 4, variance32x16_c),
+                      make_tuple(5, 5, variance32x32_c),
+                      make_tuple(5, 6, variance32x64_c),
+                      make_tuple(6, 5, variance64x32_c),
+                      make_tuple(6, 6, variance64x64_c)));
+
+const vp9_subpixvariance_fn_t subpel_variance4x4_c =
+    vp9_sub_pixel_variance4x4_c;
+const vp9_subpixvariance_fn_t subpel_variance4x8_c =
+    vp9_sub_pixel_variance4x8_c;
+const vp9_subpixvariance_fn_t subpel_variance8x4_c =
+    vp9_sub_pixel_variance8x4_c;
+const vp9_subpixvariance_fn_t subpel_variance8x8_c =
+    vp9_sub_pixel_variance8x8_c;
+const vp9_subpixvariance_fn_t subpel_variance8x16_c =
+    vp9_sub_pixel_variance8x16_c;
+const vp9_subpixvariance_fn_t subpel_variance16x8_c =
+    vp9_sub_pixel_variance16x8_c;
+const vp9_subpixvariance_fn_t subpel_variance16x16_c =
+    vp9_sub_pixel_variance16x16_c;
+const vp9_subpixvariance_fn_t subpel_variance16x32_c =
+    vp9_sub_pixel_variance16x32_c;
+const vp9_subpixvariance_fn_t subpel_variance32x16_c =
+    vp9_sub_pixel_variance32x16_c;
+const vp9_subpixvariance_fn_t subpel_variance32x32_c =
+    vp9_sub_pixel_variance32x32_c;
+const vp9_subpixvariance_fn_t subpel_variance32x64_c =
+    vp9_sub_pixel_variance32x64_c;
+const vp9_subpixvariance_fn_t subpel_variance64x32_c =
+    vp9_sub_pixel_variance64x32_c;
+const vp9_subpixvariance_fn_t subpel_variance64x64_c =
+    vp9_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_variance4x4_c),
-                      make_tuple(2, 3, vp9_sub_pixel_variance4x8_c),
-                      make_tuple(3, 2, vp9_sub_pixel_variance8x4_c),
-                      make_tuple(3, 3, vp9_sub_pixel_variance8x8_c),
-                      make_tuple(3, 4, vp9_sub_pixel_variance8x16_c),
-                      make_tuple(4, 3, vp9_sub_pixel_variance16x8_c),
-                      make_tuple(4, 4, vp9_sub_pixel_variance16x16_c),
-                      make_tuple(4, 5, vp9_sub_pixel_variance16x32_c),
-                      make_tuple(5, 4, vp9_sub_pixel_variance32x16_c),
-                      make_tuple(5, 5, vp9_sub_pixel_variance32x32_c),
-                      make_tuple(5, 6, vp9_sub_pixel_variance32x64_c),
-                      make_tuple(6, 5, vp9_sub_pixel_variance64x32_c),
-                      make_tuple(6, 6, vp9_sub_pixel_variance64x64_c)));
-
+    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c),
+                      make_tuple(2, 3, subpel_variance4x8_c),
+                      make_tuple(3, 2, subpel_variance8x4_c),
+                      make_tuple(3, 3, subpel_variance8x8_c),
+                      make_tuple(3, 4, subpel_variance8x16_c),
+                      make_tuple(4, 3, subpel_variance16x8_c),
+                      make_tuple(4, 4, subpel_variance16x16_c),
+                      make_tuple(4, 5, subpel_variance16x32_c),
+                      make_tuple(5, 4, subpel_variance32x16_c),
+                      make_tuple(5, 5, subpel_variance32x32_c),
+                      make_tuple(5, 6, subpel_variance32x64_c),
+                      make_tuple(6, 5, subpel_variance64x32_c),
+                      make_tuple(6, 6, subpel_variance64x64_c)));
+
+const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
+    vp9_sub_pixel_avg_variance4x4_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
+    vp9_sub_pixel_avg_variance4x8_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_c =
+    vp9_sub_pixel_avg_variance8x4_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_c =
+    vp9_sub_pixel_avg_variance8x8_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_c =
+    vp9_sub_pixel_avg_variance8x16_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_c =
+    vp9_sub_pixel_avg_variance16x8_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_c =
+    vp9_sub_pixel_avg_variance16x16_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_c =
+    vp9_sub_pixel_avg_variance16x32_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_c =
+    vp9_sub_pixel_avg_variance32x16_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_c =
+    vp9_sub_pixel_avg_variance32x32_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_c =
+    vp9_sub_pixel_avg_variance32x64_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_c =
+    vp9_sub_pixel_avg_variance64x32_c;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_c =
+    vp9_sub_pixel_avg_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_avg_variance4x4_c),
-                      make_tuple(2, 3, vp9_sub_pixel_avg_variance4x8_c),
-                      make_tuple(3, 2, vp9_sub_pixel_avg_variance8x4_c),
-                      make_tuple(3, 3, vp9_sub_pixel_avg_variance8x8_c),
-                      make_tuple(3, 4, vp9_sub_pixel_avg_variance8x16_c),
-                      make_tuple(4, 3, vp9_sub_pixel_avg_variance16x8_c),
-                      make_tuple(4, 4, vp9_sub_pixel_avg_variance16x16_c),
-                      make_tuple(4, 5, vp9_sub_pixel_avg_variance16x32_c),
-                      make_tuple(5, 4, vp9_sub_pixel_avg_variance32x16_c),
-                      make_tuple(5, 5, vp9_sub_pixel_avg_variance32x32_c),
-                      make_tuple(5, 6, vp9_sub_pixel_avg_variance32x64_c),
-                      make_tuple(6, 5, vp9_sub_pixel_avg_variance64x32_c),
-                      make_tuple(6, 6, vp9_sub_pixel_avg_variance64x64_c)));
+    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_c),
+                      make_tuple(2, 3, subpel_avg_variance4x8_c),
+                      make_tuple(3, 2, subpel_avg_variance8x4_c),
+                      make_tuple(3, 3, subpel_avg_variance8x8_c),
+                      make_tuple(3, 4, subpel_avg_variance8x16_c),
+                      make_tuple(4, 3, subpel_avg_variance16x8_c),
+                      make_tuple(4, 4, subpel_avg_variance16x16_c),
+                      make_tuple(4, 5, subpel_avg_variance16x32_c),
+                      make_tuple(5, 4, subpel_avg_variance32x16_c),
+                      make_tuple(5, 5, subpel_avg_variance32x32_c),
+                      make_tuple(5, 6, subpel_avg_variance32x64_c),
+                      make_tuple(6, 5, subpel_avg_variance64x32_c),
+                      make_tuple(6, 6, subpel_avg_variance64x64_c)));
 
 #if HAVE_MMX
+const vp9_variance_fn_t variance4x4_mmx = vp9_variance4x4_mmx;
+const vp9_variance_fn_t variance8x8_mmx = vp9_variance8x8_mmx;
+const vp9_variance_fn_t variance8x16_mmx = vp9_variance8x16_mmx;
+const vp9_variance_fn_t variance16x8_mmx = vp9_variance16x8_mmx;
+const vp9_variance_fn_t variance16x16_mmx = vp9_variance16x16_mmx;
 INSTANTIATE_TEST_CASE_P(
     MMX, VP9VarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_variance4x4_mmx),
-                      make_tuple(3, 3, vp9_variance8x8_mmx),
-                      make_tuple(3, 4, vp9_variance8x16_mmx),
-                      make_tuple(4, 3, vp9_variance16x8_mmx),
-                      make_tuple(4, 4, vp9_variance16x16_mmx)));
+    ::testing::Values(make_tuple(2, 2, variance4x4_mmx),
+                      make_tuple(3, 3, variance8x8_mmx),
+                      make_tuple(3, 4, variance8x16_mmx),
+                      make_tuple(4, 3, variance16x8_mmx),
+                      make_tuple(4, 4, variance16x16_mmx)));
 #endif
 
 #if HAVE_SSE2
 #if CONFIG_USE_X86INC
+const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
+const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
+const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
+const vp9_variance_fn_t variance8x8_sse2 = vp9_variance8x8_sse2;
+const vp9_variance_fn_t variance8x16_sse2 = vp9_variance8x16_sse2;
+const vp9_variance_fn_t variance16x8_sse2 = vp9_variance16x8_sse2;
+const vp9_variance_fn_t variance16x16_sse2 = vp9_variance16x16_sse2;
+const vp9_variance_fn_t variance16x32_sse2 = vp9_variance16x32_sse2;
+const vp9_variance_fn_t variance32x16_sse2 = vp9_variance32x16_sse2;
+const vp9_variance_fn_t variance32x32_sse2 = vp9_variance32x32_sse2;
+const vp9_variance_fn_t variance32x64_sse2 = vp9_variance32x64_sse2;
+const vp9_variance_fn_t variance64x32_sse2 = vp9_variance64x32_sse2;
+const vp9_variance_fn_t variance64x64_sse2 = vp9_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9VarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_variance4x4_sse2),
-                      make_tuple(2, 3, vp9_variance4x8_sse2),
-                      make_tuple(3, 2, vp9_variance8x4_sse2),
-                      make_tuple(3, 3, vp9_variance8x8_sse2),
-                      make_tuple(3, 4, vp9_variance8x16_sse2),
-                      make_tuple(4, 3, vp9_variance16x8_sse2),
-                      make_tuple(4, 4, vp9_variance16x16_sse2),
-                      make_tuple(4, 5, vp9_variance16x32_sse2),
-                      make_tuple(5, 4, vp9_variance32x16_sse2),
-                      make_tuple(5, 5, vp9_variance32x32_sse2),
-                      make_tuple(5, 6, vp9_variance32x64_sse2),
-                      make_tuple(6, 5, vp9_variance64x32_sse2),
-                      make_tuple(6, 6, vp9_variance64x64_sse2)));
-
+    ::testing::Values(make_tuple(2, 2, variance4x4_sse2),
+                      make_tuple(2, 3, variance4x8_sse2),
+                      make_tuple(3, 2, variance8x4_sse2),
+                      make_tuple(3, 3, variance8x8_sse2),
+                      make_tuple(3, 4, variance8x16_sse2),
+                      make_tuple(4, 3, variance16x8_sse2),
+                      make_tuple(4, 4, variance16x16_sse2),
+                      make_tuple(4, 5, variance16x32_sse2),
+                      make_tuple(5, 4, variance32x16_sse2),
+                      make_tuple(5, 5, variance32x32_sse2),
+                      make_tuple(5, 6, variance32x64_sse2),
+                      make_tuple(6, 5, variance64x32_sse2),
+                      make_tuple(6, 6, variance64x64_sse2)));
+
+const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
+    vp9_sub_pixel_variance4x4_sse;
+const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
+    vp9_sub_pixel_variance4x8_sse;
+const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
+    vp9_sub_pixel_variance8x4_sse2;
+const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
+    vp9_sub_pixel_variance8x8_sse2;
+const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
+    vp9_sub_pixel_variance8x16_sse2;
+const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
+    vp9_sub_pixel_variance16x8_sse2;
+const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
+    vp9_sub_pixel_variance16x16_sse2;
+const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
+    vp9_sub_pixel_variance16x32_sse2;
+const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
+    vp9_sub_pixel_variance32x16_sse2;
+const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
+    vp9_sub_pixel_variance32x32_sse2;
+const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
+    vp9_sub_pixel_variance32x64_sse2;
+const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
+    vp9_sub_pixel_variance64x32_sse2;
+const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
+    vp9_sub_pixel_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_variance4x4_sse),
-                      make_tuple(2, 3, vp9_sub_pixel_variance4x8_sse),
-                      make_tuple(3, 2, vp9_sub_pixel_variance8x4_sse2),
-                      make_tuple(3, 3, vp9_sub_pixel_variance8x8_sse2),
-                      make_tuple(3, 4, vp9_sub_pixel_variance8x16_sse2),
-                      make_tuple(4, 3, vp9_sub_pixel_variance16x8_sse2),
-                      make_tuple(4, 4, vp9_sub_pixel_variance16x16_sse2),
-                      make_tuple(4, 5, vp9_sub_pixel_variance16x32_sse2),
-                      make_tuple(5, 4, vp9_sub_pixel_variance32x16_sse2),
-                      make_tuple(5, 5, vp9_sub_pixel_variance32x32_sse2),
-                      make_tuple(5, 6, vp9_sub_pixel_variance32x64_sse2),
-                      make_tuple(6, 5, vp9_sub_pixel_variance64x32_sse2),
-                      make_tuple(6, 6, vp9_sub_pixel_variance64x64_sse2)));
-
+    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_sse),
+                      make_tuple(2, 3, subpel_variance4x8_sse),
+                      make_tuple(3, 2, subpel_variance8x4_sse2),
+                      make_tuple(3, 3, subpel_variance8x8_sse2),
+                      make_tuple(3, 4, subpel_variance8x16_sse2),
+                      make_tuple(4, 3, subpel_variance16x8_sse2),
+                      make_tuple(4, 4, subpel_variance16x16_sse2),
+                      make_tuple(4, 5, subpel_variance16x32_sse2),
+                      make_tuple(5, 4, subpel_variance32x16_sse2),
+                      make_tuple(5, 5, subpel_variance32x32_sse2),
+                      make_tuple(5, 6, subpel_variance32x64_sse2),
+                      make_tuple(6, 5, subpel_variance64x32_sse2),
+                      make_tuple(6, 6, subpel_variance64x64_sse2)));
+
+const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_sse =
+    vp9_sub_pixel_avg_variance4x4_sse;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_sse =
+    vp9_sub_pixel_avg_variance4x8_sse;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_sse2 =
+    vp9_sub_pixel_avg_variance8x4_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_sse2 =
+    vp9_sub_pixel_avg_variance8x8_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_sse2 =
+    vp9_sub_pixel_avg_variance8x16_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_sse2 =
+    vp9_sub_pixel_avg_variance16x8_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_sse2 =
+    vp9_sub_pixel_avg_variance16x16_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_sse2 =
+    vp9_sub_pixel_avg_variance16x32_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_sse2 =
+    vp9_sub_pixel_avg_variance32x16_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_sse2 =
+    vp9_sub_pixel_avg_variance32x32_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_sse2 =
+    vp9_sub_pixel_avg_variance32x64_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_sse2 =
+    vp9_sub_pixel_avg_variance64x32_sse2;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_sse2 =
+    vp9_sub_pixel_avg_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_avg_variance4x4_sse),
-                      make_tuple(2, 3, vp9_sub_pixel_avg_variance4x8_sse),
-                      make_tuple(3, 2, vp9_sub_pixel_avg_variance8x4_sse2),
-                      make_tuple(3, 3, vp9_sub_pixel_avg_variance8x8_sse2),
-                      make_tuple(3, 4, vp9_sub_pixel_avg_variance8x16_sse2),
-                      make_tuple(4, 3, vp9_sub_pixel_avg_variance16x8_sse2),
-                      make_tuple(4, 4, vp9_sub_pixel_avg_variance16x16_sse2),
-                      make_tuple(4, 5, vp9_sub_pixel_avg_variance16x32_sse2),
-                      make_tuple(5, 4, vp9_sub_pixel_avg_variance32x16_sse2),
-                      make_tuple(5, 5, vp9_sub_pixel_avg_variance32x32_sse2),
-                      make_tuple(5, 6, vp9_sub_pixel_avg_variance32x64_sse2),
-                      make_tuple(6, 5, vp9_sub_pixel_avg_variance64x32_sse2),
-                      make_tuple(6, 6, vp9_sub_pixel_avg_variance64x64_sse2)));
+    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_sse),
+                      make_tuple(2, 3, subpel_avg_variance4x8_sse),
+                      make_tuple(3, 2, subpel_avg_variance8x4_sse2),
+                      make_tuple(3, 3, subpel_avg_variance8x8_sse2),
+                      make_tuple(3, 4, subpel_avg_variance8x16_sse2),
+                      make_tuple(4, 3, subpel_avg_variance16x8_sse2),
+                      make_tuple(4, 4, subpel_avg_variance16x16_sse2),
+                      make_tuple(4, 5, subpel_avg_variance16x32_sse2),
+                      make_tuple(5, 4, subpel_avg_variance32x16_sse2),
+                      make_tuple(5, 5, subpel_avg_variance32x32_sse2),
+                      make_tuple(5, 6, subpel_avg_variance32x64_sse2),
+                      make_tuple(6, 5, subpel_avg_variance64x32_sse2),
+                      make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
 #endif
 #endif
 
 #if HAVE_SSSE3
 #if CONFIG_USE_X86INC
 
+const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
+    vp9_sub_pixel_variance4x4_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
+    vp9_sub_pixel_variance4x8_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
+    vp9_sub_pixel_variance8x4_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
+    vp9_sub_pixel_variance8x8_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
+    vp9_sub_pixel_variance8x16_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
+    vp9_sub_pixel_variance16x8_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
+    vp9_sub_pixel_variance16x16_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
+    vp9_sub_pixel_variance16x32_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
+    vp9_sub_pixel_variance32x16_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
+    vp9_sub_pixel_variance32x32_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
+    vp9_sub_pixel_variance32x64_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
+    vp9_sub_pixel_variance64x32_ssse3;
+const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
+    vp9_sub_pixel_variance64x64_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9SubpelVarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_variance4x4_ssse3),
-                      make_tuple(2, 3, vp9_sub_pixel_variance4x8_ssse3),
-                      make_tuple(3, 2, vp9_sub_pixel_variance8x4_ssse3),
-                      make_tuple(3, 3, vp9_sub_pixel_variance8x8_ssse3),
-                      make_tuple(3, 4, vp9_sub_pixel_variance8x16_ssse3),
-                      make_tuple(4, 3, vp9_sub_pixel_variance16x8_ssse3),
-                      make_tuple(4, 4, vp9_sub_pixel_variance16x16_ssse3),
-                      make_tuple(4, 5, vp9_sub_pixel_variance16x32_ssse3),
-                      make_tuple(5, 4, vp9_sub_pixel_variance32x16_ssse3),
-                      make_tuple(5, 5, vp9_sub_pixel_variance32x32_ssse3),
-                      make_tuple(5, 6, vp9_sub_pixel_variance32x64_ssse3),
-                      make_tuple(6, 5, vp9_sub_pixel_variance64x32_ssse3),
-                      make_tuple(6, 6, vp9_sub_pixel_variance64x64_ssse3)));
-
+    ::testing::Values(make_tuple(2, 2, subpel_variance4x4_ssse3),
+                      make_tuple(2, 3, subpel_variance4x8_ssse3),
+                      make_tuple(3, 2, subpel_variance8x4_ssse3),
+                      make_tuple(3, 3, subpel_variance8x8_ssse3),
+                      make_tuple(3, 4, subpel_variance8x16_ssse3),
+                      make_tuple(4, 3, subpel_variance16x8_ssse3),
+                      make_tuple(4, 4, subpel_variance16x16_ssse3),
+                      make_tuple(4, 5, subpel_variance16x32_ssse3),
+                      make_tuple(5, 4, subpel_variance32x16_ssse3),
+                      make_tuple(5, 5, subpel_variance32x32_ssse3),
+                      make_tuple(5, 6, subpel_variance32x64_ssse3),
+                      make_tuple(6, 5, subpel_variance64x32_ssse3),
+                      make_tuple(6, 6, subpel_variance64x64_ssse3)));
+
+const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_ssse3 =
+    vp9_sub_pixel_avg_variance4x4_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_ssse3 =
+    vp9_sub_pixel_avg_variance4x8_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x4_ssse3 =
+    vp9_sub_pixel_avg_variance8x4_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x8_ssse3 =
+    vp9_sub_pixel_avg_variance8x8_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance8x16_ssse3 =
+    vp9_sub_pixel_avg_variance8x16_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x8_ssse3 =
+    vp9_sub_pixel_avg_variance16x8_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x16_ssse3 =
+    vp9_sub_pixel_avg_variance16x16_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance16x32_ssse3 =
+    vp9_sub_pixel_avg_variance16x32_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x16_ssse3 =
+    vp9_sub_pixel_avg_variance32x16_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x32_ssse3 =
+    vp9_sub_pixel_avg_variance32x32_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance32x64_ssse3 =
+    vp9_sub_pixel_avg_variance32x64_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x32_ssse3 =
+    vp9_sub_pixel_avg_variance64x32_ssse3;
+const vp9_subp_avg_variance_fn_t subpel_avg_variance64x64_ssse3 =
+    vp9_sub_pixel_avg_variance64x64_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9SubpelAvgVarianceTest,
-    ::testing::Values(make_tuple(2, 2, vp9_sub_pixel_avg_variance4x4_ssse3),
-                      make_tuple(2, 3, vp9_sub_pixel_avg_variance4x8_ssse3),
-                      make_tuple(3, 2, vp9_sub_pixel_avg_variance8x4_ssse3),
-                      make_tuple(3, 3, vp9_sub_pixel_avg_variance8x8_ssse3),
-                      make_tuple(3, 4, vp9_sub_pixel_avg_variance8x16_ssse3),
-                      make_tuple(4, 3, vp9_sub_pixel_avg_variance16x8_ssse3),
-                      make_tuple(4, 4, vp9_sub_pixel_avg_variance16x16_ssse3),
-                      make_tuple(4, 5, vp9_sub_pixel_avg_variance16x32_ssse3),
-                      make_tuple(5, 4, vp9_sub_pixel_avg_variance32x16_ssse3),
-                      make_tuple(5, 5, vp9_sub_pixel_avg_variance32x32_ssse3),
-                      make_tuple(5, 6, vp9_sub_pixel_avg_variance32x64_ssse3),
-                      make_tuple(6, 5, vp9_sub_pixel_avg_variance64x32_ssse3),
-                      make_tuple(6, 6, vp9_sub_pixel_avg_variance64x64_ssse3)));
+    ::testing::Values(make_tuple(2, 2, subpel_avg_variance4x4_ssse3),
+                      make_tuple(2, 3, subpel_avg_variance4x8_ssse3),
+                      make_tuple(3, 2, subpel_avg_variance8x4_ssse3),
+                      make_tuple(3, 3, subpel_avg_variance8x8_ssse3),
+                      make_tuple(3, 4, subpel_avg_variance8x16_ssse3),
+                      make_tuple(4, 3, subpel_avg_variance16x8_ssse3),
+                      make_tuple(4, 4, subpel_avg_variance16x16_ssse3),
+                      make_tuple(4, 5, subpel_avg_variance16x32_ssse3),
+                      make_tuple(5, 4, subpel_avg_variance32x16_ssse3),
+                      make_tuple(5, 5, subpel_avg_variance32x32_ssse3),
+                      make_tuple(5, 6, subpel_avg_variance32x64_ssse3),
+                      make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
+                      make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
 #endif
 #endif
 #endif  // CONFIG_VP9_ENCODER
diff --git a/source/libvpx/test/vp8cx_set_ref.sh b/source/libvpx/test/vp8cx_set_ref.sh
new file mode 100755
index 0000000..ef9d0c0
--- /dev/null
+++ b/source/libvpx/test/vp8cx_set_ref.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx vp8cx_set_ref example. To add new tests to this
+##  file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to vp8cx_set_ref_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+vp8cx_set_ref_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name, which vp8cx_set_ref does not support at present: It's
+# currently used only to name the output file.
+# TODO(tomfinegan): Pass the codec param once the example is updated to support
+# VP9.
+vpx_set_ref() {
+  local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
+  local ref_frame_num=90
+
+  [ -x "${encoder}" ] || return 1
+
+  eval "${encoder}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
+      "${YUV_RAW_INPUT}" "${output_file}" "${ref_frame_num}" \
+      ${devnull}
+
+  [ -e "${output_file}" ] || return 1
+}
+
+vp8cx_set_ref_vp8() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    vpx_set_ref vp8 || return 1
+  fi
+}
+
+vp8cx_set_ref_tests="vp8cx_set_ref_vp8"
+
+run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}"
diff --git a/source/libvpx/vp8/common/arm/dequantize_arm.c b/source/libvpx/vp8/common/arm/dequantize_arm.c
index 70e72aa..1f8157f 100644
--- a/source/libvpx/vp8/common/arm/dequantize_arm.c
+++ b/source/libvpx/vp8/common/arm/dequantize_arm.c
@@ -12,26 +12,9 @@
 #include "vpx_config.h"
 #include "vp8/common/blockd.h"
 
-#if HAVE_NEON
-extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
-#endif
-
 #if HAVE_MEDIA
 extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
-#endif
-
-#if HAVE_NEON
-
-void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
-{
-    short *DQ  = d->dqcoeff;
-    short *Q   = d->qcoeff;
-
-    vp8_dequantize_b_loop_neon(Q, DQC, DQ);
-}
-#endif
 
-#if HAVE_MEDIA
 void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
 {
     short *DQ  = d->dqcoeff;
diff --git a/source/libvpx/vp8/common/arm/loopfilter_arm.c b/source/libvpx/vp8/common/arm/loopfilter_arm.c
index 3bdc967..f37ca63 100644
--- a/source/libvpx/vp8/common/arm/loopfilter_arm.c
+++ b/source/libvpx/vp8/common/arm/loopfilter_arm.c
@@ -25,20 +25,24 @@ extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
 extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
 #endif
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM || HAVE_NEON
 typedef void loopfilter_y_neon(unsigned char *src, int pitch,
         unsigned char blimit, unsigned char limit, unsigned char thresh);
 typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
         unsigned char blimit, unsigned char limit, unsigned char thresh,
         unsigned char *v);
+#endif
 
+#if HAVE_NEON_ASM
 extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
 extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
-extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
-extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
-
 extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
 extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
+#endif
+
+#if HAVE_NEON
+extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
+extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
 extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
 extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
 #endif
@@ -146,7 +150,9 @@ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
     if (u_ptr)
         vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
 }
+#endif
 
+#if HAVE_NEON_ASM
 /* Horizontal B Filtering */
 void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi)
diff --git a/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c b/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
index 60f69c8..54e709d 100644
--- a/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
+++ b/source/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
@@ -10,18 +10,16 @@
 
 #include <arm_neon.h>
 
-void vp8_dequantize_b_loop_neon(
-        int16_t *Q,
-        int16_t *DQC,
-        int16_t *DQ) {
+#include "vp8/common/blockd.h"
+
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
     int16x8x2_t qQ, qDQC, qDQ;
 
-    qQ   = vld2q_s16(Q);
+    qQ   = vld2q_s16(d->qcoeff);
     qDQC = vld2q_s16(DQC);
 
     qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
     qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
 
-    vst2q_s16(DQ, qDQ);
-    return;
+    vst2q_s16(d->dqcoeff, qDQ);
 }
diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
new file mode 100644
index 0000000..3a39210
--- /dev/null
+++ b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
@@ -0,0 +1,81 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |idct_dequant_0_2x_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_0_2x_neon(short *q, short dq,
+;                            unsigned char *dst, int stride);
+; r0   *q
+; r1   dq
+; r2   *dst
+; r3   stride
+|idct_dequant_0_2x_neon| PROC
+    push            {r4, r5}
+    vpush           {d8-d15}
+
+    add             r12, r2, #4
+    vld1.32         {d2[0]}, [r2], r3
+    vld1.32         {d8[0]}, [r12], r3
+    vld1.32         {d2[1]}, [r2], r3
+    vld1.32         {d8[1]}, [r12], r3
+    vld1.32         {d4[0]}, [r2], r3
+    vld1.32         {d10[0]}, [r12], r3
+    vld1.32         {d4[1]}, [r2], r3
+    vld1.32         {d10[1]}, [r12], r3
+
+    ldrh            r12, [r0]               ; lo q
+    ldrh            r4, [r0, #32]           ; hi q
+    mov             r5, #0
+    strh            r5, [r0]
+    strh            r5, [r0, #32]
+
+    sxth            r12, r12                ; lo
+    mul             r0, r12, r1
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    vdup.16         q0, r0
+    sxth            r4, r4                  ; hi
+    mul             r0, r4, r1
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    vdup.16         q3, r0
+
+    vaddw.u8        q1, q0, d2              ; lo
+    vaddw.u8        q2, q0, d4
+    vaddw.u8        q4, q3, d8              ; hi
+    vaddw.u8        q5, q3, d10
+
+    sub             r2, r2, r3, lsl #2      ; dst - 4*stride
+    add             r0, r2, #4
+
+    vqmovun.s16     d2, q1                  ; lo
+    vqmovun.s16     d4, q2
+    vqmovun.s16     d8, q4                  ; hi
+    vqmovun.s16     d10, q5
+
+    vst1.32         {d2[0]}, [r2], r3       ; lo
+    vst1.32         {d8[0]}, [r0], r3       ; hi
+    vst1.32         {d2[1]}, [r2], r3
+    vst1.32         {d8[1]}, [r0], r3
+    vst1.32         {d4[0]}, [r2], r3
+    vst1.32         {d10[0]}, [r0], r3
+    vst1.32         {d4[1]}, [r2]
+    vst1.32         {d10[1]}, [r0]
+
+    vpop            {d8-d15}
+    pop             {r4, r5}
+    bx              lr
+
+    ENDP            ; |idct_dequant_0_2x_neon|
+    END
diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
deleted file mode 100644
index 967c322..0000000
--- a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-void idct_dequant_0_2x_neon(
-        int16_t *q,
-        int16_t dq,
-        unsigned char *dst,
-        int stride) {
-    unsigned char *dst0;
-    int i, a0, a1;
-    int16x8x2_t q2Add;
-    int32x2_t d2s32, d4s32;
-    uint8x8_t d2u8, d4u8;
-    uint16x8_t q1u16, q2u16;
-
-    a0 = ((q[0] * dq) + 4) >> 3;
-    a1 = ((q[16] * dq) + 4) >> 3;
-    q[0] = q[16] = 0;
-    q2Add.val[0] = vdupq_n_s16((int16_t)a0);
-    q2Add.val[1] = vdupq_n_s16((int16_t)a1);
-
-    for (i = 0; i < 2; i++, dst += 4) {
-        dst0 = dst;
-        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
-        dst0 += stride;
-        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
-        dst0 += stride;
-        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
-        dst0 += stride;
-        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
-
-        q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
-                         vreinterpret_u8_s32(d2s32));
-        q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
-                         vreinterpret_u8_s32(d4s32));
-
-        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
-        d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
-
-        d2s32 = vreinterpret_s32_u8(d2u8);
-        d4s32 = vreinterpret_s32_u8(d4u8);
-
-        dst0 = dst;
-        vst1_lane_s32((int32_t *)dst0, d2s32, 0);
-        dst0 += stride;
-        vst1_lane_s32((int32_t *)dst0, d2s32, 1);
-        dst0 += stride;
-        vst1_lane_s32((int32_t *)dst0, d4s32, 0);
-        dst0 += stride;
-        vst1_lane_s32((int32_t *)dst0, d4s32, 1);
-    }
-    return;
-}
diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
new file mode 100644
index 0000000..8da0fa0
--- /dev/null
+++ b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
@@ -0,0 +1,199 @@
+;
+;  Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |idct_dequant_full_2x_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_full_2x_neon(short *q, short *dq,
+;                               unsigned char *dst, int stride);
+; r0    *q,
+; r1    *dq,
+; r2    *dst
+; r3    stride
+|idct_dequant_full_2x_neon| PROC
+    vpush           {d8-d15}
+
+    vld1.16         {q0, q1}, [r1]          ; dq (same l/r)
+    vld1.16         {q2, q3}, [r0]          ; l q
+    add             r0, r0, #32
+    vld1.16         {q4, q5}, [r0]          ; r q
+    add             r12, r2, #4
+
+    ; interleave the predictors
+    vld1.32         {d28[0]}, [r2],  r3     ; l pre
+    vld1.32         {d28[1]}, [r12], r3     ; r pre
+    vld1.32         {d29[0]}, [r2],  r3
+    vld1.32         {d29[1]}, [r12], r3
+    vld1.32         {d30[0]}, [r2],  r3
+    vld1.32         {d30[1]}, [r12], r3
+    vld1.32         {d31[0]}, [r2],  r3
+    vld1.32         {d31[1]}, [r12]
+
+    adr             r1, cospi8sqrt2minus1   ; pointer to the first constant
+
+    ; dequant: q[i] = q[i] * dq[i]
+    vmul.i16        q2, q2, q0
+    vmul.i16        q3, q3, q1
+    vmul.i16        q4, q4, q0
+    vmul.i16        q5, q5, q1
+
+    vld1.16         {d0}, [r1]
+
+    ; q2: l0r0  q3: l8r8
+    ; q4: l4r4  q5: l12r12
+    vswp            d5, d8
+    vswp            d7, d10
+
+    ; _CONSTANTS_ * 4,12 >> 16
+    ; q6:  4 * sinpi : c1/temp1
+    ; q7: 12 * sinpi : d1/temp2
+    ; q8:  4 * cospi
+    ; q9: 12 * cospi
+    vqdmulh.s16     q6, q4, d0[2]           ; sinpi8sqrt2
+    vqdmulh.s16     q7, q5, d0[2]
+    vqdmulh.s16     q8, q4, d0[0]           ; cospi8sqrt2minus1
+    vqdmulh.s16     q9, q5, d0[0]
+
+    vqadd.s16       q10, q2, q3             ; a1 = 0 + 8
+    vqsub.s16       q11, q2, q3             ; b1 = 0 - 8
+
+    ; vqdmulh only accepts signed values. this was a problem because
+    ; our constant had the high bit set, and was treated as a negative value.
+    ; vqdmulh also doubles the value before it shifts by 16. we need to
+    ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+    ; so we can shift the constant without losing precision. this avoids
+    ; shift again afterward, but also avoids the sign issue. win win!
+    ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+    ; pre-shift it
+    vshr.s16        q8, q8, #1
+    vshr.s16        q9, q9, #1
+
+    ; q4:  4 +  4 * cospi : d1/temp1
+    ; q5: 12 + 12 * cospi : c1/temp2
+    vqadd.s16       q4, q4, q8
+    vqadd.s16       q5, q5, q9
+
+    ; c1 = temp1 - temp2
+    ; d1 = temp1 + temp2
+    vqsub.s16       q2, q6, q5
+    vqadd.s16       q3, q4, q7
+
+    ; [0]: a1+d1
+    ; [1]: b1+c1
+    ; [2]: b1-c1
+    ; [3]: a1-d1
+    vqadd.s16       q4, q10, q3
+    vqadd.s16       q5, q11, q2
+    vqsub.s16       q6, q11, q2
+    vqsub.s16       q7, q10, q3
+
+    ; rotate
+    vtrn.32         q4, q6
+    vtrn.32         q5, q7
+    vtrn.16         q4, q5
+    vtrn.16         q6, q7
+    ; idct loop 2
+    ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+    ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+    ; q6: l 2, 6,10,14 r 2, 6,10,14
+    ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+    ; q8:  1 * sinpi : c1/temp1
+    ; q9:  3 * sinpi : d1/temp2
+    ; q10: 1 * cospi
+    ; q11: 3 * cospi
+    vqdmulh.s16     q8, q5, d0[2]           ; sinpi8sqrt2
+    vqdmulh.s16     q9, q7, d0[2]
+    vqdmulh.s16     q10, q5, d0[0]          ; cospi8sqrt2minus1
+    vqdmulh.s16     q11, q7, d0[0]
+
+    vqadd.s16       q2, q4, q6             ; a1 = 0 + 2
+    vqsub.s16       q3, q4, q6             ; b1 = 0 - 2
+
+    ; see note on shifting above
+    vshr.s16        q10, q10, #1
+    vshr.s16        q11, q11, #1
+
+    ; q10: 1 + 1 * cospi : d1/temp1
+    ; q11: 3 + 3 * cospi : c1/temp2
+    vqadd.s16       q10, q5, q10
+    vqadd.s16       q11, q7, q11
+
+    ; q8: c1 = temp1 - temp2
+    ; q9: d1 = temp1 + temp2
+    vqsub.s16       q8, q8, q11
+    vqadd.s16       q9, q10, q9
+
+    ; a1+d1
+    ; b1+c1
+    ; b1-c1
+    ; a1-d1
+    vqadd.s16       q4, q2, q9
+    vqadd.s16       q5, q3, q8
+    vqsub.s16       q6, q3, q8
+    vqsub.s16       q7, q2, q9
+
+    ; +4 >> 3 (rounding)
+    vrshr.s16       q4, q4, #3              ; lo
+    vrshr.s16       q5, q5, #3
+    vrshr.s16       q6, q6, #3              ; hi
+    vrshr.s16       q7, q7, #3
+
+    vtrn.32         q4, q6
+    vtrn.32         q5, q7
+    vtrn.16         q4, q5
+    vtrn.16         q6, q7
+
+    ; adding pre
+    ; input is still packed. pre was read interleaved
+    vaddw.u8        q4, q4, d28
+    vaddw.u8        q5, q5, d29
+    vaddw.u8        q6, q6, d30
+    vaddw.u8        q7, q7, d31
+
+    vmov.i16        q14, #0
+    vmov            q15, q14
+    vst1.16         {q14, q15}, [r0]        ; write over high input
+    sub             r0, r0, #32
+    vst1.16         {q14, q15}, [r0]        ; write over low input
+
+    sub             r2, r2, r3, lsl #2      ; dst - 4*stride
+    add             r1, r2, #4              ; hi
+
+    ;saturate and narrow
+    vqmovun.s16     d0, q4                  ; lo
+    vqmovun.s16     d1, q5
+    vqmovun.s16     d2, q6                  ; hi
+    vqmovun.s16     d3, q7
+
+    vst1.32         {d0[0]}, [r2], r3       ; lo
+    vst1.32         {d0[1]}, [r1], r3       ; hi
+    vst1.32         {d1[0]}, [r2], r3
+    vst1.32         {d1[1]}, [r1], r3
+    vst1.32         {d2[0]}, [r2], r3
+    vst1.32         {d2[1]}, [r1], r3
+    vst1.32         {d3[0]}, [r2]
+    vst1.32         {d3[1]}, [r1]
+
+    vpop            {d8-d15}
+    bx             lr
+
+    ENDP           ; |idct_dequant_full_2x_neon|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2       DCD 0x4546
+
+    END
diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
deleted file mode 100644
index a60ed46..0000000
--- a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-static const int16_t cospi8sqrt2minus1 = 20091;
-static const int16_t sinpi8sqrt2       = 17734;
-// because the lowest bit in 0x8a8c is 0, we can pre-shift this
-
-void idct_dequant_full_2x_neon(
-        int16_t *q,
-        int16_t *dq,
-        unsigned char *dst,
-        int stride) {
-    unsigned char *dst0, *dst1;
-    int32x2_t d28, d29, d30, d31;
-    int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
-    int16x8_t qEmpty = vdupq_n_s16(0);
-    int32x4x2_t q2tmp0, q2tmp1;
-    int16x8x2_t q2tmp2, q2tmp3;
-    int16x4_t dLow0, dLow1, dHigh0, dHigh1;
-
-    d28 = d29 = d30 = d31 = vdup_n_s32(0);
-
-    // load dq
-    q0 = vld1q_s16(dq);
-    dq += 8;
-    q1 = vld1q_s16(dq);
-
-    // load q
-    q2 = vld1q_s16(q);
-    vst1q_s16(q, qEmpty);
-    q += 8;
-    q3 = vld1q_s16(q);
-    vst1q_s16(q, qEmpty);
-    q += 8;
-    q4 = vld1q_s16(q);
-    vst1q_s16(q, qEmpty);
-    q += 8;
-    q5 = vld1q_s16(q);
-    vst1q_s16(q, qEmpty);
-
-    // load src from dst
-    dst0 = dst;
-    dst1 = dst + 4;
-    d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
-    dst0 += stride;
-    d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
-    dst1 += stride;
-    d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
-    dst0 += stride;
-    d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
-    dst1 += stride;
-
-    d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
-    dst0 += stride;
-    d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
-    dst1 += stride;
-    d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
-    d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
-
-    q2 = vmulq_s16(q2, q0);
-    q3 = vmulq_s16(q3, q1);
-    q4 = vmulq_s16(q4, q0);
-    q5 = vmulq_s16(q5, q1);
-
-    // vswp
-    dLow0 = vget_low_s16(q2);
-    dHigh0 = vget_high_s16(q2);
-    dLow1 = vget_low_s16(q4);
-    dHigh1 = vget_high_s16(q4);
-    q2 = vcombine_s16(dLow0, dLow1);
-    q4 = vcombine_s16(dHigh0, dHigh1);
-
-    dLow0 = vget_low_s16(q3);
-    dHigh0 = vget_high_s16(q3);
-    dLow1 = vget_low_s16(q5);
-    dHigh1 = vget_high_s16(q5);
-    q3 = vcombine_s16(dLow0, dLow1);
-    q5 = vcombine_s16(dHigh0, dHigh1);
-
-    q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
-    q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
-    q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
-    q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
-
-    q10 = vqaddq_s16(q2, q3);
-    q11 = vqsubq_s16(q2, q3);
-
-    q8 = vshrq_n_s16(q8, 1);
-    q9 = vshrq_n_s16(q9, 1);
-
-    q4 = vqaddq_s16(q4, q8);
-    q5 = vqaddq_s16(q5, q9);
-
-    q2 = vqsubq_s16(q6, q5);
-    q3 = vqaddq_s16(q7, q4);
-
-    q4 = vqaddq_s16(q10, q3);
-    q5 = vqaddq_s16(q11, q2);
-    q6 = vqsubq_s16(q11, q2);
-    q7 = vqsubq_s16(q10, q3);
-
-    q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
-    q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
-    q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
-                       vreinterpretq_s16_s32(q2tmp1.val[0]));
-    q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
-                       vreinterpretq_s16_s32(q2tmp1.val[1]));
-
-    // loop 2
-    q8  = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
-    q9  = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
-    q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
-    q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
-
-    q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
-    q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
-
-    q10 = vshrq_n_s16(q10, 1);
-    q11 = vshrq_n_s16(q11, 1);
-
-    q10 = vqaddq_s16(q2tmp2.val[1], q10);
-    q11 = vqaddq_s16(q2tmp3.val[1], q11);
-
-    q8 = vqsubq_s16(q8, q11);
-    q9 = vqaddq_s16(q9, q10);
-
-    q4 = vqaddq_s16(q2, q9);
-    q5 = vqaddq_s16(q3, q8);
-    q6 = vqsubq_s16(q3, q8);
-    q7 = vqsubq_s16(q2, q9);
-
-    q4 = vrshrq_n_s16(q4, 3);
-    q5 = vrshrq_n_s16(q5, 3);
-    q6 = vrshrq_n_s16(q6, 3);
-    q7 = vrshrq_n_s16(q7, 3);
-
-    q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
-    q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
-    q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
-                       vreinterpretq_s16_s32(q2tmp1.val[0]));
-    q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
-                       vreinterpretq_s16_s32(q2tmp1.val[1]));
-
-    q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
-                                          vreinterpret_u8_s32(d28)));
-    q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
-                                          vreinterpret_u8_s32(d29)));
-    q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
-                                          vreinterpret_u8_s32(d30)));
-    q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
-                                          vreinterpret_u8_s32(d31)));
-
-    d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
-    d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
-    d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
-    d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
-
-    dst0 = dst;
-    dst1 = dst + 4;
-    vst1_lane_s32((int32_t *)dst0, d28, 0);
-    dst0 += stride;
-    vst1_lane_s32((int32_t *)dst1, d28, 1);
-    dst1 += stride;
-    vst1_lane_s32((int32_t *)dst0, d29, 0);
-    dst0 += stride;
-    vst1_lane_s32((int32_t *)dst1, d29, 1);
-    dst1 += stride;
-
-    vst1_lane_s32((int32_t *)dst0, d30, 0);
-    dst0 += stride;
-    vst1_lane_s32((int32_t *)dst1, d30, 1);
-    dst1 += stride;
-    vst1_lane_s32((int32_t *)dst0, d31, 0);
-    vst1_lane_s32((int32_t *)dst1, d31, 1);
-    return;
-}
diff --git a/source/libvpx/vp8/common/arm/reconintra_arm.c b/source/libvpx/vp8/common/arm/reconintra_arm.c
index 2874896..e55a33c 100644
--- a/source/libvpx/vp8/common/arm/reconintra_arm.c
+++ b/source/libvpx/vp8/common/arm/reconintra_arm.c
@@ -14,7 +14,7 @@
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 extern void vp8_build_intra_predictors_mby_neon_func(
     unsigned char *y_buffer,
     unsigned char *ypred_ptr,
diff --git a/source/libvpx/vp8/common/arm/variance_arm.c b/source/libvpx/vp8/common/arm/variance_arm.c
index 467a509..e3f7083 100644
--- a/source/libvpx/vp8/common/arm/variance_arm.c
+++ b/source/libvpx/vp8/common/arm/variance_arm.c
@@ -95,7 +95,7 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
 #endif /* HAVE_MEDIA */
 
 
-#if HAVE_NEON
+#if HAVE_NEON_ASM
 
 extern unsigned int vp8_sub_pixel_variance16x16_neon_func
 (
diff --git a/source/libvpx/vp8/common/rtcd_defs.pl b/source/libvpx/vp8/common/rtcd_defs.pl
index 130d965..cbfd76a 100644
--- a/source/libvpx/vp8/common/rtcd_defs.pl
+++ b/source/libvpx/vp8/common/rtcd_defs.pl
@@ -38,13 +38,15 @@ $vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6;
 $vp8_dequant_idct_add_dspr2=vp8_dequant_idct_add_dspr2;
 
 add_proto qw/void vp8_dequant_idct_add_y_block/, "short *q, short *dq, unsigned char *dst, int stride, char *eobs";
-specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon dspr2/;
+specialize qw/vp8_dequant_idct_add_y_block mmx sse2 media neon_asm dspr2/;
 $vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6;
+$vp8_dequant_idct_add_y_block_neon_asm=vp8_dequant_idct_add_y_block_neon;
 $vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
 
 add_proto qw/void vp8_dequant_idct_add_uv_block/, "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs";
-specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon dspr2/;
+specialize qw/vp8_dequant_idct_add_uv_block mmx sse2 media neon_asm dspr2/;
 $vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6;
+$vp8_dequant_idct_add_uv_block_neon_asm=vp8_dequant_idct_add_uv_block_neon;
 $vp8_dequant_idct_add_y_block_dspr2=vp8_dequant_idct_add_y_block_dspr2;
 
 #
@@ -56,8 +58,9 @@ $vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6;
 $vp8_loop_filter_mbv_dspr2=vp8_loop_filter_mbv_dspr2;
 
 add_proto qw/void vp8_loop_filter_bv/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
-specialize qw/vp8_loop_filter_bv mmx sse2 media neon dspr2/;
+specialize qw/vp8_loop_filter_bv mmx sse2 media neon_asm dspr2/;
 $vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6;
+$vp8_loop_filter_bv_neon_asm=vp8_loop_filter_bv_neon;
 $vp8_loop_filter_bv_dspr2=vp8_loop_filter_bv_dspr2;
 
 add_proto qw/void vp8_loop_filter_mbh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
@@ -66,18 +69,19 @@ $vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6;
 $vp8_loop_filter_mbh_dspr2=vp8_loop_filter_mbh_dspr2;
 
 add_proto qw/void vp8_loop_filter_bh/, "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi";
-specialize qw/vp8_loop_filter_bh mmx sse2 media neon dspr2/;
+specialize qw/vp8_loop_filter_bh mmx sse2 media neon_asm dspr2/;
 $vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6;
+$vp8_loop_filter_bh_neon_asm=vp8_loop_filter_bh_neon;
 $vp8_loop_filter_bh_dspr2=vp8_loop_filter_bh_dspr2;
 
 
 add_proto qw/void vp8_loop_filter_simple_mbv/, "unsigned char *y, int ystride, const unsigned char *blimit";
-specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon/;
+specialize qw/vp8_loop_filter_simple_mbv mmx sse2 media neon_asm/;
 $vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c;
 $vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx;
 $vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2;
 $vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6;
-$vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon;
+$vp8_loop_filter_simple_mbv_neon_asm=vp8_loop_filter_mbvs_neon;
 
 add_proto qw/void vp8_loop_filter_simple_mbh/, "unsigned char *y, int ystride, const unsigned char *blimit";
 specialize qw/vp8_loop_filter_simple_mbh mmx sse2 media neon/;
@@ -88,12 +92,12 @@ $vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6;
 $vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon;
 
 add_proto qw/void vp8_loop_filter_simple_bv/, "unsigned char *y, int ystride, const unsigned char *blimit";
-specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon/;
+specialize qw/vp8_loop_filter_simple_bv mmx sse2 media neon_asm/;
 $vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c;
 $vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx;
 $vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2;
 $vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6;
-$vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon;
+$vp8_loop_filter_simple_bv_neon_asm=vp8_loop_filter_bvs_neon;
 
 add_proto qw/void vp8_loop_filter_simple_bh/, "unsigned char *y, int ystride, const unsigned char *blimit";
 specialize qw/vp8_loop_filter_simple_bh mmx sse2 media neon/;
@@ -269,9 +273,10 @@ specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
 $vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
 
 add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon/;
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
 $vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
 $vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
+$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
 
 add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
 specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
@@ -282,24 +287,28 @@ specialize qw/vp8_sub_pixel_variance16x8 mmx sse2 ssse3/;
 $vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt;
 
 add_proto qw/unsigned int vp8_sub_pixel_variance16x16/, "const unsigned char  *src_ptr, int  source_stride, int  xoffset, int  yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon/;
+specialize qw/vp8_sub_pixel_variance16x16 mmx sse2 ssse3 media neon_asm/;
 $vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt;
 $vp8_sub_pixel_variance16x16_media=vp8_sub_pixel_variance16x16_armv6;
+$vp8_sub_pixel_variance16x16_neon_asm=vp8_sub_pixel_variance16x16_neon;
 
 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon/;
+specialize qw/vp8_variance_halfpixvar16x16_h mmx sse2 media neon_asm/;
 $vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt;
 $vp8_variance_halfpixvar16x16_h_media=vp8_variance_halfpixvar16x16_h_armv6;
+$vp8_variance_halfpixvar16x16_h_neon_asm=vp8_variance_halfpixvar16x16_h_neon;
 
 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon/;
+specialize qw/vp8_variance_halfpixvar16x16_v mmx sse2 media neon_asm/;
 $vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt;
 $vp8_variance_halfpixvar16x16_v_media=vp8_variance_halfpixvar16x16_v_armv6;
+$vp8_variance_halfpixvar16x16_v_neon_asm=vp8_variance_halfpixvar16x16_v_neon;
 
 add_proto qw/unsigned int vp8_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon/;
+specialize qw/vp8_variance_halfpixvar16x16_hv mmx sse2 media neon_asm/;
 $vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt;
 $vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
+$vp8_variance_halfpixvar16x16_hv_neon_asm=vp8_variance_halfpixvar16x16_hv_neon;
 
 #
 # Single block SAD
@@ -402,12 +411,14 @@ specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
 $vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
 
 add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, unsigned int *sse";
-specialize qw/vp8_mse16x16 mmx sse2 media neon/;
+specialize qw/vp8_mse16x16 mmx sse2 media neon_asm/;
 $vp8_mse16x16_sse2=vp8_mse16x16_wmt;
 $vp8_mse16x16_media=vp8_mse16x16_armv6;
+$vp8_mse16x16_neon_asm=vp8_mse16x16_neon;
 
 add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride";
-specialize qw/vp8_get4x4sse_cs mmx neon/;
+specialize qw/vp8_get4x4sse_cs mmx neon_asm/;
+$vp8_get4x4sse_cs_neon_asm=vp8_get4x4sse_cs_neon;
 
 #
 # Block copy
@@ -434,16 +445,19 @@ if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
 # Forward DCT
 #
 add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_fdct4x4 mmx sse2 media neon/;
+specialize qw/vp8_short_fdct4x4 mmx sse2 media neon_asm/;
 $vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6;
+$vp8_short_fdct4x4_neon_asm=vp8_short_fdct4x4_neon;
 
 add_proto qw/void vp8_short_fdct8x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_fdct8x4 mmx sse2 media neon/;
+specialize qw/vp8_short_fdct8x4 mmx sse2 media neon_asm/;
 $vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6;
+$vp8_short_fdct8x4_neon_asm=vp8_short_fdct8x4_neon;
 
 add_proto qw/void vp8_short_walsh4x4/, "short *input, short *output, int pitch";
-specialize qw/vp8_short_walsh4x4 sse2 media neon/;
+specialize qw/vp8_short_walsh4x4 sse2 media neon_asm/;
 $vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6;
+$vp8_short_walsh4x4_neon_asm=vp8_short_walsh4x4_neon;
 
 #
 # Quantizer
@@ -454,14 +468,16 @@ specialize qw/vp8_regular_quantize_b sse2/;
 #$vp8_regular_quantize_b_sse4_1=vp8_regular_quantize_b_sse4;
 
 add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
-specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
+specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon_asm/;
 $vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
+$vp8_fast_quantize_b_neon_asm=vp8_fast_quantize_b_neon;
 
 add_proto qw/void vp8_regular_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
 # no asm yet
 
 add_proto qw/void vp8_fast_quantize_b_pair/, "struct block *b1, struct block *b2, struct blockd *d1, struct blockd *d2";
-specialize qw/vp8_fast_quantize_b_pair neon/;
+specialize qw/vp8_fast_quantize_b_pair neon_asm/;
+$vp8_fast_quantize_b_pair_neon_asm=vp8_fast_quantize_b_pair_neon;
 
 add_proto qw/void vp8_quantize_mb/, "struct macroblock *";
 specialize qw/vp8_quantize_mb neon/;
@@ -488,16 +504,19 @@ specialize qw/vp8_mbuverror mmx sse2/;
 $vp8_mbuverror_sse2=vp8_mbuverror_xmm;
 
 add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
-specialize qw/vp8_subtract_b mmx sse2 media neon/;
+specialize qw/vp8_subtract_b mmx sse2 media neon_asm/;
 $vp8_subtract_b_media=vp8_subtract_b_armv6;
+$vp8_subtract_b_neon_asm=vp8_subtract_b_neon;
 
 add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
-specialize qw/vp8_subtract_mby mmx sse2 media neon/;
+specialize qw/vp8_subtract_mby mmx sse2 media neon_asm/;
 $vp8_subtract_mby_media=vp8_subtract_mby_armv6;
+$vp8_subtract_mby_neon_asm=vp8_subtract_mby_neon;
 
 add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
-specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
+specialize qw/vp8_subtract_mbuv mmx sse2 media neon_asm/;
 $vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
+$vp8_subtract_mbuv_neon_asm=vp8_subtract_mbuv_neon;
 
 #
 # Motion search
@@ -526,13 +545,14 @@ if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") {
 # Pick Loopfilter
 #
 add_proto qw/void vp8_yv12_copy_partial_frame/, "struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-specialize qw/vp8_yv12_copy_partial_frame neon/;
+specialize qw/vp8_yv12_copy_partial_frame neon_asm/;
+$vp8_yv12_copy_partial_frame_neon_asm=vp8_yv12_copy_partial_frame_neon;
 
 #
 # Denoiser filter
 #
 if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
-    add_proto qw/int vp8_denoiser_filter/, "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset";
+    add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
     specialize qw/vp8_denoiser_filter sse2 neon/;
 }
 
diff --git a/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c b/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c
index 23dc0a9..32ce65a 100644
--- a/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c
+++ b/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c
@@ -45,10 +45,13 @@
  *      [16, 255]       3               6                    7
  */
 
-int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
-                             YV12_BUFFER_CONFIG *running_avg,
-                             MACROBLOCK *signal, unsigned int motion_magnitude,
-                             int y_offset, int uv_offset) {
+int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y,
+                             int mc_running_avg_y_stride,
+                             unsigned char *running_avg_y,
+                             int running_avg_y_stride,
+                             unsigned char *sig, int sig_stride,
+                             unsigned int motion_magnitude,
+                             int increase_denoising) {
     /* If motion_magnitude is small, making the denoiser more aggressive by
      * increasing the adjustment for each level, level1 adjustment is
      * increased, the deltas stay the same.
@@ -60,14 +63,6 @@ int vp8_denoiser_filter_neon(YV12_BUFFER_CONFIG *mc_running_avg,
     const uint8x16_t v_level1_threshold = vdupq_n_u8(4);
     const uint8x16_t v_level2_threshold = vdupq_n_u8(8);
     const uint8x16_t v_level3_threshold = vdupq_n_u8(16);
-
-    /* Local variables for array pointers and strides. */
-    unsigned char *sig = signal->thismb;
-    int            sig_stride = 16;
-    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-    int            mc_running_avg_y_stride = mc_running_avg->y_stride;
-    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-    int            running_avg_y_stride = running_avg->y_stride;
     int64x2_t v_sum_diff_total = vdupq_n_s64(0);
 
     /* Go over lines. */
diff --git a/source/libvpx/vp8/encoder/block.h b/source/libvpx/vp8/encoder/block.h
index dd733e5..34879cf 100644
--- a/source/libvpx/vp8/encoder/block.h
+++ b/source/libvpx/vp8/encoder/block.h
@@ -125,6 +125,7 @@ typedef struct macroblock
 
     int optimize;
     int q_index;
+    int increase_denoising;
 
 #if CONFIG_TEMPORAL_DENOISING
     MB_PREDICTION_MODE best_sse_inter_mode;
diff --git a/source/libvpx/vp8/encoder/denoising.c b/source/libvpx/vp8/encoder/denoising.c
index 7819265..1e645fb 100644
--- a/source/libvpx/vp8/encoder/denoising.c
+++ b/source/libvpx/vp8/encoder/denoising.c
@@ -21,6 +21,7 @@ static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
  */
 static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
 static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
+static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 60;
 
 /*
  * The filter function was modified to reduce the computational complexity.
@@ -51,27 +52,32 @@ static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
  * [16, 255]              6                                    7
  */
 
-int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
-                          YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal,
-                          unsigned int motion_magnitude, int y_offset,
-                          int uv_offset)
+int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride,
+                          unsigned char *running_avg_y, int avg_y_stride,
+                          unsigned char *sig, int sig_stride,
+                          unsigned int motion_magnitude,
+                          int increase_denoising)
 {
-    unsigned char *sig = signal->thismb;
-    int sig_stride = 16;
-    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-    int mc_avg_y_stride = mc_running_avg->y_stride;
-    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-    int avg_y_stride = running_avg->y_stride;
-    int r, c, i;
+    unsigned char *running_avg_y_start = running_avg_y;
+    unsigned char *sig_start = sig;
+    int sum_diff_thresh;
+    int r, c;
     int sum_diff = 0;
     int adj_val[3] = {3, 4, 6};
-
+    int shift_inc1 = 0;
+    int shift_inc2 = 1;
     /* If motion_magnitude is small, making the denoiser more aggressive by
-     * increasing the adjustment for each level. */
+     * increasing the adjustment for each level. Add another increment for
+     * blocks that are labeled for increase denoising. */
     if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
     {
-        for (i = 0; i < 3; i++)
-            adj_val[i] += 1;
+      if (increase_denoising) {
+        shift_inc1 = 1;
+        shift_inc2 = 2;
+      }
+      adj_val[0] += shift_inc2;
+      adj_val[1] += shift_inc2;
+      adj_val[2] += shift_inc2;
     }
 
     for (r = 0; r < 16; ++r)
@@ -85,8 +91,9 @@ int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
             diff = mc_running_avg_y[c] - sig[c];
             absdiff = abs(diff);
 
-            /* When |diff| < 4, use pixel value from last denoised raw. */
-            if (absdiff <= 3)
+            // When |diff| <= |3 + shift_inc1|, use pixel value from
+            // last denoised raw.
+            if (absdiff <= 3 + shift_inc1)
             {
                 running_avg_y[c] = mc_running_avg_y[c];
                 sum_diff += diff;
@@ -127,11 +134,12 @@ int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
         running_avg_y += avg_y_stride;
     }
 
-    if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+    sum_diff_thresh= SUM_DIFF_THRESHOLD;
+    if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
+    if (abs(sum_diff) > sum_diff_thresh)
         return COPY_BLOCK;
 
-    vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
-                      signal->thismb, sig_stride);
+    vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
     return FILTER_BLOCK;
 }
 
@@ -192,7 +200,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
     int mv_row;
     int mv_col;
     unsigned int motion_magnitude2;
-
+    unsigned int sse_thresh;
     MV_REFERENCE_FRAME frame = x->best_reference_frame;
     MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
 
@@ -277,7 +285,10 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
     mv_row = x->best_sse_mv.as_mv.row;
     mv_col = x->best_sse_mv.as_mv.col;
     motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
-    if (best_sse > SSE_THRESHOLD || motion_magnitude2
+    sse_thresh = SSE_THRESHOLD;
+    if (x->increase_denoising) sse_thresh = SSE_THRESHOLD_HIGH;
+
+    if (best_sse > sse_thresh || motion_magnitude2
            > 8 * NOISE_MOTION_THRESHOLD)
     {
         decision = COPY_BLOCK;
@@ -285,12 +296,18 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
 
     if (decision == FILTER_BLOCK)
     {
+        unsigned char *mc_running_avg_y =
+            denoiser->yv12_mc_running_avg.y_buffer + recon_yoffset;
+        int mc_avg_y_stride = denoiser->yv12_mc_running_avg.y_stride;
+        unsigned char *running_avg_y =
+            denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset;
+        int avg_y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
+
         /* Filter. */
-        decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
-                                       &denoiser->yv12_running_avg[INTRA_FRAME],
-                                       x,
-                                       motion_magnitude2,
-                                       recon_yoffset, recon_uvoffset);
+        decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride,
+                                         running_avg_y, avg_y_stride,
+                                         x->thismb, 16, motion_magnitude2,
+                                         x->increase_denoising);
     }
     if (decision == COPY_BLOCK)
     {
diff --git a/source/libvpx/vp8/encoder/denoising.h b/source/libvpx/vp8/encoder/denoising.h
index cc9913a..ae744d2 100644
--- a/source/libvpx/vp8/encoder/denoising.h
+++ b/source/libvpx/vp8/encoder/denoising.h
@@ -18,6 +18,7 @@ extern "C" {
 #endif
 
 #define SUM_DIFF_THRESHOLD (16 * 16 * 2)
+#define SUM_DIFF_THRESHOLD_HIGH (16 * 16 * 3)
 #define MOTION_MAGNITUDE_THRESHOLD (8*3)
 
 enum vp8_denoiser_decision
diff --git a/source/libvpx/vp8/encoder/pickinter.c b/source/libvpx/vp8/encoder/pickinter.c
index 39a3baf..cf6a82f 100644
--- a/source/libvpx/vp8/encoder/pickinter.c
+++ b/source/libvpx/vp8/encoder/pickinter.c
@@ -1177,6 +1177,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
             x->best_reference_frame = best_mbmode.ref_frame;
             best_sse = best_rd_sse;
         }
+        x->increase_denoising = 0;
         vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
                                 recon_yoffset, recon_uvoffset);
 
diff --git a/source/libvpx/vp8/encoder/x86/denoising_sse2.c b/source/libvpx/vp8/encoder/x86/denoising_sse2.c
index cceb826..5112f89 100644
--- a/source/libvpx/vp8/encoder/x86/denoising_sse2.c
+++ b/source/libvpx/vp8/encoder/x86/denoising_sse2.c
@@ -22,26 +22,28 @@ union sum_union {
     signed char e[16];
 };
 
-int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
-                             YV12_BUFFER_CONFIG *running_avg,
-                             MACROBLOCK *signal, unsigned int motion_magnitude,
-                             int y_offset, int uv_offset)
+int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
+                             int mc_avg_y_stride,
+                             unsigned char *running_avg_y, int avg_y_stride,
+                             unsigned char *sig, int sig_stride,
+                             unsigned int motion_magnitude,
+                             int increase_denoising)
 {
-    unsigned char *sig = signal->thismb;
-    int sig_stride = 16;
-    unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
-    int mc_avg_y_stride = mc_running_avg->y_stride;
-    unsigned char *running_avg_y = running_avg->y_buffer + y_offset;
-    int avg_y_stride = running_avg->y_stride;
+    unsigned char *running_avg_y_start = running_avg_y;
+    unsigned char *sig_start = sig;
+    int sum_diff_thresh;
     int r;
+    int shift_inc  = (increase_denoising &&
+        motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
     __m128i acc_diff = _mm_setzero_si128();
     const __m128i k_0 = _mm_setzero_si128();
-    const __m128i k_4 = _mm_set1_epi8(4);
+    const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
     const __m128i k_8 = _mm_set1_epi8(8);
     const __m128i k_16 = _mm_set1_epi8(16);
     /* Modify each level's adjustment according to motion_magnitude. */
     const __m128i l3 = _mm_set1_epi8(
-                      (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 : 6);
+                       (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
+                        7 + shift_inc : 6);
     /* Difference between level 3 and level 2 is 2. */
     const __m128i l32 = _mm_set1_epi8(2);
     /* Difference between level 2 and level 1 is 1. */
@@ -108,13 +110,14 @@ int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
                  + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
                  + s.e[12] + s.e[13] + s.e[14] + s.e[15];
 
-        if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+        sum_diff_thresh = SUM_DIFF_THRESHOLD;
+        if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
+        if (abs(sum_diff) > sum_diff_thresh)
         {
             return COPY_BLOCK;
         }
     }
 
-    vp8_copy_mem16x16(running_avg->y_buffer + y_offset, avg_y_stride,
-                      signal->thismb, sig_stride);
+    vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
     return FILTER_BLOCK;
 }
diff --git a/source/libvpx/vp8/vp8_common.mk b/source/libvpx/vp8/vp8_common.mk
index 2812111..8282547 100644
--- a/source/libvpx/vp8/vp8_common.mk
+++ b/source/libvpx/vp8/vp8_common.mk
@@ -129,7 +129,6 @@ VP8_COMMON_SRCS-$(HAVE_DSPR2)  += common/mips/dspr2/dequantize_dspr2.c
 # common (c)
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/filter_arm.c
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/loopfilter_arm.c
-VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/reconintra_arm.c
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/dequantize_arm.c
 VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/variance_arm.c
 
@@ -159,13 +158,16 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_
 VP8_COMMON_SRCS-$(HAVE_MEDIA)  += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
 
 # common (neon)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfilter_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_blk_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
+#VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/reconintra_arm.c
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/loopfilter_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
+#VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/idct_blk_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_NEON_ASM)  += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
 
 # common (neon intrinsics)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/bilinearpredict_neon.c
@@ -173,14 +175,12 @@ VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/copymem_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dc_only_idct_add_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequant_idct_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/dequantizeb_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_full_2x_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/iwalsh_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/mbloopfilter_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sad_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/shortidct4x4llm_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_0_2x_neon.c
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/variance_neon.c
 
 $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/source/libvpx/vp8/vp8_cx_iface.c b/source/libvpx/vp8/vp8_cx_iface.c
index 6ca6087..501dd3e 100644
--- a/source/libvpx/vp8/vp8_cx_iface.c
+++ b/source/libvpx/vp8/vp8_cx_iface.c
@@ -886,7 +886,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
                 VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
 
                 /* Add the frame packet to the list of returned packets. */
-                round = (vpx_codec_pts_t)1000000
+                round = (vpx_codec_pts_t)10000000
                         * ctx->cfg.g_timebase.num / 2 - 1;
                 delta = (dst_end_time_stamp - dst_time_stamp);
                 pkt.kind = VPX_CODEC_CX_FRAME_PKT;
diff --git a/source/libvpx/vp8/vp8cx_arm.mk b/source/libvpx/vp8/vp8cx_arm.mk
index 398172a..5733048 100644
--- a/source/libvpx/vp8/vp8cx_arm.mk
+++ b/source/libvpx/vp8/vp8cx_arm.mk
@@ -35,11 +35,12 @@ VP8_CX_SRCS-$(HAVE_MEDIA)  += encoder/arm/armv6/walsh_v6$(ASM)
 
 #File list for neon
 # encoder
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/fastquantizeb_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/picklpf_arm.c
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/fastquantizeb_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/picklpf_arm.c
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/shortfdct_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/subtract_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/vp8_memcpy_neon$(ASM)
+VP8_CX_SRCS-$(HAVE_NEON_ASM)  += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
+
 VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/denoising_neon.c
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/shortfdct_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/subtract_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/vp8_mse16x16_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_NEON)  += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
diff --git a/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm b/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
index 5476400..ab5bb69 100644
--- a/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
+++ b/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_add_neon.asm
@@ -9,7 +9,7 @@
 ;
 
     EXPORT  |vp9_idct8x8_64_add_neon|
-    EXPORT  |vp9_idct8x8_10_add_neon|
+    EXPORT  |vp9_idct8x8_12_add_neon|
     ARM
     REQUIRE8
     PRESERVE8
@@ -310,13 +310,13 @@
     bx              lr
     ENDP  ; |vp9_idct8x8_64_add_neon|
 
-;void vp9_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_idct8x8_12_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
 ;
 ; r0  int16_t input
 ; r1  uint8_t *dest
 ; r2  int dest_stride)
 
-|vp9_idct8x8_10_add_neon| PROC
+|vp9_idct8x8_12_add_neon| PROC
     push            {r4-r9}
     vpush           {d8-d15}
     vld1.s16        {q8,q9}, [r0]!
@@ -514,6 +514,6 @@
     vpop            {d8-d15}
     pop             {r4-r9}
     bx              lr
-    ENDP  ; |vp9_idct8x8_10_add_neon|
+    ENDP  ; |vp9_idct8x8_12_add_neon|
 
     END
diff --git a/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index acccaea..fc44ffa 100644
--- a/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -617,7 +617,7 @@ void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
   }
 }
 
-void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest,
+void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
                               int dest_stride) {
   DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
   int16_t *outptr = out;
diff --git a/source/libvpx/vp9/common/vp9_common.h b/source/libvpx/vp9/common/vp9_common.h
index 2dccb70..04db7c0 100644
--- a/source/libvpx/vp9/common/vp9_common.h
+++ b/source/libvpx/vp9/common/vp9_common.h
@@ -45,7 +45,7 @@ extern "C" {
     vpx_memcpy(dest, src, n * sizeof(*src)); \
   }
 
-#define vp9_zero(dest) vpx_memset(&dest, 0, sizeof(dest))
+#define vp9_zero(dest) vpx_memset(&(dest), 0, sizeof(dest))
 #define vp9_zero_array(dest, n) vpx_memset(dest, 0, n * sizeof(*dest))
 
 static INLINE uint8_t clip_pixel(int val) {
diff --git a/source/libvpx/vp9/common/vp9_convolve.c b/source/libvpx/vp9/common/vp9_convolve.c
index d30e0b4..1a8c49d 100644
--- a/source/libvpx/vp9/common/vp9_convolve.c
+++ b/source/libvpx/vp9/common/vp9_convolve.c
@@ -156,6 +156,9 @@ void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
   const InterpKernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
+  (void)filter_y;
+  (void)y_step_q4;
+
   convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
                  x0_q4, x_step_q4, w, h);
 }
@@ -168,6 +171,9 @@ void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
   const InterpKernel *const filters_x = get_filter_base(filter_x);
   const int x0_q4 = get_filter_offset(filter_x, filters_x);
 
+  (void)filter_y;
+  (void)y_step_q4;
+
   convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
                      x0_q4, x_step_q4, w, h);
 }
@@ -179,6 +185,10 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                           int w, int h) {
   const InterpKernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
+
+  (void)filter_x;
+  (void)x_step_q4;
+
   convolve_vert(src, src_stride, dst, dst_stride, filters_y,
                 y0_q4, y_step_q4, w, h);
 }
@@ -190,6 +200,10 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
                               int w, int h) {
   const InterpKernel *const filters_y = get_filter_base(filter_y);
   const int y0_q4 = get_filter_offset(filter_y, filters_y);
+
+  (void)filter_x;
+  (void)x_step_q4;
+
   convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
                     y0_q4, y_step_q4, w, h);
 }
@@ -232,6 +246,9 @@ void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
                          int w, int h) {
   int r;
 
+  (void)filter_x;  (void)filter_x_stride;
+  (void)filter_y;  (void)filter_y_stride;
+
   for (r = h; r > 0; --r) {
     vpx_memcpy(dst, src, w);
     src += src_stride;
@@ -246,6 +263,9 @@ void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
                         int w, int h) {
   int x, y;
 
+  (void)filter_x;  (void)filter_x_stride;
+  (void)filter_y;  (void)filter_y_stride;
+
   for (y = 0; y < h; ++y) {
     for (x = 0; x < w; ++x)
       dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
diff --git a/source/libvpx/vp9/common/vp9_debugmodes.c b/source/libvpx/vp9/common/vp9_debugmodes.c
index 8f150a4..d2522bb 100644
--- a/source/libvpx/vp9/common/vp9_debugmodes.c
+++ b/source/libvpx/vp9/common/vp9_debugmodes.c
@@ -24,10 +24,9 @@ static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) {
  */
 static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
                           size_t member_offset) {
-  int mi_row;
-  int mi_col;
+  int mi_row, mi_col;
   int mi_index = 0;
-  MODE_INFO **mi_8x8 = cm->mi_grid_visible;
+  MODE_INFO **mi = cm->mi_grid_visible;
   int rows = cm->mi_rows;
   int cols = cm->mi_cols;
   char prefix = descriptor[0];
@@ -38,7 +37,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
     fprintf(file, "%c ", prefix);
     for (mi_col = 0; mi_col < cols; mi_col++) {
       fprintf(file, "%2d ",
-              *((int*) ((char *) (&mi_8x8[mi_index]->mbmi) +
+              *((int*) ((char *) (&mi[mi_index]->mbmi) +
                         member_offset)));
       mi_index++;
     }
@@ -52,7 +51,7 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
   int mi_col;
   int mi_index = 0;
   FILE *mvs = fopen(file, "a");
-  MODE_INFO **mi_8x8 = cm->mi_grid_visible;
+  MODE_INFO **mi = cm->mi_grid_visible;
   int rows = cm->mi_rows;
   int cols = cm->mi_cols;
 
@@ -67,8 +66,8 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
   for (mi_row = 0; mi_row < rows; mi_row++) {
     fprintf(mvs, "V ");
     for (mi_col = 0; mi_col < cols; mi_col++) {
-      fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row,
-                               mi_8x8[mi_index]->mbmi.mv[0].as_mv.col);
+      fprintf(mvs, "%4d:%4d ", mi[mi_index]->mbmi.mv[0].as_mv.row,
+                               mi[mi_index]->mbmi.mv[0].as_mv.col);
       mi_index++;
     }
     fprintf(mvs, "\n");
diff --git a/source/libvpx/vp9/common/vp9_idct.c b/source/libvpx/vp9/common/vp9_idct.c
index 20b78bf..856d41e 100644
--- a/source/libvpx/vp9/common/vp9_idct.c
+++ b/source/libvpx/vp9/common/vp9_idct.c
@@ -421,7 +421,7 @@ void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
   }
 }
 
-void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_12_add_c(const int16_t *input, uint8_t *dest, int stride) {
   int16_t out[8 * 8] = { 0 };
   int16_t *outptr = out;
   int i, j;
@@ -1348,8 +1348,8 @@ void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
   if (eob == 1)
     // DC only DCT coefficient
     vp9_idct8x8_1_add(input, dest, stride);
-  else if (eob <= 10)
-    vp9_idct8x8_10_add(input, dest, stride);
+  else if (eob <= 12)
+    vp9_idct8x8_12_add(input, dest, stride);
   else
     vp9_idct8x8_64_add(input, dest, stride);
 }
diff --git a/source/libvpx/vp9/common/vp9_loopfilter.c b/source/libvpx/vp9/common/vp9_loopfilter.c
index 3ac5a05..efd0249 100644
--- a/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -619,12 +619,12 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n,
 // by mi_row, mi_col.
 // TODO(JBB): This function only works for yv12.
 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
-                    MODE_INFO **mi_8x8, const int mode_info_stride,
+                    MODE_INFO **mi, const int mode_info_stride,
                     LOOP_FILTER_MASK *lfm) {
   int idx_32, idx_16, idx_8;
   const loop_filter_info_n *const lfi_n = &cm->lf_info;
-  MODE_INFO **mip = mi_8x8;
-  MODE_INFO **mip2 = mi_8x8;
+  MODE_INFO **mip = mi;
+  MODE_INFO **mip2 = mi;
 
   // These are offsets to the next mi in the 64x64 block. It is what gets
   // added to the mi ptr as we go through each loop.  It helps us to avoids
@@ -1192,39 +1192,41 @@ void vp9_filter_block_plane(VP9_COMMON *const cm,
 }
 
 void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
-                          VP9_COMMON *cm, MACROBLOCKD *xd,
+                          VP9_COMMON *cm,
+                          struct macroblockd_plane planes[MAX_MB_PLANE],
                           int start, int stop, int y_only) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
-  int mi_row, mi_col;
+  const int use_420 = y_only || (planes[1].subsampling_y == 1 &&
+                                 planes[1].subsampling_x == 1);
   LOOP_FILTER_MASK lfm;
-  int use_420 = y_only || (xd->plane[1].subsampling_y == 1 &&
-      xd->plane[1].subsampling_x == 1);
+  int mi_row, mi_col;
 
   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
       int plane;
 
-      vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
 
       // TODO(JBB): Make setup_mask work for non 420.
       if (use_420)
-        vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride,
+        vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
                        &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
         if (use_420)
-          vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+          vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
         else
-          filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
+          filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
                                     mi_row, mi_col);
       }
     }
   }
 }
 
-void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
+void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
+                           VP9_COMMON *cm, MACROBLOCKD *xd,
                            int frame_filter_level,
                            int y_only, int partial_frame) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
@@ -1238,7 +1240,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
   }
   end_mi_row = start_mi_row + mi_rows_to_filter;
   vp9_loop_filter_frame_init(cm, frame_filter_level);
-  vp9_loop_filter_rows(cm->frame_to_show, cm, xd,
+  vp9_loop_filter_rows(frame, cm, xd->plane,
                        start_mi_row, end_mi_row,
                        y_only);
 }
@@ -1246,7 +1248,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
 int vp9_loop_filter_worker(void *arg1, void *arg2) {
   LFWorkerData *const lf_data = (LFWorkerData*)arg1;
   (void)arg2;
-  vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+  vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
                        lf_data->start, lf_data->stop, lf_data->y_only);
   return 1;
 }
diff --git a/source/libvpx/vp9/common/vp9_loopfilter.h b/source/libvpx/vp9/common/vp9_loopfilter.h
index 97ae9d2..6fa2773 100644
--- a/source/libvpx/vp9/common/vp9_loopfilter.h
+++ b/source/libvpx/vp9/common/vp9_loopfilter.h
@@ -104,22 +104,23 @@ void vp9_loop_filter_init(struct VP9Common *cm);
 // calls this function directly.
 void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl);
 
-void vp9_loop_filter_frame(struct VP9Common *cm,
+void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
+                           struct VP9Common *cm,
                            struct macroblockd *mbd,
                            int filter_level,
                            int y_only, int partial_frame);
 
 // Apply the loop filter to [start, stop) macro block rows in frame_buffer.
 void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer,
-                          struct VP9Common *cm, struct macroblockd *xd,
+                          struct VP9Common *cm,
+                          struct macroblockd_plane planes[MAX_MB_PLANE],
                           int start, int stop, int y_only);
 
 typedef struct LoopFilterWorkerData {
   const YV12_BUFFER_CONFIG *frame_buffer;
   struct VP9Common *cm;
-  struct macroblockd xd;  // TODO(jzern): most of this is unnecessary to the
-                          // loopfilter. the planes are necessary as their state
-                          // is changed during decode.
+  struct macroblockd_plane planes[MAX_MB_PLANE];
+
   int start;
   int stop;
   int y_only;
diff --git a/source/libvpx/vp9/common/vp9_postproc.c b/source/libvpx/vp9/common/vp9_postproc.c
index 5601a93..9f32104 100644
--- a/source/libvpx/vp9/common/vp9_postproc.c
+++ b/source/libvpx/vp9/common/vp9_postproc.c
@@ -24,61 +24,7 @@
 #include "vp9/common/vp9_systemdependent.h"
 #include "vp9/common/vp9_textblit.h"
 
-#define RGB_TO_YUV(t)                                            \
-  ( (0.257*(float)(t >> 16))  + (0.504*(float)(t >> 8 & 0xff)) + \
-    (0.098*(float)(t & 0xff)) + 16),                             \
-  (-(0.148*(float)(t >> 16))  - (0.291*(float)(t >> 8 & 0xff)) + \
-    (0.439*(float)(t & 0xff)) + 128),                            \
-  ( (0.439*(float)(t >> 16))  - (0.368*(float)(t >> 8 & 0xff)) - \
-    (0.071*(float)(t & 0xff)) + 128)
-
-/* global constants */
-#if 0 && CONFIG_POSTPROC_VISUALIZER
-static const unsigned char PREDICTION_MODE_colors[MB_MODE_COUNT][3] = {
-  { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
-  { RGB_TO_YUV(0x00FF00) },   /* Green */
-  { RGB_TO_YUV(0xADFF2F) },   /* GreenYellow */
-  { RGB_TO_YUV(0x8F0000) },   /* Dark Red */
-  { RGB_TO_YUV(0x008F8F) },   /* Dark Cyan */
-  { RGB_TO_YUV(0x008F8F) },   /* Dark Cyan */
-  { RGB_TO_YUV(0x008F8F) },   /* Dark Cyan */
-  { RGB_TO_YUV(0x8F0000) },   /* Dark Red */
-  { RGB_TO_YUV(0x8F0000) },   /* Dark Red */
-  { RGB_TO_YUV(0x228B22) },   /* ForestGreen */
-  { RGB_TO_YUV(0x006400) },   /* DarkGreen */
-  { RGB_TO_YUV(0x98F5FF) },   /* Cadet Blue */
-  { RGB_TO_YUV(0x6CA6CD) },   /* Sky Blue */
-  { RGB_TO_YUV(0x00008B) },   /* Dark blue */
-  { RGB_TO_YUV(0x551A8B) },   /* Purple */
-  { RGB_TO_YUV(0xFF0000) }    /* Red */
-  { RGB_TO_YUV(0xCC33FF) },   /* Magenta */
-};
-
-static const unsigned char B_PREDICTION_MODE_colors[INTRA_MODES][3] = {
-  { RGB_TO_YUV(0x6633ff) },   /* Purple */
-  { RGB_TO_YUV(0xcc33ff) },   /* Magenta */
-  { RGB_TO_YUV(0xff33cc) },   /* Pink */
-  { RGB_TO_YUV(0xff3366) },   /* Coral */
-  { RGB_TO_YUV(0x3366ff) },   /* Blue */
-  { RGB_TO_YUV(0xed00f5) },   /* Dark Blue */
-  { RGB_TO_YUV(0x2e00b8) },   /* Dark Purple */
-  { RGB_TO_YUV(0xff6633) },   /* Orange */
-  { RGB_TO_YUV(0x33ccff) },   /* Light Blue */
-  { RGB_TO_YUV(0x8ab800) },   /* Green */
-  { RGB_TO_YUV(0xffcc33) },   /* Light Orange */
-  { RGB_TO_YUV(0x33ffcc) },   /* Aqua */
-  { RGB_TO_YUV(0x66ff33) },   /* Light Green */
-  { RGB_TO_YUV(0xccff33) },   /* Yellow */
-};
-
-static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] = {
-  { RGB_TO_YUV(0x00ff00) },   /* Blue */
-  { RGB_TO_YUV(0x0000ff) },   /* Green */
-  { RGB_TO_YUV(0xffff00) },   /* Yellow */
-  { RGB_TO_YUV(0xff0000) },   /* Red */
-};
-#endif
-
+#if CONFIG_VP9_POSTPROC
 static const short kernel5[] = {
   1, 1, 4, 1, 1
 };
@@ -448,163 +394,6 @@ void vp9_plane_add_noise_c(uint8_t *start, char *noise,
   }
 }
 
-/* Blend the macro block with a solid colored square.  Leave the
- * edges unblended to give distinction to macro blocks in areas
- * filled with the same color block.
- */
-void vp9_blend_mb_inner_c(uint8_t *y, uint8_t *u, uint8_t *v,
-                          int y1, int u1, int v1, int alpha, int stride) {
-  int i, j;
-  int y1_const = y1 * ((1 << 16) - alpha);
-  int u1_const = u1 * ((1 << 16) - alpha);
-  int v1_const = v1 * ((1 << 16) - alpha);
-
-  y += 2 * stride + 2;
-  for (i = 0; i < 12; i++) {
-    for (j = 0; j < 12; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  stride >>= 1;
-
-  u += stride + 1;
-  v += stride + 1;
-
-  for (i = 0; i < 6; i++) {
-    for (j = 0; j < 6; j++) {
-      u[j] = (u[j] * alpha + u1_const) >> 16;
-      v[j] = (v[j] * alpha + v1_const) >> 16;
-    }
-    u += stride;
-    v += stride;
-  }
-}
-
-/* Blend only the edge of the macro block.  Leave center
- * unblended to allow for other visualizations to be layered.
- */
-void vp9_blend_mb_outer_c(uint8_t *y, uint8_t *u, uint8_t *v,
-                          int y1, int u1, int v1, int alpha, int stride) {
-  int i, j;
-  int y1_const = y1 * ((1 << 16) - alpha);
-  int u1_const = u1 * ((1 << 16) - alpha);
-  int v1_const = v1 * ((1 << 16) - alpha);
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 16; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  for (i = 0; i < 12; i++) {
-    y[0]  = (y[0] * alpha  + y1_const) >> 16;
-    y[1]  = (y[1] * alpha  + y1_const) >> 16;
-    y[14] = (y[14] * alpha + y1_const) >> 16;
-    y[15] = (y[15] * alpha + y1_const) >> 16;
-    y += stride;
-  }
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 16; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  stride >>= 1;
-
-  for (j = 0; j < 8; j++) {
-    u[j] = (u[j] * alpha + u1_const) >> 16;
-    v[j] = (v[j] * alpha + v1_const) >> 16;
-  }
-  u += stride;
-  v += stride;
-
-  for (i = 0; i < 6; i++) {
-    u[0] = (u[0] * alpha + u1_const) >> 16;
-    v[0] = (v[0] * alpha + v1_const) >> 16;
-
-    u[7] = (u[7] * alpha + u1_const) >> 16;
-    v[7] = (v[7] * alpha + v1_const) >> 16;
-
-    u += stride;
-    v += stride;
-  }
-
-  for (j = 0; j < 8; j++) {
-    u[j] = (u[j] * alpha + u1_const) >> 16;
-    v[j] = (v[j] * alpha + v1_const) >> 16;
-  }
-}
-
-void vp9_blend_b_c(uint8_t *y, uint8_t *u, uint8_t *v,
-                   int y1, int u1, int v1, int alpha, int stride) {
-  int i, j;
-  int y1_const = y1 * ((1 << 16) - alpha);
-  int u1_const = u1 * ((1 << 16) - alpha);
-  int v1_const = v1 * ((1 << 16) - alpha);
-
-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 4; j++) {
-      y[j] = (y[j] * alpha + y1_const) >> 16;
-    }
-    y += stride;
-  }
-
-  stride >>= 1;
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 2; j++) {
-      u[j] = (u[j] * alpha + u1_const) >> 16;
-      v[j] = (v[j] * alpha + v1_const) >> 16;
-    }
-    u += stride;
-    v += stride;
-  }
-}
-
-static void constrain_line(int x0, int *x1, int y0, int *y1,
-                           int width, int height) {
-  int dx;
-  int dy;
-
-  if (*x1 > width) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *x1 = width;
-    if (dx)
-      *y1 = ((width - x0) * dy) / dx + y0;
-  }
-  if (*x1 < 0) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *x1 = 0;
-    if (dx)
-      *y1 = ((0 - x0) * dy) / dx + y0;
-  }
-  if (*y1 > height) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *y1 = height;
-    if (dy)
-      *x1 = ((height - y0) * dx) / dy + x0;
-  }
-  if (*y1 < 0) {
-    dx = *x1 - x0;
-    dy = *y1 - y0;
-
-    *y1 = 0;
-    if (dy)
-      *x1 = ((0 - y0) * dx) / dy + x0;
-  }
-}
-
 int vp9_post_proc_frame(struct VP9Common *cm,
                         YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) {
   const int q = MIN(63, cm->lf.filter_level * 10 / 6);
@@ -643,328 +432,6 @@ int vp9_post_proc_frame(struct VP9Common *cm,
                         ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
   }
 
-#if 0 && CONFIG_POSTPROC_VISUALIZER
-  if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {
-    char message[512];
-    snprintf(message, sizeof(message) -1,
-             "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
-             (cm->frame_type == KEY_FRAME),
-             cm->refresh_golden_frame,
-             cm->base_qindex,
-             cm->filter_level,
-             flags,
-             cm->mb_cols, cm->mb_rows);
-    vp9_blit_text(message, ppbuf->y_buffer, ppbuf->y_stride);
-  }
-
-  if (flags & VP9D_DEBUG_TXT_MBLK_MODES) {
-    int i, j;
-    uint8_t *y_ptr;
-    int mb_rows = ppbuf->y_height >> 4;
-    int mb_cols = ppbuf->y_width  >> 4;
-    int mb_index = 0;
-    MODE_INFO *mi = cm->mi;
-
-    y_ptr = post->y_buffer + 4 * post->y_stride + 4;
-
-    /* vp9_filter each macro block */
-    for (i = 0; i < mb_rows; i++) {
-      for (j = 0; j < mb_cols; j++) {
-        char zz[4];
-
-        snprintf(zz, sizeof(zz) - 1, "%c", mi[mb_index].mbmi.mode + 'a');
-
-        vp9_blit_text(zz, y_ptr, post->y_stride);
-        mb_index++;
-        y_ptr += 16;
-      }
-
-      mb_index++; /* border */
-      y_ptr += post->y_stride  * 16 - post->y_width;
-    }
-  }
-
-  if (flags & VP9D_DEBUG_TXT_DC_DIFF) {
-    int i, j;
-    uint8_t *y_ptr;
-    int mb_rows = ppbuf->y_height >> 4;
-    int mb_cols = ppbuf->y_width  >> 4;
-    int mb_index = 0;
-    MODE_INFO *mi = cm->mi;
-
-    y_ptr = post->y_buffer + 4 * post->y_stride + 4;
-
-    /* vp9_filter each macro block */
-    for (i = 0; i < mb_rows; i++) {
-      for (j = 0; j < mb_cols; j++) {
-        char zz[4];
-        int dc_diff = !(mi[mb_index].mbmi.mode != I4X4_PRED &&
-                        mi[mb_index].mbmi.mode != SPLITMV &&
-                        mi[mb_index].mbmi.skip);
-
-        if (cm->frame_type == KEY_FRAME)
-          snprintf(zz, sizeof(zz) - 1, "a");
-        else
-          snprintf(zz, sizeof(zz) - 1, "%c", dc_diff + '0');
-
-        vp9_blit_text(zz, y_ptr, post->y_stride);
-        mb_index++;
-        y_ptr += 16;
-      }
-
-      mb_index++; /* border */
-      y_ptr += post->y_stride  * 16 - post->y_width;
-    }
-  }
-
-  if (flags & VP9D_DEBUG_TXT_RATE_INFO) {
-    char message[512];
-    snprintf(message, sizeof(message),
-             "Bitrate: %10.2f framerate: %10.2f ",
-             cm->bitrate, cm->framerate);
-    vp9_blit_text(message, ppbuf->y_buffer, ppbuf->y_stride);
-  }
-
-  /* Draw motion vectors */
-  if ((flags & VP9D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) {
-    int width  = ppbuf->y_width;
-    int height = ppbuf->y_height;
-    uint8_t *y_buffer = ppbuf->y_buffer;
-    int y_stride = ppbuf->y_stride;
-    MODE_INFO *mi = cm->mi;
-    int x0, y0;
-
-    for (y0 = 0; y0 < height; y0 += 16) {
-      for (x0 = 0; x0 < width; x0 += 16) {
-        int x1, y1;
-
-        if (!(ppflags->display_mv_flag & (1 << mi->mbmi.mode))) {
-          mi++;
-          continue;
-        }
-
-        if (mi->mbmi.mode == SPLITMV) {
-          switch (mi->mbmi.partitioning) {
-            case PARTITIONING_16X8 : {  /* mv_top_bottom */
-              union b_mode_info *bmi = &mi->bmi[0];
-              MV *mv = &bmi->mv.as_mv;
-
-              x1 = x0 + 8 + (mv->col >> 3);
-              y1 = y0 + 4 + (mv->row >> 3);
-
-              constrain_line(x0 + 8, &x1, y0 + 4, &y1, width, height);
-              vp9_blit_line(x0 + 8,  x1, y0 + 4,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[8];
-
-              x1 = x0 + 8 + (mv->col >> 3);
-              y1 = y0 + 12 + (mv->row >> 3);
-
-              constrain_line(x0 + 8, &x1, y0 + 12, &y1, width, height);
-              vp9_blit_line(x0 + 8,  x1, y0 + 12,  y1, y_buffer, y_stride);
-
-              break;
-            }
-            case PARTITIONING_8X16 : {  /* mv_left_right */
-              union b_mode_info *bmi = &mi->bmi[0];
-              MV *mv = &bmi->mv.as_mv;
-
-              x1 = x0 + 4 + (mv->col >> 3);
-              y1 = y0 + 8 + (mv->row >> 3);
-
-              constrain_line(x0 + 4, &x1, y0 + 8, &y1, width, height);
-              vp9_blit_line(x0 + 4,  x1, y0 + 8,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[2];
-
-              x1 = x0 + 12 + (mv->col >> 3);
-              y1 = y0 + 8 + (mv->row >> 3);
-
-              constrain_line(x0 + 12, &x1, y0 + 8, &y1, width, height);
-              vp9_blit_line(x0 + 12,  x1, y0 + 8,  y1, y_buffer, y_stride);
-
-              break;
-            }
-            case PARTITIONING_8X8 : {  /* mv_quarters   */
-              union b_mode_info *bmi = &mi->bmi[0];
-              MV *mv = &bmi->mv.as_mv;
-
-              x1 = x0 + 4 + (mv->col >> 3);
-              y1 = y0 + 4 + (mv->row >> 3);
-
-              constrain_line(x0 + 4, &x1, y0 + 4, &y1, width, height);
-              vp9_blit_line(x0 + 4,  x1, y0 + 4,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[2];
-
-              x1 = x0 + 12 + (mv->col >> 3);
-              y1 = y0 + 4 + (mv->row >> 3);
-
-              constrain_line(x0 + 12, &x1, y0 + 4, &y1, width, height);
-              vp9_blit_line(x0 + 12,  x1, y0 + 4,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[8];
-
-              x1 = x0 + 4 + (mv->col >> 3);
-              y1 = y0 + 12 + (mv->row >> 3);
-
-              constrain_line(x0 + 4, &x1, y0 + 12, &y1, width, height);
-              vp9_blit_line(x0 + 4,  x1, y0 + 12,  y1, y_buffer, y_stride);
-
-              bmi = &mi->bmi[10];
-
-              x1 = x0 + 12 + (mv->col >> 3);
-              y1 = y0 + 12 + (mv->row >> 3);
-
-              constrain_line(x0 + 12, &x1, y0 + 12, &y1, width, height);
-              vp9_blit_line(x0 + 12,  x1, y0 + 12,  y1, y_buffer, y_stride);
-              break;
-            }
-            case PARTITIONING_4X4:
-            default : {
-              union b_mode_info *bmi = mi->bmi;
-              int bx0, by0;
-
-              for (by0 = y0; by0 < (y0 + 16); by0 += 4) {
-                for (bx0 = x0; bx0 < (x0 + 16); bx0 += 4) {
-                  MV *mv = &bmi->mv.as_mv;
-
-                  x1 = bx0 + 2 + (mv->col >> 3);
-                  y1 = by0 + 2 + (mv->row >> 3);
-
-                  constrain_line(bx0 + 2, &x1, by0 + 2, &y1, width, height);
-                  vp9_blit_line(bx0 + 2,  x1, by0 + 2,  y1, y_buffer, y_stride);
-
-                  bmi++;
-                }
-              }
-            }
-          }
-        } else if (is_inter_mode(mi->mbmi.mode)) {
-          MV *mv = &mi->mbmi.mv.as_mv;
-          const int lx0 = x0 + 8;
-          const int ly0 = y0 + 8;
-
-          x1 = lx0 + (mv->col >> 3);
-          y1 = ly0 + (mv->row >> 3);
-
-          if (x1 != lx0 && y1 != ly0) {
-            constrain_line(lx0, &x1, ly0 - 1, &y1, width, height);
-            vp9_blit_line(lx0,  x1, ly0 - 1,  y1, y_buffer, y_stride);
-
-            constrain_line(lx0, &x1, ly0 + 1, &y1, width, height);
-            vp9_blit_line(lx0,  x1, ly0 + 1,  y1, y_buffer, y_stride);
-          } else {
-            vp9_blit_line(lx0,  x1, ly0,  y1, y_buffer, y_stride);
-          }
-        }
-
-        mi++;
-      }
-      mi++;
-    }
-  }
-
-  /* Color in block modes */
-  if ((flags & VP9D_DEBUG_CLR_BLK_MODES)
-      && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) {
-    int y, x;
-    int width  = ppbuf->y_width;
-    int height = ppbuf->y_height;
-    uint8_t *y_ptr = ppbuf->y_buffer;
-    uint8_t *u_ptr = ppbuf->u_buffer;
-    uint8_t *v_ptr = ppbuf->v_buffer;
-    int y_stride = ppbuf->y_stride;
-    MODE_INFO *mi = cm->mi;
-
-    for (y = 0; y < height; y += 16) {
-      for (x = 0; x < width; x += 16) {
-        int Y = 0, U = 0, V = 0;
-
-        if (mi->mbmi.mode == I4X4_PRED &&
-            ((ppflags->display_mb_modes_flag & I4X4_PRED) ||
-             ppflags->display_b_modes_flag)) {
-          int by, bx;
-          uint8_t *yl, *ul, *vl;
-          union b_mode_info *bmi = mi->bmi;
-
-          yl = y_ptr + x;
-          ul = u_ptr + (x >> 1);
-          vl = v_ptr + (x >> 1);
-
-          for (by = 0; by < 16; by += 4) {
-            for (bx = 0; bx < 16; bx += 4) {
-              if ((ppflags->display_b_modes_flag & (1 << mi->mbmi.mode))
-                  || (ppflags->display_mb_modes_flag & I4X4_PRED)) {
-                Y = B_PREDICTION_MODE_colors[bmi->as_mode][0];
-                U = B_PREDICTION_MODE_colors[bmi->as_mode][1];
-                V = B_PREDICTION_MODE_colors[bmi->as_mode][2];
-
-                vp9_blend_b(yl + bx, ul + (bx >> 1), vl + (bx >> 1), Y, U, V,
-                    0xc000, y_stride);
-              }
-              bmi++;
-            }
-
-            yl += y_stride * 4;
-            ul += y_stride * 1;
-            vl += y_stride * 1;
-          }
-        } else if (ppflags->display_mb_modes_flag & (1 << mi->mbmi.mode)) {
-          Y = PREDICTION_MODE_colors[mi->mbmi.mode][0];
-          U = PREDICTION_MODE_colors[mi->mbmi.mode][1];
-          V = PREDICTION_MODE_colors[mi->mbmi.mode][2];
-
-          vp9_blend_mb_inner(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1),
-                             Y, U, V, 0xc000, y_stride);
-        }
-
-        mi++;
-      }
-      y_ptr += y_stride * 16;
-      u_ptr += y_stride * 4;
-      v_ptr += y_stride * 4;
-
-      mi++;
-    }
-  }
-
-  /* Color in frame reference blocks */
-  if ((flags & VP9D_DEBUG_CLR_FRM_REF_BLKS) &&
-      ppflags->display_ref_frame_flag) {
-    int y, x;
-    int width  = ppbuf->y_width;
-    int height = ppbuf->y_height;
-    uint8_t *y_ptr = ppbuf->y_buffer;
-    uint8_t *u_ptr = ppbuf->u_buffer;
-    uint8_t *v_ptr = ppbuf->v_buffer;
-    int y_stride = ppbuf->y_stride;
-    MODE_INFO *mi = cm->mi;
-
-    for (y = 0; y < height; y += 16) {
-      for (x = 0; x < width; x += 16) {
-        int Y = 0, U = 0, V = 0;
-
-        if (ppflags->display_ref_frame_flag & (1 << mi->mbmi.ref_frame)) {
-          Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
-          U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
-          V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
-
-          vp9_blend_mb_outer(y_ptr + x, u_ptr + (x >> 1), v_ptr + (x >> 1),
-                             Y, U, V, 0xc000, y_stride);
-        }
-
-        mi++;
-      }
-      y_ptr += y_stride * 16;
-      u_ptr += y_stride * 4;
-      v_ptr += y_stride * 4;
-
-      mi++;
-    }
-  }
-#endif
-
   *dest = *ppbuf;
 
   /* handle problem with extending borders */
@@ -975,3 +442,4 @@ int vp9_post_proc_frame(struct VP9Common *cm,
 
   return 0;
 }
+#endif
diff --git a/source/libvpx/vp9/common/vp9_ppflags.h b/source/libvpx/vp9/common/vp9_ppflags.h
index e8b04d2..1644a1b 100644
--- a/source/libvpx/vp9/common/vp9_ppflags.h
+++ b/source/libvpx/vp9/common/vp9_ppflags.h
@@ -33,12 +33,6 @@ typedef struct {
   int post_proc_flag;
   int deblocking_level;
   int noise_level;
-#if CONFIG_POSTPROC_VISUALIZER
-  int display_ref_frame_flag;
-  int display_mb_modes_flag;
-  int display_b_modes_flag;
-  int display_mv_flag;
-#endif  // CONFIG_POSTPROC_VISUALIZER
 } vp9_ppflags_t;
 
 #ifdef __cplusplus
diff --git a/source/libvpx/vp9/common/vp9_reconinter.c b/source/libvpx/vp9/common/vp9_reconinter.c
index e722d6a..edc36d7 100644
--- a/source/libvpx/vp9/common/vp9_reconinter.c
+++ b/source/libvpx/vp9/common/vp9_reconinter.c
@@ -409,7 +409,7 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
   }
 }
 
-void vp9_setup_dst_planes(MACROBLOCKD *xd,
+void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
                           const YV12_BUFFER_CONFIG *src,
                           int mi_row, int mi_col) {
   uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
@@ -419,7 +419,7 @@ void vp9_setup_dst_planes(MACROBLOCKD *xd,
   int i;
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
-    struct macroblockd_plane *const pd = &xd->plane[i];
+    struct macroblockd_plane *const pd = &planes[i];
     setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
                      pd->subsampling_x, pd->subsampling_y);
   }
diff --git a/source/libvpx/vp9/common/vp9_reconinter.h b/source/libvpx/vp9/common/vp9_reconinter.h
index 86f3158..58c596e 100644
--- a/source/libvpx/vp9/common/vp9_reconinter.h
+++ b/source/libvpx/vp9/common/vp9_reconinter.h
@@ -57,7 +57,8 @@ static INLINE void setup_pred_plane(struct buf_2d *dst,
   dst->stride = stride;
 }
 
-void vp9_setup_dst_planes(MACROBLOCKD *xd, const YV12_BUFFER_CONFIG *src,
+void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+                          const YV12_BUFFER_CONFIG *src,
                           int mi_row, int mi_col);
 
 void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
diff --git a/source/libvpx/vp9/common/vp9_reconintra.c b/source/libvpx/vp9/common/vp9_reconintra.c
index 32e4551..403e105 100644
--- a/source/libvpx/vp9/common/vp9_reconintra.c
+++ b/source/libvpx/vp9/common/vp9_reconintra.c
@@ -31,6 +31,9 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
   ADST_ADST,  // TM
 };
 
+// This serves as a wrapper function, so that all the prediction functions
+// can be unified and accessed as a pointer array. Note that the boundary
+// above and left are not necessarily used all the time.
 #define intra_pred_sized(type, size) \
   void vp9_##type##_predictor_##size##x##size##_c(uint8_t *dst, \
                                                   ptrdiff_t stride, \
@@ -48,7 +51,7 @@ const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
 static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                   const uint8_t *above, const uint8_t *left) {
   int r, c;
-
+  (void) above;
   // first column
   for (r = 0; r < bs - 1; ++r)
     dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
@@ -77,6 +80,7 @@ intra_pred_allsizes(d207)
 static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   int r, c;
+  (void) left;
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c)
       dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
@@ -92,6 +96,7 @@ intra_pred_allsizes(d63)
 static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                  const uint8_t *above, const uint8_t *left) {
   int r, c;
+  (void) left;
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c)
       dst[c] = r + c + 2 < bs * 2 ?  ROUND_POWER_OF_TWO(above[r + c] +
@@ -184,6 +189,7 @@ intra_pred_allsizes(d153)
 static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                const uint8_t *above, const uint8_t *left) {
   int r;
+  (void) left;
 
   for (r = 0; r < bs; r++) {
     vpx_memcpy(dst, above, bs);
@@ -195,6 +201,7 @@ intra_pred_allsizes(v)
 static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                const uint8_t *above, const uint8_t *left) {
   int r;
+  (void) above;
 
   for (r = 0; r < bs; r++) {
     vpx_memset(dst, left[r], bs);
@@ -219,6 +226,8 @@ intra_pred_allsizes(tm)
 static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                     const uint8_t *above, const uint8_t *left) {
   int r;
+  (void) above;
+  (void) left;
 
   for (r = 0; r < bs; r++) {
     vpx_memset(dst, 128, bs);
@@ -231,6 +240,7 @@ static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                      const uint8_t *above,
                                      const uint8_t *left) {
   int i, r, expected_dc, sum = 0;
+  (void) above;
 
   for (i = 0; i < bs; i++)
     sum += left[i];
@@ -246,6 +256,7 @@ intra_pred_allsizes(dc_left)
 static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                     const uint8_t *above, const uint8_t *left) {
   int i, r, expected_dc, sum = 0;
+  (void) left;
 
   for (i = 0; i < bs; i++)
     sum += above[i];
diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index 63380d6..1037bfb 100644
--- a/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -58,7 +58,8 @@ add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, con
 specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_4x4 neon_asm dspr2/, "$ssse3_x86inc";
+$vp9_h_predictor_4x4_neon_asm=vp9_h_predictor_4x4_neon;
 
 add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_4x4/;
@@ -70,10 +71,12 @@ add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
 specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_4x4 neon_asm/, "$sse_x86inc";
+$vp9_v_predictor_4x4_neon_asm=vp9_v_predictor_4x4_neon;
 
 add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+specialize qw/vp9_tm_predictor_4x4 neon_asm dspr2/, "$sse_x86inc";
+$vp9_tm_predictor_4x4_neon_asm=vp9_tm_predictor_4x4_neon;
 
 add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
@@ -97,7 +100,8 @@ add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, con
 specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_8x8 neon_asm dspr2/, "$ssse3_x86inc";
+$vp9_h_predictor_8x8_neon_asm=vp9_h_predictor_8x8_neon;
 
 add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_8x8/;
@@ -109,10 +113,12 @@ add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co
 specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_8x8 neon_asm/, "$sse_x86inc";
+$vp9_v_predictor_8x8_neon_asm=vp9_v_predictor_8x8_neon;
 
 add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_8x8 neon_asm dspr2/, "$sse2_x86inc";
+$vp9_tm_predictor_8x8_neon_asm=vp9_tm_predictor_8x8_neon;
 
 add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
@@ -136,7 +142,8 @@ add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, c
 specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_16x16 neon_asm dspr2/, "$ssse3_x86inc";
+$vp9_h_predictor_16x16_neon_asm=vp9_h_predictor_16x16_neon;
 
 add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_16x16/;
@@ -148,10 +155,12 @@ add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
 specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_16x16 neon_asm/, "$sse2_x86inc";
+$vp9_v_predictor_16x16_neon_asm=vp9_v_predictor_16x16_neon;
 
 add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_16x16 neon_asm/, "$sse2_x86inc";
+$vp9_tm_predictor_16x16_neon_asm=vp9_tm_predictor_16x16_neon;
 
 add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
@@ -175,7 +184,8 @@ add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, c
 specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
 
 add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_32x32 neon_asm/, "$ssse3_x86inc";
+$vp9_h_predictor_32x32_neon_asm=vp9_h_predictor_32x32_neon;
 
 add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_d117_predictor_32x32/;
@@ -187,10 +197,12 @@ add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride,
 specialize qw/vp9_d153_predictor_32x32/;
 
 add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_32x32 neon_asm/, "$sse2_x86inc";
+$vp9_v_predictor_32x32_neon_asm=vp9_v_predictor_32x32_neon;
 
 add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon_asm/, "$sse2_x86_64";
+$vp9_tm_predictor_32x32_neon_asm=vp9_tm_predictor_32x32_neon;
 
 add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
 specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
@@ -208,37 +220,48 @@ specialize qw/vp9_dc_128_predictor_32x32/;
 # Loopfilter
 #
 add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16 sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
 
 add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
 
 add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
 
 add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
 
 add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_vertical_4 mmx neon_asm dspr2/;
+$vp9_lpf_vertical_4_neon_asm=vp9_lpf_vertical_4_neon;
 
 add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon_asm dspr2/;
+$vp9_lpf_vertical_4_dual_neon_asm=vp9_lpf_vertical_4_dual_neon;
 
 add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
+$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
 
 add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
+$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
 
 add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
+$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
 
 add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4 mmx neon_asm dspr2/;
+$vp9_lpf_horizontal_4_neon_asm=vp9_lpf_horizontal_4_neon;
 
 add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon_asm dspr2/;
+$vp9_lpf_horizontal_4_dual_neon_asm=vp9_lpf_horizontal_4_dual_neon;
 
 #
 # post proc
@@ -274,71 +297,91 @@ specialize qw/vp9_blend_b/;
 # Sub Pixel Filters
 #
 add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_copy neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_convolve_copy neon_asm dspr2/, "$sse2_x86inc";
+$vp9_convolve_copy_neon_asm=vp9_convolve_copy_neon;
 
 add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
+$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
 
 add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 avx2 neon_asm dspr2/;
+$vp9_convolve8_neon_asm=vp9_convolve8_neon;
 
 add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 avx2 neon_asm dspr2/;
+$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
 
 add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 avx2 neon_asm dspr2/;
+$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
 
 add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon_asm dspr2/;
+$vp9_convolve8_avg_neon_asm=vp9_convolve8_avg_neon;
 
 add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon_asm dspr2/;
+$vp9_convolve8_avg_horiz_neon_asm=vp9_convolve8_avg_horiz_neon;
 
 add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon_asm dspr2/;
+$vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
 
 #
 # dct
 #
 add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
+$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
 
 add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
+$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
 
 add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
+$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
 
 add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/, "$ssse3_x86_64";
+specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
+$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
 
-add_proto qw/void vp9_idct8x8_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct8x8_10_add sse2 neon dspr2/;
+add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
+specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
+$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
 
 add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
+$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
 
 add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_256_add sse2 neon dspr2/;
+specialize qw/vp9_idct16x16_256_add sse2 neon_asm dspr2/;
+$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
 
 add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct16x16_10_add sse2 neon dspr2/;
+specialize qw/vp9_idct16x16_10_add sse2 neon_asm dspr2/;
+$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
 
 add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2/;
+specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
+$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
 
 add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct32x32_34_add sse2 neon dspr2/;
-$vp9_idct32x32_34_add_neon=vp9_idct32x32_1024_add_neon;
+specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
+$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
 
 add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
-specialize qw/vp9_idct32x32_1_add sse2 neon dspr2/;
+specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
+$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
 
 add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
-specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
+$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
 
 add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
-specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
+$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
 
 add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
 specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
@@ -660,7 +703,7 @@ specialize qw/vp9_get_mb_ss mmx sse2/;
 # ENCODEMB INVOKE
 
 add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
-specialize qw/vp9_block_error/, "$sse2_x86inc";
+specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
 
 add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
 specialize qw/vp9_subtract_block/, "$sse2_x86inc";
@@ -693,7 +736,7 @@ add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int str
 specialize qw/vp9_fht16x16 sse2 avx2/;
 
 add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fwht4x4/;
+specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
 
 add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct4x4 sse2 avx2/;
diff --git a/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 13a5b5a..0231726 100644
--- a/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -995,7 +995,7 @@ void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
   RECON_AND_STORE(dest, in[7]);
 }
 
-void vp9_idct8x8_10_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i final_rounding = _mm_set1_epi16(1<<4);
diff --git a/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm b/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm
index f2a120f..2c10607 100644
--- a/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm
+++ b/source/libvpx/vp9/common/x86/vp9_idct_ssse3.asm
@@ -28,6 +28,29 @@ TRANSFORM_COEFFS    6270, 15137
 TRANSFORM_COEFFS    3196, 16069
 TRANSFORM_COEFFS   13623,  9102
 
+%macro PAIR_PP_COEFFS 2
+dpw_%1_%2:   dw  %1,  %1,  %1,  %1,  %2,  %2,  %2,  %2
+%endmacro
+
+%macro PAIR_MP_COEFFS 2
+dpw_m%1_%2:  dw -%1, -%1, -%1, -%1,  %2,  %2,  %2,  %2
+%endmacro
+
+%macro PAIR_MM_COEFFS 2
+dpw_m%1_m%2: dw -%1, -%1, -%1, -%1, -%2, -%2, -%2, -%2
+%endmacro
+
+PAIR_PP_COEFFS     30274, 12540
+PAIR_PP_COEFFS      6392, 32138
+PAIR_MP_COEFFS     18204, 27246
+
+PAIR_PP_COEFFS     12540, 12540
+PAIR_PP_COEFFS     30274, 30274
+PAIR_PP_COEFFS      6392,  6392
+PAIR_PP_COEFFS     32138, 32138
+PAIR_MM_COEFFS     18204, 18204
+PAIR_PP_COEFFS     27246, 27246
+
 SECTION .text
 
 %if ARCH_X86_64
@@ -128,6 +151,7 @@ SECTION .text
 %endmacro
 
 INIT_XMM ssse3
+; full inverse 8x8 2D-DCT transform
 cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
   mova     m8, [pd_8192]
   mova    m11, [pw_16]
@@ -159,4 +183,118 @@ cglobal idct8x8_64_add, 3, 5, 13, input, output, stride
   ADD_STORE_8P_2X  6, 7, 9, 10, 12
 
   RET
+
+; inverse 8x8 2D-DCT transform with only first 10 coeffs non-zero
+cglobal idct8x8_12_add, 3, 5, 13, input, output, stride
+  mova       m8, [pd_8192]
+  mova      m11, [pw_16]
+  mova      m12, [pw_11585x2]
+
+  lea        r3, [2 * strideq]
+
+  mova       m0, [inputq +  0]
+  mova       m1, [inputq + 16]
+  mova       m2, [inputq + 32]
+  mova       m3, [inputq + 48]
+
+  punpcklwd  m0, m1
+  punpcklwd  m2, m3
+  punpckhdq  m9, m0, m2
+  punpckldq  m0, m2
+  SWAP       2, 9
+
+  ; m0 -> [0], [0]
+  ; m1 -> [1], [1]
+  ; m2 -> [2], [2]
+  ; m3 -> [3], [3]
+  punpckhqdq m10, m0, m0
+  punpcklqdq m0,  m0
+  punpckhqdq m9,  m2, m2
+  punpcklqdq m2,  m2
+  SWAP       1, 10
+  SWAP       3,  9
+
+  pmulhrsw   m0, m12
+  pmulhrsw   m2, [dpw_30274_12540]
+  pmulhrsw   m1, [dpw_6392_32138]
+  pmulhrsw   m3, [dpw_m18204_27246]
+
+  SUM_SUB    0, 2, 9
+  SUM_SUB    1, 3, 9
+
+  punpcklqdq m9, m3, m3
+  punpckhqdq m5, m3, m9
+
+  SUM_SUB    3, 5, 9
+  punpckhqdq m5, m3
+  pmulhrsw   m5, m12
+
+  punpckhqdq m9, m1, m5
+  punpcklqdq m1, m5
+  SWAP       5, 9
+
+  SUM_SUB    0, 5, 9
+  SUM_SUB    2, 1, 9
+
+  punpckhqdq m3, m0, m0
+  punpckhqdq m4, m1, m1
+  punpckhqdq m6, m5, m5
+  punpckhqdq m7, m2, m2
+
+  punpcklwd  m0, m3
+  punpcklwd  m7, m2
+  punpcklwd  m1, m4
+  punpcklwd  m6, m5
+
+  punpckhdq  m4, m0, m7
+  punpckldq  m0, m7
+  punpckhdq  m10, m1, m6
+  punpckldq  m5, m1, m6
+
+  punpckhqdq m1, m0, m5
+  punpcklqdq m0, m5
+  punpckhqdq m3, m4, m10
+  punpcklqdq m2, m4, m10
+
+
+  pmulhrsw   m0, m12
+  pmulhrsw   m6, m2, [dpw_30274_30274]
+  pmulhrsw   m4, m2, [dpw_12540_12540]
+
+  pmulhrsw   m7, m1, [dpw_32138_32138]
+  pmulhrsw   m1, [dpw_6392_6392]
+  pmulhrsw   m5, m3, [dpw_m18204_m18204]
+  pmulhrsw   m3, [dpw_27246_27246]
+
+  mova       m2, m0
+  SUM_SUB    0, 6, 9
+  SUM_SUB    2, 4, 9
+  SUM_SUB    1, 5, 9
+  SUM_SUB    7, 3, 9
+
+  SUM_SUB    3, 5, 9
+  pmulhrsw   m3, m12
+  pmulhrsw   m5, m12
+
+  SUM_SUB    0, 7, 9
+  SUM_SUB    2, 3, 9
+  SUM_SUB    4, 5, 9
+  SUM_SUB    6, 1, 9
+
+  SWAP       3, 6
+  SWAP       1, 2
+  SWAP       2, 4
+
+
+  pxor    m12, m12
+  ADD_STORE_8P_2X  0, 1, 9, 10, 12
+  lea              outputq, [outputq + r3]
+  ADD_STORE_8P_2X  2, 3, 9, 10, 12
+  lea              outputq, [outputq + r3]
+  ADD_STORE_8P_2X  4, 5, 9, 10, 12
+  lea              outputq, [outputq + r3]
+  ADD_STORE_8P_2X  6, 7, 9, 10, 12
+
+  RET
+
 %endif
diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.c b/source/libvpx/vp9/decoder/vp9_decodeframe.c
index 45ebb2f..3124158 100644
--- a/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -316,7 +316,7 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
   // as they are always compared to values that are in 1/8th pel units
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
 
-  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
   return &xd->mi[0]->mbmi;
 }
 
@@ -676,17 +676,17 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
 }
 
 static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
-                        vp9_reader *r) {
-  const int num_threads = pbi->oxcf.max_threads;
+                        int do_loopfilter_inline, vp9_reader *r) {
+  const int num_threads = pbi->max_threads;
   VP9_COMMON *const cm = &pbi->common;
   int mi_row, mi_col;
   MACROBLOCKD *xd = &pbi->mb;
 
-  if (pbi->do_loopfilter_inline) {
+  if (do_loopfilter_inline) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
     lf_data->frame_buffer = get_frame_new_buffer(cm);
     lf_data->cm = cm;
-    lf_data->xd = pbi->mb;
+    vp9_copy(lf_data->planes, pbi->mb.plane);
     lf_data->stop = 0;
     lf_data->y_only = 0;
     vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
@@ -702,7 +702,7 @@ static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
       decode_partition(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
     }
 
-    if (pbi->do_loopfilter_inline) {
+    if (do_loopfilter_inline) {
       const int lf_start = mi_row - MI_BLOCK_SIZE;
       LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
 
@@ -723,7 +723,7 @@ static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile,
     }
   }
 
-  if (pbi->do_loopfilter_inline) {
+  if (do_loopfilter_inline) {
     LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
 
     vp9_worker_sync(&pbi->lf_worker);
@@ -749,14 +749,20 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
     cm->log2_tile_rows += vp9_rb_read_bit(rb);
 }
 
+typedef struct TileBuffer {
+  const uint8_t *data;
+  size_t size;
+  int col;  // only used with multi-threaded decoding
+} TileBuffer;
+
 // Reads the next tile returning its size and adjusting '*data' accordingly
 // based on 'is_last'.
-static size_t get_tile(const uint8_t *const data_end,
-                       int is_last,
-                       struct vpx_internal_error_info *error_info,
-                       const uint8_t **data,
-                       vpx_decrypt_cb decrypt_cb,
-                       void *decrypt_state) {
+static void get_tile_buffer(const uint8_t *const data_end,
+                            int is_last,
+                            struct vpx_internal_error_info *error_info,
+                            const uint8_t **data,
+                            vpx_decrypt_cb decrypt_cb, void *decrypt_state,
+                            TileBuffer *buf) {
   size_t size;
 
   if (!is_last) {
@@ -779,18 +785,34 @@ static size_t get_tile(const uint8_t *const data_end,
   } else {
     size = data_end - *data;
   }
-  return size;
+
+  buf->data = *data;
+  buf->size = size;
+
+  *data += size;
 }
 
-typedef struct TileBuffer {
-  const uint8_t *data;
-  size_t size;
-  int col;  // only used with multi-threaded decoding
-} TileBuffer;
+static void get_tile_buffers(VP9Decoder *pbi,
+                             const uint8_t *data, const uint8_t *data_end,
+                             int tile_cols, int tile_rows,
+                             TileBuffer (*tile_buffers)[1 << 6]) {
+  int r, c;
+
+  for (r = 0; r < tile_rows; ++r) {
+    for (c = 0; c < tile_cols; ++c) {
+      const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1);
+      TileBuffer *const buf = &tile_buffers[r][c];
+      buf->col = c;
+      get_tile_buffer(data_end, is_last, &pbi->common.error, &data,
+                      pbi->decrypt_cb, pbi->decrypt_state, buf);
+    }
+  }
+}
 
 static const uint8_t *decode_tiles(VP9Decoder *pbi,
                                    const uint8_t *data,
-                                   const uint8_t *data_end) {
+                                   const uint8_t *data_end,
+                                   int do_loopfilter_inline) {
   VP9_COMMON *const cm = &pbi->common;
   const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
@@ -811,25 +833,12 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
   vpx_memset(cm->above_seg_context, 0,
              sizeof(*cm->above_seg_context) * aligned_cols);
 
-  // Load tile data into tile_buffers
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      const int last_tile = tile_row == tile_rows - 1 &&
-                            tile_col == tile_cols - 1;
-      const size_t size = get_tile(data_end, last_tile, &cm->error, &data,
-                                   pbi->decrypt_cb, pbi->decrypt_state);
-      TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
-      buf->data = data;
-      buf->size = size;
-      data += size;
-    }
-  }
+  get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
 
   // Decode tiles using data from tile_buffers
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1
-                                               : tile_col;
+      const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col;
       const int last_tile = tile_row == tile_rows - 1 &&
                                  col == tile_cols - 1;
       const TileBuffer *const buf = &tile_buffers[tile_row][col];
@@ -838,7 +847,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
       vp9_tile_init(&tile, cm, tile_row, col);
       setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r,
                           pbi->decrypt_cb, pbi->decrypt_state);
-      decode_tile(pbi, &tile, &r);
+      decode_tile(pbi, &tile, do_loopfilter_inline, &r);
 
       if (last_tile)
         end = vp9_reader_find_end(&r);
@@ -887,8 +896,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
-  const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
-  TileBuffer tile_buffers[1 << 6];
+  const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
+  TileBuffer tile_buffers[1][1 << 6];
   int n;
   int final_worker = -1;
 
@@ -899,7 +908,7 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
   // TODO(jzern): See if we can remove the restriction of passing in max
   // threads to the decoder.
   if (pbi->num_tile_workers == 0) {
-    const int num_threads = pbi->oxcf.max_threads & ~1;
+    const int num_threads = pbi->max_threads & ~1;
     int i;
     // TODO(jzern): Allocate one less worker, as in the current code we only
     // use num_threads - 1 workers.
@@ -933,19 +942,11 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
              sizeof(*cm->above_seg_context) * aligned_mi_cols);
 
   // Load tile data into tile_buffers
-  for (n = 0; n < tile_cols; ++n) {
-    const size_t size =
-        get_tile(data_end, n == tile_cols - 1, &cm->error, &data,
-                 pbi->decrypt_cb, pbi->decrypt_state);
-    TileBuffer *const buf = &tile_buffers[n];
-    buf->data = data;
-    buf->size = size;
-    buf->col = n;
-    data += size;
-  }
+  get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
 
   // Sort the buffers based on size in descending order.
-  qsort(tile_buffers, tile_cols, sizeof(tile_buffers[0]), compare_tile_buffers);
+  qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]),
+        compare_tile_buffers);
 
   // Rearrange the tile buffers such that per-tile group the largest, and
   // presumably the most difficult, tile will be decoded in the main thread.
@@ -954,11 +955,11 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
   {
     int group_start = 0;
     while (group_start < tile_cols) {
-      const TileBuffer largest = tile_buffers[group_start];
+      const TileBuffer largest = tile_buffers[0][group_start];
       const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
-      memmove(tile_buffers + group_start, tile_buffers + group_start + 1,
-              (group_end - group_start) * sizeof(tile_buffers[0]));
-      tile_buffers[group_end] = largest;
+      memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1,
+              (group_end - group_start) * sizeof(tile_buffers[0][0]));
+      tile_buffers[0][group_end] = largest;
       group_start = group_end + 1;
     }
   }
@@ -970,7 +971,7 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
       VP9Worker *const worker = &pbi->tile_workers[i];
       TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
       TileInfo *const tile = (TileInfo*)worker->data2;
-      TileBuffer *const buf = &tile_buffers[n];
+      TileBuffer *const buf = &tile_buffers[0][n];
 
       tile_data->cm = cm;
       tile_data->xd = pbi->mb;
@@ -1278,6 +1279,7 @@ static struct vp9_read_bit_buffer* init_read_bit_buffer(
     const uint8_t *data,
     const uint8_t *data_end,
     uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) {
+  vp9_zero(*rb);
   rb->bit_offset = 0;
   rb->error_handler = error_handler;
   rb->error_handler_data = &pbi->common;
@@ -1298,7 +1300,7 @@ int vp9_decode_frame(VP9Decoder *pbi,
                      const uint8_t **p_data_end) {
   VP9_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &pbi->mb;
-  struct vp9_read_bit_buffer rb = { 0 };
+  struct vp9_read_bit_buffer rb;
   uint8_t clear_data[MAX_VP9_HEADER_SIZE];
   const size_t first_partition_size = read_uncompressed_header(pbi,
       init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
@@ -1306,6 +1308,8 @@ int vp9_decode_frame(VP9Decoder *pbi,
   const int tile_rows = 1 << cm->log2_tile_rows;
   const int tile_cols = 1 << cm->log2_tile_cols;
   YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
+  const int do_loopfilter_inline = tile_rows == 1 && tile_cols == 1 &&
+                                   cm->lf.filter_level;
   xd->cur_buf = new_fb;
 
   if (!first_partition_size) {
@@ -1322,18 +1326,6 @@ int vp9_decode_frame(VP9Decoder *pbi,
     vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
                        "Truncated packet or corrupt header length");
 
-  pbi->do_loopfilter_inline =
-      (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level;
-  if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
-    CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
-                    vpx_memalign(32, sizeof(LFWorkerData)));
-    pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
-    if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
-      vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
-                         "Loop filter thread creation failed");
-    }
-  }
-
   init_macroblockd(cm, &pbi->mb);
 
   if (cm->coding_use_prev_mi)
@@ -1353,11 +1345,26 @@ int vp9_decode_frame(VP9Decoder *pbi,
 
   // TODO(jzern): remove frame_parallel_decoding_mode restriction for
   // single-frame tile decoding.
-  if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
+  if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1 &&
       cm->frame_parallel_decoding_mode) {
     *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
+    // If multiple threads are used to decode tiles, then we use those threads
+    // to do parallel loopfiltering.
+    vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
   } else {
-    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
+    if (do_loopfilter_inline && pbi->lf_worker.data1 == NULL) {
+      CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
+                      vpx_memalign(32, sizeof(LFWorkerData)));
+      pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
+      if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
+        vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+                           "Loop filter thread creation failed");
+      }
+    }
+    *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end,
+                               do_loopfilter_inline);
+    if (!do_loopfilter_inline)
+      vp9_loop_filter_frame(new_fb, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
   }
 
   new_fb->corrupted |= xd->corrupted;
@@ -1370,16 +1377,17 @@ int vp9_decode_frame(VP9Decoder *pbi,
                          "A stream must start with a complete key frame");
   }
 
-  if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode &&
-      !new_fb->corrupted) {
-    vp9_adapt_coef_probs(cm);
+  if (!new_fb->corrupted) {
+    if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+      vp9_adapt_coef_probs(cm);
 
-    if (!frame_is_intra_only(cm)) {
-      vp9_adapt_mode_probs(cm);
-      vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+      if (!frame_is_intra_only(cm)) {
+        vp9_adapt_mode_probs(cm);
+        vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+      }
+    } else {
+      debug_check_frame_counts(cm);
     }
-  } else {
-    debug_check_frame_counts(cm);
   }
 
   if (cm->refresh_frame_context)
diff --git a/source/libvpx/vp9/decoder/vp9_decoder.c b/source/libvpx/vp9/decoder/vp9_decoder.c
index abcff9f..9e0811f 100644
--- a/source/libvpx/vp9/decoder/vp9_decoder.c
+++ b/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -42,7 +42,7 @@ void vp9_initialize_dec() {
   }
 }
 
-VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf) {
+VP9Decoder *vp9_decoder_create() {
   VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
   VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
 
@@ -66,7 +66,6 @@ VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf) {
   vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
 
   cm->current_video_frame = 0;
-  pbi->oxcf = *oxcf;
   pbi->ready_for_new_data = 1;
   pbi->decoded_key_frame = 0;
 
@@ -280,16 +279,6 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
 
   swap_frame_buffers(pbi);
 
-  if (!pbi->do_loopfilter_inline) {
-    // If multiple threads are used to decode tiles, then we use those threads
-    // to do parallel loopfiltering.
-    if (pbi->num_tile_workers) {
-      vp9_loop_filter_frame_mt(pbi, cm, cm->lf.filter_level, 0, 0);
-    } else {
-      vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
-    }
-  }
-
   vp9_clear_system_state();
 
   cm->last_width = cm->width;
@@ -315,11 +304,14 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
                       int64_t *time_stamp, int64_t *time_end_stamp,
                       vp9_ppflags_t *flags) {
   int ret = -1;
+#if !CONFIG_VP9_POSTPROC
+  (void)*flags;
+#endif
 
   if (pbi->ready_for_new_data == 1)
     return ret;
 
-  /* ie no raw frame to show!!! */
+  /* no raw frame to show!!! */
   if (pbi->common.show_frame == 0)
     return ret;
 
@@ -330,8 +322,8 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
 #if CONFIG_VP9_POSTPROC
   ret = vp9_post_proc_frame(&pbi->common, sd, flags);
 #else
-    *sd = *pbi->common.frame_to_show;
-    ret = 0;
+  *sd = *pbi->common.frame_to_show;
+  ret = 0;
 #endif /*!CONFIG_POSTPROC*/
   vp9_clear_system_state();
   return ret;
diff --git a/source/libvpx/vp9/decoder/vp9_decoder.h b/source/libvpx/vp9/decoder/vp9_decoder.h
index ebcbb90..d6110c4 100644
--- a/source/libvpx/vp9/decoder/vp9_decoder.h
+++ b/source/libvpx/vp9/decoder/vp9_decoder.h
@@ -27,21 +27,11 @@
 extern "C" {
 #endif
 
-typedef struct VP9DecoderConfig {
-  int width;
-  int height;
-  int version;
-  int max_threads;
-  int inv_tile_order;
-} VP9DecoderConfig;
-
 typedef struct VP9Decoder {
   DECLARE_ALIGNED(16, MACROBLOCKD, mb);
 
   DECLARE_ALIGNED(16, VP9_COMMON, common);
 
-  VP9DecoderConfig oxcf;
-
   int64_t last_time_stamp;
   int ready_for_new_data;
 
@@ -49,7 +39,6 @@ typedef struct VP9Decoder {
 
   int decoded_key_frame;
 
-  int do_loopfilter_inline;  // apply loopfilter to available rows immediately
   VP9Worker lf_worker;
 
   VP9Worker *tile_workers;
@@ -59,6 +48,9 @@ typedef struct VP9Decoder {
 
   vpx_decrypt_cb decrypt_cb;
   void *decrypt_state;
+
+  int max_threads;
+  int inv_tile_order;
 } VP9Decoder;
 
 void vp9_initialize_dec();
@@ -83,8 +75,7 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
 int vp9_get_reference_dec(struct VP9Decoder *pbi,
                           int index, YV12_BUFFER_CONFIG **fb);
 
-
-struct VP9Decoder *vp9_decoder_create(const VP9DecoderConfig *oxcf);
+struct VP9Decoder *vp9_decoder_create();
 
 void vp9_decoder_remove(struct VP9Decoder *pbi);
 
diff --git a/source/libvpx/vp9/decoder/vp9_dthread.c b/source/libvpx/vp9/decoder/vp9_dthread.c
index 9098063..bc6c418 100644
--- a/source/libvpx/vp9/decoder/vp9_dthread.c
+++ b/source/libvpx/vp9/decoder/vp9_dthread.c
@@ -89,7 +89,8 @@ static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
 
 // Implement row loopfiltering for each thread.
 static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
-                                VP9_COMMON *const cm, MACROBLOCKD *const xd,
+                                VP9_COMMON *const cm,
+                                struct macroblockd_plane planes[MAX_MB_PLANE],
                                 int start, int stop, int y_only,
                                 VP9LfSync *const lf_sync, int num_lf_workers) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
@@ -107,11 +108,11 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
 
       sync_read(lf_sync, r, c);
 
-      vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+      vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
       vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
-        vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+        vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
       }
 
       sync_write(lf_sync, r, c, sb_cols);
@@ -124,7 +125,7 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
   TileWorkerData *const tile_data = (TileWorkerData*)arg1;
   LFWorkerData *const lf_data = &tile_data->lfdata;
 
-  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+  loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
                       lf_data->start, lf_data->stop, lf_data->y_only,
                       lf_data->lf_sync, lf_data->num_lf_workers);
   return 1;
@@ -132,15 +133,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
 
 // VP9 decoder: Implement multi-threaded loopfilter that uses the tile
 // threads.
-void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
-                              VP9_COMMON *cm,
+void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
+                              VP9Decoder *pbi, VP9_COMMON *cm,
                               int frame_filter_level,
-                              int y_only, int partial_frame) {
+                              int y_only) {
   VP9LfSync *const lf_sync = &pbi->lf_row_sync;
   // Number of superblock rows and cols
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
   const int tile_cols = 1 << cm->log2_tile_cols;
-  const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols);
+  const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
   int i;
 
   // Allocate memory used in thread synchronization.
@@ -184,9 +185,9 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
     worker->hook = (VP9WorkerHook)loop_filter_row_worker;
 
     // Loopfilter data
-    lf_data->frame_buffer = get_frame_new_buffer(cm);
+    lf_data->frame_buffer = frame;
     lf_data->cm = cm;
-    lf_data->xd = pbi->mb;
+    vp9_copy(lf_data->planes, pbi->mb.plane);
     lf_data->start = i;
     lf_data->stop = sb_rows;
     lf_data->y_only = y_only;   // always do all planes in decoder
diff --git a/source/libvpx/vp9/decoder/vp9_dthread.h b/source/libvpx/vp9/decoder/vp9_dthread.h
index 8738cee..a727e2a 100644
--- a/source/libvpx/vp9/decoder/vp9_dthread.h
+++ b/source/libvpx/vp9/decoder/vp9_dthread.h
@@ -48,9 +48,10 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync,
 void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows);
 
 // Multi-threaded loopfilter that uses the tile threads.
-void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
+void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
+                              struct VP9Decoder *pbi,
                               struct VP9Common *cm,
                               int frame_filter_level,
-                              int y_only, int partial_frame);
+                              int y_only);
 
 #endif  // VP9_DECODER_VP9_DTHREAD_H_
diff --git a/source/libvpx/vp9/encoder/vp9_bitstream.c b/source/libvpx/vp9/encoder/vp9_bitstream.c
index 35d2ecf..8ef2b2e 100644
--- a/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -485,8 +485,8 @@ static void write_modes(VP9_COMP *cpi,
 }
 
 static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
-                                    vp9_coeff_stats *coef_branch_ct) {
-  vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size];
+                                    vp9_coeff_stats *coef_branch_ct,
+                                    vp9_coeff_probs_model *coef_probs) {
   vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size];
   unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
       cpi->common.counts.eob_branch[tx_size];
@@ -513,10 +513,9 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size,
 
 static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
                                      TX_SIZE tx_size,
-                                     vp9_coeff_stats *frame_branch_ct) {
-  vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size];
-  vp9_coeff_probs_model *old_frame_coef_probs =
-      cpi->common.fc.coef_probs[tx_size];
+                                     vp9_coeff_stats *frame_branch_ct,
+                                     vp9_coeff_probs_model *new_coef_probs) {
+  vp9_coeff_probs_model *old_coef_probs = cpi->common.fc.coef_probs[tx_size];
   const vp9_prob upd = DIFF_UPDATE_PROB;
   const int entropy_nodes_update = UNCONSTRAINED_NODES;
   int i, j, k, l, t;
@@ -530,14 +529,14 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
           for (k = 0; k < COEF_BANDS; ++k) {
             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
               for (t = 0; t < entropy_nodes_update; ++t) {
-                vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
-                const vp9_prob oldp = old_frame_coef_probs[i][j][k][l][t];
+                vp9_prob newp = new_coef_probs[i][j][k][l][t];
+                const vp9_prob oldp = old_coef_probs[i][j][k][l][t];
                 int s;
                 int u = 0;
                 if (t == PIVOT_NODE)
                   s = vp9_prob_diff_update_savings_search_model(
                       frame_branch_ct[i][j][k][l][0],
-                      old_frame_coef_probs[i][j][k][l], &newp, upd);
+                      old_coef_probs[i][j][k][l], &newp, upd);
                 else
                   s = vp9_prob_diff_update_savings_search(
                       frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
@@ -567,15 +566,15 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
               // calc probs and branch cts for this frame only
               for (t = 0; t < entropy_nodes_update; ++t) {
-                vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
-                vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+                vp9_prob newp = new_coef_probs[i][j][k][l][t];
+                vp9_prob *oldp = old_coef_probs[i][j][k][l] + t;
                 const vp9_prob upd = DIFF_UPDATE_PROB;
                 int s;
                 int u = 0;
                 if (t == PIVOT_NODE)
                   s = vp9_prob_diff_update_savings_search_model(
                       frame_branch_ct[i][j][k][l][0],
-                      old_frame_coef_probs[i][j][k][l], &newp, upd);
+                      old_coef_probs[i][j][k][l], &newp, upd);
                 else
                   s = vp9_prob_diff_update_savings_search(
                       frame_branch_ct[i][j][k][l][t],
@@ -612,8 +611,8 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
             for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
               // calc probs and branch cts for this frame only
               for (t = 0; t < entropy_nodes_update; ++t) {
-                vp9_prob newp = new_frame_coef_probs[i][j][k][l][t];
-                vp9_prob *oldp = old_frame_coef_probs[i][j][k][l] + t;
+                vp9_prob newp = new_coef_probs[i][j][k][l][t];
+                vp9_prob *oldp = old_coef_probs[i][j][k][l] + t;
                 int s;
                 int u = 0;
                 if (l >= prev_coef_contexts_to_update ||
@@ -623,7 +622,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
                   if (t == PIVOT_NODE)
                     s = vp9_prob_diff_update_savings_search_model(
                         frame_branch_ct[i][j][k][l][0],
-                        old_frame_coef_probs[i][j][k][l], &newp, upd);
+                        old_coef_probs[i][j][k][l], &newp, upd);
                   else
                     s = vp9_prob_diff_update_savings_search(
                         frame_branch_ct[i][j][k][l][t],
@@ -670,14 +669,17 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) {
   const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
   TX_SIZE tx_size;
   vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES];
+  vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
 
   vp9_clear_system_state();
 
   for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size)
-    build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size]);
+    build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size],
+                            frame_coef_probs[tx_size]);
 
   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
-    update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size]);
+    update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size],
+                             frame_coef_probs[tx_size]);
 }
 
 static void encode_loopfilter(struct loopfilter *lf,
diff --git a/source/libvpx/vp9/encoder/vp9_context_tree.c b/source/libvpx/vp9/encoder/vp9_context_tree.c
index 659935c..ac9b562 100644
--- a/source/libvpx/vp9/encoder/vp9_context_tree.c
+++ b/source/libvpx/vp9/encoder/vp9_context_tree.c
@@ -8,14 +8,13 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
 #include "vp9/encoder/vp9_context_tree.h"
 
 static const BLOCK_SIZE square[] = {
-    BLOCK_8X8,
-    BLOCK_16X16,
-    BLOCK_32X32,
-    BLOCK_64X64,
+  BLOCK_8X8,
+  BLOCK_16X16,
+  BLOCK_32X32,
+  BLOCK_64X64,
 };
 
 static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
@@ -62,23 +61,25 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
     }
   }
 }
-static void free_tree_contexts(PC_TREE *this_pc) {
-  free_mode_context(&this_pc->none);
-  free_mode_context(&this_pc->horizontal[0]);
-  free_mode_context(&this_pc->horizontal[1]);
-  free_mode_context(&this_pc->vertical[0]);
-  free_mode_context(&this_pc->vertical[1]);
-}
-static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *this_pc,
+
+static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *tree,
                                 int num_4x4_blk) {
-  alloc_mode_context(cm, num_4x4_blk, &this_pc->none);
-  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[0]);
-  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[0]);
+  alloc_mode_context(cm, num_4x4_blk, &tree->none);
+  alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[0]);
+  alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[0]);
 
   /* TODO(Jbb): for 4x8 and 8x4 these allocated values are not used.
    * Figure out a better way to do this. */
-  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->horizontal[1]);
-  alloc_mode_context(cm, num_4x4_blk/2, &this_pc->vertical[1]);
+  alloc_mode_context(cm, num_4x4_blk/2, &tree->horizontal[1]);
+  alloc_mode_context(cm, num_4x4_blk/2, &tree->vertical[1]);
+}
+
+static void free_tree_contexts(PC_TREE *tree) {
+  free_mode_context(&tree->none);
+  free_mode_context(&tree->horizontal[0]);
+  free_mode_context(&tree->horizontal[1]);
+  free_mode_context(&tree->vertical[0]);
+  free_mode_context(&tree->vertical[1]);
 }
 
 // This function sets up a tree of contexts such that at each square
@@ -97,9 +98,9 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) {
 
   vpx_free(x->leaf_tree);
   CHECK_MEM_ERROR(cm, x->leaf_tree, vpx_calloc(leaf_nodes,
-                                               sizeof(PICK_MODE_CONTEXT)));
+                                               sizeof(*x->leaf_tree)));
   vpx_free(x->pc_tree);
-  CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(PC_TREE)));
+  CHECK_MEM_ERROR(cm, x->pc_tree, vpx_calloc(tree_nodes, sizeof(*x->pc_tree)));
 
   this_pc = &x->pc_tree[0];
   this_leaf = &x->leaf_tree[0];
@@ -111,45 +112,45 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x) {
 
   // Sets up all the leaf nodes in the tree.
   for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
-    x->pc_tree[pc_tree_index].block_size = square[0];
-    alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index], 4);
-    x->pc_tree[pc_tree_index].leaf_split[0] = this_leaf++;
-    for (j = 1; j < 4; j++) {
-      x->pc_tree[pc_tree_index].leaf_split[j] =
-          x->pc_tree[pc_tree_index].leaf_split[0];
-    }
+    PC_TREE *const tree = &x->pc_tree[pc_tree_index];
+    tree->block_size = square[0];
+    alloc_tree_contexts(cm, tree, 4);
+    tree->leaf_split[0] = this_leaf++;
+    for (j = 1; j < 4; j++)
+      tree->leaf_split[j] = tree->leaf_split[0];
   }
 
   // Each node has 4 leaf nodes, fill each block_size level of the tree
   // from leafs to the root.
-  for (nodes = 16; nodes > 0; nodes >>= 2, ++square_index) {
-    for (i = 0; i < nodes; ++pc_tree_index,  ++i) {
-      alloc_tree_contexts(cm, &x->pc_tree[pc_tree_index],
-                          4 << (2 * square_index));
-      x->pc_tree[pc_tree_index].block_size = square[square_index];
-      for (j = 0; j < 4; j++) {
-        x->pc_tree[pc_tree_index].split[j] = this_pc++;
-      }
+  for (nodes = 16; nodes > 0; nodes >>= 2) {
+    for (i = 0; i < nodes; ++i) {
+      PC_TREE *const tree = &x->pc_tree[pc_tree_index];
+      alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
+      tree->block_size = square[square_index];
+      for (j = 0; j < 4; j++)
+        tree->split[j] = this_pc++;
+      ++pc_tree_index;
     }
+    ++square_index;
   }
-  x->pc_root = &x->pc_tree[tree_nodes-1];
+  x->pc_root = &x->pc_tree[tree_nodes - 1];
   x->pc_root[0].none.best_mode_index = 2;
 }
 
-void vp9_free_pc_tree(MACROBLOCK *m) {
+void vp9_free_pc_tree(MACROBLOCK *x) {
   const int tree_nodes = 64 + 16 + 4 + 1;
   int i;
 
   // Set up all 4x4 mode contexts
   for (i = 0; i < 64; ++i)
-    free_mode_context(&m->leaf_tree[i]);
+    free_mode_context(&x->leaf_tree[i]);
 
   // Sets up all the leaf nodes in the tree.
-  for (i = 0; i < tree_nodes; i++) {
-    free_tree_contexts(&m->pc_tree[i]);
-  }
-  vpx_free(m->pc_tree);
-  m->pc_tree = 0;
-  vpx_free(m->leaf_tree);
-  m->leaf_tree = 0;
+  for (i = 0; i < tree_nodes; ++i)
+    free_tree_contexts(&x->pc_tree[i]);
+
+  vpx_free(x->pc_tree);
+  x->pc_tree = NULL;
+  vpx_free(x->leaf_tree);
+  x->leaf_tree = NULL;
 }
diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 87051d5..86e5986 100644
--- a/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -201,7 +201,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
   mbmi = &xd->mi[0]->mbmi;
 
   // Set up destination pointers.
-  vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col);
+  vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
 
   // Set up limit values for MV components.
   // Mv beyond the range do not produce new/different prediction block.
@@ -254,7 +254,6 @@ static void duplicate_mode_info_in_sb(VP9_COMMON * const cm,
 }
 
 static void set_block_size(VP9_COMP * const cpi,
-                           const TileInfo *const tile,
                            int mi_row, int mi_col,
                            BLOCK_SIZE bsize) {
   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
@@ -377,11 +376,9 @@ static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
 
 static int set_vt_partitioning(VP9_COMP *cpi,
                                void *data,
-                               const TileInfo *const tile,
                                BLOCK_SIZE bsize,
                                int mi_row,
-                               int mi_col,
-                               int mi_size) {
+                               int mi_col) {
   VP9_COMMON * const cm = &cpi->common;
   variance_node vt;
   const int block_width = num_8x8_blocks_wide_lookup[bsize];
@@ -398,7 +395,7 @@ static int set_vt_partitioning(VP9_COMP *cpi,
   if (mi_col + block_width / 2 < cm->mi_cols &&
       mi_row + block_height / 2 < cm->mi_rows &&
       vt.part_variances->none.variance < threshold) {
-    set_block_size(cpi, tile, mi_row, mi_col, bsize);
+    set_block_size(cpi, mi_row, mi_col, bsize);
     return 1;
   }
 
@@ -407,8 +404,8 @@ static int set_vt_partitioning(VP9_COMP *cpi,
       vt.part_variances->vert[0].variance < threshold &&
       vt.part_variances->vert[1].variance < threshold) {
     BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
-    set_block_size(cpi, tile, mi_row, mi_col, subsize);
-    set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize);
+    set_block_size(cpi, mi_row, mi_col, subsize);
+    set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
     return 1;
   }
 
@@ -417,8 +414,8 @@ static int set_vt_partitioning(VP9_COMP *cpi,
       vt.part_variances->horz[0].variance < threshold &&
       vt.part_variances->horz[1].variance < threshold) {
     BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
-    set_block_size(cpi, tile, mi_row, mi_col, subsize);
-    set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize);
+    set_block_size(cpi, mi_row, mi_col, subsize);
+    set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
     return 1;
   }
   return 0;
@@ -505,13 +502,13 @@ static void choose_partitioning(VP9_COMP *cpi,
   // Now go through the entire structure,  splitting every block size until
   // we get to one that's got a variance lower than our threshold,  or we
   // hit 8x8.
-  if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64,
-                           mi_row, mi_col, 8)) {
+  if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
+                           mi_row, mi_col)) {
     for (i = 0; i < 4; ++i) {
       const int x32_idx = ((i & 1) << 2);
       const int y32_idx = ((i >> 1) << 2);
-      if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32,
-                               (mi_row + y32_idx), (mi_col + x32_idx), 4)) {
+      if (!set_vt_partitioning(cpi, &vt.split[i], BLOCK_32X32,
+                               (mi_row + y32_idx), (mi_col + x32_idx))) {
         for (j = 0; j < 4; ++j) {
           const int x16_idx = ((j & 1) << 1);
           const int y16_idx = ((j >> 1) << 1);
@@ -521,7 +518,7 @@ static void choose_partitioning(VP9_COMP *cpi,
 #ifdef DISABLE_8X8_VAR_BASED_PARTITION
           if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows &&
               mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) {
-            set_block_size(cpi, tile,
+            set_block_size(cpi,
                            (mi_row + y32_idx + y16_idx),
                            (mi_col + x32_idx + x16_idx),
                            BLOCK_16X16);
@@ -529,7 +526,7 @@ static void choose_partitioning(VP9_COMP *cpi,
             for (k = 0; k < 4; ++k) {
               const int x8_idx = (k & 1);
               const int y8_idx = (k >> 1);
-              set_block_size(cpi, tile,
+              set_block_size(cpi,
                              (mi_row + y32_idx + y16_idx + y8_idx),
                              (mi_col + x32_idx + x16_idx + x8_idx),
                              BLOCK_8X8);
@@ -543,7 +540,7 @@ static void choose_partitioning(VP9_COMP *cpi,
             for (k = 0; k < 4; ++k) {
               const int x8_idx = (k & 1);
               const int y8_idx = (k >> 1);
-              set_block_size(cpi, tile,
+              set_block_size(cpi,
                              (mi_row + y32_idx + y16_idx + y8_idx),
                              (mi_col + x32_idx + x16_idx + x8_idx),
                              BLOCK_8X8);
@@ -1456,8 +1453,7 @@ static void rd_use_partition(VP9_COMP *cpi,
                              MODE_INFO **mi_8x8,
                              TOKENEXTRA **tp, int mi_row, int mi_col,
                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
-                             int do_recon, PC_TREE *pc_tree,
-                             int block) {
+                             int do_recon, PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1624,7 +1620,7 @@ static void rd_use_partition(VP9_COMP *cpi,
 
         rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
                          mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
-                         i != 3, pc_tree->split[i], i);
+                         i != 3, pc_tree->split[i]);
         if (rt == INT_MAX || dt == INT64_MAX) {
           last_part_rate = INT_MAX;
           last_part_dist = INT64_MAX;
@@ -1809,15 +1805,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
                                     BLOCK_SIZE *max_block_size) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-  MODE_INFO **mi_8x8 = xd->mi;
-  const int left_in_image = xd->left_available && mi_8x8[-1];
-  const int above_in_image = xd->up_available &&
-                             mi_8x8[-xd->mi_stride];
-  MODE_INFO **above_sb64_mi_8x8;
-  MODE_INFO **left_sb64_mi_8x8;
-
-  int row8x8_remaining = tile->mi_row_end - mi_row;
-  int col8x8_remaining = tile->mi_col_end - mi_col;
+  MODE_INFO **mi = xd->mi;
+  const int left_in_image = xd->left_available && mi[-1];
+  const int above_in_image = xd->up_available && mi[-xd->mi_stride];
+  const int row8x8_remaining = tile->mi_row_end - mi_row;
+  const int col8x8_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
   BLOCK_SIZE min_size = BLOCK_4X4;
   BLOCK_SIZE max_size = BLOCK_64X64;
@@ -1837,15 +1829,13 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
     }
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
-      left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, left_sb64_mi_8x8,
-                                  &min_size, &max_size);
+      MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
+      get_sb_partition_size_range(cpi, left_sb64_mi, &min_size, &max_size);
     }
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
-      above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
-      get_sb_partition_size_range(cpi, above_sb64_mi_8x8,
-                                  &min_size, &max_size);
+      MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
+      get_sb_partition_size_range(cpi, above_sb64_mi, &min_size, &max_size);
     }
     // adjust observed min and max
     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
@@ -1871,6 +1861,67 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
   *max_block_size = max_size;
 }
 
+static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
+                                 int mi_row, int mi_col,
+                                 BLOCK_SIZE *min_block_size,
+                                 BLOCK_SIZE *max_block_size) {
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+  MODE_INFO **mi_8x8 = xd->mi;
+  const int left_in_image = xd->left_available && mi_8x8[-1];
+  const int above_in_image = xd->up_available &&
+                             mi_8x8[-xd->mi_stride];
+  int row8x8_remaining = tile->mi_row_end - mi_row;
+  int col8x8_remaining = tile->mi_col_end - mi_col;
+  int bh, bw;
+  BLOCK_SIZE min_size = BLOCK_32X32;
+  BLOCK_SIZE max_size = BLOCK_8X8;
+  int bsl = mi_width_log2_lookup[BLOCK_64X64];
+  int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
+                           cpi->sf.chessboard_index) & 0x01;
+  // Trap case where we do not have a prediction.
+  if (search_range_ctrl &&
+      (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) {
+    int block;
+    MODE_INFO **mi;
+    BLOCK_SIZE sb_type;
+
+    // Find the min and max partition sizes used in the left SB64.
+    if (left_in_image) {
+      MODE_INFO *cur_mi;
+      mi = &mi_8x8[-1];
+      for (block = 0; block < MI_BLOCK_SIZE; ++block) {
+        cur_mi = mi[block * xd->mi_stride];
+        sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0;
+        min_size = MIN(min_size, sb_type);
+        max_size = MAX(max_size, sb_type);
+      }
+    }
+    // Find the min and max partition sizes used in the above SB64.
+    if (above_in_image) {
+      mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
+      for (block = 0; block < MI_BLOCK_SIZE; ++block) {
+        sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0;
+        min_size = MIN(min_size, sb_type);
+        max_size = MAX(max_size, sb_type);
+      }
+    }
+
+    min_size = min_partition_size[min_size];
+    max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
+                                   &bh, &bw);
+    min_size = MIN(min_size, max_size);
+    min_size = MAX(min_size, BLOCK_8X8);
+    max_size = MIN(max_size, BLOCK_32X32);
+  } else {
+    min_size = BLOCK_8X8;
+    max_size = BLOCK_32X32;
+  }
+
+  *min_block_size = min_size;
+  *max_block_size = max_size;
+}
+
 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
   vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
 }
@@ -1886,7 +1937,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
                               TOKENEXTRA **tp, int mi_row,
                               int mi_col, BLOCK_SIZE bsize, int *rate,
                               int64_t *dist, int do_recon, int64_t best_rd,
-                              PC_TREE *pc_tree, int block) {
+                              PC_TREE *pc_tree) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2038,7 +2089,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
 
         rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
                           subsize, &this_rate, &this_dist, i != 3,
-                          best_rd - sum_rd, pc_tree->split[i], i);
+                          best_rd - sum_rd, pc_tree->split[i]);
 
         if (this_rate == INT_MAX) {
           sum_rd = INT64_MAX;
@@ -2239,26 +2290,26 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
          sf->partition_search_type == VAR_BASED_PARTITION ||
          sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
       const int idx_str = cm->mi_stride * mi_row + mi_col;
-      MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-      MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+      MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+      MODE_INFO **prev_mi = cm->prev_mi_grid_visible + idx_str;
       cpi->mb.source_variance = UINT_MAX;
       if (sf->partition_search_type == FIXED_PARTITION) {
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
+        set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
                                sf->always_this_block_size);
-        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+        rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1, x->pc_root);
       } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
         BLOCK_SIZE bsize;
         set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+        set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
+        rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1, x->pc_root);
       } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
         choose_partitioning(cpi, tile, mi_row, mi_col);
-        rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+        rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                         &dummy_rate, &dummy_dist, 1, x->pc_root);
       } else {
         if ((cm->current_video_frame
             % sf->last_partitioning_redo_frequency) == 0
@@ -2268,7 +2319,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
             || cpi->rc.is_src_frame_alt_ref
             || ((sf->use_lastframe_partitioning ==
                  LAST_FRAME_PARTITION_LOW_MOTION) &&
-                 sb_has_motion(cm, prev_mi_8x8))) {
+                 sb_has_motion(cm, prev_mi))) {
           // If required set upper and lower partition size limits
           if (sf->auto_min_max_partition_size) {
             set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2277,17 +2328,16 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                     &sf->max_partition_size);
           }
           rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                            &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root,
-                            0);
+                            &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root);
         } else {
           if (sf->constrain_copy_partition &&
-              sb_has_motion(cm, prev_mi_8x8))
-            constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8,
+              sb_has_motion(cm, prev_mi))
+            constrain_copy_partitioning(cpi, tile, mi, prev_mi,
                                         mi_row, mi_col, BLOCK_16X16);
           else
-            copy_partitioning(cm, mi_8x8, prev_mi_8x8);
-          rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                           &dummy_rate, &dummy_dist, 1, x->pc_root, 0);
+            copy_partitioning(cm, mi, prev_mi);
+          rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+                           &dummy_rate, &dummy_dist, 1, x->pc_root);
         }
       }
     } else {
@@ -2299,7 +2349,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 &sf->max_partition_size);
       }
       rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
-                        &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root, 0);
+                        &dummy_rate, &dummy_dist, 1, INT64_MAX, x->pc_root);
     }
   }
 }
@@ -2623,9 +2673,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
 
       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
         continue;
-
       load_pred_mv(x, ctx);
-
       nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
                            subsize, &this_rate, &this_dist, 0,
                            best_rd - sum_rd, pc_tree->split[i]);
@@ -2768,7 +2816,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
 
 static void nonrd_use_partition(VP9_COMP *cpi,
                                 const TileInfo *const tile,
-                                MODE_INFO **mi_8x8,
+                                MODE_INFO **mi,
                                 TOKENEXTRA **tp,
                                 int mi_row, int mi_col,
                                 BLOCK_SIZE bsize, int output_enabled,
@@ -2787,7 +2835,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4;
+  subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
   partition = partition_lookup[bsl][subsize];
 
   switch (partition) {
@@ -2815,7 +2863,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       if (mi_row + hbs < cm->mi_rows) {
         nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
                             &rate, &dist, subsize);
-        pc_tree->horizontal[1].mic.mbmi = mi_8x8[0]->mbmi;
+        pc_tree->horizontal[1].mic.mbmi = mi[0]->mbmi;
         if (rate != INT_MAX && dist != INT64_MAX &&
             *totrate != INT_MAX && *totdist != INT64_MAX) {
           *totrate += rate;
@@ -2825,10 +2873,10 @@ static void nonrd_use_partition(VP9_COMP *cpi,
       break;
     case PARTITION_SPLIT:
       subsize = get_subsize(bsize, PARTITION_SPLIT);
-      nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+      nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
                           subsize, output_enabled, totrate, totdist,
                           pc_tree->split[0]);
-      nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp,
+      nonrd_use_partition(cpi, tile, mi + hbs, tp,
                           mi_row, mi_col + hbs, subsize, output_enabled,
                           &rate, &dist, pc_tree->split[1]);
       if (rate != INT_MAX && dist != INT64_MAX &&
@@ -2836,7 +2884,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
         *totrate += rate;
         *totdist += dist;
       }
-      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp,
+      nonrd_use_partition(cpi, tile, mi + hbs * mis, tp,
                           mi_row + hbs, mi_col, subsize, output_enabled,
                           &rate, &dist, pc_tree->split[2]);
       if (rate != INT_MAX && dist != INT64_MAX &&
@@ -2844,7 +2892,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
         *totrate += rate;
         *totdist += dist;
       }
-      nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp,
+      nonrd_use_partition(cpi, tile, mi + hbs * mis + hbs, tp,
                           mi_row + hbs, mi_col + hbs, subsize, output_enabled,
                           &rate, &dist, pc_tree->split[3]);
       if (rate != INT_MAX && dist != INT64_MAX &&
@@ -2883,8 +2931,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     int dummy_rate = 0;
     int64_t dummy_dist = 0;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-    MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
+    MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+    MODE_INFO **prev_mi = cm->prev_mi_grid_visible + idx_str;
     BLOCK_SIZE bsize;
 
     x->in_static_area = 0;
@@ -2895,12 +2943,12 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     switch (cpi->sf.partition_search_type) {
       case VAR_BASED_PARTITION:
         choose_partitioning(cpi, tile, mi_row, mi_col);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+        nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case SOURCE_VAR_BASED_PARTITION:
-        set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+        set_source_var_based_partition(cpi, tile, mi, mi_row, mi_col);
+        nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case VAR_BASED_FIXED_PARTITION:
@@ -2908,19 +2956,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
         bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
                 cpi->sf.always_this_block_size :
                 get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
-        set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
-        nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+        set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
+        nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist, x->pc_root);
         break;
       case REFERENCE_PARTITION:
         if (cpi->sf.partition_check ||
             !is_background(cpi, tile, mi_row, mi_col)) {
+          set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+          auto_partition_range(cpi, tile, mi_row, mi_col,
+                               &cpi->sf.min_partition_size,
+                               &cpi->sf.max_partition_size);
           nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
                                &dummy_rate, &dummy_dist, 1, INT64_MAX,
                                x->pc_root);
         } else {
-          copy_partitioning(cm, mi_8x8, prev_mi_8x8);
-          nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
+          copy_partitioning(cm, mi, prev_mi);
+          nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
                               BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
                               x->pc_root);
         }
diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.c b/source/libvpx/vp9/encoder/vp9_encodemb.c
index d71b16f..3b231b7 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -99,7 +99,7 @@ static int trellis_get_coeff_context(const int16_t *scan,
 }
 
 static int optimize_b(MACROBLOCK *mb, int plane, int block,
-                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) {
+                      TX_SIZE tx_size, int ctx) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   struct macroblock_plane *const p = &mb->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -381,7 +381,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
 
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
     const int ctx = combine_entropy_contexts(*a, *l);
-    *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0;
+    *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
   } else {
     *a = *l = p->eobs[block] > 0;
   }
diff --git a/source/libvpx/vp9/encoder/vp9_encoder.c b/source/libvpx/vp9/encoder/vp9_encoder.c
index cc2c552..911ce7c 100644
--- a/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -115,22 +115,6 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
   }
 }
 
-static void setup_key_frame(VP9_COMP *cpi) {
-  vp9_setup_past_independence(&cpi->common);
-
-  // All buffers are implicitly updated on key frames.
-  cpi->refresh_golden_frame = 1;
-  cpi->refresh_alt_ref_frame = 1;
-}
-
-static void setup_inter_frame(VP9_COMMON *cm) {
-  if (cm->error_resilient_mode || cm->intra_only)
-    vp9_setup_past_independence(cm);
-
-  assert(cm->frame_context_idx < FRAME_CONTEXTS);
-  cm->fc = cm->frame_contexts[cm->frame_context_idx];
-}
-
 static void setup_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   // Set up entropy context depending on frame type. The decoder mandates
@@ -138,17 +122,21 @@ static void setup_frame(VP9_COMP *cpi) {
   // frames where the error_resilient_mode or intra_only flag is set. For
   // other inter-frames the encoder currently uses only two contexts;
   // context 1 for ALTREF frames and context 0 for the others.
+  if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+    vp9_setup_past_independence(cm);
+  } else {
+    if (!cpi->use_svc)
+      cm->frame_context_idx = cpi->refresh_alt_ref_frame;
+  }
+
   if (cm->frame_type == KEY_FRAME) {
-    setup_key_frame(cpi);
+    cpi->refresh_golden_frame = 1;
+    cpi->refresh_alt_ref_frame = 1;
   } else {
-    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
-        cm->frame_context_idx = cpi->refresh_alt_ref_frame;
-     setup_inter_frame(cm);
+    cm->fc = cm->frame_contexts[cm->frame_context_idx];
   }
 }
 
-
-
 void vp9_initialize_enc() {
   static int init_done = 0;
 
@@ -761,7 +749,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
 
 
 VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
-  int i, j;
+  unsigned int i, j;
   VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP));
   VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL;
 
@@ -1054,7 +1042,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
 }
 
 void vp9_remove_compressor(VP9_COMP *cpi) {
-  int i;
+  unsigned int i;
 
   if (!cpi)
     return;
@@ -1617,7 +1605,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
   }
 
   if (lf->filter_level > 0) {
-    vp9_loop_filter_frame(cm, xd, lf->filter_level, 0, 0);
+    vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
   }
 
   vp9_extend_frame_inner_borders(cm->frame_to_show);
@@ -1737,8 +1725,6 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 #endif
 
 static void encode_without_recode_loop(VP9_COMP *cpi,
-                                       size_t *size,
-                                       uint8_t *dest,
                                        int q) {
   VP9_COMMON *const cm = &cpi->common;
   vp9_clear_system_state();
@@ -2174,7 +2160,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   }
 
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
-    encode_without_recode_loop(cpi, size, dest, q);
+    encode_without_recode_loop(cpi, q);
   } else {
     encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
   }
@@ -2236,9 +2222,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     }
   }
 
-#if 0
-  output_frame_level_debug_stats(cpi);
-#endif
   if (cpi->refresh_golden_frame == 1)
     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
   else
@@ -2254,6 +2237,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cm->last_frame_type = cm->frame_type;
   vp9_rc_postencode_update(cpi, *size);
 
+#if 0
+  output_frame_level_debug_stats(cpi);
+#endif
+
   if (cm->frame_type == KEY_FRAME) {
     // Tell the caller that the frame was coded as a key frame
     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
@@ -2790,6 +2777,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
                               vp9_ppflags_t *flags) {
   VP9_COMMON *cm = &cpi->common;
+#if !CONFIG_VP9_POSTPROC
+  (void)flags;
+#endif
 
   if (!cm->show_frame) {
     return -1;
@@ -2798,7 +2788,6 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
 #if CONFIG_VP9_POSTPROC
     ret = vp9_post_proc_frame(cm, dest, flags);
 #else
-
     if (cm->frame_to_show) {
       *dest = *cm->frame_to_show;
       dest->y_width = cm->width;
@@ -2809,64 +2798,13 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
     } else {
       ret = -1;
     }
-
 #endif  // !CONFIG_VP9_POSTPROC
     vp9_clear_system_state();
     return ret;
   }
 }
 
-int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
-                   unsigned int cols, int delta_q[MAX_SEGMENTS],
-                   int delta_lf[MAX_SEGMENTS],
-                   unsigned int threshold[MAX_SEGMENTS]) {
-  signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS];
-  struct segmentation *seg = &cpi->common.seg;
-  const VP9_COMMON *const cm = &cpi->common;
-  int i;
-
-  if (cm->mb_rows != rows || cm->mb_cols != cols)
-    return -1;
-
-  if (!map) {
-    vp9_disable_segmentation(seg);
-    return 0;
-  }
-
-  vpx_memcpy(cpi->segmentation_map, map, cm->mi_rows * cm->mi_cols);
-
-  // Activate segmentation.
-  vp9_enable_segmentation(seg);
-
-  // Set up the quant, LF and breakout threshold segment data
-  for (i = 0; i < MAX_SEGMENTS; i++) {
-    feature_data[SEG_LVL_ALT_Q][i] = delta_q[i];
-    feature_data[SEG_LVL_ALT_LF][i] = delta_lf[i];
-    cpi->segment_encode_breakout[i] = threshold[i];
-  }
-
-  // Enable the loop and quant changes in the feature mask
-  for (i = 0; i < MAX_SEGMENTS; i++) {
-    if (delta_q[i])
-      vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
-    else
-      vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
-
-    if (delta_lf[i])
-      vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
-    else
-      vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
-  }
-
-  // Initialize the feature data structure
-  // SEGMENT_DELTADATA    0, SEGMENT_ABSDATA      1
-  vp9_set_segment_data(seg, &feature_data[0][0], SEGMENT_DELTADATA);
-
-  return 0;
-}
-
-int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map,
-                       unsigned int rows, unsigned int cols) {
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols) {
   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
     if (map) {
       vpx_memcpy(cpi->active_map, map, rows * cols);
diff --git a/source/libvpx/vp9/encoder/vp9_encoder.h b/source/libvpx/vp9/encoder/vp9_encoder.h
index f48909e..17c826f 100644
--- a/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -391,7 +391,6 @@ typedef struct VP9_COMP {
   RATE_CONTROL rc;
 
   vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
-  vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES];
 
   struct vpx_codec_pkt_list  *output_pkt_list;
 
@@ -552,14 +551,7 @@ int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
 
 int vp9_update_entropy(VP9_COMP *cpi, int update);
 
-int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map,
-                   unsigned int rows, unsigned int cols,
-                   int delta_q[MAX_SEGMENTS],
-                   int delta_lf[MAX_SEGMENTS],
-                   unsigned int threshold[MAX_SEGMENTS]);
-
-int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map,
-                       unsigned int rows, unsigned int cols);
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
 
 int vp9_set_internal_size(VP9_COMP *cpi,
                           VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c
index efa320f..ed72d78 100644
--- a/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -61,6 +61,7 @@
 #define MIN_GF_INTERVAL             4
 #endif
 
+
 // #define LONG_TERM_VBR_CORRECTION
 
 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
@@ -540,7 +541,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
   vp9_setup_src_planes(x, cpi->Source, 0, 0);
   vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
-  vp9_setup_dst_planes(xd, new_yv12, 0, 0);
+  vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0);
 
   xd->mi = cm->mi_grid_visible;
   xd->mi[0] = cm->mi;
@@ -1417,12 +1418,90 @@ void define_fixed_arf_period(VP9_COMP *cpi) {
 }
 #endif
 
+// Calculate a section intra ratio used in setting max loop filter.
+static void calculate_section_intra_ratio(struct twopass_rc *twopass,
+                                          const FIRSTPASS_STATS *start_pos,
+                                          int section_length) {
+  FIRSTPASS_STATS next_frame;
+  FIRSTPASS_STATS sectionstats;
+  int i;
+
+  vp9_zero(next_frame);
+  vp9_zero(sectionstats);
+
+  reset_fpf_position(twopass, start_pos);
+
+  for (i = 0; i < section_length; ++i) {
+    input_stats(twopass, &next_frame);
+    accumulate_stats(&sectionstats, &next_frame);
+  }
+
+  avg_stats(&sectionstats);
+
+  twopass->section_intra_rating =
+    (int)(sectionstats.intra_error /
+          DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
+
+  reset_fpf_position(twopass, start_pos);
+}
+
+// Calculate the total bits to allocate in this GF/ARF group.
+static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi,
+                                             double gf_group_err) {
+  const RATE_CONTROL *const rc = &cpi->rc;
+  const struct twopass_rc *const twopass = &cpi->twopass;
+  const int max_bits = frame_max_bits(rc, &cpi->oxcf);
+  int64_t total_group_bits;
+
+  // Calculate the bits to be allocated to the group as a whole.
+  if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) {
+    total_group_bits = (int64_t)(twopass->kf_group_bits *
+                                 (gf_group_err / twopass->kf_group_error_left));
+  } else {
+    total_group_bits = 0;
+  }
+
+  // Clamp odd edge cases.
+  total_group_bits = (total_group_bits < 0) ?
+     0 : (total_group_bits > twopass->kf_group_bits) ?
+     twopass->kf_group_bits : total_group_bits;
+
+  // Clip based on user supplied data rate variability limit.
+  if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
+    total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+
+  return total_group_bits;
+}
+
+// Calculate the number bits extra to assign to boosted frames in a group.
+static int calculate_boost_bits(int frame_count,
+                                int boost, int64_t total_group_bits) {
+  int allocation_chunks;
+
+  // return 0 for invalid inputs (could arise e.g. through rounding errors)
+  if (!boost || (total_group_bits <= 0) || (frame_count <= 0) )
+    return 0;
+
+  allocation_chunks = (frame_count * 100) + boost;
+
+  // Prevent overflow.
+  if (boost > 1023) {
+    int divisor = boost >> 10;
+    boost /= divisor;
+    allocation_chunks /= divisor;
+  }
+
+  // Calculate the number of extra bits for use in the boosted frame or frames.
+  return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
+}
+
+
 // Analyse and define a gf/arf group.
 static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   struct twopass_rc *const twopass = &cpi->twopass;
-  FIRSTPASS_STATS next_frame = { 0 };
+  FIRSTPASS_STATS next_frame;
   const FIRSTPASS_STATS *start_pos;
   int i;
   double boost_score = 0.0;
@@ -1442,8 +1521,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   double mv_in_out_accumulator = 0.0;
   double abs_mv_in_out_accumulator = 0.0;
   double mv_ratio_accumulator_thresh;
-  // Max bits for a single frame.
-  const int max_bits = frame_max_bits(rc, oxcf);
   unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames;
 
   int f_boost = 0;
@@ -1451,10 +1528,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int flash_detected;
   int active_max_gf_interval;
 
-  twopass->gf_group_bits = 0;
-
   vp9_clear_system_state();
+  vp9_zero(next_frame);
 
+  twopass->gf_group_bits = 0;
   start_pos = twopass->stats_in;
 
   // Load stats for the current frame.
@@ -1657,149 +1734,57 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   }
 #endif
 #endif
-
-  // Calculate the bits to be allocated to the group as a whole.
-  if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) {
-    twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits *
-                (gf_group_err / twopass->kf_group_error_left));
-  } else {
-    twopass->gf_group_bits = 0;
-  }
-  twopass->gf_group_bits = (twopass->gf_group_bits < 0) ?
-     0 : (twopass->gf_group_bits > twopass->kf_group_bits) ?
-     twopass->kf_group_bits : twopass->gf_group_bits;
-
-  // Clip cpi->twopass.gf_group_bits based on user supplied data rate
-  // variability limit, cpi->oxcf.two_pass_vbrmax_section.
-  if (twopass->gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
-    twopass->gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
-
   // Reset the file position.
   reset_fpf_position(twopass, start_pos);
 
-  // Assign  bits to the arf or gf.
-  for (i = 0; i <= (rc->source_alt_ref_pending &&
-                    cpi->common.frame_type != KEY_FRAME); ++i) {
-    int allocation_chunks;
-    int q = rc->last_q[INTER_FRAME];
-    int gf_bits;
+  // Calculate the bits to be allocated to the gf/arf group as a whole
+  twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
 
+  // Calculate the extra bits to be used for boosted frame(s)
+  {
+    int q = rc->last_q[INTER_FRAME];
     int boost = (rc->gfu_boost * gfboost_qadjust(q)) / 100;
 
     // Set max and minimum boost and hence minimum allocation.
     boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200);
 
-    if (rc->source_alt_ref_pending && i == 0)
-      allocation_chunks = ((rc->baseline_gf_interval + 1) * 100) + boost;
-    else
-      allocation_chunks = (rc->baseline_gf_interval * 100) + (boost - 100);
-
-    // Prevent overflow.
-    if (boost > 1023) {
-      int divisor = boost >> 10;
-      boost /= divisor;
-      allocation_chunks /= divisor;
-    }
-
-    // Calculate the number of bits to be spent on the gf or arf based on
-    // the boost number.
-    gf_bits = (int)((double)boost * (twopass->gf_group_bits /
-                  (double)allocation_chunks));
-
-    // If the frame that is to be boosted is simpler than the average for
-    // the gf/arf group then use an alternative calculation
-    // based on the error score of the frame itself.
-    if (rc->baseline_gf_interval < 1 ||
-        mod_frame_err < gf_group_err / (double)rc->baseline_gf_interval) {
-      double alt_gf_grp_bits = (double)twopass->kf_group_bits  *
-        (mod_frame_err * (double)rc->baseline_gf_interval) /
-        DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left);
-
-      int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /
-                                           (double)allocation_chunks));
-
-      if (gf_bits > alt_gf_bits)
-        gf_bits = alt_gf_bits;
-    } else {
-      // If it is harder than other frames in the group make sure it at
-      // least receives an allocation in keeping with its relative error
-      // score, otherwise it may be worse off than an "un-boosted" frame.
-      int alt_gf_bits = (int)((double)twopass->kf_group_bits *
-                        mod_frame_err /
-                        DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left));
-
-      if (alt_gf_bits > gf_bits)
-        gf_bits = alt_gf_bits;
-    }
+    // Calculate the extra bits to be used for boosted frame(s)
+    twopass->gf_bits = calculate_boost_bits(rc->baseline_gf_interval,
+                                            boost, twopass->gf_group_bits);
 
-    // Don't allow a negative value for gf_bits.
-    if (gf_bits < 0)
-      gf_bits = 0;
 
-    if (i == 0) {
-      twopass->gf_bits = gf_bits;
-    }
-    if (i == 1 ||
-        (!rc->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME &&
-         !vp9_is_upper_layer_key_frame(cpi))) {
-      // Calculate the per frame bit target for this frame.
-      vp9_rc_set_frame_target(cpi, gf_bits);
+    // For key frames the frame target rate is set already.
+    // NOTE: We dont bother to check for the special case of ARF overlay
+    // frames here, as there is clamping code for this in the function
+    // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
+    // encodes.
+    if (cpi->common.frame_type != KEY_FRAME &&
+        !vp9_is_upper_layer_key_frame(cpi)) {
+      vp9_rc_set_frame_target(cpi, twopass->gf_bits);
     }
   }
 
-  {
-    // Adjust KF group bits and error remaining.
-    twopass->kf_group_error_left -= (int64_t)gf_group_err;
-
-    // If this is an arf update we want to remove the score for the overlay
-    // frame at the end which will usually be very cheap to code.
-    // The overlay frame has already, in effect, been coded so we want to spread
-    // the remaining bits among the other frames.
-    // For normal GFs remove the score for the GF itself unless this is
-    // also a key frame in which case it has already been accounted for.
-    if (rc->source_alt_ref_pending) {
-      twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err);
-    } else if (cpi->common.frame_type != KEY_FRAME) {
-      twopass->gf_group_error_left = (int64_t)(gf_group_err
-                                                   - gf_first_frame_err);
-    } else {
-      twopass->gf_group_error_left = (int64_t)gf_group_err;
-    }
-
-    // This condition could fail if there are two kfs very close together
-    // despite MIN_GF_INTERVAL and would cause a divide by 0 in the
-    // calculation of alt_extra_bits.
-    if (rc->baseline_gf_interval >= 3) {
-      const int boost = rc->source_alt_ref_pending ? b_boost : rc->gfu_boost;
-
-      if (boost >= 150) {
-        const int pct_extra = MIN(20, (boost - 100) / 50);
-        const int alt_extra_bits = (int)((
-            MAX(twopass->gf_group_bits - twopass->gf_bits, 0) *
-            pct_extra) / 100);
-        twopass->gf_group_bits -= alt_extra_bits;
-      }
-    }
+  // Adjust KF group bits and error remaining.
+  twopass->kf_group_error_left -= (int64_t)gf_group_err;
+
+  // If this is an arf update we want to remove the score for the overlay
+  // frame at the end which will usually be very cheap to code.
+  // The overlay frame has already, in effect, been coded so we want to spread
+  // the remaining bits among the other frames.
+  // For normal GFs remove the score for the GF itself unless this is
+  // also a key frame in which case it has already been accounted for.
+  if (rc->source_alt_ref_pending) {
+    twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err);
+  } else if (cpi->common.frame_type != KEY_FRAME) {
+    twopass->gf_group_error_left = (int64_t)(gf_group_err
+                                                 - gf_first_frame_err);
+  } else {
+    twopass->gf_group_error_left = (int64_t)gf_group_err;
   }
 
+  // Calculate a section intra ratio used in setting max loop filter.
   if (cpi->common.frame_type != KEY_FRAME) {
-    FIRSTPASS_STATS sectionstats;
-
-    zero_stats(&sectionstats);
-    reset_fpf_position(twopass, start_pos);
-
-    for (i = 0; i < rc->baseline_gf_interval; ++i) {
-      input_stats(twopass, &next_frame);
-      accumulate_stats(&sectionstats, &next_frame);
-    }
-
-    avg_stats(&sectionstats);
-
-    twopass->section_intra_rating = (int)
-      (sectionstats.intra_error /
-      DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
-
-    reset_fpf_position(twopass, start_pos);
+    calculate_section_intra_ratio(twopass, start_pos, rc->baseline_gf_interval);
   }
 }
 
@@ -2050,15 +2035,15 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   } else {
     twopass->kf_group_bits = 0;
   }
+  twopass->kf_group_bits = MAX(0, twopass->kf_group_bits);
+
   // Reset the first pass file position.
   reset_fpf_position(twopass, start_position);
 
-  // Determine how big to make this keyframe based on how well the subsequent
-  // frames use inter blocks.
+  // Scan through the kf group collating various stats used to deteermine
+  // how many bits to spend on it.
   decay_accumulator = 1.0;
   boost_score = 0.0;
-
-  // Scan through the kf group collating various stats.
   for (i = 0; i < rc->frames_to_key; ++i) {
     if (EOF == input_stats(twopass, &next_frame))
       break;
@@ -2095,101 +2080,27 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     }
   }
 
-  {
-    FIRSTPASS_STATS sectionstats;
+  // Store the zero motion percentage
+  twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
 
-    zero_stats(&sectionstats);
-    reset_fpf_position(twopass, start_position);
+  // Calculate a section intra ratio used in setting max loop filter.
+  calculate_section_intra_ratio(twopass, start_position, rc->frames_to_key);
 
-    for (i = 0; i < rc->frames_to_key; ++i) {
-      input_stats(twopass, &next_frame);
-      accumulate_stats(&sectionstats, &next_frame);
-    }
-
-    avg_stats(&sectionstats);
-
-    twopass->section_intra_rating = (int) (sectionstats.intra_error /
-        DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
-  }
+  // Work out how many bits to allocate for the key frame itself.
+  rc->kf_boost = (int)boost_score;
 
-  // Reset the first pass file position.
-  reset_fpf_position(twopass, start_position);
+  if (rc->kf_boost  < (rc->frames_to_key * 3))
+    rc->kf_boost  = (rc->frames_to_key * 3);
+  if (rc->kf_boost   < MIN_KF_BOOST)
+    rc->kf_boost = MIN_KF_BOOST;
 
-  // Work out how many bits to allocate for the key frame itself.
-  if (1) {
-    int kf_boost = (int)boost_score;
-    int allocation_chunks;
-
-    if (kf_boost < (rc->frames_to_key * 3))
-      kf_boost = (rc->frames_to_key * 3);
-
-    if (kf_boost < MIN_KF_BOOST)
-      kf_boost = MIN_KF_BOOST;
-
-    // Make a note of baseline boost and the zero motion
-    // accumulator value for use elsewhere.
-    rc->kf_boost = kf_boost;
-    twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
-
-    // Key frame size depends on:
-    // (1) the error score for the whole key frame group,
-    // (2) the key frames' own error if this is smaller than the
-    //     average for the group (optional),
-    // (3) insuring that the frame receives at least the allocation it would
-    //     have received based on its own error score vs the error score
-    //     remaining.
-    // Special case:
-    // If the sequence appears almost totally static we want to spend almost
-    // all of the bits on the key frame.
-    //
-    // We use (cpi->rc.frames_to_key - 1) below because the key frame itself is
-    // taken care of by kf_boost.
-    if (zero_motion_accumulator >= 0.99) {
-      allocation_chunks = ((rc->frames_to_key - 1) * 10) + kf_boost;
-    } else {
-      allocation_chunks = ((rc->frames_to_key - 1) * 100) + kf_boost;
-    }
+  twopass->kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
+                                          rc->kf_boost, twopass->kf_group_bits);
 
-    // Prevent overflow.
-    if (kf_boost > 1028) {
-      const int divisor = kf_boost >> 10;
-      kf_boost /= divisor;
-      allocation_chunks /= divisor;
-    }
+  twopass->kf_group_bits -= twopass->kf_bits;
 
-    twopass->kf_group_bits = MAX(0, twopass->kf_group_bits);
-    // Calculate the number of bits to be spent on the key frame.
-    twopass->kf_bits = (int)((double)kf_boost *
-        ((double)twopass->kf_group_bits / allocation_chunks));
-
-    // If the key frame is actually easier than the average for the
-    // kf group (which does sometimes happen, e.g. a blank intro frame)
-    // then use an alternate calculation based on the kf error score
-    // which should give a smaller key frame.
-    if (kf_mod_err < kf_group_err / rc->frames_to_key) {
-      double alt_kf_grp_bits = ((double)twopass->bits_left *
-         (kf_mod_err * (double)rc->frames_to_key) /
-         DOUBLE_DIVIDE_CHECK(twopass->modified_error_left));
-
-      const int alt_kf_bits = (int)((double)kf_boost *
-                          (alt_kf_grp_bits / (double)allocation_chunks));
-
-      if (twopass->kf_bits > alt_kf_bits)
-        twopass->kf_bits = alt_kf_bits;
-    } else {
-      // Else if it is much harder than other frames in the group make sure
-      // it at least receives an allocation in keeping with its relative
-      // error score.
-      const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err /
-               DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)));
-
-      if (alt_kf_bits > twopass->kf_bits)
-        twopass->kf_bits = alt_kf_bits;
-    }
-    twopass->kf_group_bits -= twopass->kf_bits;
-    // Per frame bit target for this frame.
-    vp9_rc_set_frame_target(cpi, twopass->kf_bits);
-  }
+  // Per frame bit target for this frame.
+  vp9_rc_set_frame_target(cpi, twopass->kf_bits);
 
   // Note the total error score of the kf group minus the key frame itself.
   twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -2242,8 +2153,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   double this_frame_coded_error;
   int target;
   LAYER_CONTEXT *lc = NULL;
-  int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1);
-
+  const int is_spatial_svc = (cpi->use_svc &&
+                              cpi->svc.number_temporal_layers == 1);
   if (is_spatial_svc) {
     lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
     frames_left = (int)(twopass->total_stats.count -
@@ -2303,14 +2214,14 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
     // Don't place key frame in any enhancement layers in spatial svc
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc) {
       lc->is_key_frame = 1;
       if (cpi->svc.spatial_layer_id > 0) {
         cm->frame_type = INTER_FRAME;
       }
     }
   } else {
-    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+    if (is_spatial_svc) {
       lc->is_key_frame = 0;
     }
     cm->frame_type = INTER_FRAME;
diff --git a/source/libvpx/vp9/encoder/vp9_mbgraph.c b/source/libvpx/vp9/encoder/vp9_mbgraph.c
index e7dcc7a..5e87d28 100644
--- a/source/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/source/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -20,7 +20,6 @@
 #include "vp9/common/vp9_systemdependent.h"
 
 
-
 static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
                                               const MV *ref_mv,
                                               MV *dst_mv,
@@ -237,8 +236,9 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
   int mb_col, mb_row, offset = 0;
   int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
   MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0};
-  MODE_INFO mi_local = { { 0 } };
+  MODE_INFO mi_local;
 
+  vp9_zero(mi_local);
   // Set up limit values for motion vectors to prevent them extending outside
   // the UMV borders.
   x->mv_row_min     = -BORDER_MV_PIXELS_B16;
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.c b/source/libvpx/vp9/encoder/vp9_mcomp.c
index 43c8ab8..4f7d6f1 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -886,6 +886,10 @@ int vp9_full_range_search_c(const MACROBLOCK *x,
   int r, c, i;
   int start_col, end_col, start_row, end_row;
 
+  // The cfg and search_param parameters are not used in this search variant
+  (void)cfg;
+  (void)search_param;
+
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   *best_mv = *ref_mv;
   *num00 = 11;
@@ -1551,7 +1555,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              int search_range,
                              const vp9_variance_fn_ptr_t *fn_ptr,
                              const MV *center_mv,
-                             const uint8_t *second_pred, int w, int h) {
+                             const uint8_t *second_pred) {
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
                            {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
   const MACROBLOCKD *const xd = &x->e_mbd;
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.h b/source/libvpx/vp9/encoder/vp9_mcomp.h
index 827957d..873edf3 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -144,8 +144,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             const MV *center_mv, const uint8_t *second_pred,
-                             int w, int h);
+                             const MV *center_mv, const uint8_t *second_pred);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.c b/source/libvpx/vp9/encoder/vp9_picklpf.c
index 7c42bb8..5328465 100644
--- a/source/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/source/libvpx/vp9/encoder/vp9_picklpf.c
@@ -24,8 +24,12 @@
 #include "vp9/encoder/vp9_quantize.h"
 
 static int get_max_filter_level(const VP9_COMP *cpi) {
-  return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
-                                               : MAX_LOOP_FILTER;
+  if (cpi->pass == 2) {
+    return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
+                                                 : MAX_LOOP_FILTER;
+  } else {
+    return MAX_LOOP_FILTER;
+  }
 }
 
 
@@ -34,7 +38,8 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
   VP9_COMMON *const cm = &cpi->common;
   int filt_err;
 
-  vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame);
+  vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1,
+                        partial_frame);
   filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
 
   // Re-instate the unfiltered frame
@@ -77,8 +82,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
     // Bias against raising loop filter in favor of lowering it.
     int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
 
-    if (cpi->twopass.section_intra_rating < 20)
-      bias = bias * cpi->twopass.section_intra_rating / 20;
+    if ((cpi->pass == 2) && (cpi->twopass.section_intra_rating < 20))
+      bias = (bias * cpi->twopass.section_intra_rating) / 20;
 
     // yx, bias less for large block size
     if (cm->tx_mode != ONLY_4X4)
diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c
index adaa044..1e9887c 100644
--- a/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -27,12 +27,11 @@
 #include "vp9/encoder/vp9_rdopt.h"
 
 static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
-                                    const TileInfo *const tile,
                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int step_param;
   int sadpb = x->sadperbit16;
   MV mvp_full;
@@ -107,12 +106,11 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
 }
 
 static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
-                                    const TileInfo *const tile,
                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     MV *tmp_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int ref = mbmi->ref_frame[0];
   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
   int dis;
@@ -290,7 +288,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
           continue;
 
-        full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+        full_pixel_motion_search(cpi, x, bsize, mi_row, mi_col,
                                  &frame_mv[NEWMV][ref_frame], &rate_mv);
 
         if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
@@ -301,7 +299,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd)
           continue;
 
-        sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+        sub_pixel_motion_search(cpi, x, bsize, mi_row, mi_col,
                                 &frame_mv[NEWMV][ref_frame].as_mv);
       }
 
diff --git a/source/libvpx/vp9/encoder/vp9_quantize.c b/source/libvpx/vp9/encoder/vp9_quantize.c
index 5206bb6..4d3086d 100644
--- a/source/libvpx/vp9/encoder/vp9_quantize.c
+++ b/source/libvpx/vp9/encoder/vp9_quantize.c
@@ -32,6 +32,7 @@ void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
                          zbin_ptr[1] + zbin_oq_value };
   const int nzbins[2] = { zbins[0] * -1,
                           zbins[1] * -1 };
+  (void)iscan;
 
   vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
   vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
@@ -87,6 +88,7 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
   int idx = 0;
   int idx_arr[1024];
   int i, eob = -1;
+  (void)iscan;
 
   vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
   vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c
index fe43f3a..a04622c 100644
--- a/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -48,6 +48,7 @@ static int kf_high_motion_minq[QINDEX_RANGE];
 static int arfgf_low_motion_minq[QINDEX_RANGE];
 static int arfgf_high_motion_minq[QINDEX_RANGE];
 static int inter_minq[QINDEX_RANGE];
+static int rtc_minq[QINDEX_RANGE];
 static int gf_high = 2000;
 static int gf_low = 400;
 static int kf_high = 5000;
@@ -84,6 +85,7 @@ void vp9_rc_init_minq_luts() {
     arfgf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30);
     arfgf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
     inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90);
+    rtc_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70);
   }
 }
 
@@ -549,14 +551,14 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
     // Use the lower of active_worst_quality and recent/average Q.
     if (cm->current_video_frame > 1) {
       if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
-        active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+        active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]];
       else
-        active_best_quality = inter_minq[active_worst_quality];
+        active_best_quality = rtc_minq[active_worst_quality];
     } else {
       if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
-        active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+        active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]];
       else
-        active_best_quality = inter_minq[active_worst_quality];
+        active_best_quality = rtc_minq[active_worst_quality];
     }
   }
 
@@ -972,11 +974,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
     q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
   }
 
-  // Q of 0 is disabled because we force tx size to be
-  // 16x16...
   if (cpi->sf.use_nonrd_pick_mode) {
-    if (q == 0)
-      q++;
     if (cpi->sf.force_frame_boost == 1)
       q -= cpi->sf.max_delta_qindex;
 
@@ -1149,10 +1147,6 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
   cpi->rc.frames_to_key--;
 }
 
-static int test_for_kf_one_pass(VP9_COMP *cpi) {
-  // Placeholder function for auto key frame
-  return 0;
-}
 // Use this macro to turn on/off use of alt-refs in one-pass mode.
 #define USE_ALTREF_FOR_ONE_PASS   1
 
@@ -1184,11 +1178,12 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
   int target;
+  // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
        (cpi->frame_flags & FRAMEFLAGS_KEY) ||
        rc->frames_to_key == 0 ||
-       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
+       (cpi->oxcf.auto_key && 0))) {
     cm->frame_type = KEY_FRAME;
     rc->this_key_frame_forced = cm->current_video_frame != 0 &&
                                 rc->frames_to_key == 0;
@@ -1315,10 +1310,11 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
   int target;
+  // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
   if ((cm->current_video_frame == 0 ||
       (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       rc->frames_to_key == 0 ||
-      (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
+      (cpi->oxcf.auto_key && 0))) {
     cm->frame_type = KEY_FRAME;
     rc->this_key_frame_forced = cm->current_video_frame != 0 &&
                                 rc->frames_to_key == 0;
diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.c b/source/libvpx/vp9/encoder/vp9_rdopt.c
index f4def1e..64f3e5a 100644
--- a/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -745,7 +745,8 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
                              int use_fast_coef_casting) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  struct rdcost_block_args args = { 0 };
+  struct rdcost_block_args args;
+  vp9_zero(args);
   args.x = x;
   args.best_rd = ref_best_rd;
   args.use_fast_coef_costing = use_fast_coef_casting;
@@ -806,7 +807,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
                              {INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX}};
-  int n, m;
+  TX_SIZE n, m;
   int s0, s1;
   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
   int64_t best_rd = INT64_MAX;
@@ -889,7 +890,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                              {INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX}};
-  int n, m;
+  TX_SIZE n, m;
   int s0, s1;
   double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@@ -961,7 +962,7 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 
   vp9_subtract_plane(x, bs, 0);
 
-  if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
+  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
                              ref_best_rd, bs);
@@ -999,7 +1000,7 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 
   assert(bs == mbmi->sb_type);
-  if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
+  if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) {
     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
                              ref_best_rd, bs);
@@ -2312,7 +2313,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   MACROBLOCKD *xd = &x->e_mbd;
   const VP9_COMMON *cm = &cpi->common;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+  struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int bestsme = INT_MAX;
   int step_param;
   int sadpb = x->sadperbit16;
@@ -2514,8 +2515,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
                                        search_range,
                                        &cpi->fn_ptr[bsize],
-                                       &ref_mv[id].as_mv, second_pred,
-                                       pw, ph);
+                                       &ref_mv[id].as_mv, second_pred);
     if (bestsme < INT_MAX)
       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
                                       second_pred, &cpi->fn_ptr[bsize], 1);
@@ -3069,7 +3069,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_pred_rd[REFERENCE_MODES];
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  MB_MODE_INFO best_mbmode = { 0 };
+  MB_MODE_INFO best_mbmode;
   int mode_index, best_mode_index = -1;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
@@ -3095,7 +3095,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   const int intra_y_mode_mask =
       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
   int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
-
+  vp9_zero(best_mbmode);
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
@@ -3678,7 +3678,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_pred_rd[REFERENCE_MODES];
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  MB_MODE_INFO best_mbmode = { 0 };
+  MB_MODE_INFO best_mbmode;
   int ref_index, best_ref_index = 0;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
@@ -3698,6 +3698,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
+  vp9_zero(best_mbmode);
 
   for (i = 0; i < 4; i++) {
     int j;
diff --git a/source/libvpx/vp9/encoder/vp9_segmentation.c b/source/libvpx/vp9/encoder/vp9_segmentation.c
index 7537d1b..574df62 100644
--- a/source/libvpx/vp9/encoder/vp9_segmentation.c
+++ b/source/libvpx/vp9/encoder/vp9_segmentation.c
@@ -109,7 +109,7 @@ static int cost_segmap(int *segcounts, vp9_prob *probs) {
 }
 
 static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
-                       MODE_INFO **mi_8x8,
+                       MODE_INFO **mi,
                        int *no_pred_segcounts,
                        int (*temporal_predictor_count)[2],
                        int *t_unpred_seg_counts,
@@ -121,7 +121,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  xd->mi = mi_8x8;
+  xd->mi = mi;
   segment_id = xd->mi[0]->mbmi.segment_id;
 
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
@@ -131,7 +131,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
 
   // Temporal prediction not allowed on key frames
   if (cm->frame_type != KEY_FRAME) {
-    const BLOCK_SIZE bsize = mi_8x8[0]->mbmi.sb_type;
+    const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
     // Test to see if the segment id matches the predicted value.
     const int pred_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map,
                                                    bsize, mi_row, mi_col);
@@ -143,14 +143,14 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile,
     xd->mi[0]->mbmi.seg_id_predicted = pred_flag;
     temporal_predictor_count[pred_context][pred_flag]++;
 
+    // Update the "unpredicted" segment count
     if (!pred_flag)
-      // Update the "unpredicted" segment count
       t_unpred_seg_counts[segment_id]++;
   }
 }
 
 static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
-                          MODE_INFO **mi_8x8,
+                          MODE_INFO **mi,
                           int *no_pred_segcounts,
                           int (*temporal_predictor_count)[2],
                           int *t_unpred_seg_counts,
@@ -164,22 +164,22 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
-  bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
+  bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
+  bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
 
   if (bw == bs && bh == bs) {
-    count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, bs, mi_row, mi_col);
   } else if (bw == bs && bh < bs) {
-    count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
-    count_segs(cpi, tile, mi_8x8 + hbs * mis, no_pred_segcounts,
+    count_segs(cpi, tile, mi + hbs * mis, no_pred_segcounts,
                temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
                mi_row + hbs, mi_col);
   } else if (bw < bs && bh == bs) {
-    count_segs(cpi, tile, mi_8x8, no_pred_segcounts, temporal_predictor_count,
+    count_segs(cpi, tile, mi, no_pred_segcounts, temporal_predictor_count,
                t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
-    count_segs(cpi, tile, mi_8x8 + hbs,
+    count_segs(cpi, tile, mi + hbs,
                no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts,
                hbs, bs, mi_row, mi_col + hbs);
   } else {
@@ -192,7 +192,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile,
       const int mi_dc = hbs * (n & 1);
       const int mi_dr = hbs * (n >> 1);
 
-      count_segs_sb(cpi, tile, &mi_8x8[mi_dr * mis + mi_dc],
+      count_segs_sb(cpi, tile, &mi[mi_dr * mis + mi_dc],
                     no_pred_segcounts, temporal_predictor_count,
                     t_unpred_seg_counts,
                     mi_row + mi_dr, mi_col + mi_dc, subsize);
@@ -217,9 +217,6 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
   vp9_prob t_pred_tree[SEG_TREE_PROBS];
   vp9_prob t_nopred_prob[PREDICTION_PROBS];
 
-  const int mis = cm->mi_stride;
-  MODE_INFO **mi_ptr, **mi;
-
   // Set default state for the segment tree probabilities and the
   // temporal coding probabilities
   vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
@@ -229,12 +226,13 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
   // predicts this one
   for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) {
     TileInfo tile;
-
+    MODE_INFO **mi_ptr;
     vp9_tile_init(&tile, cm, 0, tile_col);
+
     mi_ptr = cm->mi_grid_visible + tile.mi_col_start;
     for (mi_row = 0; mi_row < cm->mi_rows;
-         mi_row += 8, mi_ptr += 8 * mis) {
-      mi = mi_ptr;
+         mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
+      MODE_INFO **mi = mi_ptr;
       for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
            mi_col += 8, mi += 8)
         count_segs_sb(cpi, &tile, mi, no_pred_segcounts,
diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.h b/source/libvpx/vp9/encoder/vp9_speed_features.h
index a384a43..46806c9 100644
--- a/source/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/source/libvpx/vp9/encoder/vp9_speed_features.h
@@ -176,7 +176,7 @@ typedef struct SPEED_FEATURES {
   // a log search that iterates 4 times (check around mv for last for best
   // error of combined predictor then check around mv for alt). If 0 we
   // we just use the best motion vector found for each frame by itself.
-  int comp_inter_joint_search_thresh;
+  BLOCK_SIZE comp_inter_joint_search_thresh;
 
   // This variable is used to cap the maximum number of times we skip testing a
   // mode to be evaluated. A high value means we will be faster.
diff --git a/source/libvpx/vp9/encoder/vp9_tokenize.c b/source/libvpx/vp9/encoder/vp9_tokenize.c
index 8ce98d9..17214c3 100644
--- a/source/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/source/libvpx/vp9/encoder/vp9_tokenize.c
@@ -232,7 +232,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
       cpi->common.fc.coef_probs[tx_size][type][ref];
   unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
       cpi->common.counts.eob_branch[tx_size][type][ref];
-
   const uint8_t *const band = get_band_translate(tx_size);
   const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
 
@@ -294,6 +293,8 @@ static void is_skippable(int plane, int block,
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                          void *argv) {
   struct is_skippable_args *args = argv;
+  (void)plane_bsize;
+  (void)tx_size;
   args->skippable[0] &= (!args->x->plane[plane].eobs[block]);
 }
 
diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm b/source/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm
new file mode 100644
index 0000000..f71181c
--- /dev/null
+++ b/source/libvpx/vp9/encoder/x86/vp9_dct_mmx.asm
@@ -0,0 +1,70 @@
+;
+;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro TRANSFORM_COLS 0
+  paddw           m0,        m1
+  movq            m4,        m0
+  psubw           m3,        m2
+  psubw           m4,        m3
+  psraw           m4,        1
+  movq            m5,        m4
+  psubw           m5,        m1 ;b1
+  psubw           m4,        m2 ;c1
+  psubw           m0,        m4
+  paddw           m3,        m5
+                                ; m0 a0
+  SWAP            1,         4  ; m1 c1
+  SWAP            2,         3  ; m2 d1
+  SWAP            3,         5  ; m3 b1
+%endmacro
+
+%macro TRANSPOSE_4X4 0
+  movq            m4,        m0
+  movq            m5,        m2
+  punpcklwd       m4,        m1
+  punpckhwd       m0,        m1
+  punpcklwd       m5,        m3
+  punpckhwd       m2,        m3
+  movq            m1,        m4
+  movq            m3,        m0
+  punpckldq       m1,        m5
+  punpckhdq       m4,        m5
+  punpckldq       m3,        m2
+  punpckhdq       m0,        m2
+  SWAP            2, 3, 0, 1, 4
+%endmacro
+
+INIT_MMX mmx
+cglobal fwht4x4, 3, 4, 8, input, output, stride
+  lea             r3q,       [inputq + strideq*4]
+  movq            m0,        [inputq] ;a1
+  movq            m1,        [inputq + strideq*2] ;b1
+  movq            m2,        [r3q] ;c1
+  movq            m3,        [r3q + strideq*2] ;d1
+
+  TRANSFORM_COLS
+  TRANSPOSE_4X4
+  TRANSFORM_COLS
+  TRANSPOSE_4X4
+
+  psllw           m0,        2
+  psllw           m1,        2
+  psllw           m2,        2
+  psllw           m3,        2
+
+  movq            [outputq],      m0
+  movq            [outputq + 8],  m1
+  movq            [outputq + 16], m2
+  movq            [outputq + 24], m3
+
+  RET
diff --git a/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c b/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
new file mode 100644
index 0000000..c67490f
--- /dev/null
+++ b/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Usee of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h>  // AVX2
+#include "vpx/vpx_integer.h"
+
+
+int64_t vp9_block_error_avx2(const int16_t *coeff,
+                             const int16_t *dqcoeff,
+                             intptr_t block_size,
+                             int64_t *ssz) {
+  __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
+  __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
+  __m256i sse_reg_64hi, ssz_reg_64hi;
+  __m128i sse_reg128, ssz_reg128;
+  int64_t sse;
+  int i;
+  const __m256i zero_reg = _mm256_set1_epi16(0);
+
+  // init sse and ssz registerd to zero
+  sse_reg = _mm256_set1_epi16(0);
+  ssz_reg = _mm256_set1_epi16(0);
+
+  for (i = 0 ; i < block_size ; i+= 16) {
+    // load 32 bytes from coeff and dqcoeff
+    coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
+    dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
+    // dqcoeff - coeff
+    dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
+    // madd (dqcoeff - coeff)
+    dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg);
+    // madd coeff
+    coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg);
+    // expand each double word of madd (dqcoeff - coeff) to quad word
+    exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg);
+    exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg);
+    // expand each double word of madd (coeff) to quad word
+    exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg);
+    exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg);
+    // add each quad word of madd (dqcoeff - coeff) and madd (coeff)
+    sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo);
+    ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo);
+    sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi);
+    ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi);
+  }
+  // save the higher 64 bit of each 128 bit lane
+  sse_reg_64hi = _mm256_srli_si256(sse_reg, 8);
+  ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8);
+  // add the higher 64 bit to the low 64 bit
+  sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi);
+  ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi);
+
+  // add each 64 bit from each of the 128 bit lane of the 256 bit
+  sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg),
+                             _mm256_extractf128_si256(sse_reg, 1));
+
+  ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg),
+                             _mm256_extractf128_si256(ssz_reg, 1));
+
+  // store the results
+  _mm_storel_epi64((__m128i*)(&sse), sse_reg128);
+
+  _mm_storel_epi64((__m128i*)(ssz), ssz_reg128);
+  return sse;
+}
diff --git a/source/libvpx/vp9/vp9_common.mk b/source/libvpx/vp9/vp9_common.mk
index eaff60a..3b4d6b9 100644
--- a/source/libvpx/vp9/vp9_common.mk
+++ b/source/libvpx/vp9/vp9_common.mk
@@ -124,28 +124,28 @@ ifeq ($(ARCH_X86_64), yes)
 VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_idct_ssse3.asm
 endif
 
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_16_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct4x4_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct8x8_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_dc_only_idct_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct4x4_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct8x8_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct16x16_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_1_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_idct32x32_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht4x4_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_iht8x8_add_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_copy_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_reconintra_neon$(ASM)
 
 $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/source/libvpx/vp9/vp9_cx_iface.c b/source/libvpx/vp9/vp9_cx_iface.c
index 449e7d8..2a3964a 100644
--- a/source/libvpx/vp9/vp9_cx_iface.c
+++ b/source/libvpx/vp9/vp9_cx_iface.c
@@ -42,7 +42,7 @@ struct vp9_extracfg {
 };
 
 struct extraconfig_map {
-  int usage;
+  unsigned int usage;
   struct vp9_extracfg cfg;
 };
 
@@ -245,7 +245,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
         layer_id = (int)stats->spatial_layer_id;
 
         if (layer_id >= cfg->ss_number_layers
-            ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1)
+            ||(unsigned int)(stats->count + 0.5) !=
+               n_packets_per_layer[layer_id] - 1)
           ERROR("rc_twopass_stats_in missing EOS stats packet");
       }
     } else {
@@ -823,7 +824,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
         }
 
         // Add the frame packet to the list of returned packets.
-        round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1;
+        round = (vpx_codec_pts_t)10000000 * ctx->cfg.g_timebase.num / 2 - 1;
         delta = (dst_end_time_stamp - dst_time_stamp);
         pkt.kind = VPX_CODEC_CX_FRAME_PKT;
         pkt.data.frame.pts =
@@ -1003,7 +1004,8 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
   vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
 
   if (map) {
-    if (!vp9_set_active_map(ctx->cpi, map->active_map, map->rows, map->cols))
+    if (!vp9_set_active_map(ctx->cpi, map->active_map,
+                            (int)map->rows, (int)map->cols))
       return VPX_CODEC_OK;
     else
       return VPX_CODEC_INVALID_PARAM;
diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c
index 06b4823..1d29815 100644
--- a/source/libvpx/vp9/vp9_dx_iface.c
+++ b/source/libvpx/vp9/vp9_dx_iface.c
@@ -32,21 +32,12 @@ struct vpx_codec_alg_priv {
   vpx_codec_priv_t        base;
   vpx_codec_dec_cfg_t     cfg;
   vp9_stream_info_t       si;
-  int                     decoder_init;
   struct VP9Decoder *pbi;
   int                     postproc_cfg_set;
   vp8_postproc_cfg_t      postproc_cfg;
-#if CONFIG_POSTPROC_VISUALIZER
-  unsigned int            dbg_postproc_flag;
-  int                     dbg_color_ref_frame_flag;
-  int                     dbg_color_mb_modes_flag;
-  int                     dbg_color_b_modes_flag;
-  int                     dbg_display_mv_flag;
-#endif
   vpx_decrypt_cb          decrypt_cb;
   void                   *decrypt_state;
   vpx_image_t             img;
-  int                     img_setup;
   int                     img_avail;
   int                     invert_tile_order;
 
@@ -226,36 +217,20 @@ static void set_default_ppflags(vp8_postproc_cfg_t *cfg) {
 static void set_ppflags(const vpx_codec_alg_priv_t *ctx,
                         vp9_ppflags_t *flags) {
   flags->post_proc_flag =
-#if CONFIG_POSTPROC_VISUALIZER
-      (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) |
-      (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
-      (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
-      (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) |
-#endif
       ctx->postproc_cfg.post_proc_flag;
 
   flags->deblocking_level = ctx->postproc_cfg.deblocking_level;
   flags->noise_level = ctx->postproc_cfg.noise_level;
-#if CONFIG_POSTPROC_VISUALIZER
-  flags->display_ref_frame_flag = ctx->dbg_color_ref_frame_flag;
-  flags->display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
-  flags->display_b_modes_flag = ctx->dbg_color_b_modes_flag;
-  flags->display_mv_flag = ctx->dbg_display_mv_flag;
-#endif
 }
 
 static void init_decoder(vpx_codec_alg_priv_t *ctx) {
-  VP9DecoderConfig oxcf;
-  oxcf.width = ctx->si.w;
-  oxcf.height = ctx->si.h;
-  oxcf.version = 9;
-  oxcf.max_threads = ctx->cfg.threads;
-  oxcf.inv_tile_order = ctx->invert_tile_order;
-
-  ctx->pbi = vp9_decoder_create(&oxcf);
+  ctx->pbi = vp9_decoder_create();
   if (ctx->pbi == NULL)
     return;
 
+  ctx->pbi->max_threads = ctx->cfg.threads;
+  ctx->pbi->inv_tile_order = ctx->invert_tile_order;
+
   vp9_initialize_dec();
 
   // If postprocessing was enabled by the application and a
@@ -289,12 +264,10 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
   }
 
   // Initialize the decoder instance on the first frame
-  if (!ctx->decoder_init) {
+  if (ctx->pbi == NULL) {
     init_decoder(ctx);
     if (ctx->pbi == NULL)
       return VPX_CODEC_ERROR;
-
-    ctx->decoder_init = 1;
   }
 
   // Set these even if already initialized.  The caller may have changed the
@@ -375,80 +348,70 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz,
   }
 }
 
+static vpx_codec_err_t decode_one_iter(vpx_codec_alg_priv_t *ctx,
+                                       const uint8_t **data_start_ptr,
+                                       const uint8_t *data_end,
+                                       uint32_t frame_size, void *user_priv,
+                                       long deadline) {
+  const vpx_codec_err_t res = decode_one(ctx, data_start_ptr, frame_size,
+                                         user_priv, deadline);
+  if (res != VPX_CODEC_OK)
+    return res;
+
+  // Account for suboptimal termination by the encoder.
+  while (*data_start_ptr < data_end) {
+    const uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
+                                       *data_start_ptr);
+    if (marker)
+      break;
+    (*data_start_ptr)++;
+  }
+
+  return VPX_CODEC_OK;
+}
+
 static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
                                       const uint8_t *data, unsigned int data_sz,
                                       void *user_priv, long deadline) {
   const uint8_t *data_start = data;
-  const uint8_t *data_end = data + data_sz;
-  vpx_codec_err_t res = VPX_CODEC_OK;
-  uint32_t sizes[8];
-  int frames_this_pts, frame_count = 0;
+  const uint8_t *const data_end = data + data_sz;
+  vpx_codec_err_t res;
+  uint32_t frame_sizes[8];
+  int frame_count;
 
   if (data == NULL || data_sz == 0)
     return VPX_CODEC_INVALID_PARAM;
 
-  parse_superframe_index(data, data_sz, sizes, &frames_this_pts,
+  parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
                          ctx->decrypt_cb, ctx->decrypt_state);
 
-  do {
-    if (data_sz) {
-      uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
-                                   data_start);
-      // Skip over the superframe index, if present
-      if ((marker & 0xe0) == 0xc0) {
-        const uint32_t frames = (marker & 0x7) + 1;
-        const uint32_t mag = ((marker >> 3) & 0x3) + 1;
-        const uint32_t index_sz = 2 + mag * frames;
-
-        if (data_sz >= index_sz) {
-          uint8_t marker2 = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
-                                        data_start + index_sz - 1);
-          if (marker2 == marker) {
-            data_start += index_sz;
-            data_sz -= index_sz;
-            if (data_start < data_end)
-              continue;
-            else
-              break;
-          }
-        }
-      }
-    }
-
-    // Use the correct size for this frame, if an index is present.
-    if (frames_this_pts) {
-      uint32_t this_sz = sizes[frame_count];
+  if (frame_count > 0) {
+    int i;
 
-      if (data_sz < this_sz) {
+    for (i = 0; i < frame_count; ++i) {
+      const uint32_t frame_size = frame_sizes[i];
+      if (data_start < data ||
+          frame_size > (uint32_t)(data_end - data_start)) {
         ctx->base.err_detail = "Invalid frame size in index";
         return VPX_CODEC_CORRUPT_FRAME;
       }
 
-      data_sz = this_sz;
-      frame_count++;
+      res = decode_one_iter(ctx, &data_start, data_end, frame_size,
+                            user_priv, deadline);
+      if (res != VPX_CODEC_OK)
+        return res;
     }
-
-    res = decode_one(ctx, &data_start, data_sz, user_priv, deadline);
-    assert(data_start >= data);
-    assert(data_start <= data_end);
-
-    // Early exit if there was a decode error
-    if (res)
-      break;
-
-    // Account for suboptimal termination by the encoder.
+  } else {
     while (data_start < data_end) {
-      uint8_t marker3 = read_marker(ctx->decrypt_cb, ctx->decrypt_state,
-                                    data_start);
-      if (marker3)
-        break;
-      data_start++;
+      res = decode_one_iter(ctx, &data_start, data_end,
+                            (uint32_t)(data_end - data_start),
+                            user_priv, deadline);
+      if (res != VPX_CODEC_OK)
+        return res;
     }
+  }
 
-    data_sz = (unsigned int)(data_end - data_start);
-  } while (data_start < data_end);
-
-  return res;
+  return VPX_CODEC_OK;
 }
 
 static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
@@ -553,22 +516,7 @@ static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
 
 static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
                                             int ctrl_id, va_list args) {
-#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
-  int data = va_arg(args, int);
-
-#define MAP(id, var) case id: var = data; break;
-
-  switch (ctrl_id) {
-      MAP(VP8_SET_DBG_COLOR_REF_FRAME,   ctx->dbg_color_ref_frame_flag);
-      MAP(VP8_SET_DBG_COLOR_MB_MODES,    ctx->dbg_color_mb_modes_flag);
-      MAP(VP8_SET_DBG_COLOR_B_MODES,     ctx->dbg_color_b_modes_flag);
-      MAP(VP8_SET_DBG_DISPLAY_MV,        ctx->dbg_display_mv_flag);
-  }
-
-  return VPX_CODEC_OK;
-#else
   return VPX_CODEC_INCAPABLE;
-#endif
 }
 
 static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
diff --git a/source/libvpx/vp9/vp9cx.mk b/source/libvpx/vp9/vp9cx.mk
index 5e88793..6e5c521 100644
--- a/source/libvpx/vp9/vp9cx.mk
+++ b/source/libvpx/vp9/vp9cx.mk
@@ -101,7 +101,9 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
 
 ifeq ($(CONFIG_USE_X86INC),yes)
+VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
diff --git a/source/libvpx/vpx/src/svc_encodeframe.c b/source/libvpx/vpx/src/svc_encodeframe.c
index 38c2d26..b874be7 100644
--- a/source/libvpx/vpx/src/svc_encodeframe.c
+++ b/source/libvpx/vpx/src/svc_encodeframe.c
@@ -234,7 +234,8 @@ static void svc_log_reset(SvcContext *svc_ctx) {
   si->message_buffer[0] = '\0';
 }
 
-static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) {
+static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level,
+                   const char *fmt, ...) {
   char buf[512];
   int retval = 0;
   va_list ap;
diff --git a/source/libvpx/vpx_scale/vpx_scale.mk b/source/libvpx/vpx_scale/vpx_scale.mk
index ded8e0b..95e7483 100644
--- a/source/libvpx/vpx_scale/vpx_scale.mk
+++ b/source/libvpx/vpx_scale/vpx_scale.mk
@@ -10,10 +10,10 @@ SCALE_SRCS-yes += vpx_scale_rtcd.c
 SCALE_SRCS-yes += vpx_scale_rtcd.pl
 
 #neon
-SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON)  += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
-SCALE_SRCS-$(HAVE_NEON)  += arm/neon/yv12extend_arm.c
+SCALE_SRCS-$(HAVE_NEON_ASM)  += arm/neon/vp8_vpxyv12_copyframe_func_neon$(ASM)
+SCALE_SRCS-$(HAVE_NEON_ASM)  += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM)
+SCALE_SRCS-$(HAVE_NEON_ASM)  += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
+SCALE_SRCS-$(HAVE_NEON_ASM)  += arm/neon/yv12extend_arm.c
 
 #mips(dspr2)
 SCALE_SRCS-$(HAVE_DSPR2)  += mips/dspr2/yv12extend_dspr2.c
diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.pl b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl
index 8c92570..2e3f1ff 100644
--- a/source/libvpx/vpx_scale/vpx_scale_rtcd.pl
+++ b/source/libvpx/vpx_scale/vpx_scale_rtcd.pl
@@ -17,10 +17,12 @@ if (vpx_config("CONFIG_SPATIAL_RESAMPLING") eq "yes") {
 }
 
 add_proto qw/void vp8_yv12_extend_frame_borders/, "struct yv12_buffer_config *ybf";
-specialize qw/vp8_yv12_extend_frame_borders neon/;
+specialize qw/vp8_yv12_extend_frame_borders neon_asm/;
+$vp8_yv12_extend_frame_borders_neon_asm=vp8_yv12_extend_frame_borders_neon;
 
 add_proto qw/void vp8_yv12_copy_frame/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
-specialize qw/vp8_yv12_copy_frame neon/;
+specialize qw/vp8_yv12_copy_frame neon_asm/;
+$vp8_yv12_copy_frame_neon_asm=vp8_yv12_copy_frame_neon;
 
 add_proto qw/void vpx_yv12_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc";
 
diff --git a/source/libvpx/vpxdec.c b/source/libvpx/vpxdec.c
index 6356961..ed37c70 100644
--- a/source/libvpx/vpxdec.c
+++ b/source/libvpx/vpxdec.c
@@ -33,7 +33,9 @@
 #include "./md5_utils.h"
 
 #include "./tools_common.h"
+#if CONFIG_WEBM_IO
 #include "./webmdec.h"
+#endif
 #include "./y4menc.h"
 
 static const char *exec_name;
@@ -528,9 +530,11 @@ int main_loop(int argc, const char **argv_) {
 
   struct VpxDecInputContext input = {0};
   struct VpxInputContext vpx_input_ctx = {0};
+#if CONFIG_WEBM_IO
   struct WebmInputContext webm_ctx = {0};
-  input.vpx_input_ctx = &vpx_input_ctx;
   input.webm_ctx = &webm_ctx;
+#endif
+  input.vpx_input_ctx = &vpx_input_ctx;
 
   /* Parse command line */
   exec_name = argv_[0];
diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c
index 8e8ed23..96a7ab6 100644
--- a/source/libvpx/vpxenc.c
+++ b/source/libvpx/vpxenc.c
@@ -42,7 +42,9 @@
 #include "./rate_hist.h"
 #include "./vpxstats.h"
 #include "./warnings.h"
+#if CONFIG_WEBM_IO
 #include "./webmenc.h"
+#endif
 #include "./y4minput.h"
 
 /* Swallow warnings about unused results of fread/fwrite */
@@ -207,6 +209,7 @@ static const arg_def_t width            = ARG_DEF("w", "width", 1,
                                                   "Frame width");
 static const arg_def_t height           = ARG_DEF("h", "height", 1,
                                                   "Frame height");
+#if CONFIG_WEBM_IO
 static const struct arg_enum_list stereo_mode_enum[] = {
   {"mono", STEREO_FORMAT_MONO},
   {"left-right", STEREO_FORMAT_LEFT_RIGHT},
@@ -217,6 +220,7 @@ static const struct arg_enum_list stereo_mode_enum[] = {
 };
 static const arg_def_t stereo_mode      = ARG_DEF_ENUM(NULL, "stereo-mode", 1,
                                                        "Stereo 3D video format", stereo_mode_enum);
+#endif
 static const arg_def_t timebase         = ARG_DEF(NULL, "timebase", 1,
                                                   "Output timestamp precision (fractional seconds)");
 static const arg_def_t error_resilient  = ARG_DEF(NULL, "error-resilient", 1,
@@ -226,7 +230,11 @@ static const arg_def_t lag_in_frames    = ARG_DEF(NULL, "lag-in-frames", 1,
 
 static const arg_def_t *global_args[] = {
   &use_yv12, &use_i420, &usage, &threads, &profile,
-  &width, &height, &stereo_mode, &timebase, &framerate,
+  &width, &height,
+#if CONFIG_WEBM_IO
+  &stereo_mode,
+#endif
+  &timebase, &framerate,
   &error_resilient,
   &lag_in_frames, NULL
 };
@@ -554,6 +562,11 @@ static int compare_img(const vpx_image_t *const img1,
                              NELEMENTS(vp9_arg_ctrl_map))
 #endif
 
+#if !CONFIG_WEBM_IO
+typedef int stereo_format_t;
+struct EbmlGlobal { int debug; };
+#endif
+
 /* Per-stream configuration */
 struct stream_config {
   struct vpx_codec_enc_cfg  cfg;
@@ -792,9 +805,9 @@ static struct stream_state *new_stream(struct VpxEncoderConfig *global,
     stream->config.cfg.g_h = 0;
 
     /* Initialize remaining stream parameters */
-    stream->config.stereo_fmt = STEREO_FORMAT_MONO;
     stream->config.write_webm = 1;
 #if CONFIG_WEBM_IO
+    stream->config.stereo_fmt = STEREO_FORMAT_MONO;
     stream->ebml.last_pts_ns = -1;
     stream->ebml.writer = NULL;
     stream->ebml.segment = NULL;
@@ -869,8 +882,10 @@ static int parse_stream_params(struct VpxEncoderConfig *global,
       config->cfg.g_w = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &height, argi)) {
       config->cfg.g_h = arg_parse_uint(&arg);
+#if CONFIG_WEBM_IO
     } else if (arg_match(&arg, &stereo_mode, argi)) {
       config->stereo_fmt = arg_parse_enum_or_int(&arg);
+#endif
     } else if (arg_match(&arg, &timebase, argi)) {
       config->cfg.g_timebase = arg_parse_rational(&arg);
       validate_positive_rational(arg.name, &config->cfg.g_timebase);
diff --git a/unpack_lib_posix.gypi b/unpack_lib_posix.gypi
index 3716314..f26ea13 100644
--- a/unpack_lib_posix.gypi
+++ b/unpack_lib_posix.gypi
@@ -30,7 +30,7 @@
         'ar_cmd': [],
 	'conditions': [
           ['android_webview_build==1', {
-            'ar_cmd': ['-r', '$(realpath $($(GYP_VAR_PREFIX)TARGET_AR))'],
+            'ar_cmd': ['-r', '$(abspath $($(gyp_var_prefix)TARGET_AR))'],
           }],
         ],
       },
diff --git a/update_libvpx.sh b/update_libvpx.sh
index 2328add..c00b3b0 100755
--- a/update_libvpx.sh
+++ b/update_libvpx.sh
@@ -11,7 +11,7 @@
 
 # Usage:
 #
-# $ ./update_libvpx.sh [branch | revision | file containing a revision]
+# $ ./update_libvpx.sh [branch | revision | file or url containing a revision]
 # When specifying a branch it may be necessary to prefix with origin/
 
 # Tools required for running this tool:
@@ -33,6 +33,8 @@ if [ -n "$1" ]; then
   GIT_BRANCH="$1"
   if [ -f "$1"  ]; then
     GIT_BRANCH=$(<"$1")
+  elif [[ $1 = http* ]]; then
+    GIT_BRANCH=`curl $1`
   fi
 fi
author	Torne (Richard Coles) <torne@google.com>	2014-06-24 11:04:27 +0100
committer	Torne (Richard Coles) <torne@google.com>	2014-06-24 11:04:27 +0100
commit	c1633d58a5bb3344df388ccd1c12445a6dfd3098 (patch)
tree	2ae99634b3c1b3d9d69eb6e531455175a81dda6c
parent	d77dc4514a925c51ea9a72901526e45e361f55c8 (diff)
parent	db9ac6c76553d95d7eb35e2bcf84c16a7901c3c3 (diff)
download	libvpx-c1633d58a5bb3344df388ccd1c12445a6dfd3098.tar.gz