From 23da8622c04ac843f7912dd33b4ad55f41422119 Mon Sep 17 00:00:00 2001
From: "kma@webrtc.org" <kma@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Sat, 9 Mar 2013 00:38:14 +0000
Subject: Optimized EstCodeLpcCoef() for iSAC with intrinsics in Android-Neon
 platform.

Cycles of the whole iSAC codec was reduced by 7.9%, measured by offline file test, with time() function.

Bit exact.

** Code style cleanup is not considered in this CL. **
Review URL: https://webrtc-codereview.appspot.com/1069004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3643 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../codecs/isac/fix/source/entropy_coding.c        | 508 ++++++++++-----------
 .../codecs/isac/fix/source/entropy_coding.h        |  60 ++-
 .../codecs/isac/fix/source/entropy_coding_neon.c   | 220 +++++++++
 .../audio_coding/codecs/isac/fix/source/isacfix.c  |   6 +
 .../codecs/isac/fix/source/isacfix.gypi            |   1 +
 5 files changed, 530 insertions(+), 265 deletions(-)
 create mode 100644 webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_neon.c

(limited to 'webrtc')

diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.c
index 03fccac0d1..1132235e4a 100644
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.c
@@ -27,6 +27,40 @@
 #include "settings.h"
 #include "signal_processing_library.h"
 
+/*
+ * Eenumerations for arguments to functions WebRtcIsacfix_MatrixProduct1()
+ * and WebRtcIsacfix_MatrixProduct2().
+*/
+
+enum matrix_index_factor {
+  kTIndexFactor1 = 1,
+  kTIndexFactor2 = 2,
+  kTIndexFactor3 = SUBFRAMES,
+  kTIndexFactor4 = LPC_SHAPE_ORDER
+};
+
+enum matrix_index_step {
+  kTIndexStep1 = 1,
+  kTIndexStep2 = SUBFRAMES,
+  kTIndexStep3 = LPC_SHAPE_ORDER
+};
+
+enum matrixprod_loop_count {
+  kTLoopCount1 = SUBFRAMES,
+  kTLoopCount2 = 2,
+  kTLoopCount3 = LPC_SHAPE_ORDER
+};
+
+enum matrix1_shift_value {
+  kTMatrix1_shift0 = 0,
+  kTMatrix1_shift1 = 1,
+  kTMatrix1_shift5 = 5
+};
+
+enum matrixprod_init_case {
+  kTInitCase0 = 0,
+  kTInitCase1 = 1
+};
 
 /*
   This function implements the fix-point correspondant function to lrint.
@@ -775,6 +809,115 @@ static void Lar2polyFix(WebRtc_Word32 *larsQ17,
   }
 }
 
+/*
+Function WebRtcIsacfix_MatrixProduct1C() does one form of matrix multiplication.
+It first shifts input data of one matrix, determines the right indexes for the
+two matrixes, multiply them, and write the results into an output buffer.
+
+Note that two factors (or, multipliers) determine the initialization values of
+the variable |matrix1_index| in the code. The relationship is
+|matrix1_index| = |matrix1_index_factor1| * |matrix1_index_factor2|, where
+|matrix1_index_factor1| is given by the argument while |matrix1_index_factor2|
+is determined by the value of argument |matrix1_index_init_case|;
+|matrix1_index_factor2| is the value of the outmost loop counter j (when
+|matrix1_index_init_case| is 0), or the value of the middle loop counter k (when
+|matrix1_index_init_case| is non-zero).
+
+|matrix0_index| is determined the same way.
+
+Arguments:
+  matrix0[]:                 matrix0 data in Q15 domain.
+  matrix1[]:                 matrix1 data.
+  matrix_product[]:          output data (matrix product).
+  matrix1_index_factor1:     The first of two factors determining the
+                             initialization value of matrix1_index.
+  matrix0_index_factor1:     The first of two factors determining the
+                             initialization value of matrix0_index.
+  matrix1_index_init_case:   Case number for selecting the second of two
+                             factors determining the initialization value
+                             of matrix1_index and matrix0_index.
+  matrix1_index_step:        Incremental step for matrix1_index.
+  matrix0_index_step:        Incremental step for matrix0_index.
+  inner_loop_count:          Maximum count of the inner loop.
+  mid_loop_count:            Maximum count of the intermediate loop.
+  shift:                     Left shift value for matrix1.
+*/
+void WebRtcIsacfix_MatrixProduct1C(const int16_t matrix0[],
+                                   const int32_t matrix1[],
+                                   int32_t matrix_product[],
+                                   const int matrix1_index_factor1,
+                                   const int matrix0_index_factor1,
+                                   const int matrix1_index_init_case,
+                                   const int matrix1_index_step,
+                                   const int matrix0_index_step,
+                                   const int inner_loop_count,
+                                   const int mid_loop_count,
+                                   const int shift) {
+  int j = 0, k = 0, n = 0;
+  int matrix0_index = 0, matrix1_index = 0, matrix_prod_index = 0;
+  int* matrix0_index_factor2 = &k;
+  int* matrix1_index_factor2 = &j;
+  if (matrix1_index_init_case != 0) {
+    matrix0_index_factor2 = &j;
+    matrix1_index_factor2 = &k;
+  }
+
+  for (j = 0; j < SUBFRAMES; j++) {
+    matrix_prod_index = mid_loop_count * j;
+    for (k = 0; k < mid_loop_count; k++) {
+      int32_t sum32 = 0;
+      matrix0_index = matrix0_index_factor1 * (*matrix0_index_factor2);
+      matrix1_index = matrix1_index_factor1 * (*matrix1_index_factor2);
+      for (n = 0; n < inner_loop_count; n++) {
+        sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
+                                              matrix1[matrix1_index] << shift));
+        matrix0_index += matrix0_index_step;
+        matrix1_index += matrix1_index_step;
+      }
+      matrix_product[matrix_prod_index] = sum32;
+      matrix_prod_index++;
+    }
+  }
+}
+
+/*
+Function WebRtcIsacfix_MatrixProduct2C() returns the product of two matrixes,
+one of which has two columns. It first has to determine the correct index of
+the first matrix before doing the actual element multiplication.
+
+Arguments:
+  matrix0[]:                 A matrix in Q15 domain.
+  matrix1[]:                 A matrix in Q21 domain.
+  matrix_product[]:          Output data in Q17 domain.
+  matrix0_index_factor:      A factor determining the initialization value
+                             of matrix0_index.
+  matrix0_index_step:        Incremental step for matrix0_index.
+*/
+void WebRtcIsacfix_MatrixProduct2C(const int16_t matrix0[],
+                                   const int32_t matrix1[],
+                                   int32_t matrix_product[],
+                                   const int matrix0_index_factor,
+                                   const int matrix0_index_step) {
+  int j = 0, n = 0;
+  int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0;
+  for (j = 0; j < SUBFRAMES; j++) {
+    int32_t sum32 = 0, sum32_2 = 0;
+    matrix1_index = 0;
+    matrix0_index = matrix0_index_factor * j;
+    for (n = SUBFRAMES; n > 0; n--) {
+      sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
+                                            matrix1[matrix1_index]));
+      sum32_2 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
+                                            matrix1[matrix1_index + 1]));
+      matrix1_index += 2;
+      matrix0_index += matrix0_index_step;
+    }
+    matrix_product[matrix_prod_index] = sum32 >> 3;
+    matrix_product[matrix_prod_index + 1] = sum32_2 >> 3;
+    matrix_prod_index += 2;
+  }
+}
+
 int WebRtcIsacfix_DecodeLpc(WebRtc_Word32 *gain_lo_hiQ17,
                             WebRtc_Word16 *LPCCoef_loQ15,
                             WebRtc_Word16 *LPCCoef_hiQ15,
@@ -801,7 +944,7 @@ int WebRtcIsacfix_DecodeLpcCoef(Bitstr_dec *streamdata,
 {
   int j, k, n;
   int err;
-  WebRtc_Word16 pos, pos2, posg, poss, offsg, offss, offs2;
+  WebRtc_Word16 pos, pos2, posg, poss;
   WebRtc_Word16 gainpos;
   WebRtc_Word16 model;
   WebRtc_Word16 index_QQ[KLT_ORDER_SHAPE];
@@ -842,31 +985,17 @@ int WebRtcIsacfix_DecodeLpcCoef(Bitstr_dec *streamdata,
   /* inverse KLT  */
 
   /* left transform */  // Transpose matrix!
-  offsg = 0;
-  offss = 0;
-  posg = 0;
+  WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT1GainQ15[model], tmpcoeffs_gQ17,
+                               tmpcoeffs2_gQ21, kTIndexFactor2, kTIndexFactor2,
+                               kTInitCase0, kTIndexStep1, kTIndexStep1,
+                               kTLoopCount2, kTLoopCount2, kTMatrix1_shift5);
+
   poss = 0;
   for (j=0; j<SUBFRAMES; j++) {
-    offs2 = 0;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = offsg;
-      pos2 = offs2;
-      for (n=0; n<2; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[model][pos2], tmpcoeffs_gQ17[pos]<<5)); // (Q15*Q17)>>(16-5) = Q21
-        pos++;
-        pos2++;
-      }
-      tmpcoeffs2_gQ21[posg] = sumQQ; //Q21
-      posg++;
-      offs2 += 2;
-    }
-    offs2 = 0;
-
     for (k=0; k<LPC_SHAPE_ORDER; k++) {
       sumQQ = 0;
-      pos = offss;
-      pos2 = offs2;
+      pos = LPC_SHAPE_ORDER * j;
+      pos2 = LPC_SHAPE_ORDER * k;
       for (n=0; n<LPC_SHAPE_ORDER; n++) {
         sumQQ += WEBRTC_SPL_MUL_16_16_RSFT(tmpcoeffs_sQ10[pos], WebRtcIsacfix_kT1ShapeQ15[model][pos2], 7); // (Q10*Q15)>>7 = Q18
         pos++;
@@ -874,48 +1003,16 @@ int WebRtcIsacfix_DecodeLpcCoef(Bitstr_dec *streamdata,
       }
       tmpcoeffs2_sQ18[poss] = sumQQ; //Q18
       poss++;
-      offs2 += LPC_SHAPE_ORDER;
     }
-    offsg += 2;
-    offss += LPC_SHAPE_ORDER;
   }
 
   /* right transform */ // Transpose matrix
-  offsg = 0;
-  offss = 0;
-  posg = 0;
-  poss = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = j;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[model][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
-        pos += 2;
-        pos2 += SUBFRAMES;
-
-      }
-      tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
-      posg++;
-    }
-    poss = offss;
-    for (k=0; k<LPC_SHAPE_ORDER; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = j;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2ShapeQ15[model][pos2], tmpcoeffs2_sQ18[pos])); // (Q15*Q18)>>16 = Q17
-        pos += LPC_SHAPE_ORDER;
-        pos2 += SUBFRAMES;
-      }
-      tmpcoeffs_sQ17[poss] = sumQQ;
-      poss++;
-    }
-    offsg += 2;
-    offss += LPC_SHAPE_ORDER;
-  }
+  WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
+                               tmpcoeffs_gQ17, kTIndexFactor1, kTIndexStep2);
+  WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT2ShapeQ15[model],
+      tmpcoeffs2_sQ18, tmpcoeffs_sQ17, kTIndexFactor1, kTIndexFactor1,
+      kTInitCase1, kTIndexStep3, kTIndexStep2, kTLoopCount1, kTLoopCount3,
+      kTMatrix1_shift0);
 
   /* scaling, mean addition, and gain restoration */
   gainpos = 0;
@@ -968,7 +1065,7 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
                           transcode_obj *transcodingParam) {
   int j, k, n;
   WebRtc_Word16 posQQ, pos2QQ, gainpos;
-  WebRtc_Word16  pos, pos2, poss, posg, offsg, offss, offs2;
+  WebRtc_Word16  pos, poss, posg, offsg;
   WebRtc_Word16 index_gQQ[KLT_ORDER_GAIN], index_sQQ[KLT_ORDER_SHAPE];
   WebRtc_Word16 index_ovr_gQQ[KLT_ORDER_GAIN], index_ovr_sQQ[KLT_ORDER_SHAPE];
   WebRtc_Word32 BitsQQ;
@@ -1034,73 +1131,38 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
 
   /* left transform */
   offsg = 0;
-  offss = 0;
+  posg = 0;
   for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = offsg;
-      pos2 = k;
-      for (n=0; n<2; n++) {
-        sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[pos], WebRtcIsacfix_kT1GainQ15[0][pos2]); //Q21 = Q6*Q15
-        pos++;
-        pos2 += 2;
-      }
-      tmpcoeffs2_gQ21[posg] = sumQQ;
-      posg++;
-    }
-    poss = offss;
-    for (k=0; k<LPC_SHAPE_ORDER; k++) {
-      sumQQ = 0;
-      pos = offss;
-      pos2 = k;
-      for (n=0; n<LPC_SHAPE_ORDER; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1ShapeQ15[0][pos2], tmpcoeffs_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
-        pos++;
-        pos2 += LPC_SHAPE_ORDER;
-      }
-      tmpcoeffs2_sQ17[poss] = sumQQ; //Q17
-      poss++;
-    }
+    // Q21 = Q6 * Q15
+    sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
+        WebRtcIsacfix_kT1GainQ15[0][0]);
+    sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
+        WebRtcIsacfix_kT1GainQ15[0][2]);
+    tmpcoeffs2_gQ21[posg] = sumQQ;
+    posg++;
+
+    // Q21 = Q6 * Q15
+    sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
+        WebRtcIsacfix_kT1GainQ15[0][1]);
+    sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
+        WebRtcIsacfix_kT1GainQ15[0][3]);
+    tmpcoeffs2_gQ21[posg] = sumQQ;
+    posg++;
+
     offsg += 2;
-    offss += LPC_SHAPE_ORDER;
   }
 
+  WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT1ShapeQ15[0], tmpcoeffs_sQ17,
+      tmpcoeffs2_sQ17, kTIndexFactor4, kTIndexFactor1, kTInitCase0,
+      kTIndexStep1, kTIndexStep3, kTLoopCount3, kTLoopCount3, kTMatrix1_shift1);
+
   /* right transform */
-  offsg = 0;
-  offss = 0;
-  offs2 = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = offs2;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
-        pos += 2;
-        pos2++;
-      }
-      tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
-      posg++;
-    }
-    poss = offss;
-    for (k=0; k<LPC_SHAPE_ORDER; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = offs2;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2ShapeQ15[0][pos2], tmpcoeffs2_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
-        pos += LPC_SHAPE_ORDER;
-        pos2++;
-      }
-      tmpcoeffs_sQ17[poss] = sumQQ;
-      poss++;
-    }
-    offs2 += SUBFRAMES;
-    offsg += 2;
-    offss += LPC_SHAPE_ORDER;
-  }
+  WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
+                               tmpcoeffs_gQ17, kTIndexFactor3, kTIndexStep1);
+
+  WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT2ShapeQ15[0], tmpcoeffs2_sQ17,
+      tmpcoeffs_sQ17, kTIndexFactor1, kTIndexFactor3, kTInitCase1, kTIndexStep3,
+      kTIndexStep1, kTLoopCount1, kTLoopCount3, kTMatrix1_shift1);
 
   /* quantize coefficients */
 
@@ -1191,47 +1253,14 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
   /* inverse KLT  */
 
   /* left transform */  // Transpose matrix!
-  offss = 0;
-  poss = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    offs2 = 0;
-    for (k=0; k<LPC_SHAPE_ORDER; k++) {
-      sumQQ = 0;
-      pos = offss;
-      pos2 = offs2;
-      for (n=0; n<LPC_SHAPE_ORDER; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1ShapeQ15[0][pos2], tmpcoeffs_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
-        pos++;
-        pos2++;
-      }
-      tmpcoeffs2_sQ17[poss] = sumQQ;
-
-      poss++;
-      offs2 += LPC_SHAPE_ORDER;
-    }
-    offss += LPC_SHAPE_ORDER;
-  }
-
+  WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT1ShapeQ15[0], tmpcoeffs_sQ17,
+      tmpcoeffs2_sQ17, kTIndexFactor4, kTIndexFactor4, kTInitCase0,
+      kTIndexStep1, kTIndexStep1, kTLoopCount3, kTLoopCount3, kTMatrix1_shift1);
 
   /* right transform */ // Transpose matrix
-  offss = 0;
-  poss = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    poss = offss;
-    for (k=0; k<LPC_SHAPE_ORDER; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = j;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2ShapeQ15[0][pos2], tmpcoeffs2_sQ17[pos]<<1)); // (Q15*Q17)>>(16-1) = Q17
-        pos += LPC_SHAPE_ORDER;
-        pos2 += SUBFRAMES;
-      }
-      tmpcoeffs_sQ17[poss] = sumQQ;
-      poss++;
-    }
-    offss += LPC_SHAPE_ORDER;
-  }
+  WebRtcIsacfix_MatrixProduct1(WebRtcIsacfix_kT2ShapeQ15[0], tmpcoeffs2_sQ17,
+      tmpcoeffs_sQ17, kTIndexFactor1, kTIndexFactor1, kTInitCase1, kTIndexStep3,
+      kTIndexStep2, kTLoopCount1, kTLoopCount3, kTMatrix1_shift1);
 
   /* scaling, mean addition, and gain restoration */
   poss = 0;pos=0;
@@ -1266,42 +1295,26 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
   offsg = 0;
   posg = 0;
   for (j=0; j<SUBFRAMES; j++) {
-    offs2 = 0;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = offsg;
-      pos2 = offs2;
-      for (n=0; n<2; n++) {
-        sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][pos2], tmpcoeffs_gQ17[pos])<<1); // (Q15*Q17)>>(16-1) = Q17
-        pos++;
-        pos2++;
-      }
-      tmpcoeffs2_gQ21[posg] = WEBRTC_SPL_LSHIFT_W32(sumQQ, 4); //Q17<<4 = Q21
-      posg++;
-      offs2 += 2;
-    }
+    // (Q15 * Q17) >> (16 - 1) = Q17; Q17 << 4 = Q21.
+    sumQQ = (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][0],
+                                         tmpcoeffs_gQ17[offsg]) << 1);
+    sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][1],
+                                          tmpcoeffs_gQ17[offsg + 1]) << 1);
+    tmpcoeffs2_gQ21[posg] = WEBRTC_SPL_LSHIFT_W32(sumQQ, 4);
+    posg++;
+
+    sumQQ = (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][2],
+                                         tmpcoeffs_gQ17[offsg]) << 1);
+    sumQQ += (WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT1GainQ15[0][3],
+                                          tmpcoeffs_gQ17[offsg + 1]) << 1);
+    tmpcoeffs2_gQ21[posg] = WEBRTC_SPL_LSHIFT_W32(sumQQ, 4);
+    posg++;
     offsg += 2;
   }
 
   /* right transform */ // Transpose matrix
-  offsg = 0;
-  posg = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = j;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
-        pos += 2;
-        pos2 += SUBFRAMES;
-      }
-      tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
-      posg++;
-    }
-    offsg += 2;
-  }
+  WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
+                               tmpcoeffs_gQ17, kTIndexFactor1, kTIndexStep2);
 
   /* scaling, mean addition, and gain restoration */
   posg = 0;
@@ -1323,9 +1336,9 @@ static int EstCodeLpcCoef(WebRtc_Word32 *LPCCoefQ17,
 int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
                                  Bitstr_enc *streamdata,
                                  ISAC_SaveEncData_t* encData) {
-  int j, k, n;
+  int j, k;
   WebRtc_Word16 posQQ, pos2QQ, gainpos;
-  WebRtc_Word16  pos, pos2, posg, offsg, offs2;
+  WebRtc_Word16 posg;
   WebRtc_Word16 index_gQQ[KLT_ORDER_GAIN];
 
   WebRtc_Word16 tmpcoeffs_gQ6[KLT_ORDER_GAIN];
@@ -1343,7 +1356,7 @@ int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
   }
 
   /* log gains, mean removal and scaling */
-  posg = 0; pos = 0; gainpos = 0;
+  posg = 0; gainpos = 0;
 
   for (k=0; k<SUBFRAMES; k++) {
     /* log gains */
@@ -1369,45 +1382,28 @@ int WebRtcIsacfix_EstCodeLpcGain(WebRtc_Word32 *gain_lo_hiQ17,
   /* KLT  */
 
   /* left transform */
-  offsg = 0;
+  posg = 0;
   for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = offsg;
-      pos2 = k;
-      for (n=0; n<2; n++) {
-        sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[pos], WebRtcIsacfix_kT1GainQ15[0][pos2]); //Q21 = Q6*Q15
-        pos++;
-        pos2 += 2;
-      }
+      // Q21 = Q6 * Q15
+      sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2],
+                                   WebRtcIsacfix_kT1GainQ15[0][0]);
+      sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2 + 1],
+                                    WebRtcIsacfix_kT1GainQ15[0][2]);
       tmpcoeffs2_gQ21[posg] = sumQQ;
       posg++;
-    }
-    offsg += 2;
-  }
 
-  /* right transform */
-  offsg = 0;
-  offs2 = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = offs2;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
-        pos += 2;
-        pos2++;
-      }
-      tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
+      sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2],
+                                   WebRtcIsacfix_kT1GainQ15[0][1]);
+      sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[j * 2 + 1],
+                                    WebRtcIsacfix_kT1GainQ15[0][3]);
+      tmpcoeffs2_gQ21[posg] = sumQQ;
       posg++;
-    }
-    offsg += 2;
-    offs2 += SUBFRAMES;
   }
 
+  /* right transform */
+  WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
+                               tmpcoeffs_gQ17, kTIndexFactor3, kTIndexStep1);
+
   /* quantize coefficients */
 
   for (k=0; k<KLT_ORDER_GAIN; k++) //ATTN: ok?
@@ -1454,7 +1450,8 @@ int WebRtcIsacfix_EncodeLpc(WebRtc_Word32 *gain_lo_hiQ17,
 
   Poly2LarFix(LPCCoef_loQ15, ORDERLO, LPCCoef_hiQ15, ORDERHI, SUBFRAMES, larsQ17);
 
-  status = EstCodeLpcCoef(larsQ17, gain_lo_hiQ17, model, sizeQ11, streamdata, encData, transcodeParam);
+  status = EstCodeLpcCoef(larsQ17, gain_lo_hiQ17, model, sizeQ11,
+                          streamdata, encData, transcodeParam);
   if (status < 0) {
     return (status);
   }
@@ -1978,9 +1975,9 @@ int WebRtcIsacfix_EncodeReceiveBandwidth(WebRtc_Word16 *BWno, Bitstr_enc *stream
 /* estimate codel length of LPC Coef */
 void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
                                     WebRtc_Word16 *index_gQQ) {
-  int j, k, n;
+  int j, k;
   WebRtc_Word16 posQQ, pos2QQ;
-  WebRtc_Word16  pos, pos2, posg, offsg, offs2, gainpos;
+  WebRtc_Word16 posg, offsg, gainpos;
   WebRtc_Word32 tmpcoeffs_gQ6[KLT_ORDER_GAIN];
   WebRtc_Word32 tmpcoeffs_gQ17[KLT_ORDER_GAIN];
   WebRtc_Word32 tmpcoeffs2_gQ21[KLT_ORDER_GAIN];
@@ -1988,7 +1985,7 @@ void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
 
 
   /* log gains, mean removal and scaling */
-  posg = 0;pos=0; gainpos=0;
+  posg = 0; gainpos=0;
 
   for (k=0; k<SUBFRAMES; k++) {
     /* log gains */
@@ -2017,43 +2014,26 @@ void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *gain_lo_hiQ17,
   /* left transform */
   offsg = 0;
   for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = offsg;
-      pos2 = k;
-      for (n=0; n<2; n++) {
-        sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[pos], WebRtcIsacfix_kT1GainQ15[0][pos2]); //Q21 = Q6*Q15
-        pos++;
-        pos2 += 2;
-      }
+      // Q21 = Q6 * Q15
+      sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
+                                   WebRtcIsacfix_kT1GainQ15[0][0]);
+      sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
+                                    WebRtcIsacfix_kT1GainQ15[0][2]);
       tmpcoeffs2_gQ21[posg] = sumQQ;
       posg++;
-    }
 
-    offsg += 2;
+      // Q21 = Q6 * Q15
+      sumQQ = WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg],
+                                   WebRtcIsacfix_kT1GainQ15[0][1]);
+      sumQQ += WEBRTC_SPL_MUL_16_16(tmpcoeffs_gQ6[offsg + 1],
+                                    WebRtcIsacfix_kT1GainQ15[0][3]);
+      tmpcoeffs2_gQ21[posg] = sumQQ;
+      posg++;
   }
 
   /* right transform */
-  offsg = 0;
-  offs2 = 0;
-  for (j=0; j<SUBFRAMES; j++) {
-    posg = offsg;
-    for (k=0; k<2; k++) {
-      sumQQ = 0;
-      pos = k;
-      pos2 = offs2;
-      for (n=0; n<SUBFRAMES; n++) {
-        sumQQ += WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_32_RSFT16(WebRtcIsacfix_kT2GainQ15[0][pos2], tmpcoeffs2_gQ21[pos]), 1); // (Q15*Q21)>>(16-1) = Q21
-        pos += 2;
-        pos2++;
-      }
-      tmpcoeffs_gQ17[posg] = WEBRTC_SPL_RSHIFT_W32(sumQQ, 4);
-      posg++;
-    }
-    offsg += 2;
-    offs2 += SUBFRAMES;
-  }
+  WebRtcIsacfix_MatrixProduct2(WebRtcIsacfix_kT2GainQ15[0], tmpcoeffs2_gQ21,
+                               tmpcoeffs_gQ17, kTIndexFactor3, kTIndexStep1);
 
   /* quantize coefficients */
   for (k=0; k<KLT_ORDER_GAIN; k++) //ATTN: ok?
diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h
index 298ea223ac..941fd0b9cf 100644
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding.h
@@ -108,4 +108,62 @@ int WebRtcIsacfix_EncodeReceiveBandwidth(WebRtc_Word16 *BWno,
 void WebRtcIsacfix_TranscodeLpcCoef(WebRtc_Word32 *tmpcoeffs_gQ6,
                                     WebRtc_Word16 *index_gQQ);
 
-#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_ENTROPY_CODING_H_ */
+// Pointer functions for LPC transforms.
+
+typedef void (*MatrixProduct1)(const int16_t matrix0[],
+                               const int32_t matrix1[],
+                               int32_t matrix_product[],
+                               const int matrix1_index_factor1,
+                               const int matrix0_index_factor1,
+                               const int matrix1_index_init_case,
+                               const int matrix1_index_step,
+                               const int matrix0_index_step,
+                               const int inner_loop_count,
+                               const int mid_loop_count,
+                               const int shift);
+typedef void (*MatrixProduct2)(const int16_t matrix0[],
+                               const int32_t matrix1[],
+                               int32_t matrix_product[],
+                               const int matrix0_index_factor,
+                               const int matrix0_index_step);
+
+extern MatrixProduct1 WebRtcIsacfix_MatrixProduct1;
+extern MatrixProduct2 WebRtcIsacfix_MatrixProduct2;
+
+void WebRtcIsacfix_MatrixProduct1C(const int16_t matrix0[],
+                                   const int32_t matrix1[],
+                                   int32_t matrix_product[],
+                                   const int matrix1_index_factor1,
+                                   const int matrix0_index_factor1,
+                                   const int matrix1_index_init_case,
+                                   const int matrix1_index_step,
+                                   const int matrix0_index_step,
+                                   const int inner_loop_count,
+                                   const int mid_loop_count,
+                                   const int shift);
+void WebRtcIsacfix_MatrixProduct2C(const int16_t matrix0[],
+                                   const int32_t matrix1[],
+                                   int32_t matrix_product[],
+                                   const int matrix0_index_factor,
+                                   const int matrix0_index_step);
+
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+void WebRtcIsacfix_MatrixProduct1Neon(const int16_t matrix0[],
+                                      const int32_t matrix1[],
+                                      int32_t matrix_product[],
+                                      const int matrix1_index_factor1,
+                                      const int matrix0_index_factor1,
+                                      const int matrix1_index_init_case,
+                                      const int matrix1_index_step,
+                                      const int matrix0_index_step,
+                                      const int inner_loop_count,
+                                      const int mid_loop_count,
+                                      const int shift);
+void WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[],
+                                      const int32_t matrix1[],
+                                      int32_t matrix_product[],
+                                      const int matrix0_index_factor,
+                                      const int matrix0_index_step);
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_ENTROPY_CODING_H_
diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_neon.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_neon.c
new file mode 100644
index 0000000000..3cc1be8df8
--- /dev/null
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/entropy_coding_neon.c
@@ -0,0 +1,220 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* This file contains WebRtcIsacfix_MatrixProduct1Neon() and
+ * WebRtcIsacfix_MatrixProduct2Neon() for ARM Neon platform. API's are in
+ * entropy_coding.c. Results are bit exact with the c code for
+ * generic platforms.
+ */
+
+#include "entropy_coding.h"
+
+#include <arm_neon.h>
+#include <assert.h>
+#include <stddef.h>
+
+#include "signal_processing_library.h"
+
+void WebRtcIsacfix_MatrixProduct1Neon(const int16_t matrix0[],
+                                      const int32_t matrix1[],
+                                      int32_t matrix_product[],
+                                      const int matrix1_index_factor1,
+                                      const int matrix0_index_factor1,
+                                      const int matrix1_index_init_case,
+                                      const int matrix1_index_step,
+                                      const int matrix0_index_step,
+                                      const int inner_loop_count,
+                                      const int mid_loop_count,
+                                      const int shift) {
+  int j = 0, k = 0, n = 0;
+  int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0;
+  int* matrix1_index_factor2 = &j;
+  int* matrix0_index_factor2 = &k;
+  if (matrix1_index_init_case != 0) {
+    matrix1_index_factor2 = &k;
+    matrix0_index_factor2 = &j;
+  }
+  int32x4_t shift32x4 = vdupq_n_s32(shift);
+  int32x2_t shift32x2 = vdup_n_s32(shift);
+
+  assert(inner_loop_count % 2 == 0);
+  assert(mid_loop_count % 2 == 0);
+
+  if (matrix1_index_init_case != 0 && matrix1_index_factor1 == 1) {
+    for (j = 0; j < SUBFRAMES; j++) {
+      matrix_prod_index = mid_loop_count * j;
+      for (k = 0; k < (mid_loop_count >> 2) << 2; k += 4) {
+        // Initialize sum_32x4 to zeros.
+        int32x4_t sum_32x4 = veorq_s32(sum_32x4, sum_32x4);
+        matrix1_index = k;
+        matrix0_index = matrix0_index_factor1 * j;
+        for (n = 0; n < inner_loop_count; n++) {
+          int32x4_t matrix0_32x4 =
+              vdupq_n_s32((int32_t)(matrix0[matrix0_index]) << 15);
+          int32x4_t matrix1_32x4 =
+              vshlq_s32(vld1q_s32(&matrix1[matrix1_index]), shift32x4);
+          int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4);
+          sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4);
+          matrix1_index += matrix1_index_step;
+          matrix0_index += matrix0_index_step;
+        }
+        vst1q_s32(&matrix_product[matrix_prod_index], sum_32x4);
+        matrix_prod_index += 4;
+      }
+      if (mid_loop_count % 4 > 1) {
+        // Initialize sum_32x2 to zeros.
+        int32x2_t sum_32x2 = veor_s32(sum_32x2, sum_32x2);
+        matrix1_index = k;
+        k += 2;
+        matrix0_index = matrix0_index_factor1 * j;
+        for (n = 0; n < inner_loop_count; n++) {
+          int32x2_t matrix0_32x2 =
+              vdup_n_s32((int32_t)(matrix0[matrix0_index]) << 15);
+          int32x2_t matrix1_32x2 =
+              vshl_s32(vld1_s32(&matrix1[matrix1_index]), shift32x2);
+          int32x2_t multi_32x2 = vqdmulh_s32(matrix0_32x2, matrix1_32x2);
+          sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
+          matrix1_index += matrix1_index_step;
+          matrix0_index += matrix0_index_step;
+        }
+        vst1_s32(&matrix_product[matrix_prod_index], sum_32x2);
+        matrix_prod_index += 2;
+      }
+    }
+  }
+  else if (matrix1_index_init_case == 0 && matrix0_index_factor1 == 1) {
+    for (j = 0; j < SUBFRAMES; j++) {
+      matrix_prod_index = mid_loop_count * j;
+      for (k = 0; k < (mid_loop_count >> 2) << 2; k += 4) {
+        // Initialize sum_32x4 to zeros.
+        int32x4_t sum_32x4 = veorq_s32(sum_32x4, sum_32x4);
+        matrix1_index = matrix1_index_factor1 * j;
+        matrix0_index = k;
+        for (n = 0; n < inner_loop_count; n++) {
+          int32x4_t matrix1_32x4 = vdupq_n_s32(matrix1[matrix1_index] << shift);
+          int32x4_t matrix0_32x4 =
+              vshll_n_s16(vld1_s16(&matrix0[matrix0_index]), 15);
+          int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4);
+          sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4);
+          matrix1_index += matrix1_index_step;
+          matrix0_index += matrix0_index_step;
+        }
+        vst1q_s32(&matrix_product[matrix_prod_index], sum_32x4);
+        matrix_prod_index += 4;
+      }
+      if (mid_loop_count % 4 > 1) {
+        // Initialize sum_32x2 to zeros.
+        int32x2_t sum_32x2 = veor_s32(sum_32x2, sum_32x2);
+        matrix1_index = matrix1_index_factor1 * j;
+        matrix0_index = k;
+        for (n = 0; n < inner_loop_count; n++) {
+          int32x2_t multi_32x2;
+          int32x2_t matrix1_32x2 = vdup_n_s32(matrix1[matrix1_index] << shift);
+          int32x2_t matrix0_32x2 =
+              vset_lane_s32((int32_t)matrix0[matrix0_index], matrix0_32x2, 0);
+          matrix0_32x2 = vset_lane_s32((int32_t)matrix0[matrix0_index + 1],
+                                     matrix0_32x2, 1);
+          matrix0_32x2 = vshl_n_s32(matrix0_32x2, 15);
+          multi_32x2 = vqdmulh_s32(matrix1_32x2, matrix0_32x2);
+          sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
+          matrix1_index += matrix1_index_step;
+          matrix0_index += matrix0_index_step;
+        }
+        vst1_s32(&matrix_product[matrix_prod_index], sum_32x2);
+        matrix_prod_index += 2;
+      }
+    }
+  }
+  else if (matrix1_index_init_case == 0 &&
+           matrix1_index_step == 1 &&
+           matrix0_index_step == 1) {
+    for (j = 0; j < SUBFRAMES; j++) {
+      matrix_prod_index = mid_loop_count * j;
+      for (k = 0; k < mid_loop_count; k++) {
+        int32x2_t sum_32x2;
+        // Initialize sum_32x4 to zeros.
+        int32x4_t sum_32x4 = veorq_s32(sum_32x4, sum_32x4);
+        matrix1_index = matrix1_index_factor1 * j;
+        matrix0_index = matrix0_index_factor1 * k;
+        for (n = 0; n < (inner_loop_count >> 2) << 2; n += 4) {
+          int32x4_t matrix1_32x4 =
+              vshlq_s32(vld1q_s32(&matrix1[matrix1_index]), shift32x4);
+          int32x4_t matrix0_32x4 =
+              vshll_n_s16(vld1_s16(&matrix0[matrix0_index]), 15);
+          int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4);
+          sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4);
+          matrix1_index += 4;
+          matrix0_index += 4;
+        }
+        sum_32x2 = vqadd_s32(vget_low_s32(sum_32x4), vget_high_s32(sum_32x4));
+        if (inner_loop_count % 4 > 1) {
+          int32x2_t multi_32x2;
+          int32x2_t matrix1_32x2 =
+              vshl_s32(vld1_s32(&matrix1[matrix1_index]), shift32x2);
+          int32x2_t matrix0_32x2 =
+              vset_lane_s32((int32_t)matrix0[matrix0_index], matrix0_32x2, 0);
+          matrix0_32x2 = vset_lane_s32((int32_t)matrix0[matrix0_index + 1],
+                                     matrix0_32x2, 1);
+          matrix0_32x2 = vshl_n_s32(matrix0_32x2, 15);
+          multi_32x2 = vqdmulh_s32(matrix1_32x2, matrix0_32x2);
+          sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
+        }
+        sum_32x2 = vpadd_s32(sum_32x2, sum_32x2);
+        vst1_lane_s32(&matrix_product[matrix_prod_index], sum_32x2, 0);
+        matrix_prod_index++;
+      }
+    }
+  }
+  else {
+    for (j = 0; j < SUBFRAMES; j++) {
+      matrix_prod_index = mid_loop_count * j;
+      for (k=0; k < mid_loop_count; k++) {
+        int32_t sum32 = 0;
+        matrix1_index = matrix1_index_factor1 * (*matrix1_index_factor2);
+        matrix0_index = matrix0_index_factor1 * (*matrix0_index_factor2);
+        for (n = 0; n < inner_loop_count; n++) {
+          sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index],
+              matrix1[matrix1_index] << shift));
+          matrix1_index += matrix1_index_step;
+          matrix0_index += matrix0_index_step;
+        }
+        matrix_product[matrix_prod_index] = sum32;
+        matrix_prod_index++;
+      }
+    }
+  }
+}
+
+void WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[],
+                                      const int32_t matrix1[],
+                                      int32_t matrix_product[],
+                                      const int matrix0_index_factor,
+                                      const int matrix0_index_step) {
+  int j = 0, n = 0;
+  int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0;
+  for (j = 0; j < SUBFRAMES; j++) {
+    // Initialize sum_32x2 to zeros.
+    int32x2_t sum_32x2 = veor_s32(sum_32x2, sum_32x2);
+    matrix1_index = 0;
+    matrix0_index = matrix0_index_factor * j;
+    for (n = SUBFRAMES; n > 0; n--) {
+      int32x2_t matrix0_32x2 =
+          vdup_n_s32((int32_t)(matrix0[matrix0_index]) << 15);
+      int32x2_t matrix1_32x2 = vld1_s32(&matrix1[matrix1_index]);
+      int32x2_t multi_32x2 = vqdmulh_s32(matrix0_32x2, matrix1_32x2);
+      sum_32x2 = vqadd_s32(sum_32x2, multi_32x2);
+      matrix1_index += 2;
+      matrix0_index += matrix0_index_step;
+    }
+    sum_32x2 = vshr_n_s32(sum_32x2, 3);
+    vst1_s32(&matrix_product[matrix_prod_index], sum_32x2);
+    matrix_prod_index += 2;
+  }
+}
diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c
index 68498de191..f9c481dea6 100644
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c
@@ -31,6 +31,8 @@
 FilterMaLoopFix WebRtcIsacfix_FilterMaLoopFix;
 Spec2Time WebRtcIsacfix_Spec2Time;
 Time2Spec WebRtcIsacfix_Time2Spec;
+MatrixProduct1 WebRtcIsacfix_MatrixProduct1;
+MatrixProduct2 WebRtcIsacfix_MatrixProduct2;
 
 /**************************************************************************
  * WebRtcIsacfix_AssignSize(...)
@@ -192,6 +194,8 @@ static void WebRtcIsacfix_InitNeon(void) {
       WebRtcIsacfix_CalculateResidualEnergyNeon;
   WebRtcIsacfix_AllpassFilter2FixDec16 =
       WebRtcIsacfix_AllpassFilter2FixDec16Neon;
+  WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1Neon;
+  WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2Neon;
 }
 #endif
 
@@ -281,6 +285,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
   WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C;
   WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecC;
   WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeC;
+  WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1C;
+  WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2C ;
 
 #ifdef WEBRTC_DETECT_ARM_NEON
   if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
index 88a85da29b..cc93c3179c 100644
--- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
@@ -97,6 +97,7 @@
             '<(webrtc_root)/common_audio/common_audio.gyp:signal_processing',
           ],
           'sources': [
+            'entropy_coding_neon.c',
             'filterbanks_neon.S',
             'filters_neon.S',
             'lattice_neon.S',
-- 
cgit v1.2.3