Enable clang-format in the tests

Extend the clang_format.py script to format everything by trace files in the `test/` directory. Also, we have to add "on/off" marker in `test-simualtor-inputs-aarch64.h` as clang-format is getting confused there. Change-Id: I1159498072bda1bfd049082aeccb347ec55e7825
author: Pierre Langlois <pierre.langlois@arm.com> 2017-01-24 17:41:26 +0000
committer: Pierre Langlois <pierre.langlois@arm.com> 2017-01-25 11:10:13 +0000
commit: bde2e4b5ce376456d50a972b6f3aaee3475f8786 (patch)
tree: 8588d5fcbfe1d53b568ef1ae8d45252b215521e8 /test/aarch64
parent: f4ba40fc419a9d484da9be1df051ad03327ce4f4 (diff)
download: vixl-bde2e4b5ce376456d50a972b6f3aaee3475f8786.tar.gz
11 files changed, 5746 insertions, 4610 deletions
diff --git a/test/aarch64/examples/test-examples.cc b/test/aarch64/examples/test-examples.cc
index ec1ce7b8..7817536a 100644
--- a/test/aarch64/examples/test-examples.cc
+++ b/test/aarch64/examples/test-examples.cc
@@ -39,9 +39,7 @@ using namespace vixl;
 using namespace vixl::aarch64;
 
 
-TEST(custom_disassembler) {
-  TestCustomDisassembler();
-}
+TEST(custom_disassembler) { TestCustomDisassembler(); }
 
 
 // The tests below only work with the simulator.
@@ -63,30 +61,28 @@ uint64_t FactorialC(uint64_t n) {
 // Multiply two column-major 4x4 matrices of 32 bit floating point values.
 // Return a column-major 4x4 matrix of 32 bit floating point values in 'C'.
 void MatrixMultiplyC(float C[16], float A[16], float B[16]) {
-  C[ 0] = A[ 0]*B[ 0] + A[ 4]*B[ 1] + A[ 8]*B[ 2] + A[12]*B[ 3];
-  C[ 1] = A[ 1]*B[ 0] + A[ 5]*B[ 1] + A[ 9]*B[ 2] + A[13]*B[ 3];
-  C[ 2] = A[ 2]*B[ 0] + A[ 6]*B[ 1] + A[10]*B[ 2] + A[14]*B[ 3];
-  C[ 3] = A[ 3]*B[ 0] + A[ 7]*B[ 1] + A[11]*B[ 2] + A[15]*B[ 3];
-
-  C[ 4] = A[ 0]*B[ 4] + A[ 4]*B[ 5] + A[ 8]*B[ 6] + A[12]*B[ 7];
-  C[ 5] = A[ 1]*B[ 4] + A[ 5]*B[ 5] + A[ 9]*B[ 6] + A[13]*B[ 7];
-  C[ 6] = A[ 2]*B[ 4] + A[ 6]*B[ 5] + A[10]*B[ 6] + A[14]*B[ 7];
-  C[ 7] = A[ 3]*B[ 4] + A[ 7]*B[ 5] + A[11]*B[ 6] + A[15]*B[ 7];
-
-  C[ 8] = A[ 0]*B[ 8] + A[ 4]*B[ 9] + A[ 8]*B[10] + A[12]*B[11];
-  C[ 9] = A[ 1]*B[ 8] + A[ 5]*B[ 9] + A[ 9]*B[10] + A[13]*B[11];
-  C[10] = A[ 2]*B[ 8] + A[ 6]*B[ 9] + A[10]*B[10] + A[14]*B[11];
-  C[11] = A[ 3]*B[ 8] + A[ 7]*B[ 9] + A[11]*B[10] + A[15]*B[11];
-
-  C[12] = A[ 0]*B[12] + A[ 4]*B[13] + A[ 8]*B[14] + A[12]*B[15];
-  C[13] = A[ 1]*B[12] + A[ 5]*B[13] + A[ 9]*B[14] + A[13]*B[15];
-  C[14] = A[ 2]*B[12] + A[ 6]*B[13] + A[10]*B[14] + A[14]*B[15];
-  C[15] = A[ 3]*B[12] + A[ 7]*B[13] + A[11]*B[14] + A[15]*B[15];
+  C[0] = A[0] * B[0] + A[4] * B[1] + A[8] * B[2] + A[12] * B[3];
+  C[1] = A[1] * B[0] + A[5] * B[1] + A[9] * B[2] + A[13] * B[3];
+  C[2] = A[2] * B[0] + A[6] * B[1] + A[10] * B[2] + A[14] * B[3];
+  C[3] = A[3] * B[0] + A[7] * B[1] + A[11] * B[2] + A[15] * B[3];
+
+  C[4] = A[0] * B[4] + A[4] * B[5] + A[8] * B[6] + A[12] * B[7];
+  C[5] = A[1] * B[4] + A[5] * B[5] + A[9] * B[6] + A[13] * B[7];
+  C[6] = A[2] * B[4] + A[6] * B[5] + A[10] * B[6] + A[14] * B[7];
+  C[7] = A[3] * B[4] + A[7] * B[5] + A[11] * B[6] + A[15] * B[7];
+
+  C[8] = A[0] * B[8] + A[4] * B[9] + A[8] * B[10] + A[12] * B[11];
+  C[9] = A[1] * B[8] + A[5] * B[9] + A[9] * B[10] + A[13] * B[11];
+  C[10] = A[2] * B[8] + A[6] * B[9] + A[10] * B[10] + A[14] * B[11];
+  C[11] = A[3] * B[8] + A[7] * B[9] + A[11] * B[10] + A[15] * B[11];
+
+  C[12] = A[0] * B[12] + A[4] * B[13] + A[8] * B[14] + A[12] * B[15];
+  C[13] = A[1] * B[12] + A[5] * B[13] + A[9] * B[14] + A[13] * B[15];
+  C[14] = A[2] * B[12] + A[6] * B[13] + A[10] * B[14] + A[14] * B[15];
+  C[15] = A[3] * B[12] + A[7] * B[13] + A[11] * B[14] + A[15] * B[15];
 }
 
-double Add3DoubleC(double x, double y, double z) {
-  return x + y + z;
-}
+double Add3DoubleC(double x, double y, double z) { return x + y + z; }
 
 double Add4DoubleC(uint64_t a, double b, uint64_t c, double d) {
   return static_cast<double>(a) + b + static_cast<double>(c) + d;
@@ -103,7 +99,7 @@ uint32_t SumArrayC(uint8_t* array, uint32_t size) {
 }
 
 
-void GenerateTestWrapper(MacroAssembler* masm, RegisterDump *regs) {
+void GenerateTestWrapper(MacroAssembler* masm, RegisterDump* regs) {
   __ Push(xzr, lr);
   __ Blr(x15);
   regs->Dump(masm);
@@ -112,92 +108,91 @@ void GenerateTestWrapper(MacroAssembler* masm, RegisterDump *regs) {
 }
 
 
-#define TEST_FUNCTION(Func)                                             \
-  do {                                                                  \
-    int64_t saved_xregs[13];                                            \
-    saved_xregs[0] = simulator.ReadXRegister(19);                       \
-    saved_xregs[1] = simulator.ReadXRegister(20);                       \
-    saved_xregs[2] = simulator.ReadXRegister(21);                       \
-    saved_xregs[3] = simulator.ReadXRegister(22);                       \
-    saved_xregs[4] = simulator.ReadXRegister(23);                       \
-    saved_xregs[5] = simulator.ReadXRegister(24);                       \
-    saved_xregs[6] = simulator.ReadXRegister(25);                       \
-    saved_xregs[7] = simulator.ReadXRegister(26);                       \
-    saved_xregs[8] = simulator.ReadXRegister(27);                       \
-    saved_xregs[9] = simulator.ReadXRegister(28);                       \
-    saved_xregs[10] = simulator.ReadXRegister(29);                      \
-    saved_xregs[11] = simulator.ReadXRegister(30);                      \
-    saved_xregs[12] = simulator.ReadXRegister(31);                      \
-                                                                        \
-    uint64_t saved_dregs[8];                                            \
-    saved_dregs[0] = simulator.ReadDRegisterBits(8);                    \
-    saved_dregs[1] = simulator.ReadDRegisterBits(9);                    \
-    saved_dregs[2] = simulator.ReadDRegisterBits(10);                   \
-    saved_dregs[3] = simulator.ReadDRegisterBits(11);                   \
-    saved_dregs[4] = simulator.ReadDRegisterBits(12);                   \
-    saved_dregs[5] = simulator.ReadDRegisterBits(13);                   \
-    saved_dregs[6] = simulator.ReadDRegisterBits(14);                   \
-    saved_dregs[7] = simulator.ReadDRegisterBits(15);                   \
-                                                                        \
-    simulator.WriteXRegister(15, masm.GetLabelAddress<uint64_t>(&Func));\
-    simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&test));       \
-                                                                        \
-    VIXL_CHECK(saved_xregs[0] == simulator.ReadXRegister(19));          \
-    VIXL_CHECK(saved_xregs[1] == simulator.ReadXRegister(20));          \
-    VIXL_CHECK(saved_xregs[2] == simulator.ReadXRegister(21));          \
-    VIXL_CHECK(saved_xregs[3] == simulator.ReadXRegister(22));          \
-    VIXL_CHECK(saved_xregs[4] == simulator.ReadXRegister(23));          \
-    VIXL_CHECK(saved_xregs[5] == simulator.ReadXRegister(24));          \
-    VIXL_CHECK(saved_xregs[6] == simulator.ReadXRegister(25));          \
-    VIXL_CHECK(saved_xregs[7] == simulator.ReadXRegister(26));          \
-    VIXL_CHECK(saved_xregs[8] == simulator.ReadXRegister(27));          \
-    VIXL_CHECK(saved_xregs[9] == simulator.ReadXRegister(28));          \
-    VIXL_CHECK(saved_xregs[10] == simulator.ReadXRegister(29));         \
-    VIXL_CHECK(saved_xregs[11] == simulator.ReadXRegister(30));         \
-    VIXL_CHECK(saved_xregs[12] == simulator.ReadXRegister(31));         \
-                                                                        \
-    VIXL_CHECK(saved_dregs[0] == simulator.ReadDRegisterBits(8));       \
-    VIXL_CHECK(saved_dregs[1] == simulator.ReadDRegisterBits(9));       \
-    VIXL_CHECK(saved_dregs[2] == simulator.ReadDRegisterBits(10));      \
-    VIXL_CHECK(saved_dregs[3] == simulator.ReadDRegisterBits(11));      \
-    VIXL_CHECK(saved_dregs[4] == simulator.ReadDRegisterBits(12));      \
-    VIXL_CHECK(saved_dregs[5] == simulator.ReadDRegisterBits(13));      \
-    VIXL_CHECK(saved_dregs[6] == simulator.ReadDRegisterBits(14));      \
-    VIXL_CHECK(saved_dregs[7] == simulator.ReadDRegisterBits(15));      \
-                                                                        \
+#define TEST_FUNCTION(Func)                                              \
+  do {                                                                   \
+    int64_t saved_xregs[13];                                             \
+    saved_xregs[0] = simulator.ReadXRegister(19);                        \
+    saved_xregs[1] = simulator.ReadXRegister(20);                        \
+    saved_xregs[2] = simulator.ReadXRegister(21);                        \
+    saved_xregs[3] = simulator.ReadXRegister(22);                        \
+    saved_xregs[4] = simulator.ReadXRegister(23);                        \
+    saved_xregs[5] = simulator.ReadXRegister(24);                        \
+    saved_xregs[6] = simulator.ReadXRegister(25);                        \
+    saved_xregs[7] = simulator.ReadXRegister(26);                        \
+    saved_xregs[8] = simulator.ReadXRegister(27);                        \
+    saved_xregs[9] = simulator.ReadXRegister(28);                        \
+    saved_xregs[10] = simulator.ReadXRegister(29);                       \
+    saved_xregs[11] = simulator.ReadXRegister(30);                       \
+    saved_xregs[12] = simulator.ReadXRegister(31);                       \
+                                                                         \
+    uint64_t saved_dregs[8];                                             \
+    saved_dregs[0] = simulator.ReadDRegisterBits(8);                     \
+    saved_dregs[1] = simulator.ReadDRegisterBits(9);                     \
+    saved_dregs[2] = simulator.ReadDRegisterBits(10);                    \
+    saved_dregs[3] = simulator.ReadDRegisterBits(11);                    \
+    saved_dregs[4] = simulator.ReadDRegisterBits(12);                    \
+    saved_dregs[5] = simulator.ReadDRegisterBits(13);                    \
+    saved_dregs[6] = simulator.ReadDRegisterBits(14);                    \
+    saved_dregs[7] = simulator.ReadDRegisterBits(15);                    \
+                                                                         \
+    simulator.WriteXRegister(15, masm.GetLabelAddress<uint64_t>(&Func)); \
+    simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&test));        \
+                                                                         \
+    VIXL_CHECK(saved_xregs[0] == simulator.ReadXRegister(19));           \
+    VIXL_CHECK(saved_xregs[1] == simulator.ReadXRegister(20));           \
+    VIXL_CHECK(saved_xregs[2] == simulator.ReadXRegister(21));           \
+    VIXL_CHECK(saved_xregs[3] == simulator.ReadXRegister(22));           \
+    VIXL_CHECK(saved_xregs[4] == simulator.ReadXRegister(23));           \
+    VIXL_CHECK(saved_xregs[5] == simulator.ReadXRegister(24));           \
+    VIXL_CHECK(saved_xregs[6] == simulator.ReadXRegister(25));           \
+    VIXL_CHECK(saved_xregs[7] == simulator.ReadXRegister(26));           \
+    VIXL_CHECK(saved_xregs[8] == simulator.ReadXRegister(27));           \
+    VIXL_CHECK(saved_xregs[9] == simulator.ReadXRegister(28));           \
+    VIXL_CHECK(saved_xregs[10] == simulator.ReadXRegister(29));          \
+    VIXL_CHECK(saved_xregs[11] == simulator.ReadXRegister(30));          \
+    VIXL_CHECK(saved_xregs[12] == simulator.ReadXRegister(31));          \
+                                                                         \
+    VIXL_CHECK(saved_dregs[0] == simulator.ReadDRegisterBits(8));        \
+    VIXL_CHECK(saved_dregs[1] == simulator.ReadDRegisterBits(9));        \
+    VIXL_CHECK(saved_dregs[2] == simulator.ReadDRegisterBits(10));       \
+    VIXL_CHECK(saved_dregs[3] == simulator.ReadDRegisterBits(11));       \
+    VIXL_CHECK(saved_dregs[4] == simulator.ReadDRegisterBits(12));       \
+    VIXL_CHECK(saved_dregs[5] == simulator.ReadDRegisterBits(13));       \
+    VIXL_CHECK(saved_dregs[6] == simulator.ReadDRegisterBits(14));       \
+    VIXL_CHECK(saved_dregs[7] == simulator.ReadDRegisterBits(15));       \
+                                                                         \
   } while (0)
 
-#define START()                                             \
-  MacroAssembler masm;                                      \
-  Decoder decoder;                                          \
-  Debugger simulator(&decoder);                             \
-  simulator.SetColouredTrace(Test::coloured_trace());       \
-  PrintDisassembler* pdis = NULL;                           \
-  Instrument* inst = NULL;                                  \
-  if (Test::trace_sim()) {                                  \
-    pdis = new PrintDisassembler(stdout);                   \
-    decoder.PrependVisitor(pdis);                           \
-  }                                                         \
-  if (Test::instruction_stats()) {                          \
-    inst = new Instrument("vixl_stats.csv", 10);            \
-    inst->Enable();                                         \
-    decoder.AppendVisitor(inst);                            \
-  }                                                         \
-  RegisterDump regs;                                        \
-                                                            \
-  Label test;                                               \
-  masm.Bind(&test);                                         \
-  GenerateTestWrapper(&masm, &regs);                        \
+#define START()                                       \
+  MacroAssembler masm;                                \
+  Decoder decoder;                                    \
+  Debugger simulator(&decoder);                       \
+  simulator.SetColouredTrace(Test::coloured_trace()); \
+  PrintDisassembler* pdis = NULL;                     \
+  Instrument* inst = NULL;                            \
+  if (Test::trace_sim()) {                            \
+    pdis = new PrintDisassembler(stdout);             \
+    decoder.PrependVisitor(pdis);                     \
+  }                                                   \
+  if (Test::instruction_stats()) {                    \
+    inst = new Instrument("vixl_stats.csv", 10);      \
+    inst->Enable();                                   \
+    decoder.AppendVisitor(inst);                      \
+  }                                                   \
+  RegisterDump regs;                                  \
+                                                      \
+  Label test;                                         \
+  masm.Bind(&test);                                   \
+  GenerateTestWrapper(&masm, &regs);                  \
   masm.FinalizeCode()
 
 
-
-#define FACTORIAL_DOTEST(N)                                             \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    simulator.WriteXRegister(0, N);                                     \
-    TEST_FUNCTION(factorial);                                           \
-    VIXL_CHECK(static_cast<uint64_t>(regs.xreg(0)) == FactorialC(N));   \
+#define FACTORIAL_DOTEST(N)                                           \
+  do {                                                                \
+    simulator.ResetState();                                           \
+    simulator.WriteXRegister(0, N);                                   \
+    TEST_FUNCTION(factorial);                                         \
+    VIXL_CHECK(static_cast<uint64_t>(regs.xreg(0)) == FactorialC(N)); \
   } while (0)
 
 TEST(factorial) {
@@ -217,12 +212,12 @@ TEST(factorial) {
 }
 
 
-#define FACTORIAL_REC_DOTEST(N)                                         \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    simulator.WriteXRegister(0, N);                                     \
-    TEST_FUNCTION(factorial_rec);                                       \
-    VIXL_CHECK(static_cast<uint64_t>(regs.xreg(0)) == FactorialC(N));   \
+#define FACTORIAL_REC_DOTEST(N)                                       \
+  do {                                                                \
+    simulator.ResetState();                                           \
+    simulator.WriteXRegister(0, N);                                   \
+    TEST_FUNCTION(factorial_rec);                                     \
+    VIXL_CHECK(static_cast<uint64_t>(regs.xreg(0)) == FactorialC(N)); \
   } while (0)
 
 TEST(factorial_rec) {
@@ -258,15 +253,39 @@ TEST(neon_matrix_multiply) {
 
     // Fill the two input matrices with some 32 bit floating point values.
 
-    mat1[0] =   1.0f; mat1[4] =   2.0f; mat1[ 8] =   3.0f; mat1[12] =   4.0f;
-    mat1[1] = 52.03f; mat1[5] = 12.24f; mat1[ 9] = 53.56f; mat1[13] = 22.22f;
-    mat1[2] =  4.43f; mat1[6] =  5.00f; mat1[10] =  7.00f; mat1[14] =  3.11f;
-    mat1[3] = 43.47f; mat1[7] = 10.97f; mat1[11] = 37.78f; mat1[15] = 90.91f;
-
-    mat2[0] =   1.0f; mat2[4] = 11.24f; mat2[ 8] = 21.00f; mat2[12] = 21.31f;
-    mat2[1] =   2.0f; mat2[5] =  2.24f; mat2[ 9] =  8.56f; mat2[13] = 52.03f;
-    mat2[2] =   3.0f; mat2[6] = 51.00f; mat2[10] = 21.00f; mat2[14] = 33.11f;
-    mat2[3] =   4.0f; mat2[7] =  0.00f; mat2[11] = 84.00f; mat2[15] =  1.97f;
+    mat1[0] = 1.0f;
+    mat1[4] = 2.0f;
+    mat1[8] = 3.0f;
+    mat1[12] = 4.0f;
+    mat1[1] = 52.03f;
+    mat1[5] = 12.24f;
+    mat1[9] = 53.56f;
+    mat1[13] = 22.22f;
+    mat1[2] = 4.43f;
+    mat1[6] = 5.00f;
+    mat1[10] = 7.00f;
+    mat1[14] = 3.11f;
+    mat1[3] = 43.47f;
+    mat1[7] = 10.97f;
+    mat1[11] = 37.78f;
+    mat1[15] = 90.91f;
+
+    mat2[0] = 1.0f;
+    mat2[4] = 11.24f;
+    mat2[8] = 21.00f;
+    mat2[12] = 21.31f;
+    mat2[1] = 2.0f;
+    mat2[5] = 2.24f;
+    mat2[9] = 8.56f;
+    mat2[13] = 52.03f;
+    mat2[2] = 3.0f;
+    mat2[6] = 51.00f;
+    mat2[10] = 21.00f;
+    mat2[14] = 33.11f;
+    mat2[3] = 4.0f;
+    mat2[7] = 0.00f;
+    mat2[11] = 84.00f;
+    mat2[15] = 1.97f;
 
     MatrixMultiplyC(expected, mat1, mat2);
 
@@ -294,8 +313,8 @@ TEST(add2_vectors) {
 
   // Initialize input data for the example function.
   uint8_t A[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 200};
-  uint8_t B[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, \
-                 30, 31, 50};
+  uint8_t B[] =
+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 50};
   uint8_t D[ARRAY_SIZE(A)];
   uintptr_t A_addr = reinterpret_cast<uintptr_t>(A);
   uintptr_t B_addr = reinterpret_cast<uintptr_t>(B);
@@ -322,14 +341,14 @@ TEST(add2_vectors) {
   }
 }
 
-#define ADD3_DOUBLE_DOTEST(A, B, C)                                     \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    simulator.WriteDRegister(0, A);                                     \
-    simulator.WriteDRegister(1, B);                                     \
-    simulator.WriteDRegister(2, C);                                     \
-    TEST_FUNCTION(add3_double);                                         \
-    VIXL_CHECK(regs.dreg(0) == Add3DoubleC(A, B, C));                   \
+#define ADD3_DOUBLE_DOTEST(A, B, C)                   \
+  do {                                                \
+    simulator.ResetState();                           \
+    simulator.WriteDRegister(0, A);                   \
+    simulator.WriteDRegister(1, B);                   \
+    simulator.WriteDRegister(2, C);                   \
+    TEST_FUNCTION(add3_double);                       \
+    VIXL_CHECK(regs.dreg(0) == Add3DoubleC(A, B, C)); \
   } while (0)
 
 TEST(add3_double) {
@@ -347,15 +366,15 @@ TEST(add3_double) {
 }
 
 
-#define ADD4_DOUBLE_DOTEST(A, B, C, D)                                  \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    simulator.WriteXRegister(0, A);                                     \
-    simulator.WriteDRegister(0, B);                                     \
-    simulator.WriteXRegister(1, C);                                     \
-    simulator.WriteDRegister(1, D);                                     \
-    TEST_FUNCTION(add4_double);                                         \
-    VIXL_CHECK(regs.dreg(0) == Add4DoubleC(A, B, C, D));                \
+#define ADD4_DOUBLE_DOTEST(A, B, C, D)                   \
+  do {                                                   \
+    simulator.ResetState();                              \
+    simulator.WriteXRegister(0, A);                      \
+    simulator.WriteDRegister(0, B);                      \
+    simulator.WriteXRegister(1, C);                      \
+    simulator.WriteDRegister(1, D);                      \
+    TEST_FUNCTION(add4_double);                          \
+    VIXL_CHECK(regs.dreg(0) == Add4DoubleC(A, B, C, D)); \
   } while (0)
 
 TEST(add4_double) {
@@ -374,14 +393,14 @@ TEST(add4_double) {
 }
 
 
-#define SUM_ARRAY_DOTEST(Array)                                         \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    uintptr_t addr = reinterpret_cast<uintptr_t>(Array);                \
-    simulator.WriteXRegister(0, addr);                                  \
-    simulator.WriteXRegister(1, ARRAY_SIZE(Array));                     \
-    TEST_FUNCTION(sum_array);                                           \
-    VIXL_CHECK(regs.xreg(0) == SumArrayC(Array, ARRAY_SIZE(Array)));    \
+#define SUM_ARRAY_DOTEST(Array)                                      \
+  do {                                                               \
+    simulator.ResetState();                                          \
+    uintptr_t addr = reinterpret_cast<uintptr_t>(Array);             \
+    simulator.WriteXRegister(0, addr);                               \
+    simulator.WriteXRegister(1, ARRAY_SIZE(Array));                  \
+    TEST_FUNCTION(sum_array);                                        \
+    VIXL_CHECK(regs.xreg(0) == SumArrayC(Array, ARRAY_SIZE(Array))); \
   } while (0)
 
 TEST(sum_array) {
@@ -392,25 +411,24 @@ TEST(sum_array) {
   GenerateSumArray(&masm);
   masm.FinalizeCode();
 
-  uint8_t data1[] = { 4, 9, 13, 3, 2, 6, 5 };
+  uint8_t data1[] = {4, 9, 13, 3, 2, 6, 5};
   SUM_ARRAY_DOTEST(data1);
 
-  uint8_t data2[] = { 42 };
+  uint8_t data2[] = {42};
   SUM_ARRAY_DOTEST(data2);
 
   uint8_t data3[1000];
-  for (unsigned int i = 0; i < ARRAY_SIZE(data3); ++i)
-    data3[i] = 255;
+  for (unsigned int i = 0; i < ARRAY_SIZE(data3); ++i) data3[i] = 255;
   SUM_ARRAY_DOTEST(data3);
 }
 
 
-#define ABS_DOTEST(X)                                                   \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    simulator.WriteXRegister(0, X);                                     \
-    TEST_FUNCTION(func_abs);                                            \
-    VIXL_CHECK(regs.xreg(0) == abs(X));                                 \
+#define ABS_DOTEST(X)                   \
+  do {                                  \
+    simulator.ResetState();             \
+    simulator.WriteXRegister(0, X);     \
+    TEST_FUNCTION(func_abs);            \
+    VIXL_CHECK(regs.xreg(0) == abs(X)); \
   } while (0)
 
 TEST(abs) {
@@ -436,7 +454,7 @@ TEST(crc32) {
   GenerateCrc32(&masm);
   masm.FinalizeCode();
 
-  const char *msg = "Hello World!";
+  const char* msg = "Hello World!";
   uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
   size_t msg_size = strlen(msg);
   int64_t chksum = INT64_C(0xe3d6e35c);
@@ -490,14 +508,14 @@ TEST(swap_int32) {
 }
 
 
-#define CHECKBOUNDS_DOTEST(Value, Low, High)                            \
-  do {                                                                  \
-    simulator.ResetState();                                             \
-    simulator.WriteXRegister(0, Value);                                 \
-    simulator.WriteXRegister(1, Low);                                   \
-    simulator.WriteXRegister(2, High);                                  \
-    TEST_FUNCTION(check_bounds);                                        \
-    VIXL_CHECK(regs.xreg(0) == ((Low <= Value) && (Value <= High)));    \
+#define CHECKBOUNDS_DOTEST(Value, Low, High)                         \
+  do {                                                               \
+    simulator.ResetState();                                          \
+    simulator.WriteXRegister(0, Value);                              \
+    simulator.WriteXRegister(1, Low);                                \
+    simulator.WriteXRegister(2, High);                               \
+    TEST_FUNCTION(check_bounds);                                     \
+    VIXL_CHECK(regs.xreg(0) == ((Low <= Value) && (Value <= High))); \
   } while (0)
 
 TEST(check_bounds) {
@@ -520,12 +538,12 @@ TEST(check_bounds) {
 }
 
 
-#define GETTING_STARTED_DOTEST(Value)                           \
-  do {                                                          \
-    simulator.ResetState();                                     \
-    simulator.WriteXRegister(0, Value);                         \
-    TEST_FUNCTION(demo_function);                               \
-    VIXL_CHECK(regs.xreg(0) == (Value & 0x1122334455667788));   \
+#define GETTING_STARTED_DOTEST(Value)                         \
+  do {                                                        \
+    simulator.ResetState();                                   \
+    simulator.WriteXRegister(0, Value);                       \
+    TEST_FUNCTION(demo_function);                             \
+    VIXL_CHECK(regs.xreg(0) == (Value & 0x1122334455667788)); \
   } while (0)
 
 TEST(getting_started) {
@@ -566,8 +584,8 @@ TEST(non_const_visitor) {
 
 TEST(literal_example) {
   VIXL_ASSERT(LiteralExample(1, 2) == 3);
-  VIXL_ASSERT(
-      LiteralExample(INT64_C(0x100000000), 0x1) == INT64_C(0x100000001));
+  VIXL_ASSERT(LiteralExample(INT64_C(0x100000000), 0x1) ==
+              INT64_C(0x100000001));
 }
 
 
diff --git a/test/aarch64/test-abi.cc b/test/aarch64/test-abi.cc
index aa7e65b9..e823c7ea 100644
--- a/test/aarch64/test-abi.cc
+++ b/test/aarch64/test-abi.cc
@@ -35,7 +35,7 @@
 
 #ifdef VIXL_HAS_ABI_SUPPORT
 
-#define TEST(name)  TEST_(AARCH64_ABI_##name)
+#define TEST(name) TEST_(AARCH64_ABI_##name)
 
 namespace vixl {
 namespace aarch64 {
@@ -51,7 +51,9 @@ TEST(abi) {
   VIXL_CHECK(abi.GetReturnGenericOperand<char>().Equals(GenericOperand(w0)));
   VIXL_CHECK(abi.GetReturnGenericOperand<int8_t>().Equals(GenericOperand(w0)));
   VIXL_CHECK(abi.GetReturnGenericOperand<uint8_t>().Equals(GenericOperand(w0)));
-  VIXL_CHECK(abi.GetReturnGenericOperand<short>().Equals(GenericOperand(w0)));  // NOLINT(runtime/int)
+  VIXL_CHECK(
+      abi.GetReturnGenericOperand<short>().Equals(  // NOLINT(runtime/int)
+          GenericOperand(w0)));
   VIXL_CHECK(abi.GetReturnGenericOperand<int16_t>().Equals(GenericOperand(w0)));
   VIXL_CHECK(
       abi.GetReturnGenericOperand<uint16_t>().Equals(GenericOperand(w0)));
@@ -68,45 +70,46 @@ TEST(abi) {
 
   GenericOperand found(NoReg);
   GenericOperand expected(NoReg);
-#define CHECK_NEXT_PARAMETER_REG(type, reg)       \
-    found = abi.GetNextParameterGenericOperand<type>(); \
-    expected = GenericOperand(reg);                     \
-    VIXL_CHECK(found.Equals(expected))
-  // Slots on the stack are always 8 bytes.
-#define CHECK_NEXT_PARAMETER_MEM(type, mem_op, size) \
-    found = abi.GetNextParameterGenericOperand<type>();    \
-    expected = GenericOperand(mem_op, size);               \
-    VIXL_CHECK(found.Equals(expected))
+#define CHECK_NEXT_PARAMETER_REG(type, reg)           \
+  found = abi.GetNextParameterGenericOperand<type>(); \
+  expected = GenericOperand(reg);                     \
+  VIXL_CHECK(found.Equals(expected))
+// Slots on the stack are always 8 bytes.
+#define CHECK_NEXT_PARAMETER_MEM(type, mem_op, size)  \
+  found = abi.GetNextParameterGenericOperand<type>(); \
+  expected = GenericOperand(mem_op, size);            \
+  VIXL_CHECK(found.Equals(expected))
 
   abi.Reset();
-  CHECK_NEXT_PARAMETER_REG(int,      w0);
-  CHECK_NEXT_PARAMETER_REG(char,     w1);
-  CHECK_NEXT_PARAMETER_REG(bool,     w2);
-  CHECK_NEXT_PARAMETER_REG(float,    s0);
-  CHECK_NEXT_PARAMETER_REG(double,   d1);
-  CHECK_NEXT_PARAMETER_REG(double,   d2);
-  CHECK_NEXT_PARAMETER_REG(float,    s3);
-  CHECK_NEXT_PARAMETER_REG(int64_t,  x3);
+  CHECK_NEXT_PARAMETER_REG(int, w0);
+  CHECK_NEXT_PARAMETER_REG(char, w1);
+  CHECK_NEXT_PARAMETER_REG(bool, w2);
+  CHECK_NEXT_PARAMETER_REG(float, s0);
+  CHECK_NEXT_PARAMETER_REG(double, d1);
+  CHECK_NEXT_PARAMETER_REG(double, d2);
+  CHECK_NEXT_PARAMETER_REG(float, s3);
+  CHECK_NEXT_PARAMETER_REG(int64_t, x3);
   CHECK_NEXT_PARAMETER_REG(uint64_t, x4);
-  CHECK_NEXT_PARAMETER_REG(void*,    x5);
+  CHECK_NEXT_PARAMETER_REG(void*, x5);
   CHECK_NEXT_PARAMETER_REG(uint32_t, w6);
   typedef short my_type;  // NOLINT(runtime/int)
-  CHECK_NEXT_PARAMETER_REG(my_type,  w7);
-  CHECK_NEXT_PARAMETER_MEM(int,      MemOperand(sp, 0), kWRegSizeInBytes);
-  CHECK_NEXT_PARAMETER_MEM(int,      MemOperand(sp, 8), kWRegSizeInBytes);
-  CHECK_NEXT_PARAMETER_REG(double,   d4);
-  CHECK_NEXT_PARAMETER_REG(double,   d5);
-  CHECK_NEXT_PARAMETER_REG(double,   d6);
-  CHECK_NEXT_PARAMETER_REG(double,   d7);
-  CHECK_NEXT_PARAMETER_MEM(double,   MemOperand(sp, 16), kDRegSizeInBytes);
-  CHECK_NEXT_PARAMETER_MEM(bool,     MemOperand(sp, 24), kWRegSizeInBytes);
-  CHECK_NEXT_PARAMETER_MEM(short,    MemOperand(sp, 32), kWRegSizeInBytes);  // NOLINT(runtime/int)
-  CHECK_NEXT_PARAMETER_MEM(float,    MemOperand(sp, 40), kSRegSizeInBytes);
-  CHECK_NEXT_PARAMETER_MEM(float,    MemOperand(sp, 48), kSRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_REG(my_type, w7);
+  CHECK_NEXT_PARAMETER_MEM(int, MemOperand(sp, 0), kWRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_MEM(int, MemOperand(sp, 8), kWRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_REG(double, d4);
+  CHECK_NEXT_PARAMETER_REG(double, d5);
+  CHECK_NEXT_PARAMETER_REG(double, d6);
+  CHECK_NEXT_PARAMETER_REG(double, d7);
+  CHECK_NEXT_PARAMETER_MEM(double, MemOperand(sp, 16), kDRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_MEM(bool, MemOperand(sp, 24), kWRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_MEM(short,  // NOLINT(runtime/int)
+                           MemOperand(sp, 32),
+                           kWRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_MEM(float, MemOperand(sp, 40), kSRegSizeInBytes);
+  CHECK_NEXT_PARAMETER_MEM(float, MemOperand(sp, 48), kSRegSizeInBytes);
   VIXL_CHECK(abi.GetStackSpaceRequired() == 56);
 }
-
-
-}}  // namespace vixl::aarch64
+}
+}  // namespace vixl::aarch64
 
 #endif  // VIXL_ABI_SUPORT
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc
index 6a8e6526..be936f06 100644
--- a/test/aarch64/test-assembler-aarch64.cc
+++ b/test/aarch64/test-assembler-aarch64.cc
@@ -94,132 +94,134 @@ namespace aarch64 {
 
 
 #define __ masm.
-#define TEST(name)  TEST_(AARCH64_ASM_##name)
+#define TEST(name) TEST_(AARCH64_ASM_##name)
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 // Run tests with the simulator.
 
-#define SETUP()                                                                \
-  MacroAssembler masm;                                                         \
+#define SETUP()        \
+  MacroAssembler masm; \
   SETUP_COMMON()
 
-#define SETUP_CUSTOM(size, pic)                                                \
-  byte* buf = new byte[size + CodeBuffer::kDefaultCapacity];                   \
-  MacroAssembler masm(buf, size + CodeBuffer::kDefaultCapacity, pic);          \
+#define SETUP_CUSTOM(size, pic)                                       \
+  byte* buf = new byte[size + CodeBuffer::kDefaultCapacity];          \
+  MacroAssembler masm(buf, size + CodeBuffer::kDefaultCapacity, pic); \
   SETUP_COMMON()
 
-#define SETUP_COMMON()                                                         \
-  masm.SetGenerateSimulatorCode(true);                                         \
-  Decoder simulator_decoder;                                                   \
-  Simulator* simulator =                                                       \
-      Test::run_debugger() ? new Debugger(&simulator_decoder)                  \
-                           : new Simulator(&simulator_decoder);                \
-  simulator->SetColouredTrace(Test::coloured_trace());                         \
-  simulator->SetInstructionStats(Test::instruction_stats());                   \
-  Disassembler disasm;                                                         \
-  Decoder disassembler_decoder;                                                \
-  disassembler_decoder.AppendVisitor(&disasm);                                 \
+#define SETUP_COMMON()                                            \
+  masm.SetGenerateSimulatorCode(true);                            \
+  Decoder simulator_decoder;                                      \
+  Simulator* simulator = Test::run_debugger()                     \
+                             ? new Debugger(&simulator_decoder)   \
+                             : new Simulator(&simulator_decoder); \
+  simulator->SetColouredTrace(Test::coloured_trace());            \
+  simulator->SetInstructionStats(Test::instruction_stats());      \
+  Disassembler disasm;                                            \
+  Decoder disassembler_decoder;                                   \
+  disassembler_decoder.AppendVisitor(&disasm);                    \
   RegisterDump core
 
 // This is a convenience macro to avoid creating a scope for every assembler
 // function called. It will still assert the buffer hasn't been exceeded.
-#define ALLOW_ASM()                                                            \
+#define ALLOW_ASM() \
   CodeBufferCheckScope guard(&masm, masm.GetBuffer()->GetCapacity())
 
-#define START()                                                                \
-  masm.Reset();                                                                \
-  simulator->ResetState();                                                     \
-  __ PushCalleeSavedRegisters();                                               \
-  {                                                                            \
-    int trace_parameters = 0;                                                  \
-    if (Test::trace_reg()) trace_parameters |= LOG_STATE;                      \
-    if (Test::trace_write()) trace_parameters |= LOG_WRITE;                    \
-    if (Test::trace_sim()) trace_parameters |= LOG_DISASM;                     \
-    if (Test::trace_branch()) trace_parameters |= LOG_BRANCH;                  \
-    if (trace_parameters != 0) {                                               \
-      __ Trace(static_cast<TraceParameters>(trace_parameters), TRACE_ENABLE);  \
-    }                                                                          \
-  }                                                                            \
-  if (Test::instruction_stats()) {                                             \
-    __ EnableInstrumentation();                                                \
+#define START()                                                               \
+  masm.Reset();                                                               \
+  simulator->ResetState();                                                    \
+  __ PushCalleeSavedRegisters();                                              \
+  {                                                                           \
+    int trace_parameters = 0;                                                 \
+    if (Test::trace_reg()) trace_parameters |= LOG_STATE;                     \
+    if (Test::trace_write()) trace_parameters |= LOG_WRITE;                   \
+    if (Test::trace_sim()) trace_parameters |= LOG_DISASM;                    \
+    if (Test::trace_branch()) trace_parameters |= LOG_BRANCH;                 \
+    if (trace_parameters != 0) {                                              \
+      __ Trace(static_cast<TraceParameters>(trace_parameters), TRACE_ENABLE); \
+    }                                                                         \
+  }                                                                           \
+  if (Test::instruction_stats()) {                                            \
+    __ EnableInstrumentation();                                               \
   }
 
-#define END()                                                                  \
-  if (Test::instruction_stats()) {                                             \
-    __ DisableInstrumentation();                                               \
-  }                                                                            \
-  __ Trace(LOG_ALL, TRACE_DISABLE);                                            \
-  core.Dump(&masm);                                                            \
-  __ PopCalleeSavedRegisters();                                                \
-  __ Ret();                                                                    \
+#define END()                       \
+  if (Test::instruction_stats()) {  \
+    __ DisableInstrumentation();    \
+  }                                 \
+  __ Trace(LOG_ALL, TRACE_DISABLE); \
+  core.Dump(&masm);                 \
+  __ PopCalleeSavedRegisters();     \
+  __ Ret();                         \
   masm.FinalizeCode()
 
-#define RUN()                                                                  \
-  DISASSEMBLE();                                                               \
+#define RUN()    \
+  DISASSEMBLE(); \
   simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
 
 #define RUN_CUSTOM() RUN()
 
 #define TEARDOWN() TEARDOWN_COMMON()
 
-#define TEARDOWN_CUSTOM()                                                      \
-  delete[] buf;                                                                \
+#define TEARDOWN_CUSTOM() \
+  delete[] buf;           \
   TEARDOWN_COMMON()
 
-#define TEARDOWN_COMMON()                                                      \
-  delete simulator;
+#define TEARDOWN_COMMON() delete simulator;
 
 #else  // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
 // Run the test on real hardware or models.
-#define SETUP()                                                                \
-  MacroAssembler masm;                                                         \
+#define SETUP()        \
+  MacroAssembler masm; \
   SETUP_COMMON()
 
-#define SETUP_CUSTOM(size, pic)                                                \
-  byte *buffer = reinterpret_cast<byte*>(                                      \
-      mmap(NULL, size + CodeBuffer::kDefaultCapacity,                          \
-           PROT_READ | PROT_WRITE,                                             \
-           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));                               \
-  size_t buffer_size = size + CodeBuffer::kDefaultCapacity;                    \
-  MacroAssembler masm(buffer, buffer_size, pic);                               \
+#define SETUP_CUSTOM(size, pic)                                         \
+  byte* buffer =                                                        \
+      reinterpret_cast<byte*>(mmap(NULL,                                \
+                                   size + CodeBuffer::kDefaultCapacity, \
+                                   PROT_READ | PROT_WRITE,              \
+                                   MAP_PRIVATE | MAP_ANONYMOUS,         \
+                                   -1,                                  \
+                                   0));                                 \
+  size_t buffer_size = size + CodeBuffer::kDefaultCapacity;             \
+  MacroAssembler masm(buffer, buffer_size, pic);                        \
   SETUP_COMMON()
 
-#define SETUP_COMMON()                                                         \
-  Disassembler disasm;                                                         \
-  Decoder disassembler_decoder;                                                \
-  disassembler_decoder.AppendVisitor(&disasm);                                 \
-  masm.SetGenerateSimulatorCode(false);                                        \
-  RegisterDump core;                                                           \
+#define SETUP_COMMON()                         \
+  Disassembler disasm;                         \
+  Decoder disassembler_decoder;                \
+  disassembler_decoder.AppendVisitor(&disasm); \
+  masm.SetGenerateSimulatorCode(false);        \
+  RegisterDump core;                           \
   CPU::SetUp()
 
 // This is a convenience macro to avoid creating a scope for every assembler
 // function called. It will still assert the buffer hasn't been exceeded.
-#define ALLOW_ASM()                                                            \
+#define ALLOW_ASM() \
   CodeBufferCheckScope guard(&masm, masm.GetBuffer()->GetCapacity())
 
-#define START()                                                                \
-  masm.Reset();                                                                \
+#define START() \
+  masm.Reset(); \
   __ PushCalleeSavedRegisters()
 
-#define END()                                                                  \
-  core.Dump(&masm);                                                            \
-  __ PopCalleeSavedRegisters();                                                \
-  __ Ret();                                                                    \
+#define END()                   \
+  core.Dump(&masm);             \
+  __ PopCalleeSavedRegisters(); \
+  __ Ret();                     \
   masm.FinalizeCode()
 
 // Execute the generated code from the memory area.
-#define RUN()                                                                  \
-  DISASSEMBLE();                                                               \
-  masm.GetBuffer()->SetExecutable();                                           \
-    ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(),                  \
-                  masm.GetSizeOfCodeGenerated());                              \
+#define RUN()                                               \
+  DISASSEMBLE();                                            \
+  masm.GetBuffer()->SetExecutable();                        \
+  ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
+                masm.GetSizeOfCodeGenerated());             \
   masm.GetBuffer()->SetWritable()
 
 // The generated code was written directly into `buffer`, execute it directly.
-#define RUN_CUSTOM()                                                           \
-  DISASSEMBLE();                                                               \
-  mprotect(buffer, buffer_size, PROT_READ | PROT_EXEC);                        \
-  ExecuteMemory(buffer, buffer_size);                                          \
+#define RUN_CUSTOM()                                    \
+  DISASSEMBLE();                                        \
+  mprotect(buffer, buffer_size, PROT_READ | PROT_EXEC); \
+  ExecuteMemory(buffer, buffer_size);                   \
   mprotect(buffer, buffer_size, PROT_READ | PROT_WRITE)
 
 #define TEARDOWN()
@@ -228,42 +230,42 @@ namespace aarch64 {
 
 #endif  // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
 
-#define DISASSEMBLE() \
-  if (Test::disassemble()) {                                                   \
-    Instruction* instruction =                                                 \
-        masm.GetBuffer()->GetStartAddress<Instruction*>();                     \
-    Instruction* end = masm.GetBuffer()->GetOffsetAddress<Instruction*>(       \
-        masm.GetSizeOfCodeGenerated());                                        \
-    while (instruction != end) {                                               \
-      disassembler_decoder.Decode(instruction);                                \
-      uint32_t encoding = *reinterpret_cast<uint32_t*>(instruction);           \
-      printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());             \
-      instruction += kInstructionSize;                                         \
-    }                                                                          \
+#define DISASSEMBLE()                                                    \
+  if (Test::disassemble()) {                                             \
+    Instruction* instruction =                                           \
+        masm.GetBuffer()->GetStartAddress<Instruction*>();               \
+    Instruction* end = masm.GetBuffer()->GetOffsetAddress<Instruction*>( \
+        masm.GetSizeOfCodeGenerated());                                  \
+    while (instruction != end) {                                         \
+      disassembler_decoder.Decode(instruction);                          \
+      uint32_t encoding = *reinterpret_cast<uint32_t*>(instruction);     \
+      printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());       \
+      instruction += kInstructionSize;                                   \
+    }                                                                    \
   }
 
-#define ASSERT_EQUAL_NZCV(expected)                                            \
+#define ASSERT_EQUAL_NZCV(expected) \
   VIXL_CHECK(EqualNzcv(expected, core.flags_nzcv()))
 
-#define ASSERT_EQUAL_REGISTERS(expected)                                       \
+#define ASSERT_EQUAL_REGISTERS(expected) \
   VIXL_CHECK(EqualRegisters(&expected, &core))
 
-#define ASSERT_EQUAL_32(expected, result)                                      \
+#define ASSERT_EQUAL_32(expected, result) \
   VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))
 
-#define ASSERT_EQUAL_FP32(expected, result)                                    \
+#define ASSERT_EQUAL_FP32(expected, result) \
   VIXL_CHECK(EqualFP32(expected, &core, result))
 
-#define ASSERT_EQUAL_64(expected, result)                                      \
+#define ASSERT_EQUAL_64(expected, result) \
   VIXL_CHECK(Equal64(expected, &core, result))
 
-#define ASSERT_EQUAL_FP64(expected, result)                                    \
+#define ASSERT_EQUAL_FP64(expected, result) \
   VIXL_CHECK(EqualFP64(expected, &core, result))
 
-#define ASSERT_EQUAL_128(expected_h, expected_l, result)                       \
+#define ASSERT_EQUAL_128(expected_h, expected_l, result) \
   VIXL_CHECK(Equal128(expected_h, expected_l, &core, result))
 
-#define ASSERT_LITERAL_POOL_SIZE(expected)                                     \
+#define ASSERT_LITERAL_POOL_SIZE(expected) \
   VIXL_CHECK((expected + kInstructionSize) == (masm.GetLiteralPoolSize()))
 
 
@@ -1698,17 +1700,17 @@ TEST(label) {
   START();
   __ Mov(x0, 0x1);
   __ Mov(x1, 0x0);
-  __ Mov(x22, lr);    // Save lr.
+  __ Mov(x22, lr);  // Save lr.
 
   __ B(&label_1);
   __ B(&label_1);
-  __ B(&label_1);     // Multiple branches to the same label.
+  __ B(&label_1);  // Multiple branches to the same label.
   __ Mov(x0, 0x0);
   __ Bind(&label_2);
-  __ B(&label_3);     // Forward branch.
+  __ B(&label_3);  // Forward branch.
   __ Mov(x0, 0x0);
   __ Bind(&label_1);
-  __ B(&label_2);     // Backward branch.
+  __ B(&label_2);  // Backward branch.
   __ Mov(x0, 0x0);
   __ Bind(&label_3);
   __ Bl(&label_4);
@@ -1774,10 +1776,10 @@ TEST(adr) {
   Label label_1, label_2, label_3, label_4;
 
   START();
-  __ Mov(x0, 0x0);        // Set to non-zero to indicate failure.
-  __ Adr(x1, &label_3);   // Set to zero to indicate success.
+  __ Mov(x0, 0x0);       // Set to non-zero to indicate failure.
+  __ Adr(x1, &label_3);  // Set to zero to indicate success.
 
-  __ Adr(x2, &label_1);   // Multiple forward references to the same label.
+  __ Adr(x2, &label_1);  // Multiple forward references to the same label.
   __ Adr(x3, &label_1);
   __ Adr(x4, &label_1);
 
@@ -1789,17 +1791,17 @@ TEST(adr) {
   __ Br(x2);  // label_1, label_3
 
   __ Bind(&label_3);
-  __ Adr(x2, &label_3);   // Self-reference (offset 0).
+  __ Adr(x2, &label_3);  // Self-reference (offset 0).
   __ Eor(x1, x1, Operand(x2));
-  __ Adr(x2, &label_4);   // Simple forward reference.
-  __ Br(x2);  // label_4
+  __ Adr(x2, &label_4);  // Simple forward reference.
+  __ Br(x2);             // label_4
 
   __ Bind(&label_1);
-  __ Adr(x2, &label_3);   // Multiple reverse references to the same label.
+  __ Adr(x2, &label_3);  // Multiple reverse references to the same label.
   __ Adr(x3, &label_3);
   __ Adr(x4, &label_3);
-  __ Adr(x5, &label_2);   // Simple reverse reference.
-  __ Br(x5);  // label_2
+  __ Adr(x5, &label_2);  // Simple reverse reference.
+  __ Br(x5);             // label_2
 
   __ Bind(&label_4);
   END();
@@ -2559,7 +2561,7 @@ TEST(ldr_str_largeindex) {
   // This value won't fit in the immediate offset field of ldr/str instructions.
   int largeoffset = 0xabcdef;
 
-  int64_t data[3] = { 0x1122334455667788, 0, 0 };
+  int64_t data[3] = {0x1122334455667788, 0, 0};
   uint64_t base_addr = reinterpret_cast<uintptr_t>(data);
   uint64_t drifted_addr = base_addr - largeoffset;
 
@@ -2853,11 +2855,10 @@ TEST(load_store_h) {
 TEST(load_store_q) {
   SETUP();
 
-  uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe,
-                     0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
-                     0x21, 0x43, 0x65, 0x87, 0xa9, 0xcb, 0xed, 0x0f,
-                     0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0,
-                     0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
+  uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
+                     0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
+                     0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
+                     0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
                      0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
 
   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
@@ -3057,11 +3058,20 @@ TEST(neon_ld1_d_postindex) {
   __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
-  __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(),
+  __ Ld1(v16.V2S(),
+         v17.V2S(),
+         v18.V2S(),
+         v19.V2S(),
          MemOperand(x20, 32, PostIndex));
-  __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(),
+  __ Ld1(v30.V2S(),
+         v31.V2S(),
+         v0.V2S(),
+         v1.V2S(),
          MemOperand(x21, 32, PostIndex));
-  __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(),
+  __ Ld1(v20.V1D(),
+         v21.V1D(),
+         v22.V1D(),
+         v23.V1D(),
          MemOperand(x22, 32, PostIndex));
   END();
 
@@ -3158,9 +3168,15 @@ TEST(neon_ld1_q_postindex) {
   __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
-  __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(),
+  __ Ld1(v16.V4S(),
+         v17.V4S(),
+         v18.V4S(),
+         v19.V4S(),
          MemOperand(x20, 64, PostIndex));
-  __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(),
+  __ Ld1(v30.V2D(),
+         v31.V2D(),
+         v0.V2D(),
+         v1.V2D(),
          MemOperand(x21, 64, PostIndex));
   END();
 
@@ -3408,7 +3424,6 @@ TEST(neon_ld2_q_postindex) {
   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
 
 
-
   ASSERT_EQUAL_64(src_base + 1, x17);
   ASSERT_EQUAL_64(src_base + 1 + 32, x18);
   ASSERT_EQUAL_64(src_base + 2 + 32, x19);
@@ -3582,8 +3597,6 @@ TEST(neon_ld2_lane_postindex) {
   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
 
 
-
-
   ASSERT_EQUAL_64(src_base + 32, x17);
   ASSERT_EQUAL_64(src_base + 32, x18);
   ASSERT_EQUAL_64(src_base + 32, x19);
@@ -4233,15 +4246,30 @@ TEST(neon_ld4_d_postindex) {
   __ Mov(x20, src_base + 3);
   __ Mov(x21, src_base + 4);
   __ Mov(x22, 1);
-  __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(),
+  __ Ld4(v2.V8B(),
+         v3.V8B(),
+         v4.V8B(),
+         v5.V8B(),
          MemOperand(x17, x22, PostIndex));
-  __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(),
+  __ Ld4(v6.V8B(),
+         v7.V8B(),
+         v8.V8B(),
+         v9.V8B(),
          MemOperand(x18, 32, PostIndex));
-  __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(),
+  __ Ld4(v10.V4H(),
+         v11.V4H(),
+         v12.V4H(),
+         v13.V4H(),
          MemOperand(x19, 32, PostIndex));
-  __ Ld4(v14.V2S(), v15.V2S(), v16.V2S(), v17.V2S(),
+  __ Ld4(v14.V2S(),
+         v15.V2S(),
+         v16.V2S(),
+         v17.V2S(),
          MemOperand(x20, 32, PostIndex));
-  __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(),
+  __ Ld4(v30.V2S(),
+         v31.V2S(),
+         v0.V2S(),
+         v1.V2S(),
          MemOperand(x21, 32, PostIndex));
   END();
 
@@ -4343,15 +4371,30 @@ TEST(neon_ld4_q_postindex) {
   __ Mov(x21, src_base + 4);
   __ Mov(x22, 1);
 
-  __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(),
+  __ Ld4(v2.V16B(),
+         v3.V16B(),
+         v4.V16B(),
+         v5.V16B(),
          MemOperand(x17, x22, PostIndex));
-  __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(),
+  __ Ld4(v6.V16B(),
+         v7.V16B(),
+         v8.V16B(),
+         v9.V16B(),
          MemOperand(x18, 64, PostIndex));
-  __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(),
+  __ Ld4(v10.V8H(),
+         v11.V8H(),
+         v12.V8H(),
+         v13.V8H(),
          MemOperand(x19, 64, PostIndex));
-  __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(),
+  __ Ld4(v14.V4S(),
+         v15.V4S(),
+         v16.V4S(),
+         v17.V4S(),
          MemOperand(x20, 64, PostIndex));
-  __ Ld4(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(),
+  __ Ld4(v30.V2D(),
+         v31.V2D(),
+         v0.V2D(),
+         v1.V2D(),
          MemOperand(x21, 64, PostIndex));
   END();
 
@@ -4379,7 +4422,6 @@ TEST(neon_ld4_q_postindex) {
   ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
 
 
-
   ASSERT_EQUAL_64(src_base + 1, x17);
   ASSERT_EQUAL_64(src_base + 1 + 64, x18);
   ASSERT_EQUAL_64(src_base + 2 + 64, x19);
@@ -4497,7 +4539,6 @@ TEST(neon_ld4_lane) {
 }
 
 
-
 TEST(neon_ld4_lane_postindex) {
   SETUP();
 
@@ -4512,25 +4553,26 @@ TEST(neon_ld4_lane_postindex) {
   // Test loading whole register by element.
   __ Mov(x17, src_base);
   for (int i = 15; i >= 0; i--) {
-    __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i,
-           MemOperand(x17, 4, PostIndex));
+    __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
   }
 
   __ Mov(x18, src_base);
   for (int i = 7; i >= 0; i--) {
-    __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i,
-           MemOperand(x18, 8, PostIndex));
+    __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
   }
 
   __ Mov(x19, src_base);
   for (int i = 3; i >= 0; i--) {
-    __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i,
-           MemOperand(x19, 16, PostIndex));
+    __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
   }
 
   __ Mov(x20, src_base);
   for (int i = 1; i >= 0; i--) {
-    __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i,
+    __ Ld4(v12.D(),
+           v13.D(),
+           v14.D(),
+           v15.D(),
+           i,
            MemOperand(x20, 32, PostIndex));
   }
 
@@ -4546,7 +4588,11 @@ TEST(neon_ld4_lane_postindex) {
   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
   __ Ldr(q19, MemOperand(x4));
-  __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4,
+  __ Ld4(v16.B(),
+         v17.B(),
+         v18.B(),
+         v19.B(),
+         4,
          MemOperand(x21, x25, PostIndex));
   __ Add(x25, x25, 1);
 
@@ -4555,7 +4601,11 @@ TEST(neon_ld4_lane_postindex) {
   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
   __ Ldr(q23, MemOperand(x5));
-  __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3,
+  __ Ld4(v20.H(),
+         v21.H(),
+         v22.H(),
+         v23.H(),
+         3,
          MemOperand(x22, x25, PostIndex));
   __ Add(x25, x25, 1);
 
@@ -4564,7 +4614,11 @@ TEST(neon_ld4_lane_postindex) {
   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
   __ Ldr(q27, MemOperand(x6));
-  __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2,
+  __ Ld4(v24.S(),
+         v25.S(),
+         v26.S(),
+         v27.S(),
+         2,
          MemOperand(x23, x25, PostIndex));
   __ Add(x25, x25, 1);
 
@@ -4573,7 +4627,11 @@ TEST(neon_ld4_lane_postindex) {
   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
   __ Ldr(q31, MemOperand(x7));
-  __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1,
+  __ Ld4(v28.D(),
+         v29.D(),
+         v30.D(),
+         v31.D(),
+         1,
          MemOperand(x24, x25, PostIndex));
 
   END();
@@ -4704,19 +4762,40 @@ TEST(neon_ld4_alllanes_postindex) {
   START();
   __ Mov(x17, src_base + 1);
   __ Mov(x18, 1);
-  __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(),
+  __ Ld4r(v0.V8B(),
+          v1.V8B(),
+          v2.V8B(),
+          v3.V8B(),
           MemOperand(x17, 4, PostIndex));
-  __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(),
+  __ Ld4r(v4.V16B(),
+          v5.V16B(),
+          v6.V16B(),
+          v7.V16B(),
           MemOperand(x17, x18, PostIndex));
-  __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(),
+  __ Ld4r(v8.V4H(),
+          v9.V4H(),
+          v10.V4H(),
+          v11.V4H(),
           MemOperand(x17, x18, PostIndex));
-  __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(),
+  __ Ld4r(v12.V8H(),
+          v13.V8H(),
+          v14.V8H(),
+          v15.V8H(),
           MemOperand(x17, 8, PostIndex));
-  __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(),
+  __ Ld4r(v16.V2S(),
+          v17.V2S(),
+          v18.V2S(),
+          v19.V2S(),
           MemOperand(x17, x18, PostIndex));
-  __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(),
+  __ Ld4r(v20.V4S(),
+          v21.V4S(),
+          v22.V4S(),
+          v23.V4S(),
           MemOperand(x17, 16, PostIndex));
-  __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(),
+  __ Ld4r(v24.V2D(),
+          v25.V2D(),
+          v26.V2D(),
+          v27.V2D(),
           MemOperand(x17, 32, PostIndex));
   END();
 
@@ -5375,12 +5454,18 @@ TEST(neon_st1_d_postindex) {
   __ Ldr(d19, MemOperand(x17, x19));
   __ Ldr(d20, MemOperand(x17, x18));
 
-  __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(),
+  __ St1(v0.V2S(),
+         v1.V2S(),
+         v2.V2S(),
+         v3.V2S(),
          MemOperand(x17, 32, PostIndex));
   __ Ldr(q21, MemOperand(x17, x21));
   __ Ldr(q22, MemOperand(x17, x19));
 
-  __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(),
+  __ St1(v0.V1D(),
+         v1.V1D(),
+         v2.V1D(),
+         v3.V1D(),
          MemOperand(x17, 32, PostIndex));
   __ Ldr(q23, MemOperand(x17, x21));
   __ Ldr(q24, MemOperand(x17, x19));
@@ -5486,7 +5571,10 @@ TEST(neon_st1_q_postindex) {
   __ Ldr(q20, MemOperand(x17, x19));
   __ Ldr(q21, MemOperand(x17, x18));
 
-  __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(),
+  __ St1(v0.V2D(),
+         v1.V2D(),
+         v2.V2D(),
+         v3.V2D(),
          MemOperand(x17, 64, PostIndex));
   __ Ldr(q22, MemOperand(x17, x21));
   __ Ldr(q23, MemOperand(x17, x20));
@@ -5515,7 +5603,7 @@ TEST(neon_st1_q_postindex) {
 TEST(neon_st2_d) {
   SETUP();
 
-  uint8_t src[4*16];
+  uint8_t src[4 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5555,7 +5643,7 @@ TEST(neon_st2_d) {
 TEST(neon_st2_d_postindex) {
   SETUP();
 
-  uint8_t src[4*16];
+  uint8_t src[4 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5593,7 +5681,7 @@ TEST(neon_st2_d_postindex) {
 TEST(neon_st2_q) {
   SETUP();
 
-  uint8_t src[5*16];
+  uint8_t src[5 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5634,7 +5722,7 @@ TEST(neon_st2_q) {
 TEST(neon_st2_q_postindex) {
   SETUP();
 
-  uint8_t src[5*16];
+  uint8_t src[5 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5676,7 +5764,7 @@ TEST(neon_st2_q_postindex) {
 TEST(neon_st3_d) {
   SETUP();
 
-  uint8_t src[3*16];
+  uint8_t src[3 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5714,7 +5802,7 @@ TEST(neon_st3_d) {
 TEST(neon_st3_d_postindex) {
   SETUP();
 
-  uint8_t src[4*16];
+  uint8_t src[4 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5755,7 +5843,7 @@ TEST(neon_st3_d_postindex) {
 TEST(neon_st3_q) {
   SETUP();
 
-  uint8_t src[6*16];
+  uint8_t src[6 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5802,7 +5890,7 @@ TEST(neon_st3_q) {
 TEST(neon_st3_q_postindex) {
   SETUP();
 
-  uint8_t src[7*16];
+  uint8_t src[7 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5849,7 +5937,7 @@ TEST(neon_st3_q_postindex) {
 TEST(neon_st4_d) {
   SETUP();
 
-  uint8_t src[4*16];
+  uint8_t src[4 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5892,7 +5980,7 @@ TEST(neon_st4_d) {
 TEST(neon_st4_d_postindex) {
   SETUP();
 
-  uint8_t src[5*16];
+  uint8_t src[5 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5907,12 +5995,17 @@ TEST(neon_st4_d_postindex) {
   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
 
-  __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(),
+  __ St4(v0.V8B(),
+         v1.V8B(),
+         v2.V8B(),
+         v3.V8B(),
          MemOperand(x18, x22, PostIndex));
-  __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(),
+  __ St4(v0.V4H(),
+         v1.V4H(),
+         v2.V4H(),
+         v3.V4H(),
          MemOperand(x18, 32, PostIndex));
-  __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(),
-         MemOperand(x18));
+  __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
 
 
   __ Mov(x19, src_base);
@@ -5939,7 +6032,7 @@ TEST(neon_st4_d_postindex) {
 TEST(neon_st4_q) {
   SETUP();
 
-  uint8_t src[7*16];
+  uint8_t src[7 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -5990,7 +6083,7 @@ TEST(neon_st4_q) {
 TEST(neon_st4_q_postindex) {
   SETUP();
 
-  uint8_t src[9*16];
+  uint8_t src[9 * 16];
   for (unsigned i = 0; i < sizeof(src); i++) {
     src[i] = i;
   }
@@ -6005,14 +6098,22 @@ TEST(neon_st4_q_postindex) {
   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
 
-  __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(),
+  __ St4(v0.V16B(),
+         v1.V16B(),
+         v2.V16B(),
+         v3.V16B(),
          MemOperand(x18, x22, PostIndex));
-  __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(),
+  __ St4(v0.V8H(),
+         v1.V8H(),
+         v2.V8H(),
+         v3.V8H(),
          MemOperand(x18, 64, PostIndex));
-  __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(),
+  __ St4(v0.V4S(),
+         v1.V4S(),
+         v2.V4S(),
+         v3.V4S(),
          MemOperand(x18, x22, PostIndex));
-  __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(),
-         MemOperand(x18));
+  __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
 
   __ Mov(x19, src_base);
   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
@@ -6141,7 +6242,12 @@ TEST(neon_destructive_tbl) {
   __ Mov(v27, v1);
   __ Mov(v28, v2);
   __ Mov(v29, v3);
-  __ Tbl(v26.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v29.V16B(), v26.V16B());
+  __ Tbl(v26.V16B(),
+         v26.V16B(),
+         v27.V16B(),
+         v28.V16B(),
+         v29.V16B(),
+         v26.V16B());
   END();
 
   RUN();
@@ -6192,7 +6298,12 @@ TEST(neon_destructive_tbx) {
   __ Mov(v27, v1);
   __ Mov(v28, v2);
   __ Mov(v29, v3);
-  __ Tbx(v26.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v29.V16B(), v26.V16B());
+  __ Tbx(v26.V16B(),
+         v26.V16B(),
+         v27.V16B(),
+         v28.V16B(),
+         v29.V16B(),
+         v26.V16B());
   END();
 
   RUN();
@@ -6310,8 +6421,10 @@ TEST(ldp_stp_double) {
 TEST(ldp_stp_quad) {
   SETUP();
 
-  uint64_t src[4] = {0x0123456789abcdef, 0xaaaaaaaa55555555,
-                     0xfedcba9876543210, 0x55555555aaaaaaaa};
+  uint64_t src[4] = {0x0123456789abcdef,
+                     0xaaaaaaaa55555555,
+                     0xfedcba9876543210,
+                     0x55555555aaaaaaaa};
   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
@@ -6343,7 +6456,8 @@ TEST(ldp_stp_quad) {
 TEST(ldp_stp_offset) {
   SETUP();
 
-  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
+  uint64_t src[3] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
                      0xffeeddccbbaa9988};
   uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
@@ -6397,7 +6511,8 @@ TEST(ldp_stp_offset) {
 TEST(ldp_stp_offset_wide) {
   SETUP();
 
-  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
+  uint64_t src[3] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
                      0xffeeddccbbaa9988};
   uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
@@ -6454,8 +6569,10 @@ TEST(ldp_stp_offset_wide) {
 TEST(ldnp_stnp_offset) {
   SETUP();
 
-  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
-                     0xffeeddccbbaa9988, 0x7766554433221100};
+  uint64_t src[4] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
+                     0xffeeddccbbaa9988,
+                     0x7766554433221100};
   uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
@@ -6626,7 +6743,8 @@ TEST(ldnp_stnp_offset_double) {
 TEST(ldp_stp_preindex) {
   SETUP();
 
-  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
+  uint64_t src[3] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
                      0xffeeddccbbaa9988};
   uint64_t dst[5] = {0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
@@ -6680,7 +6798,8 @@ TEST(ldp_stp_preindex) {
 TEST(ldp_stp_preindex_wide) {
   SETUP();
 
-  uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
+  uint64_t src[3] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
                      0xffeeddccbbaa9988};
   uint64_t dst[5] = {0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
@@ -6697,7 +6816,7 @@ TEST(ldp_stp_preindex_wide) {
   __ Mov(x19, x24);
   __ Mov(x24, src_base - base_offset + 4);
   __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex));
-  __ Stp(w2, w3, MemOperand(x25, 4 - base_offset , PreIndex));
+  __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex));
   __ Mov(x20, x25);
   __ Mov(x25, dst_base + base_offset + 4);
   __ Mov(x24, src_base - base_offset);
@@ -6742,8 +6861,10 @@ TEST(ldp_stp_preindex_wide) {
 TEST(ldp_stp_postindex) {
   SETUP();
 
-  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
-                     0xffeeddccbbaa9988, 0x7766554433221100};
+  uint64_t src[4] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
+                     0xffeeddccbbaa9988,
+                     0x7766554433221100};
   uint64_t dst[5] = {0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
@@ -6796,8 +6917,10 @@ TEST(ldp_stp_postindex) {
 TEST(ldp_stp_postindex_wide) {
   SETUP();
 
-  uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
-                     0xffeeddccbbaa9988, 0x7766554433221100};
+  uint64_t src[4] = {0x0011223344556677,
+                     0x8899aabbccddeeff,
+                     0xffeeddccbbaa9988,
+                     0x7766554433221100};
   uint64_t dst[5] = {0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
@@ -6922,8 +7045,7 @@ TEST(ldur_stur) {
 TEST(ldur_stur_fp) {
   SETUP();
 
-  int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef,
-                    0x0123456789abcdef};
+  int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef, 0x0123456789abcdef};
   int64_t dst[5] = {0, 0, 0, 0, 0};
   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
@@ -7050,10 +7172,12 @@ TEST(ldr_literal_range) {
 TEST(ldr_literal_values_q) {
   SETUP();
 
-  static const uint64_t kHalfValues[] = {
-    0x8000000000000000, 0x7fffffffffffffff, 0x0000000000000000,
-    0xffffffffffffffff, 0x00ff00ff00ff00ff, 0x1234567890abcdef
-  };
+  static const uint64_t kHalfValues[] = {0x8000000000000000,
+                                         0x7fffffffffffffff,
+                                         0x0000000000000000,
+                                         0xffffffffffffffff,
+                                         0x00ff00ff00ff00ff,
+                                         0x1234567890abcdef};
   const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
   const Register& ref_low64 = x1;
   const Register& ref_high64 = x2;
@@ -7117,20 +7241,25 @@ void LoadIntValueHelper(T values[], int card) {
 
 
 TEST(ldr_literal_values_x) {
-  static const uint64_t kValues[] = {
-    0x8000000000000000, 0x7fffffffffffffff, 0x0000000000000000,
-    0xffffffffffffffff, 0x00ff00ff00ff00ff, 0x1234567890abcdef
-  };
+  static const uint64_t kValues[] = {0x8000000000000000,
+                                     0x7fffffffffffffff,
+                                     0x0000000000000000,
+                                     0xffffffffffffffff,
+                                     0x00ff00ff00ff00ff,
+                                     0x1234567890abcdef};
 
   LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
 
 
 TEST(ldr_literal_values_w) {
-  static const uint32_t kValues[] = {
-    0x80000000, 0x7fffffff, 0x00000000, 0xffffffff, 0x00ff00ff, 0x12345678,
-    0x90abcdef
-  };
+  static const uint32_t kValues[] = {0x80000000,
+                                     0x7fffffff,
+                                     0x00000000,
+                                     0xffffffff,
+                                     0x00ff00ff,
+                                     0x12345678,
+                                     0x90abcdef};
 
   LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
@@ -7150,8 +7279,8 @@ void LoadFPValueHelper(T values[], int card) {
 
   // If one of the values differ then x0 will be one.
   for (int i = 0; i < card; ++i) {
-    __ Mov(tgt1, is_32bits ? FloatToRawbits(values[i])
-                           : DoubleToRawbits(values[i]));
+    __ Mov(tgt1,
+           is_32bits ? FloatToRawbits(values[i]) : DoubleToRawbits(values[i]));
     __ Ldr(fp_tgt, values[i]);
     __ Fmov(tgt2, fp_tgt);
     __ Cmp(tgt1, tgt2);
@@ -7168,18 +7297,14 @@ void LoadFPValueHelper(T values[], int card) {
 }
 
 TEST(ldr_literal_values_d) {
-  static const double kValues[] = {
-    -0.0, 0.0, -1.0, 1.0, -1e10, 1e10
-  };
+  static const double kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10};
 
   LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
 
 
 TEST(ldr_literal_values_s) {
-  static const float kValues[] = {
-    -0.0, 0.0, -1.0, 1.0, -1e10, 1e10
-  };
+  static const float kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10};
 
   LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
 }
@@ -7294,14 +7419,14 @@ TEST(ldr_literal_custom_shared) {
   for (int i = 0; i < 50; i++) {
     __ ldr(x2, &before_x);
     __ ldr(w3, &before_w);
-    __ ldrsw(x5, &before_w);    // Re-use before_w.
+    __ ldrsw(x5, &before_w);  // Re-use before_w.
     __ ldr(q11, &before_q);
     __ ldr(d13, &before_d);
     __ ldr(s25, &before_s);
 
     __ ldr(x6, &after_x);
     __ ldr(w7, &after_w);
-    __ ldrsw(x8, &after_w);     // Re-use after_w.
+    __ ldrsw(x8, &after_w);  // Re-use after_w.
     __ ldr(q18, &after_q);
     __ ldr(d14, &after_d);
     __ ldr(s26, &after_s);
@@ -7954,9 +8079,9 @@ TEST(neg) {
 }
 
 
-template<typename T, typename Op>
-static void AdcsSbcsHelper(Op op, T left, T right, int carry,
-                           T expected, StatusFlags expected_flags) {
+template <typename T, typename Op>
+static void AdcsSbcsHelper(
+    Op op, T left, T right, int carry, T expected, StatusFlags expected_flags) {
   int reg_size = sizeof(T) * 8;
   Register left_reg(0, reg_size);
   Register right_reg(1, reg_size);
@@ -7986,10 +8111,14 @@ static void AdcsSbcsHelper(Op op, T left, T right, int carry,
 
 TEST(adcs_sbcs_x) {
   uint64_t inputs[] = {
-    0x0000000000000000, 0x0000000000000001,
-    0x7ffffffffffffffe, 0x7fffffffffffffff,
-    0x8000000000000000, 0x8000000000000001,
-    0xfffffffffffffffe, 0xffffffffffffffff,
+      0x0000000000000000,
+      0x0000000000000001,
+      0x7ffffffffffffffe,
+      0x7fffffffffffffff,
+      0x8000000000000000,
+      0x8000000000000001,
+      0xfffffffffffffffe,
+      0xffffffffffffffff,
   };
   static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
 
@@ -8000,157 +8129,171 @@ TEST(adcs_sbcs_x) {
     StatusFlags carry1_flags;
   };
 
-  static const Expected expected_adcs_x[input_count][input_count] = {
-    {{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag},
-     {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
-     {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}},
-    {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
-     {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}},
-    {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
-     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}},
-    {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
-     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
-     {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}},
-    {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
-     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
-     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}},
-    {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
-     {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}},
-    {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
-     {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}},
-    {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
-     {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
-     {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}
-  };
-
-  static const Expected expected_sbcs_x[input_count][input_count] = {
-    {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
-     {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
-     {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}},
-    {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
-     {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}},
-    {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
-     {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
-     {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}},
-    {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
-     {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
-     {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
-     {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}},
-    {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
-     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
-     {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
-     {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}},
-    {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
-     {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
-     {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}},
-    {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
-     {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
-     {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}},
-    {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag},
-     {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
-     {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
-     {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
-     {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
-     {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
-     {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
-     {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}
-  };
+  static const Expected expected_adcs_x[input_count][input_count] =
+      {{{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag},
+        {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
+        {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}},
+       {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
+        {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}},
+       {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
+        {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}},
+       {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
+        {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
+        {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}},
+       {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
+        {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
+        {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}},
+       {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
+        {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}},
+       {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
+        {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}},
+       {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
+        {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
+        {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}};
+
+  static const Expected expected_sbcs_x[input_count][input_count] =
+      {{{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
+        {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
+        {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}},
+       {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
+        {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}},
+       {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
+        {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
+        {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}},
+       {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
+        {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
+        {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
+        {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}},
+       {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
+        {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
+        {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
+        {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}},
+       {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
+        {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
+        {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}},
+       {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
+        {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
+        {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}},
+       {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag},
+        {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
+        {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
+        {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
+        {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
+        {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
+        {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
+        {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}};
 
   for (size_t left = 0; left < input_count; left++) {
     for (size_t right = 0; right < input_count; right++) {
-      const Expected & expected = expected_adcs_x[left][right];
-      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 0,
-                     expected.carry0_result, expected.carry0_flags);
-      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 1,
-                     expected.carry1_result, expected.carry1_flags);
+      const Expected& expected = expected_adcs_x[left][right];
+      AdcsSbcsHelper(&MacroAssembler::Adcs,
+                     inputs[left],
+                     inputs[right],
+                     0,
+                     expected.carry0_result,
+                     expected.carry0_flags);
+      AdcsSbcsHelper(&MacroAssembler::Adcs,
+                     inputs[left],
+                     inputs[right],
+                     1,
+                     expected.carry1_result,
+                     expected.carry1_flags);
     }
   }
 
   for (size_t left = 0; left < input_count; left++) {
     for (size_t right = 0; right < input_count; right++) {
-      const Expected & expected = expected_sbcs_x[left][right];
-      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 0,
-                     expected.carry0_result, expected.carry0_flags);
-      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 1,
-                     expected.carry1_result, expected.carry1_flags);
+      const Expected& expected = expected_sbcs_x[left][right];
+      AdcsSbcsHelper(&MacroAssembler::Sbcs,
+                     inputs[left],
+                     inputs[right],
+                     0,
+                     expected.carry0_result,
+                     expected.carry0_flags);
+      AdcsSbcsHelper(&MacroAssembler::Sbcs,
+                     inputs[left],
+                     inputs[right],
+                     1,
+                     expected.carry1_result,
+                     expected.carry1_flags);
     }
   }
 }
@@ -8158,8 +8301,14 @@ TEST(adcs_sbcs_x) {
 
 TEST(adcs_sbcs_w) {
   uint32_t inputs[] = {
-    0x00000000, 0x00000001, 0x7ffffffe, 0x7fffffff,
-    0x80000000, 0x80000001, 0xfffffffe, 0xffffffff,
+      0x00000000,
+      0x00000001,
+      0x7ffffffe,
+      0x7fffffff,
+      0x80000000,
+      0x80000001,
+      0xfffffffe,
+      0xffffffff,
   };
   static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
 
@@ -8170,157 +8319,171 @@ TEST(adcs_sbcs_w) {
     StatusFlags carry1_flags;
   };
 
-  static const Expected expected_adcs_w[input_count][input_count] = {
-    {{0x00000000, ZFlag, 0x00000001, NoFlag},
-     {0x00000001, NoFlag, 0x00000002, NoFlag},
-     {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0x80000000, NFlag, 0x80000001, NFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag}},
-    {{0x00000001, NoFlag, 0x00000002, NoFlag},
-     {0x00000002, NoFlag, 0x00000003, NoFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0x80000000, NVFlag, 0x80000001, NVFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag},
-     {0x80000002, NFlag, 0x80000003, NFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag}},
-    {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
-     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}},
-    {{0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0x80000000, NVFlag, 0x80000001, NVFlag},
-     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
-     {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x7ffffffe, CFlag, 0x7fffffff, CFlag}},
-    {{0x80000000, NFlag, 0x80000001, NFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x00000000, ZCVFlag, 0x00000001, CVFlag},
-     {0x00000001, CVFlag, 0x00000002, CVFlag},
-     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag}},
-    {{0x80000001, NFlag, 0x80000002, NFlag},
-     {0x80000002, NFlag, 0x80000003, NFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0x00000001, CVFlag, 0x00000002, CVFlag},
-     {0x00000002, CVFlag, 0x00000003, CVFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0x80000000, NCFlag, 0x80000001, NCFlag}},
-    {{0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
-     {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}},
-    {{0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0x80000000, NCFlag, 0x80000001, NCFlag},
-     {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
-     {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}
-  };
-
-  static const Expected expected_sbcs_w[input_count][input_count] = {
-    {{0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag},
-     {0x80000000, NFlag, 0x80000001, NFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
-     {0x00000001, NoFlag, 0x00000002, NoFlag},
-     {0x00000000, ZFlag, 0x00000001, NoFlag}},
-    {{0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x80000002, NFlag, 0x80000003, NFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag},
-     {0x80000000, NVFlag, 0x80000001, NVFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0x00000002, NoFlag, 0x00000003, NoFlag},
-     {0x00000001, NoFlag, 0x00000002, NoFlag}},
-    {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
-     {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag},
-     {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}},
-    {{0x7ffffffe, CFlag, 0x7fffffff, CFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
-     {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
-     {0x80000000, NVFlag, 0x80000001, NVFlag},
-     {0x7fffffff, NoFlag, 0x80000000, NVFlag}},
-    {{0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
-     {0x00000001, CVFlag, 0x00000002, CVFlag},
-     {0x00000000, ZCVFlag, 0x00000001, CVFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag},
-     {0x80000000, NFlag, 0x80000001, NFlag}},
-    {{0x80000000, NCFlag, 0x80000001, NCFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0x00000002, CVFlag, 0x00000003, CVFlag},
-     {0x00000001, CVFlag, 0x00000002, CVFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0x80000002, NFlag, 0x80000003, NFlag},
-     {0x80000001, NFlag, 0x80000002, NFlag}},
-    {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
-     {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag},
-     {0xfffffffe, NFlag, 0xffffffff, NFlag}},
-    {{0xfffffffe, NCFlag, 0xffffffff, NCFlag},
-     {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
-     {0x80000000, NCFlag, 0x80000001, NCFlag},
-     {0x7fffffff, CVFlag, 0x80000000, NCFlag},
-     {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
-     {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
-     {0x00000000, ZCFlag, 0x00000001, CFlag},
-     {0xffffffff, NFlag, 0x00000000, ZCFlag}}
-  };
+  static const Expected expected_adcs_w[input_count][input_count] =
+      {{{0x00000000, ZFlag, 0x00000001, NoFlag},
+        {0x00000001, NoFlag, 0x00000002, NoFlag},
+        {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0x80000000, NFlag, 0x80000001, NFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag}},
+       {{0x00000001, NoFlag, 0x00000002, NoFlag},
+        {0x00000002, NoFlag, 0x00000003, NoFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0x80000000, NVFlag, 0x80000001, NVFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag},
+        {0x80000002, NFlag, 0x80000003, NFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag}},
+       {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
+        {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}},
+       {{0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0x80000000, NVFlag, 0x80000001, NVFlag},
+        {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
+        {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x7ffffffe, CFlag, 0x7fffffff, CFlag}},
+       {{0x80000000, NFlag, 0x80000001, NFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x00000000, ZCVFlag, 0x00000001, CVFlag},
+        {0x00000001, CVFlag, 0x00000002, CVFlag},
+        {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag}},
+       {{0x80000001, NFlag, 0x80000002, NFlag},
+        {0x80000002, NFlag, 0x80000003, NFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0x00000001, CVFlag, 0x00000002, CVFlag},
+        {0x00000002, CVFlag, 0x00000003, CVFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0x80000000, NCFlag, 0x80000001, NCFlag}},
+       {{0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
+        {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}},
+       {{0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0x80000000, NCFlag, 0x80000001, NCFlag},
+        {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
+        {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}};
+
+  static const Expected expected_sbcs_w[input_count][input_count] =
+      {{{0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag},
+        {0x80000000, NFlag, 0x80000001, NFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
+        {0x00000001, NoFlag, 0x00000002, NoFlag},
+        {0x00000000, ZFlag, 0x00000001, NoFlag}},
+       {{0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x80000002, NFlag, 0x80000003, NFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag},
+        {0x80000000, NVFlag, 0x80000001, NVFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0x00000002, NoFlag, 0x00000003, NoFlag},
+        {0x00000001, NoFlag, 0x00000002, NoFlag}},
+       {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
+        {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag},
+        {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}},
+       {{0x7ffffffe, CFlag, 0x7fffffff, CFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
+        {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
+        {0x80000000, NVFlag, 0x80000001, NVFlag},
+        {0x7fffffff, NoFlag, 0x80000000, NVFlag}},
+       {{0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
+        {0x00000001, CVFlag, 0x00000002, CVFlag},
+        {0x00000000, ZCVFlag, 0x00000001, CVFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag},
+        {0x80000000, NFlag, 0x80000001, NFlag}},
+       {{0x80000000, NCFlag, 0x80000001, NCFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0x00000002, CVFlag, 0x00000003, CVFlag},
+        {0x00000001, CVFlag, 0x00000002, CVFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0x80000002, NFlag, 0x80000003, NFlag},
+        {0x80000001, NFlag, 0x80000002, NFlag}},
+       {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
+        {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag},
+        {0xfffffffe, NFlag, 0xffffffff, NFlag}},
+       {{0xfffffffe, NCFlag, 0xffffffff, NCFlag},
+        {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
+        {0x80000000, NCFlag, 0x80000001, NCFlag},
+        {0x7fffffff, CVFlag, 0x80000000, NCFlag},
+        {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
+        {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
+        {0x00000000, ZCFlag, 0x00000001, CFlag},
+        {0xffffffff, NFlag, 0x00000000, ZCFlag}}};
 
   for (size_t left = 0; left < input_count; left++) {
     for (size_t right = 0; right < input_count; right++) {
-      const Expected & expected = expected_adcs_w[left][right];
-      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 0,
-                     expected.carry0_result, expected.carry0_flags);
-      AdcsSbcsHelper(&MacroAssembler::Adcs, inputs[left], inputs[right], 1,
-                     expected.carry1_result, expected.carry1_flags);
+      const Expected& expected = expected_adcs_w[left][right];
+      AdcsSbcsHelper(&MacroAssembler::Adcs,
+                     inputs[left],
+                     inputs[right],
+                     0,
+                     expected.carry0_result,
+                     expected.carry0_flags);
+      AdcsSbcsHelper(&MacroAssembler::Adcs,
+                     inputs[left],
+                     inputs[right],
+                     1,
+                     expected.carry1_result,
+                     expected.carry1_flags);
     }
   }
 
   for (size_t left = 0; left < input_count; left++) {
     for (size_t right = 0; right < input_count; right++) {
-      const Expected & expected = expected_sbcs_w[left][right];
-      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 0,
-                     expected.carry0_result, expected.carry0_flags);
-      AdcsSbcsHelper(&MacroAssembler::Sbcs, inputs[left], inputs[right], 1,
-                     expected.carry1_result, expected.carry1_flags);
+      const Expected& expected = expected_sbcs_w[left][right];
+      AdcsSbcsHelper(&MacroAssembler::Sbcs,
+                     inputs[left],
+                     inputs[right],
+                     0,
+                     expected.carry0_result,
+                     expected.carry0_flags);
+      AdcsSbcsHelper(&MacroAssembler::Sbcs,
+                     inputs[left],
+                     inputs[right],
+                     1,
+                     expected.carry1_result,
+                     expected.carry1_flags);
     }
   }
 }
@@ -9706,9 +9869,13 @@ TEST(fmul) {
 }
 
 
-static void FmaddFmsubHelper(double n, double m, double a,
-                             double fmadd, double fmsub,
-                             double fnmadd, double fnmsub) {
+static void FmaddFmsubHelper(double n,
+                             double m,
+                             double a,
+                             double fmadd,
+                             double fmsub,
+                             double fnmadd,
+                             double fnmsub) {
   SETUP();
   START();
 
@@ -9742,28 +9909,40 @@ TEST(fmadd_fmsub_double) {
 
   // Check the sign of exact zeroes.
   //               n     m     a     fmadd  fmsub  fnmadd fnmsub
-  FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0,  +0.0,  +0.0,  +0.0);
-  FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0,  -0.0,  +0.0,  +0.0);
-  FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0,  +0.0,  -0.0,  +0.0);
-  FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0,  +0.0,  +0.0,  -0.0);
-  FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0,  +0.0,  +0.0,  +0.0);
-  FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0,  -0.0,  +0.0,  +0.0);
-  FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0,  +0.0,  -0.0,  +0.0);
-  FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0,  +0.0,  +0.0,  -0.0);
+  FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0, +0.0, +0.0, +0.0);
+  FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0, -0.0, +0.0, +0.0);
+  FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0, +0.0, -0.0, +0.0);
+  FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0, +0.0, +0.0, -0.0);
+  FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0, +0.0, +0.0, +0.0);
+  FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0, -0.0, +0.0, +0.0);
+  FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0, +0.0, -0.0, +0.0);
+  FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0, +0.0, +0.0, -0.0);
 
   // Check NaN generation.
-  FmaddFmsubHelper(kFP64PositiveInfinity, 0.0, 42.0,
-                   kFP64DefaultNaN, kFP64DefaultNaN,
-                   kFP64DefaultNaN, kFP64DefaultNaN);
-  FmaddFmsubHelper(0.0, kFP64PositiveInfinity, 42.0,
-                   kFP64DefaultNaN, kFP64DefaultNaN,
-                   kFP64DefaultNaN, kFP64DefaultNaN);
-  FmaddFmsubHelper(kFP64PositiveInfinity, 1.0, kFP64PositiveInfinity,
-                   kFP64PositiveInfinity,   //  inf + ( inf * 1) = inf
-                   kFP64DefaultNaN,         //  inf + (-inf * 1) = NaN
-                   kFP64NegativeInfinity,   // -inf + (-inf * 1) = -inf
-                   kFP64DefaultNaN);        // -inf + ( inf * 1) = NaN
-  FmaddFmsubHelper(kFP64NegativeInfinity, 1.0, kFP64PositiveInfinity,
+  FmaddFmsubHelper(kFP64PositiveInfinity,
+                   0.0,
+                   42.0,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN);
+  FmaddFmsubHelper(0.0,
+                   kFP64PositiveInfinity,
+                   42.0,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN);
+  FmaddFmsubHelper(kFP64PositiveInfinity,
+                   1.0,
+                   kFP64PositiveInfinity,
+                   kFP64PositiveInfinity,  //  inf + ( inf * 1) = inf
+                   kFP64DefaultNaN,        //  inf + (-inf * 1) = NaN
+                   kFP64NegativeInfinity,  // -inf + (-inf * 1) = -inf
+                   kFP64DefaultNaN);       // -inf + ( inf * 1) = NaN
+  FmaddFmsubHelper(kFP64NegativeInfinity,
+                   1.0,
+                   kFP64PositiveInfinity,
                    kFP64DefaultNaN,         //  inf + (-inf * 1) = NaN
                    kFP64PositiveInfinity,   //  inf + ( inf * 1) = inf
                    kFP64DefaultNaN,         // -inf + ( inf * 1) = NaN
@@ -9771,9 +9950,13 @@ TEST(fmadd_fmsub_double) {
 }
 
 
-static void FmaddFmsubHelper(float n, float m, float a,
-                             float fmadd, float fmsub,
-                             float fnmadd, float fnmsub) {
+static void FmaddFmsubHelper(float n,
+                             float m,
+                             float a,
+                             float fmadd,
+                             float fmsub,
+                             float fnmadd,
+                             float fnmsub) {
   SETUP();
   START();
 
@@ -9817,18 +10000,30 @@ TEST(fmadd_fmsub_float) {
   FmaddFmsubHelper(+0.0f, -0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
 
   // Check NaN generation.
-  FmaddFmsubHelper(kFP32PositiveInfinity, 0.0f, 42.0f,
-                   kFP32DefaultNaN, kFP32DefaultNaN,
-                   kFP32DefaultNaN, kFP32DefaultNaN);
-  FmaddFmsubHelper(0.0f, kFP32PositiveInfinity, 42.0f,
-                   kFP32DefaultNaN, kFP32DefaultNaN,
-                   kFP32DefaultNaN, kFP32DefaultNaN);
-  FmaddFmsubHelper(kFP32PositiveInfinity, 1.0f, kFP32PositiveInfinity,
-                   kFP32PositiveInfinity,   //  inf + ( inf * 1) = inf
-                   kFP32DefaultNaN,         //  inf + (-inf * 1) = NaN
-                   kFP32NegativeInfinity,   // -inf + (-inf * 1) = -inf
-                   kFP32DefaultNaN);        // -inf + ( inf * 1) = NaN
-  FmaddFmsubHelper(kFP32NegativeInfinity, 1.0f, kFP32PositiveInfinity,
+  FmaddFmsubHelper(kFP32PositiveInfinity,
+                   0.0f,
+                   42.0f,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN);
+  FmaddFmsubHelper(0.0f,
+                   kFP32PositiveInfinity,
+                   42.0f,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN);
+  FmaddFmsubHelper(kFP32PositiveInfinity,
+                   1.0f,
+                   kFP32PositiveInfinity,
+                   kFP32PositiveInfinity,  //  inf + ( inf * 1) = inf
+                   kFP32DefaultNaN,        //  inf + (-inf * 1) = NaN
+                   kFP32NegativeInfinity,  // -inf + (-inf * 1) = -inf
+                   kFP32DefaultNaN);       // -inf + ( inf * 1) = NaN
+  FmaddFmsubHelper(kFP32NegativeInfinity,
+                   1.0f,
+                   kFP32PositiveInfinity,
                    kFP32DefaultNaN,         //  inf + (-inf * 1) = NaN
                    kFP32PositiveInfinity,   //  inf + ( inf * 1) = inf
                    kFP32DefaultNaN,         // -inf + ( inf * 1) = NaN
@@ -9903,18 +10098,34 @@ TEST(fmadd_fmsub_double_nans) {
   FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
 
   // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
-  FmaddFmsubHelper(0, kFP64PositiveInfinity, qa,
-                   kFP64DefaultNaN, kFP64DefaultNaN,
-                   kFP64DefaultNaN, kFP64DefaultNaN);
-  FmaddFmsubHelper(kFP64PositiveInfinity, 0, qa,
-                   kFP64DefaultNaN, kFP64DefaultNaN,
-                   kFP64DefaultNaN, kFP64DefaultNaN);
-  FmaddFmsubHelper(0, kFP64NegativeInfinity, qa,
-                   kFP64DefaultNaN, kFP64DefaultNaN,
-                   kFP64DefaultNaN, kFP64DefaultNaN);
-  FmaddFmsubHelper(kFP64NegativeInfinity, 0, qa,
-                   kFP64DefaultNaN, kFP64DefaultNaN,
-                   kFP64DefaultNaN, kFP64DefaultNaN);
+  FmaddFmsubHelper(0,
+                   kFP64PositiveInfinity,
+                   qa,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN);
+  FmaddFmsubHelper(kFP64PositiveInfinity,
+                   0,
+                   qa,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN);
+  FmaddFmsubHelper(0,
+                   kFP64NegativeInfinity,
+                   qa,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN);
+  FmaddFmsubHelper(kFP64NegativeInfinity,
+                   0,
+                   qa,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN,
+                   kFP64DefaultNaN);
 }
 
 
@@ -9985,18 +10196,34 @@ TEST(fmadd_fmsub_float_nans) {
   FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
 
   // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
-  FmaddFmsubHelper(0, kFP32PositiveInfinity, qa,
-                   kFP32DefaultNaN, kFP32DefaultNaN,
-                   kFP32DefaultNaN, kFP32DefaultNaN);
-  FmaddFmsubHelper(kFP32PositiveInfinity, 0, qa,
-                   kFP32DefaultNaN, kFP32DefaultNaN,
-                   kFP32DefaultNaN, kFP32DefaultNaN);
-  FmaddFmsubHelper(0, kFP32NegativeInfinity, qa,
-                   kFP32DefaultNaN, kFP32DefaultNaN,
-                   kFP32DefaultNaN, kFP32DefaultNaN);
-  FmaddFmsubHelper(kFP32NegativeInfinity, 0, qa,
-                   kFP32DefaultNaN, kFP32DefaultNaN,
-                   kFP32DefaultNaN, kFP32DefaultNaN);
+  FmaddFmsubHelper(0,
+                   kFP32PositiveInfinity,
+                   qa,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN);
+  FmaddFmsubHelper(kFP32PositiveInfinity,
+                   0,
+                   qa,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN);
+  FmaddFmsubHelper(0,
+                   kFP32NegativeInfinity,
+                   qa,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN);
+  FmaddFmsubHelper(kFP32NegativeInfinity,
+                   0,
+                   qa,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN,
+                   kFP32DefaultNaN);
 }
 
 
@@ -10090,8 +10317,7 @@ static float MinMaxHelper(float n,
     }
   }
 
-  if ((n == 0.0) && (m == 0.0) &&
-      (copysign(1.0, n) != copysign(1.0, m))) {
+  if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) {
     return min ? -0.0 : 0.0;
   }
 
@@ -10132,8 +10358,7 @@ static double MinMaxHelper(double n,
     }
   }
 
-  if ((n == 0.0) && (m == 0.0) &&
-      (copysign(1.0, n) != copysign(1.0, m))) {
+  if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) {
     return min ? -0.0 : 0.0;
   }
 
@@ -10141,8 +10366,8 @@ static double MinMaxHelper(double n,
 }
 
 
-static void FminFmaxDoubleHelper(double n, double m, double min, double max,
-                                 double minnm, double maxnm) {
+static void FminFmaxDoubleHelper(
+    double n, double m, double min, double max, double minnm, double maxnm) {
   SETUP();
 
   START();
@@ -10181,33 +10406,52 @@ TEST(fmax_fmin_d) {
   // Bootstrap tests.
   FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0);
   FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1);
-  FminFmaxDoubleHelper(kFP64PositiveInfinity, kFP64NegativeInfinity,
-                       kFP64NegativeInfinity, kFP64PositiveInfinity,
-                       kFP64NegativeInfinity, kFP64PositiveInfinity);
-  FminFmaxDoubleHelper(snan, 0,
-                       snan_processed, snan_processed,
-                       snan_processed, snan_processed);
-  FminFmaxDoubleHelper(0, snan,
-                       snan_processed, snan_processed,
-                       snan_processed, snan_processed);
-  FminFmaxDoubleHelper(qnan, 0,
-                       qnan_processed, qnan_processed,
-                       0, 0);
-  FminFmaxDoubleHelper(0, qnan,
-                       qnan_processed, qnan_processed,
-                       0, 0);
-  FminFmaxDoubleHelper(qnan, snan,
-                       snan_processed, snan_processed,
-                       snan_processed, snan_processed);
-  FminFmaxDoubleHelper(snan, qnan,
-                       snan_processed, snan_processed,
-                       snan_processed, snan_processed);
+  FminFmaxDoubleHelper(kFP64PositiveInfinity,
+                       kFP64NegativeInfinity,
+                       kFP64NegativeInfinity,
+                       kFP64PositiveInfinity,
+                       kFP64NegativeInfinity,
+                       kFP64PositiveInfinity);
+  FminFmaxDoubleHelper(snan,
+                       0,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed);
+  FminFmaxDoubleHelper(0,
+                       snan,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed);
+  FminFmaxDoubleHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0);
+  FminFmaxDoubleHelper(0, qnan, qnan_processed, qnan_processed, 0, 0);
+  FminFmaxDoubleHelper(qnan,
+                       snan,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed);
+  FminFmaxDoubleHelper(snan,
+                       qnan,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed,
+                       snan_processed);
 
   // Iterate over all combinations of inputs.
-  double inputs[] = { DBL_MAX, DBL_MIN, 1.0, 0.0,
-                      -DBL_MAX, -DBL_MIN, -1.0, -0.0,
-                      kFP64PositiveInfinity, kFP64NegativeInfinity,
-                      kFP64QuietNaN, kFP64SignallingNaN };
+  double inputs[] = {DBL_MAX,
+                     DBL_MIN,
+                     1.0,
+                     0.0,
+                     -DBL_MAX,
+                     -DBL_MIN,
+                     -1.0,
+                     -0.0,
+                     kFP64PositiveInfinity,
+                     kFP64NegativeInfinity,
+                     kFP64QuietNaN,
+                     kFP64SignallingNaN};
 
   const int count = sizeof(inputs) / sizeof(inputs[0]);
 
@@ -10215,7 +10459,8 @@ TEST(fmax_fmin_d) {
     double n = inputs[in];
     for (int im = 0; im < count; im++) {
       double m = inputs[im];
-      FminFmaxDoubleHelper(n, m,
+      FminFmaxDoubleHelper(n,
+                           m,
                            MinMaxHelper(n, m, true),
                            MinMaxHelper(n, m, false),
                            MinMaxHelper(n, m, true, kFP64PositiveInfinity),
@@ -10225,8 +10470,8 @@ TEST(fmax_fmin_d) {
 }
 
 
-static void FminFmaxFloatHelper(float n, float m, float min, float max,
-                                float minnm, float maxnm) {
+static void FminFmaxFloatHelper(
+    float n, float m, float min, float max, float minnm, float maxnm) {
   SETUP();
 
   START();
@@ -10265,33 +10510,52 @@ TEST(fmax_fmin_s) {
   // Bootstrap tests.
   FminFmaxFloatHelper(0, 0, 0, 0, 0, 0);
   FminFmaxFloatHelper(0, 1, 0, 1, 0, 1);
-  FminFmaxFloatHelper(kFP32PositiveInfinity, kFP32NegativeInfinity,
-                      kFP32NegativeInfinity, kFP32PositiveInfinity,
-                      kFP32NegativeInfinity, kFP32PositiveInfinity);
-  FminFmaxFloatHelper(snan, 0,
-                      snan_processed, snan_processed,
-                      snan_processed, snan_processed);
-  FminFmaxFloatHelper(0, snan,
-                      snan_processed, snan_processed,
-                      snan_processed, snan_processed);
-  FminFmaxFloatHelper(qnan, 0,
-                      qnan_processed, qnan_processed,
-                      0, 0);
-  FminFmaxFloatHelper(0, qnan,
-                      qnan_processed, qnan_processed,
-                      0, 0);
-  FminFmaxFloatHelper(qnan, snan,
-                      snan_processed, snan_processed,
-                      snan_processed, snan_processed);
-  FminFmaxFloatHelper(snan, qnan,
-                      snan_processed, snan_processed,
-                      snan_processed, snan_processed);
+  FminFmaxFloatHelper(kFP32PositiveInfinity,
+                      kFP32NegativeInfinity,
+                      kFP32NegativeInfinity,
+                      kFP32PositiveInfinity,
+                      kFP32NegativeInfinity,
+                      kFP32PositiveInfinity);
+  FminFmaxFloatHelper(snan,
+                      0,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed);
+  FminFmaxFloatHelper(0,
+                      snan,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed);
+  FminFmaxFloatHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0);
+  FminFmaxFloatHelper(0, qnan, qnan_processed, qnan_processed, 0, 0);
+  FminFmaxFloatHelper(qnan,
+                      snan,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed);
+  FminFmaxFloatHelper(snan,
+                      qnan,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed,
+                      snan_processed);
 
   // Iterate over all combinations of inputs.
-  float inputs[] = { FLT_MAX, FLT_MIN, 1.0, 0.0,
-                     -FLT_MAX, -FLT_MIN, -1.0, -0.0,
-                     kFP32PositiveInfinity, kFP32NegativeInfinity,
-                     kFP32QuietNaN, kFP32SignallingNaN };
+  float inputs[] = {FLT_MAX,
+                    FLT_MIN,
+                    1.0,
+                    0.0,
+                    -FLT_MAX,
+                    -FLT_MIN,
+                    -1.0,
+                    -0.0,
+                    kFP32PositiveInfinity,
+                    kFP32NegativeInfinity,
+                    kFP32QuietNaN,
+                    kFP32SignallingNaN};
 
   const int count = sizeof(inputs) / sizeof(inputs[0]);
 
@@ -10299,7 +10563,8 @@ TEST(fmax_fmin_s) {
     float n = inputs[in];
     for (int im = 0; im < count; im++) {
       float m = inputs[im];
-      FminFmaxFloatHelper(n, m,
+      FminFmaxFloatHelper(n,
+                          m,
                           MinMaxHelper(n, m, true),
                           MinMaxHelper(n, m, false),
                           MinMaxHelper(n, m, true, kFP32PositiveInfinity),
@@ -11301,8 +11566,8 @@ TEST(fcvt_ds) {
   __ Fmov(s26, -0.0);
   __ Fmov(s27, FLT_MAX);
   __ Fmov(s28, FLT_MIN);
-  __ Fmov(s29, RawbitsToFloat(0x7fc12345));   // Quiet NaN.
-  __ Fmov(s30, RawbitsToFloat(0x7f812345));   // Signalling NaN.
+  __ Fmov(s29, RawbitsToFloat(0x7fc12345));  // Quiet NaN.
+  __ Fmov(s30, RawbitsToFloat(0x7f812345));  // Signalling NaN.
 
   __ Fcvt(d0, s16);
   __ Fcvt(d1, s17);
@@ -11370,8 +11635,8 @@ TEST(fcvt_sd) {
   __ Fmov(d26, -0.0);
   __ Fmov(d27, FLT_MAX);
   __ Fmov(d28, FLT_MIN);
-  __ Fmov(d29, RawbitsToDouble(0x7ff82468a0000000));   // Quiet NaN.
-  __ Fmov(d30, RawbitsToDouble(0x7ff02468a0000000));   // Signalling NaN.
+  __ Fmov(d29, RawbitsToDouble(0x7ff82468a0000000));  // Quiet NaN.
+  __ Fmov(d30, RawbitsToDouble(0x7ff02468a0000000));  // Signalling NaN.
 
   __ Fcvt(s0, d16);
   __ Fcvt(s1, d17);
@@ -11548,15 +11813,15 @@ TEST(fcvtas) {
   __ Fmov(s19, -2.5);
   __ Fmov(s20, kFP32PositiveInfinity);
   __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);     // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                    // Smallest float > INT64_MIN.
+  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
+  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
   __ Fmov(d24, 1.1);
   __ Fmov(d25, 2.5);
   __ Fmov(d26, -2.5);
   __ Fmov(d27, kFP64PositiveInfinity);
   __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);     // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                    // Smallest double > INT64_MIN.
+  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
+  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
 
   __ Fcvtas(w0, s0);
   __ Fcvtas(w1, s1);
@@ -11736,8 +12001,8 @@ TEST(fcvtms) {
   __ Fmov(s3, -1.5);
   __ Fmov(s4, kFP32PositiveInfinity);
   __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
+  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
+  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
   __ Fmov(d8, 1.0);
   __ Fmov(d9, 1.1);
   __ Fmov(d10, 1.5);
@@ -11751,15 +12016,15 @@ TEST(fcvtms) {
   __ Fmov(s19, -1.5);
   __ Fmov(s20, kFP32PositiveInfinity);
   __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
+  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
+  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
   __ Fmov(d24, 1.1);
   __ Fmov(d25, 1.5);
   __ Fmov(d26, -1.5);
   __ Fmov(d27, kFP64PositiveInfinity);
   __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
+  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
+  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
 
   __ Fcvtms(w0, s0);
   __ Fcvtms(w1, s1);
@@ -11840,8 +12105,8 @@ TEST(fcvtmu) {
   __ Fmov(s3, -1.5);
   __ Fmov(s4, kFP32PositiveInfinity);
   __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
+  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
+  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
   __ Fmov(d8, 1.0);
   __ Fmov(d9, 1.1);
   __ Fmov(d10, 1.5);
@@ -11855,15 +12120,15 @@ TEST(fcvtmu) {
   __ Fmov(s19, -1.5);
   __ Fmov(s20, kFP32PositiveInfinity);
   __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
+  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
+  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
   __ Fmov(d24, 1.1);
   __ Fmov(d25, 1.5);
   __ Fmov(d26, -1.5);
   __ Fmov(d27, kFP64PositiveInfinity);
   __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
+  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
+  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
 
   __ Fcvtmu(w0, s0);
   __ Fcvtmu(w1, s1);
@@ -11942,8 +12207,8 @@ TEST(fcvtns) {
   __ Fmov(s3, -1.5);
   __ Fmov(s4, kFP32PositiveInfinity);
   __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
+  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
+  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
   __ Fmov(d8, 1.0);
   __ Fmov(d9, 1.1);
   __ Fmov(d10, 1.5);
@@ -11957,15 +12222,15 @@ TEST(fcvtns) {
   __ Fmov(s19, -1.5);
   __ Fmov(s20, kFP32PositiveInfinity);
   __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
+  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
+  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
   __ Fmov(d24, 1.1);
   __ Fmov(d25, 1.5);
   __ Fmov(d26, -1.5);
   __ Fmov(d27, kFP64PositiveInfinity);
   __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
+  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
+  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
 
   __ Fcvtns(w0, s0);
   __ Fcvtns(w1, s1);
@@ -12145,8 +12410,8 @@ TEST(fcvtzs) {
   __ Fmov(s3, -1.5);
   __ Fmov(s4, kFP32PositiveInfinity);
   __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
+  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
+  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
   __ Fmov(d8, 1.0);
   __ Fmov(d9, 1.1);
   __ Fmov(d10, 1.5);
@@ -12160,15 +12425,15 @@ TEST(fcvtzs) {
   __ Fmov(s19, -1.5);
   __ Fmov(s20, kFP32PositiveInfinity);
   __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
+  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
+  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
   __ Fmov(d24, 1.1);
   __ Fmov(d25, 1.5);
   __ Fmov(d26, -1.5);
   __ Fmov(d27, kFP64PositiveInfinity);
   __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
+  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
+  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
 
   __ Fcvtzs(w0, s0);
   __ Fcvtzs(w1, s1);
@@ -12248,8 +12513,8 @@ TEST(fcvtzu) {
   __ Fmov(s3, -1.5);
   __ Fmov(s4, kFP32PositiveInfinity);
   __ Fmov(s5, kFP32NegativeInfinity);
-  __ Fmov(s6, 0x7fffff80);                    // Largest float < INT32_MAX.
-  __ Fneg(s7, s6);                            // Smallest float > INT32_MIN.
+  __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
+  __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
   __ Fmov(d8, 1.0);
   __ Fmov(d9, 1.1);
   __ Fmov(d10, 1.5);
@@ -12263,15 +12528,15 @@ TEST(fcvtzu) {
   __ Fmov(s19, -1.5);
   __ Fmov(s20, kFP32PositiveInfinity);
   __ Fmov(s21, kFP32NegativeInfinity);
-  __ Fmov(s22, 0x7fffff8000000000);           // Largest float < INT64_MAX.
-  __ Fneg(s23, s22);                          // Smallest float > INT64_MIN.
+  __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
+  __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
   __ Fmov(d24, 1.1);
   __ Fmov(d25, 1.5);
   __ Fmov(d26, -1.5);
   __ Fmov(d27, kFP64PositiveInfinity);
   __ Fmov(d28, kFP64NegativeInfinity);
-  __ Fmov(d29, 0x7ffffffffffffc00);           // Largest double < INT64_MAX.
-  __ Fneg(d30, d29);                          // Smallest double > INT64_MIN.
+  __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
+  __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
 
   __ Fcvtzu(w0, s0);
   __ Fcvtzu(w1, s1);
@@ -12914,7 +13179,7 @@ TEST(zero_dest) {
   __ Mov(x0, 0);
   __ Mov(x1, literal_base);
   for (unsigned i = 2; i < x30.GetCode(); i++) {
-    __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i-1), x1);
+    __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1);
   }
   before.Dump(&masm);
 
@@ -12981,7 +13246,7 @@ TEST(zero_dest_setflags) {
   __ Mov(x0, 0);
   __ Mov(x1, literal_base);
   for (int i = 2; i < 30; i++) {
-    __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i-1), x1);
+    __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1);
   }
   before.Dump(&masm);
 
@@ -13264,8 +13529,8 @@ TEST(peek_poke_endianness) {
   uint64_t x0_expected = literal_base * 1;
   uint64_t x1_expected = literal_base * 2;
   uint64_t x4_expected = (x0_expected << 32) | (x0_expected >> 32);
-  uint64_t x5_expected = ((x1_expected << 16) & 0xffff0000) |
-                         ((x1_expected >> 16) & 0x0000ffff);
+  uint64_t x5_expected =
+      ((x1_expected << 16) & 0xffff0000) | ((x1_expected >> 16) & 0x0000ffff);
 
   ASSERT_EQUAL_64(x0_expected, x0);
   ASSERT_EQUAL_64(x1_expected, x1);
@@ -13311,13 +13576,13 @@ TEST(peek_poke_mixed) {
     __ Mov(x4, __ StackPointer());
     __ SetStackPointer(x4);
 
-    __ Poke(wzr, 0);    // Clobber the space we're about to drop.
+    __ Poke(wzr, 0);  // Clobber the space we're about to drop.
     __ Drop(4);
     __ Peek(x6, 0);
     __ Claim(8);
     __ Peek(w7, 10);
     __ Poke(x3, 28);
-    __ Poke(xzr, 0);    // Clobber the space we're about to drop.
+    __ Poke(xzr, 0);  // Clobber the space we're about to drop.
     __ Drop(8);
     __ Poke(x2, 12);
     __ Push(w0);
@@ -13336,8 +13601,8 @@ TEST(peek_poke_mixed) {
   uint64_t x2_expected = literal_base * 3;
   uint64_t x3_expected = literal_base * 4;
   uint64_t x6_expected = (x1_expected << 32) | (x0_expected >> 32);
-  uint64_t x7_expected = ((x1_expected << 16) & 0xffff0000) |
-                         ((x0_expected >> 48) & 0x0000ffff);
+  uint64_t x7_expected =
+      ((x1_expected << 16) & 0xffff0000) | ((x0_expected >> 48) & 0x0000ffff);
 
   ASSERT_EQUAL_64(x0_expected, x0);
   ASSERT_EQUAL_64(x1_expected, x1);
@@ -13429,9 +13694,9 @@ TEST(peek_poke_reglist) {
   ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base_d), d12);
   ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base_d), d13);
   ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14);
-  ASSERT_EQUAL_FP64(
-      RawbitsToDouble((base_d >> kSRegSize) | ((2 * base_d) << kSRegSize)),
-      d15);
+  ASSERT_EQUAL_FP64(RawbitsToDouble((base_d >> kSRegSize) |
+                                    ((2 * base_d) << kSRegSize)),
+                    d15);
   ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14);
   ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) & kSRegMask), s16);
   ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) >> kSRegSize), s17);
@@ -13450,7 +13715,7 @@ TEST(load_store_reglist) {
   //  * The value is not formed from repeating fixed-size smaller values, so it
   //    can be used to detect endianness-related errors.
   uint64_t high_base = UINT32_C(0x01000010);
-  uint64_t low_base =  UINT32_C(0x00100101);
+  uint64_t low_base = UINT32_C(0x00100101);
   uint64_t base = (high_base << 32) | low_base;
   uint64_t array[21];
   memset(array, 0, sizeof(array));
@@ -13597,8 +13862,8 @@ static void PushPopXRegSimpleHelper(int reg_count,
   // Work out which registers to use, based on reg_size.
   Register r[kNumberOfRegisters];
   Register x[kNumberOfRegisters];
-  RegList list = PopulateRegisterArray(NULL, x, r, reg_size, reg_count,
-                                       allowed);
+  RegList list =
+      PopulateRegisterArray(NULL, x, r, reg_size, reg_count, allowed);
 
   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
   UseScratchRegisterScope temps(&masm);
@@ -13632,14 +13897,22 @@ static void PushPopXRegSimpleHelper(int reg_count,
       case PushPopByFour:
         // Push high-numbered registers first (to the highest addresses).
         for (i = reg_count; i >= 4; i -= 4) {
-          __ Push(r[i-1], r[i-2], r[i-3], r[i-4]);
+          __ Push(r[i - 1], r[i - 2], r[i - 3], r[i - 4]);
         }
         // Finish off the leftovers.
         switch (i) {
-          case 3:  __ Push(r[2], r[1], r[0]); break;
-          case 2:  __ Push(r[1], r[0]);       break;
-          case 1:  __ Push(r[0]);             break;
-          default: VIXL_ASSERT(i == 0);            break;
+          case 3:
+            __ Push(r[2], r[1], r[0]);
+            break;
+          case 2:
+            __ Push(r[1], r[0]);
+            break;
+          case 1:
+            __ Push(r[0]);
+            break;
+          default:
+            VIXL_ASSERT(i == 0);
+            break;
         }
         break;
       case PushPopRegList:
@@ -13653,15 +13926,23 @@ static void PushPopXRegSimpleHelper(int reg_count,
     switch (pop_method) {
       case PushPopByFour:
         // Pop low-numbered registers first (from the lowest addresses).
-        for (i = 0; i <= (reg_count-4); i += 4) {
-          __ Pop(r[i], r[i+1], r[i+2], r[i+3]);
+        for (i = 0; i <= (reg_count - 4); i += 4) {
+          __ Pop(r[i], r[i + 1], r[i + 2], r[i + 3]);
         }
         // Finish off the leftovers.
         switch (reg_count - i) {
-          case 3:  __ Pop(r[i], r[i+1], r[i+2]); break;
-          case 2:  __ Pop(r[i], r[i+1]);         break;
-          case 1:  __ Pop(r[i]);                 break;
-          default: VIXL_ASSERT(i == reg_count);       break;
+          case 3:
+            __ Pop(r[i], r[i + 1], r[i + 2]);
+            break;
+          case 2:
+            __ Pop(r[i], r[i + 1]);
+            break;
+          case 1:
+            __ Pop(r[i]);
+            break;
+          default:
+            VIXL_ASSERT(i == reg_count);
+            break;
         }
         break;
       case PushPopRegList:
@@ -13683,7 +13964,7 @@ static void PushPopXRegSimpleHelper(int reg_count,
   // Check that the register contents were preserved.
   // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
   // that the upper word was properly cleared by Pop.
-  literal_base &= (0xffffffffffffffff >> (64-reg_size));
+  literal_base &= (0xffffffffffffffff >> (64 - reg_size));
   for (int i = 0; i < reg_count; i++) {
     if (x[i].Is(xzr)) {
       ASSERT_EQUAL_64(0, x[i]);
@@ -13699,24 +13980,48 @@ static void PushPopXRegSimpleHelper(int reg_count,
 TEST(push_pop_xreg_simple_32) {
   for (int claim = 0; claim <= 8; claim++) {
     for (int count = 0; count <= 8; count++) {
-      PushPopXRegSimpleHelper(count, claim, kWRegSize,
-                              PushPopByFour, PushPopByFour);
-      PushPopXRegSimpleHelper(count, claim, kWRegSize,
-                              PushPopByFour, PushPopRegList);
-      PushPopXRegSimpleHelper(count, claim, kWRegSize,
-                              PushPopRegList, PushPopByFour);
-      PushPopXRegSimpleHelper(count, claim, kWRegSize,
-                              PushPopRegList, PushPopRegList);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kWRegSize,
+                              PushPopByFour,
+                              PushPopByFour);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kWRegSize,
+                              PushPopByFour,
+                              PushPopRegList);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kWRegSize,
+                              PushPopRegList,
+                              PushPopByFour);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kWRegSize,
+                              PushPopRegList,
+                              PushPopRegList);
     }
     // Test with the maximum number of registers.
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kWRegSize, PushPopByFour, PushPopByFour);
+                            claim,
+                            kWRegSize,
+                            PushPopByFour,
+                            PushPopByFour);
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kWRegSize, PushPopByFour, PushPopRegList);
+                            claim,
+                            kWRegSize,
+                            PushPopByFour,
+                            PushPopRegList);
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kWRegSize, PushPopRegList, PushPopByFour);
+                            claim,
+                            kWRegSize,
+                            PushPopRegList,
+                            PushPopByFour);
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kWRegSize, PushPopRegList, PushPopRegList);
+                            claim,
+                            kWRegSize,
+                            PushPopRegList,
+                            PushPopRegList);
   }
 }
 
@@ -13724,24 +14029,48 @@ TEST(push_pop_xreg_simple_32) {
 TEST(push_pop_xreg_simple_64) {
   for (int claim = 0; claim <= 8; claim++) {
     for (int count = 0; count <= 8; count++) {
-      PushPopXRegSimpleHelper(count, claim, kXRegSize,
-                              PushPopByFour, PushPopByFour);
-      PushPopXRegSimpleHelper(count, claim, kXRegSize,
-                              PushPopByFour, PushPopRegList);
-      PushPopXRegSimpleHelper(count, claim, kXRegSize,
-                              PushPopRegList, PushPopByFour);
-      PushPopXRegSimpleHelper(count, claim, kXRegSize,
-                              PushPopRegList, PushPopRegList);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kXRegSize,
+                              PushPopByFour,
+                              PushPopByFour);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kXRegSize,
+                              PushPopByFour,
+                              PushPopRegList);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kXRegSize,
+                              PushPopRegList,
+                              PushPopByFour);
+      PushPopXRegSimpleHelper(count,
+                              claim,
+                              kXRegSize,
+                              PushPopRegList,
+                              PushPopRegList);
     }
     // Test with the maximum number of registers.
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kXRegSize, PushPopByFour, PushPopByFour);
+                            claim,
+                            kXRegSize,
+                            PushPopByFour,
+                            PushPopByFour);
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kXRegSize, PushPopByFour, PushPopRegList);
+                            claim,
+                            kXRegSize,
+                            PushPopByFour,
+                            PushPopRegList);
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kXRegSize, PushPopRegList, PushPopByFour);
+                            claim,
+                            kXRegSize,
+                            PushPopRegList,
+                            PushPopByFour);
     PushPopXRegSimpleHelper(kPushPopXRegMaxRegCount,
-                            claim, kXRegSize, PushPopRegList, PushPopRegList);
+                            claim,
+                            kXRegSize,
+                            PushPopRegList,
+                            PushPopRegList);
   }
 }
 
@@ -13777,8 +14106,8 @@ static void PushPopFPXRegSimpleHelper(int reg_count,
   // Work out which registers to use, based on reg_size.
   FPRegister v[kNumberOfRegisters];
   FPRegister d[kNumberOfRegisters];
-  RegList list = PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count,
-                                         allowed);
+  RegList list =
+      PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count, allowed);
 
   // Arbitrarily pick a register to use as a stack pointer.
   const Register& stack_pointer = x10;
@@ -13821,14 +14150,22 @@ static void PushPopFPXRegSimpleHelper(int reg_count,
       case PushPopByFour:
         // Push high-numbered registers first (to the highest addresses).
         for (i = reg_count; i >= 4; i -= 4) {
-          __ Push(v[i-1], v[i-2], v[i-3], v[i-4]);
+          __ Push(v[i - 1], v[i - 2], v[i - 3], v[i - 4]);
         }
         // Finish off the leftovers.
         switch (i) {
-          case 3:  __ Push(v[2], v[1], v[0]); break;
-          case 2:  __ Push(v[1], v[0]);       break;
-          case 1:  __ Push(v[0]);             break;
-          default: VIXL_ASSERT(i == 0);            break;
+          case 3:
+            __ Push(v[2], v[1], v[0]);
+            break;
+          case 2:
+            __ Push(v[1], v[0]);
+            break;
+          case 1:
+            __ Push(v[0]);
+            break;
+          default:
+            VIXL_ASSERT(i == 0);
+            break;
         }
         break;
       case PushPopRegList:
@@ -13842,15 +14179,23 @@ static void PushPopFPXRegSimpleHelper(int reg_count,
     switch (pop_method) {
       case PushPopByFour:
         // Pop low-numbered registers first (from the lowest addresses).
-        for (i = 0; i <= (reg_count-4); i += 4) {
-          __ Pop(v[i], v[i+1], v[i+2], v[i+3]);
+        for (i = 0; i <= (reg_count - 4); i += 4) {
+          __ Pop(v[i], v[i + 1], v[i + 2], v[i + 3]);
         }
         // Finish off the leftovers.
         switch (reg_count - i) {
-          case 3:  __ Pop(v[i], v[i+1], v[i+2]); break;
-          case 2:  __ Pop(v[i], v[i+1]);         break;
-          case 1:  __ Pop(v[i]);                 break;
-          default: VIXL_ASSERT(i == reg_count);       break;
+          case 3:
+            __ Pop(v[i], v[i + 1], v[i + 2]);
+            break;
+          case 2:
+            __ Pop(v[i], v[i + 1]);
+            break;
+          case 1:
+            __ Pop(v[i]);
+            break;
+          default:
+            VIXL_ASSERT(i == reg_count);
+            break;
         }
         break;
       case PushPopRegList:
@@ -13872,7 +14217,7 @@ static void PushPopFPXRegSimpleHelper(int reg_count,
   // Check that the register contents were preserved.
   // Always use ASSERT_EQUAL_FP64, even when testing S registers, so we can
   // test that the upper word was properly cleared by Pop.
-  literal_base &= (0xffffffffffffffff >> (64-reg_size));
+  literal_base &= (0xffffffffffffffff >> (64 - reg_size));
   for (int i = 0; i < reg_count; i++) {
     uint64_t literal = literal_base * i;
     double expected;
@@ -13887,24 +14232,48 @@ static void PushPopFPXRegSimpleHelper(int reg_count,
 TEST(push_pop_fp_xreg_simple_32) {
   for (int claim = 0; claim <= 8; claim++) {
     for (int count = 0; count <= 8; count++) {
-      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
-                                PushPopByFour, PushPopByFour);
-      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
-                                PushPopByFour, PushPopRegList);
-      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
-                                PushPopRegList, PushPopByFour);
-      PushPopFPXRegSimpleHelper(count, claim, kSRegSize,
-                                PushPopRegList, PushPopRegList);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kSRegSize,
+                                PushPopByFour,
+                                PushPopByFour);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kSRegSize,
+                                PushPopByFour,
+                                PushPopRegList);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kSRegSize,
+                                PushPopRegList,
+                                PushPopByFour);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kSRegSize,
+                                PushPopRegList,
+                                PushPopRegList);
     }
     // Test with the maximum number of registers.
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
-                              PushPopByFour, PushPopByFour);
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
-                              PushPopByFour, PushPopRegList);
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
-                              PushPopRegList, PushPopByFour);
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kSRegSize,
-                              PushPopRegList, PushPopRegList);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kSRegSize,
+                              PushPopByFour,
+                              PushPopByFour);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kSRegSize,
+                              PushPopByFour,
+                              PushPopRegList);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kSRegSize,
+                              PushPopRegList,
+                              PushPopByFour);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kSRegSize,
+                              PushPopRegList,
+                              PushPopRegList);
   }
 }
 
@@ -13912,24 +14281,48 @@ TEST(push_pop_fp_xreg_simple_32) {
 TEST(push_pop_fp_xreg_simple_64) {
   for (int claim = 0; claim <= 8; claim++) {
     for (int count = 0; count <= 8; count++) {
-      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
-                                PushPopByFour, PushPopByFour);
-      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
-                                PushPopByFour, PushPopRegList);
-      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
-                                PushPopRegList, PushPopByFour);
-      PushPopFPXRegSimpleHelper(count, claim, kDRegSize,
-                                PushPopRegList, PushPopRegList);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kDRegSize,
+                                PushPopByFour,
+                                PushPopByFour);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kDRegSize,
+                                PushPopByFour,
+                                PushPopRegList);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kDRegSize,
+                                PushPopRegList,
+                                PushPopByFour);
+      PushPopFPXRegSimpleHelper(count,
+                                claim,
+                                kDRegSize,
+                                PushPopRegList,
+                                PushPopRegList);
     }
     // Test with the maximum number of registers.
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
-                              PushPopByFour, PushPopByFour);
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
-                              PushPopByFour, PushPopRegList);
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
-                              PushPopRegList, PushPopByFour);
-    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount, claim, kDRegSize,
-                              PushPopRegList, PushPopRegList);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kDRegSize,
+                              PushPopByFour,
+                              PushPopByFour);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kDRegSize,
+                              PushPopByFour,
+                              PushPopRegList);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kDRegSize,
+                              PushPopRegList,
+                              PushPopByFour);
+    PushPopFPXRegSimpleHelper(kPushPopFPXRegMaxRegCount,
+                              claim,
+                              kDRegSize,
+                              PushPopRegList,
+                              PushPopRegList);
   }
 }
 
@@ -14012,7 +14405,7 @@ static void PushPopXRegMixedMethodsHelper(int claim, int reg_size) {
 
   // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
   // that the upper word was properly cleared by Pop.
-  literal_base &= (0xffffffffffffffff >> (64-reg_size));
+  literal_base &= (0xffffffffffffffff >> (64 - reg_size));
 
   ASSERT_EQUAL_64(literal_base * 3, x[9]);
   ASSERT_EQUAL_64(literal_base * 2, x[8]);
@@ -14178,7 +14571,7 @@ static void PushPopXRegWXOverlapHelper(int reg_count, int claim) {
     // If popping an even number of registers, the first one will be X-sized.
     // Otherwise, the first one will be W-sized.
     bool next_is_64 = !(reg_count & 1);
-    for (int i = reg_count-1; i >= 0; i--) {
+    for (int i = reg_count - 1; i >= 0; i--) {
       if (next_is_64) {
         __ Pop(x[i]);
         active_w_slots -= 2;
@@ -14491,8 +14884,8 @@ TEST(printf) {
   SETUP();
   START();
 
-  char const * test_plain_string = "Printf with no arguments.\n";
-  char const * test_substring = "'This is a substring.'";
+  char const* test_plain_string = "Printf with no arguments.\n";
+  char const* test_substring = "'This is a substring.'";
   RegisterDump before;
 
   // Initialize x29 to the value of the stack pointer. We will use x29 as a
@@ -14535,14 +14928,18 @@ TEST(printf) {
   // Check that we don't clobber any registers.
   before.Dump(&masm);
 
-  __ Printf(test_plain_string);   // NOLINT(runtime/printf)
+  __ Printf(test_plain_string);  // NOLINT(runtime/printf)
   __ Printf("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
-  __ Printf("w5: %" PRId32 ", x5: %" PRId64"\n", w5, x5);
+  __ Printf("w5: %" PRId32 ", x5: %" PRId64 "\n", w5, x5);
   __ Printf("d0: %f\n", d0);
   __ Printf("Test %%s: %s\n", x2);
-  __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 "\n"
+  __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32
+            "\n"
             "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
-            w3, w4, x5, x6);
+            w3,
+            w4,
+            x5,
+            x6);
   __ Printf("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
   __ Printf("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
   __ Printf("%g\n", d10);
@@ -14550,7 +14947,8 @@ TEST(printf) {
 
   // Print the stack pointer (sp).
   __ Printf("StackPointer(sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
-            __ StackPointer(), __ StackPointer().W());
+            __ StackPointer(),
+            __ StackPointer().W());
 
   // Test with a different stack pointer.
   const Register old_stack_pointer = __ StackPointer();
@@ -14558,7 +14956,8 @@ TEST(printf) {
   __ SetStackPointer(x29);
   // Print the stack pointer (not sp).
   __ Printf("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
-            __ StackPointer(), __ StackPointer().W());
+            __ StackPointer(),
+            __ StackPointer().W());
   __ Mov(old_stack_pointer, __ StackPointer());
   __ SetStackPointer(old_stack_pointer);
 
@@ -14567,9 +14966,15 @@ TEST(printf) {
 
   // Mixed argument types.
   __ Printf("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
-            w3, s1, x5, d3);
+            w3,
+            s1,
+            x5,
+            d3);
   __ Printf("s1: %f, d3: %f, w3: %" PRId32 ", x5: %" PRId64 "\n",
-            s1, d3, w3, x5);
+            s1,
+            d3,
+            w3,
+            x5);
 
   END();
   RUN();
@@ -14588,8 +14993,8 @@ TEST(printf_no_preserve) {
   SETUP();
   START();
 
-  char const * test_plain_string = "Printf with no arguments.\n";
-  char const * test_substring = "'This is a substring.'";
+  char const* test_plain_string = "Printf with no arguments.\n";
+  char const* test_substring = "'This is a substring.'";
 
   __ PrintfNoPreserve(test_plain_string);
   __ Mov(x19, x0);
@@ -14597,7 +15002,7 @@ TEST(printf_no_preserve) {
   // Test simple integer arguments.
   __ Mov(x0, 1234);
   __ Mov(x1, 0x1234);
-  __ PrintfNoPreserve("x0: %" PRId64", x1: 0x%08" PRIx64 "\n", x0, x1);
+  __ PrintfNoPreserve("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
   __ Mov(x20, x0);
 
   // Test simple floating-point arguments.
@@ -14615,9 +15020,13 @@ TEST(printf_no_preserve) {
   __ Mov(w4, 0xffffffff);
   __ Mov(x5, 0xffffffffffffffff);
   __ Mov(x6, 0xffffffffffffffff);
-  __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32 "\n"
+  __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32
+                      "\n"
                       "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
-                      w3, w4, x5, x6);
+                      w3,
+                      w4,
+                      x5,
+                      x6);
   __ Mov(x23, x0);
 
   __ Fmov(s1, 1.234);
@@ -14641,9 +15050,10 @@ TEST(printf_no_preserve) {
   __ Mov(x29, old_stack_pointer);
   __ SetStackPointer(x29);
   // Print the stack pointer (not sp).
-  __ PrintfNoPreserve(
-      "StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
-      __ StackPointer(), __ StackPointer().W());
+  __ PrintfNoPreserve("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32
+                      "\n",
+                      __ StackPointer(),
+                      __ StackPointer().W());
   __ Mov(x27, x0);
   __ Mov(old_stack_pointer, __ StackPointer());
   __ SetStackPointer(old_stack_pointer);
@@ -14661,7 +15071,10 @@ TEST(printf_no_preserve) {
   __ Mov(x5, 0xffffffffffffffff);
   __ Fmov(d3, 3.456);
   __ PrintfNoPreserve("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
-                      w3, s1, x5, d3);
+                      w3,
+                      s1,
+                      x5,
+                      d3);
   __ Mov(x29, x0);
 
   END();
@@ -15439,12 +15852,12 @@ TEST(ldxr_stxr) {
 
   // As above, but get suitably-aligned values for ldxp and stxp.
   uint32_t wp_data[] = {0, 0, 0, 0, 0};
-  uint32_t * wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
-  wp[1] = 0x12345678;           // wp[1] is 64-bit-aligned.
+  uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
+  wp[1] = 0x12345678;  // wp[1] is 64-bit-aligned.
   wp[2] = 0x87654321;
   uint64_t xp_data[] = {0, 0, 0, 0, 0};
-  uint64_t * xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
-  xp[1] = 0x123456789abcdef0;   // xp[1] is 128-bit-aligned.
+  uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
+  xp[1] = 0x123456789abcdef0;  // xp[1] is 128-bit-aligned.
   xp[2] = 0x0fedcba987654321;
 
   SETUP();
@@ -15540,12 +15953,12 @@ TEST(ldaxr_stlxr) {
 
   // As above, but get suitably-aligned values for ldxp and stxp.
   uint32_t wp_data[] = {0, 0, 0, 0, 0};
-  uint32_t * wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
-  wp[1] = 0x12345678;           // wp[1] is 64-bit-aligned.
+  uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
+  wp[1] = 0x12345678;  // wp[1] is 64-bit-aligned.
   wp[2] = 0x87654321;
   uint64_t xp_data[] = {0, 0, 0, 0, 0};
-  uint64_t * xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
-  xp[1] = 0x123456789abcdef0;   // xp[1] is 128-bit-aligned.
+  uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
+  xp[1] = 0x123456789abcdef0;  // xp[1] is 128-bit-aligned.
   xp[2] = 0x0fedcba987654321;
 
   SETUP();
@@ -15634,7 +16047,7 @@ TEST(ldaxr_stlxr) {
 TEST(clrex) {
   // This data should never be written.
   uint64_t data[] = {0, 0, 0};
-  uint64_t * data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
+  uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
 
   SETUP();
   START();
@@ -15738,7 +16151,7 @@ TEST(clrex) {
 // Check that the simulator occasionally makes store-exclusive fail.
 TEST(ldxr_stxr_fail) {
   uint64_t data[] = {0, 0, 0};
-  uint64_t * data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
+  uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
 
   // Impose a hard limit on the number of attempts, so the test cannot hang.
   static const uint64_t kWatchdog = 10000;
@@ -15822,7 +16235,7 @@ TEST(ldxr_stxr_fail) {
 // Check that the simulator occasionally makes store-exclusive fail.
 TEST(ldaxr_stlxr_fail) {
   uint64_t data[] = {0, 0, 0};
-  uint64_t * data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
+  uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
 
   // Impose a hard limit on the number of attempts, so the test cannot hang.
   static const uint64_t kWatchdog = 10000;
@@ -15903,7 +16316,7 @@ TEST(ldaxr_stlxr_fail) {
 
 
 TEST(load_store_tagged_immediate_offset) {
-  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
+  uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
   int tag_count = sizeof(tags) / sizeof(tags[0]);
 
   const int kMaxDataLength = 160;
@@ -16049,7 +16462,7 @@ TEST(load_store_tagged_immediate_offset) {
 
 
 TEST(load_store_tagged_immediate_preindex) {
-  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
+  uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
   int tag_count = sizeof(tags) / sizeof(tags[0]);
 
   const int kMaxDataLength = 128;
@@ -16179,7 +16592,7 @@ TEST(load_store_tagged_immediate_preindex) {
 
 
 TEST(load_store_tagged_immediate_postindex) {
-  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
+  uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
   int tag_count = sizeof(tags) / sizeof(tags[0]);
 
   const int kMaxDataLength = 128;
@@ -16304,7 +16717,7 @@ TEST(load_store_tagged_immediate_postindex) {
 
 
 TEST(load_store_tagged_register_offset) {
-  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
+  uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
   int tag_count = sizeof(tags) / sizeof(tags[0]);
 
   const int kMaxDataLength = 128;
@@ -16404,8 +16817,8 @@ TEST(load_store_tagged_register_offset) {
 
 
 TEST(load_store_tagged_register_postindex) {
-  uint64_t src[] = { 0x0706050403020100, 0x0f0e0d0c0b0a0908 };
-  uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
+  uint64_t src[] = {0x0706050403020100, 0x0f0e0d0c0b0a0908};
+  uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
   int tag_count = sizeof(tags) / sizeof(tags[0]);
 
   for (int j = 0; j < tag_count; j++) {
@@ -16546,8 +16959,8 @@ TEST(neon_3same_addp) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
   __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
 
   END();
@@ -16763,8 +17176,8 @@ TEST(neon_3same_mul) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
 
@@ -16782,14 +17195,13 @@ TEST(neon_3same_mul) {
 }
 
 
-
 TEST(neon_3same_absdiff) {
   SETUP();
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
 
@@ -16814,8 +17226,8 @@ TEST(neon_byelement_mul) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000155aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
 
 
   __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
@@ -16869,13 +17281,13 @@ TEST(neon_byelement_mull) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xaa55ff55555500ff, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000155aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
 
 
-  __ Smull(v16.V4S(),  v0.V4H(), v1.H(), 7);
+  __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
   __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
-  __ Umull(v18.V4S(),  v0.V4H(), v1.H(), 7);
+  __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
   __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
 
   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
@@ -16883,9 +17295,9 @@ TEST(neon_byelement_mull) {
   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
   __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
 
-  __ Smlal(v20.V4S(),  v0.V4H(), v1.H(), 7);
+  __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
   __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
-  __ Umlal(v22.V4S(),  v0.V4H(), v1.H(), 7);
+  __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
   __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
 
   __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
@@ -16893,9 +17305,9 @@ TEST(neon_byelement_mull) {
   __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
   __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
 
-  __ Smlsl(v24.V4S(),  v0.V4H(), v1.H(), 7);
+  __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
   __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
-  __ Umlsl(v26.V4S(),  v0.V4H(), v1.H(), 7);
+  __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
   __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
 
   END();
@@ -16974,8 +17386,8 @@ TEST(neon_3diff_absdiff) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
@@ -17002,14 +17414,14 @@ TEST(neon_3diff_sqdmull) {
 
   START();
 
-  __ Movi(v0.V2D(),  0x7fff7fff80008000, 0x80007fff7fff8000);
-  __ Movi(v1.V2D(),  0x80007fff7fff8000, 0x7fff7fff80008000);
-  __ Movi(v2.V2D(),  0x800000007fffffff, 0x7fffffff80000000);
-  __ Movi(v3.V2D(),  0x8000000080000000, 0x8000000080000000);
+  __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
+  __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
+  __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
+  __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
 
-  __ Sqdmull(v16.V4S(),  v0.V4H(), v1.V4H());
+  __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
-  __ Sqdmull(v18.V2D(),  v2.V2S(), v3.V2S());
+  __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
   __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
   __ Sqdmull(s20, h0, h1);
   __ Sqdmull(d21, s2, s3);
@@ -17032,10 +17444,10 @@ TEST(neon_3diff_sqdmlal) {
 
   START();
 
-  __ Movi(v0.V2D(),  0x7fff7fff80008000, 0x80007fff7fff8000);
-  __ Movi(v1.V2D(),  0x80007fff7fff8000, 0x7fff7fff80008000);
-  __ Movi(v2.V2D(),  0x800000007fffffff, 0x7fffffff80000000);
-  __ Movi(v3.V2D(),  0x8000000080000000, 0x8000000080000000);
+  __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
+  __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
+  __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
+  __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
 
   __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
@@ -17044,9 +17456,9 @@ TEST(neon_3diff_sqdmlal) {
   __ Movi(v20.V2D(), 0, 0x00000001);
   __ Movi(v21.V2D(), 0, 0x00000001);
 
-  __ Sqdmlal(v16.V4S(),  v0.V4H(), v1.V4H());
+  __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
   __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
-  __ Sqdmlal(v18.V2D(),  v2.V2S(), v3.V2S());
+  __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
   __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
   __ Sqdmlal(s20, h0, h1);
   __ Sqdmlal(d21, s2, s3);
@@ -17069,10 +17481,10 @@ TEST(neon_3diff_sqdmlsl) {
 
   START();
 
-  __ Movi(v0.V2D(),  0x7fff7fff80008000, 0x80007fff7fff8000);
-  __ Movi(v1.V2D(),  0x80007fff7fff8000, 0x7fff7fff80008000);
-  __ Movi(v2.V2D(),  0x800000007fffffff, 0x7fffffff80000000);
-  __ Movi(v3.V2D(),  0x8000000080000000, 0x8000000080000000);
+  __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
+  __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
+  __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
+  __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
 
   __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
@@ -17081,9 +17493,9 @@ TEST(neon_3diff_sqdmlsl) {
   __ Movi(v20.V2D(), 0, 0x00000001);
   __ Movi(v21.V2D(), 0, 0x00000001);
 
-  __ Sqdmlsl(v16.V4S(),  v0.V4H(), v1.V4H());
+  __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
   __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
-  __ Sqdmlsl(v18.V2D(),  v2.V2S(), v3.V2S());
+  __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
   __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
   __ Sqdmlsl(s20, h0, h1);
   __ Sqdmlsl(d21, s2, s3);
@@ -17107,8 +17519,8 @@ TEST(neon_3diff_mla) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
@@ -17135,8 +17547,8 @@ TEST(neon_3diff_mls) {
 
   START();
 
-  __ Movi(v0.V2D(),  0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
-  __ Movi(v1.V2D(),  0x000055aaff55ff00, 0xaa55ff55555500ff);
+  __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
+  __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
@@ -17432,15 +17844,15 @@ TEST(neon_2regmisc_cmeq) {
   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
 
-  __ Cmeq(v16.V8B(),  v1.V8B(),  0);
+  __ Cmeq(v16.V8B(), v1.V8B(), 0);
   __ Cmeq(v17.V16B(), v1.V16B(), 0);
-  __ Cmeq(v18.V4H(),  v1.V4H(),  0);
-  __ Cmeq(v19.V8H(),  v1.V8H(),  0);
-  __ Cmeq(v20.V2S(),  v0.V2S(),  0);
-  __ Cmeq(v21.V4S(),  v0.V4S(),  0);
-  __ Cmeq(d22,  d0,  0);
-  __ Cmeq(d23,  d1,  0);
-  __ Cmeq(v24.V2D(),  v0.V2D(),  0);
+  __ Cmeq(v18.V4H(), v1.V4H(), 0);
+  __ Cmeq(v19.V8H(), v1.V8H(), 0);
+  __ Cmeq(v20.V2S(), v0.V2S(), 0);
+  __ Cmeq(v21.V4S(), v0.V4S(), 0);
+  __ Cmeq(d22, d0, 0);
+  __ Cmeq(d23, d1, 0);
+  __ Cmeq(v24.V2D(), v0.V2D(), 0);
 
   END();
 
@@ -17466,15 +17878,15 @@ TEST(neon_2regmisc_cmge) {
   __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
 
-  __ Cmge(v16.V8B(),  v1.V8B(),  0);
+  __ Cmge(v16.V8B(), v1.V8B(), 0);
   __ Cmge(v17.V16B(), v1.V16B(), 0);
-  __ Cmge(v18.V4H(),  v1.V4H(),  0);
-  __ Cmge(v19.V8H(),  v1.V8H(),  0);
-  __ Cmge(v20.V2S(),  v0.V2S(),  0);
-  __ Cmge(v21.V4S(),  v0.V4S(),  0);
-  __ Cmge(d22,  d0,  0);
-  __ Cmge(d23,  d1,  0);
-  __ Cmge(v24.V2D(),  v0.V2D(),  0);
+  __ Cmge(v18.V4H(), v1.V4H(), 0);
+  __ Cmge(v19.V8H(), v1.V8H(), 0);
+  __ Cmge(v20.V2S(), v0.V2S(), 0);
+  __ Cmge(v21.V4S(), v0.V4S(), 0);
+  __ Cmge(d22, d0, 0);
+  __ Cmge(d23, d1, 0);
+  __ Cmge(v24.V2D(), v0.V2D(), 0);
 
   END();
 
@@ -17500,15 +17912,15 @@ TEST(neon_2regmisc_cmlt) {
   __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
 
-  __ Cmlt(v16.V8B(),  v1.V8B(),  0);
+  __ Cmlt(v16.V8B(), v1.V8B(), 0);
   __ Cmlt(v17.V16B(), v1.V16B(), 0);
-  __ Cmlt(v18.V4H(),  v1.V4H(),  0);
-  __ Cmlt(v19.V8H(),  v1.V8H(),  0);
-  __ Cmlt(v20.V2S(),  v1.V2S(),  0);
-  __ Cmlt(v21.V4S(),  v1.V4S(),  0);
-  __ Cmlt(d22,  d0,  0);
-  __ Cmlt(d23,  d1,  0);
-  __ Cmlt(v24.V2D(),  v0.V2D(),  0);
+  __ Cmlt(v18.V4H(), v1.V4H(), 0);
+  __ Cmlt(v19.V8H(), v1.V8H(), 0);
+  __ Cmlt(v20.V2S(), v1.V2S(), 0);
+  __ Cmlt(v21.V4S(), v1.V4S(), 0);
+  __ Cmlt(d22, d0, 0);
+  __ Cmlt(d23, d1, 0);
+  __ Cmlt(v24.V2D(), v0.V2D(), 0);
 
   END();
 
@@ -17534,15 +17946,15 @@ TEST(neon_2regmisc_cmle) {
   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
 
-  __ Cmle(v16.V8B(),  v1.V8B(),  0);
+  __ Cmle(v16.V8B(), v1.V8B(), 0);
   __ Cmle(v17.V16B(), v1.V16B(), 0);
-  __ Cmle(v18.V4H(),  v1.V4H(),  0);
-  __ Cmle(v19.V8H(),  v1.V8H(),  0);
-  __ Cmle(v20.V2S(),  v1.V2S(),  0);
-  __ Cmle(v21.V4S(),  v1.V4S(),  0);
-  __ Cmle(d22,  d0,  0);
-  __ Cmle(d23,  d1,  0);
-  __ Cmle(v24.V2D(),  v0.V2D(),  0);
+  __ Cmle(v18.V4H(), v1.V4H(), 0);
+  __ Cmle(v19.V8H(), v1.V8H(), 0);
+  __ Cmle(v20.V2S(), v1.V2S(), 0);
+  __ Cmle(v21.V4S(), v1.V4S(), 0);
+  __ Cmle(d22, d0, 0);
+  __ Cmle(d23, d1, 0);
+  __ Cmle(v24.V2D(), v0.V2D(), 0);
 
   END();
 
@@ -17568,15 +17980,15 @@ TEST(neon_2regmisc_cmgt) {
   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
 
-  __ Cmgt(v16.V8B(),  v1.V8B(),  0);
+  __ Cmgt(v16.V8B(), v1.V8B(), 0);
   __ Cmgt(v17.V16B(), v1.V16B(), 0);
-  __ Cmgt(v18.V4H(),  v1.V4H(),  0);
-  __ Cmgt(v19.V8H(),  v1.V8H(),  0);
-  __ Cmgt(v20.V2S(),  v0.V2S(),  0);
-  __ Cmgt(v21.V4S(),  v0.V4S(),  0);
-  __ Cmgt(d22,  d0,  0);
-  __ Cmgt(d23,  d1,  0);
-  __ Cmgt(v24.V2D(),  v0.V2D(),  0);
+  __ Cmgt(v18.V4H(), v1.V4H(), 0);
+  __ Cmgt(v19.V8H(), v1.V8H(), 0);
+  __ Cmgt(v20.V2S(), v0.V2S(), 0);
+  __ Cmgt(v21.V4S(), v0.V4S(), 0);
+  __ Cmgt(d22, d0, 0);
+  __ Cmgt(d23, d1, 0);
+  __ Cmgt(v24.V2D(), v0.V2D(), 0);
 
   END();
 
@@ -17605,15 +18017,15 @@ TEST(neon_2regmisc_neg) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Neg(v16.V8B(),  v0.V8B());
+  __ Neg(v16.V8B(), v0.V8B());
   __ Neg(v17.V16B(), v0.V16B());
-  __ Neg(v18.V4H(),  v1.V4H());
-  __ Neg(v19.V8H(),  v1.V8H());
-  __ Neg(v20.V2S(),  v2.V2S());
-  __ Neg(v21.V4S(),  v2.V4S());
+  __ Neg(v18.V4H(), v1.V4H());
+  __ Neg(v19.V8H(), v1.V8H());
+  __ Neg(v20.V2S(), v2.V2S());
+  __ Neg(v21.V4S(), v2.V4S());
   __ Neg(d22, d3);
-  __ Neg(v23.V2D(),  v3.V2D());
-  __ Neg(v24.V2D(),  v4.V2D());
+  __ Neg(v23.V2D(), v3.V2D());
+  __ Neg(v24.V2D(), v4.V2D());
 
   END();
 
@@ -17643,14 +18055,14 @@ TEST(neon_2regmisc_sqneg) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqneg(v16.V8B(),  v0.V8B());
+  __ Sqneg(v16.V8B(), v0.V8B());
   __ Sqneg(v17.V16B(), v0.V16B());
-  __ Sqneg(v18.V4H(),  v1.V4H());
-  __ Sqneg(v19.V8H(),  v1.V8H());
-  __ Sqneg(v20.V2S(),  v2.V2S());
-  __ Sqneg(v21.V4S(),  v2.V4S());
-  __ Sqneg(v22.V2D(),  v3.V2D());
-  __ Sqneg(v23.V2D(),  v4.V2D());
+  __ Sqneg(v18.V4H(), v1.V4H());
+  __ Sqneg(v19.V8H(), v1.V8H());
+  __ Sqneg(v20.V2S(), v2.V2S());
+  __ Sqneg(v21.V4S(), v2.V4S());
+  __ Sqneg(v22.V2D(), v3.V2D());
+  __ Sqneg(v23.V2D(), v4.V2D());
 
   __ Sqneg(b24, b0);
   __ Sqneg(h25, h1);
@@ -17689,15 +18101,15 @@ TEST(neon_2regmisc_abs) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Abs(v16.V8B(),  v0.V8B());
+  __ Abs(v16.V8B(), v0.V8B());
   __ Abs(v17.V16B(), v0.V16B());
-  __ Abs(v18.V4H(),  v1.V4H());
-  __ Abs(v19.V8H(),  v1.V8H());
-  __ Abs(v20.V2S(),  v2.V2S());
-  __ Abs(v21.V4S(),  v2.V4S());
+  __ Abs(v18.V4H(), v1.V4H());
+  __ Abs(v19.V8H(), v1.V8H());
+  __ Abs(v20.V2S(), v2.V2S());
+  __ Abs(v21.V4S(), v2.V4S());
   __ Abs(d22, d3);
-  __ Abs(v23.V2D(),  v3.V2D());
-  __ Abs(v24.V2D(),  v4.V2D());
+  __ Abs(v23.V2D(), v3.V2D());
+  __ Abs(v24.V2D(), v4.V2D());
 
   END();
 
@@ -17727,14 +18139,14 @@ TEST(neon_2regmisc_sqabs) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqabs(v16.V8B(),  v0.V8B());
+  __ Sqabs(v16.V8B(), v0.V8B());
   __ Sqabs(v17.V16B(), v0.V16B());
-  __ Sqabs(v18.V4H(),  v1.V4H());
-  __ Sqabs(v19.V8H(),  v1.V8H());
-  __ Sqabs(v20.V2S(),  v2.V2S());
-  __ Sqabs(v21.V4S(),  v2.V4S());
-  __ Sqabs(v22.V2D(),  v3.V2D());
-  __ Sqabs(v23.V2D(),  v4.V2D());
+  __ Sqabs(v18.V4H(), v1.V4H());
+  __ Sqabs(v19.V8H(), v1.V8H());
+  __ Sqabs(v20.V2S(), v2.V2S());
+  __ Sqabs(v21.V4S(), v2.V4S());
+  __ Sqabs(v22.V2D(), v3.V2D());
+  __ Sqabs(v23.V2D(), v4.V2D());
 
   __ Sqabs(b24, b0);
   __ Sqabs(h25, h1);
@@ -17955,12 +18367,12 @@ TEST(neon_2regmisc_xtn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Xtn(v16.V8B(),   v0.V8H());
+  __ Xtn(v16.V8B(), v0.V8H());
   __ Xtn2(v16.V16B(), v1.V8H());
-  __ Xtn(v17.V4H(),   v1.V4S());
-  __ Xtn2(v17.V8H(),  v2.V4S());
-  __ Xtn(v18.V2S(),   v3.V2D());
-  __ Xtn2(v18.V4S(),  v4.V2D());
+  __ Xtn(v17.V4H(), v1.V4S());
+  __ Xtn2(v17.V8H(), v2.V4S());
+  __ Xtn(v18.V2S(), v3.V2D());
+  __ Xtn2(v18.V4S(), v4.V2D());
 
   END();
 
@@ -17983,15 +18395,15 @@ TEST(neon_2regmisc_sqxtn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqxtn(v16.V8B(),   v0.V8H());
+  __ Sqxtn(v16.V8B(), v0.V8H());
   __ Sqxtn2(v16.V16B(), v1.V8H());
-  __ Sqxtn(v17.V4H(),   v1.V4S());
-  __ Sqxtn2(v17.V8H(),  v2.V4S());
-  __ Sqxtn(v18.V2S(),   v3.V2D());
-  __ Sqxtn2(v18.V4S(),  v4.V2D());
-  __ Sqxtn(b19,  h0);
-  __ Sqxtn(h20,  s0);
-  __ Sqxtn(s21,  d0);
+  __ Sqxtn(v17.V4H(), v1.V4S());
+  __ Sqxtn2(v17.V8H(), v2.V4S());
+  __ Sqxtn(v18.V2S(), v3.V2D());
+  __ Sqxtn2(v18.V4S(), v4.V2D());
+  __ Sqxtn(b19, h0);
+  __ Sqxtn(h20, s0);
+  __ Sqxtn(s21, d0);
 
   END();
 
@@ -18017,15 +18429,15 @@ TEST(neon_2regmisc_uqxtn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Uqxtn(v16.V8B(),   v0.V8H());
+  __ Uqxtn(v16.V8B(), v0.V8H());
   __ Uqxtn2(v16.V16B(), v1.V8H());
-  __ Uqxtn(v17.V4H(),   v1.V4S());
-  __ Uqxtn2(v17.V8H(),  v2.V4S());
-  __ Uqxtn(v18.V2S(),   v3.V2D());
-  __ Uqxtn2(v18.V4S(),  v4.V2D());
-  __ Uqxtn(b19,  h0);
-  __ Uqxtn(h20,  s0);
-  __ Uqxtn(s21,  d0);
+  __ Uqxtn(v17.V4H(), v1.V4S());
+  __ Uqxtn2(v17.V8H(), v2.V4S());
+  __ Uqxtn(v18.V2S(), v3.V2D());
+  __ Uqxtn2(v18.V4S(), v4.V2D());
+  __ Uqxtn(b19, h0);
+  __ Uqxtn(h20, s0);
+  __ Uqxtn(s21, d0);
 
   END();
 
@@ -18051,15 +18463,15 @@ TEST(neon_2regmisc_sqxtun) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqxtun(v16.V8B(),   v0.V8H());
+  __ Sqxtun(v16.V8B(), v0.V8H());
   __ Sqxtun2(v16.V16B(), v1.V8H());
-  __ Sqxtun(v17.V4H(),   v1.V4S());
-  __ Sqxtun2(v17.V8H(),  v2.V4S());
-  __ Sqxtun(v18.V2S(),   v3.V2D());
-  __ Sqxtun2(v18.V4S(),  v4.V2D());
-  __ Sqxtun(b19,  h0);
-  __ Sqxtun(h20,  s0);
-  __ Sqxtun(s21,  d0);
+  __ Sqxtun(v17.V4H(), v1.V4S());
+  __ Sqxtun2(v17.V8H(), v2.V4S());
+  __ Sqxtun(v18.V2S(), v3.V2D());
+  __ Sqxtun2(v18.V4S(), v4.V2D());
+  __ Sqxtun(b19, h0);
+  __ Sqxtun(h20, s0);
+  __ Sqxtun(s21, d0);
 
   END();
 
@@ -18083,8 +18495,8 @@ TEST(neon_3same_and) {
 
   __ And(v16.V16B(), v0.V16B(), v0.V16B());  // self test
   __ And(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
-  __ And(v24.V8B(), v0.V8B(), v0.V8B());  // self test
-  __ And(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
+  __ And(v24.V8B(), v0.V8B(), v0.V8B());     // self test
+  __ And(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
   END();
 
   RUN();
@@ -18105,8 +18517,8 @@ TEST(neon_3same_bic) {
 
   __ Bic(v16.V16B(), v0.V16B(), v0.V16B());  // self test
   __ Bic(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
-  __ Bic(v24.V8B(), v0.V8B(), v0.V8B());  // self test
-  __ Bic(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
+  __ Bic(v24.V8B(), v0.V8B(), v0.V8B());     // self test
+  __ Bic(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
   END();
 
   RUN();
@@ -18127,8 +18539,8 @@ TEST(neon_3same_orr) {
 
   __ Orr(v16.V16B(), v0.V16B(), v0.V16B());  // self test
   __ Orr(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
-  __ Orr(v24.V8B(), v0.V8B(), v0.V8B());  // self test
-  __ Orr(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
+  __ Orr(v24.V8B(), v0.V8B(), v0.V8B());     // self test
+  __ Orr(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
   END();
 
   RUN();
@@ -18180,8 +18592,8 @@ TEST(neon_3same_orn) {
 
   __ Orn(v16.V16B(), v0.V16B(), v0.V16B());  // self test
   __ Orn(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
-  __ Orn(v24.V8B(), v0.V8B(), v0.V8B());  // self test
-  __ Orn(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
+  __ Orn(v24.V8B(), v0.V8B(), v0.V8B());     // self test
+  __ Orn(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
   END();
 
   RUN();
@@ -18202,8 +18614,8 @@ TEST(neon_3same_eor) {
 
   __ Eor(v16.V16B(), v0.V16B(), v0.V16B());  // self test
   __ Eor(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
-  __ Eor(v24.V8B(), v0.V8B(), v0.V8B());  // self test
-  __ Eor(v25.V8B(), v0.V8B(), v1.V8B());  // all combinations
+  __ Eor(v24.V8B(), v0.V8B(), v0.V8B());     // self test
+  __ Eor(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
   END();
 
   RUN();
@@ -18727,21 +19139,21 @@ TEST(neon_2regmisc_cls_clz_cnt) {
   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
 
-  __ Cls(v16.V8B() , v1.V8B());
+  __ Cls(v16.V8B(), v1.V8B());
   __ Cls(v17.V16B(), v1.V16B());
-  __ Cls(v18.V4H() , v1.V4H());
-  __ Cls(v19.V8H() , v1.V8H());
-  __ Cls(v20.V2S() , v1.V2S());
-  __ Cls(v21.V4S() , v1.V4S());
+  __ Cls(v18.V4H(), v1.V4H());
+  __ Cls(v19.V8H(), v1.V8H());
+  __ Cls(v20.V2S(), v1.V2S());
+  __ Cls(v21.V4S(), v1.V4S());
 
-  __ Clz(v22.V8B() , v0.V8B());
+  __ Clz(v22.V8B(), v0.V8B());
   __ Clz(v23.V16B(), v0.V16B());
-  __ Clz(v24.V4H() , v0.V4H());
-  __ Clz(v25.V8H() , v0.V8H());
-  __ Clz(v26.V2S() , v0.V2S());
-  __ Clz(v27.V4S() , v0.V4S());
+  __ Clz(v24.V4H(), v0.V4H());
+  __ Clz(v25.V8H(), v0.V8H());
+  __ Clz(v26.V2S(), v0.V2S());
+  __ Clz(v27.V4S(), v0.V4S());
 
-  __ Cnt(v28.V8B() , v0.V8B());
+  __ Cnt(v28.V8B(), v0.V8B());
   __ Cnt(v29.V16B(), v1.V16B());
 
   END();
@@ -18776,22 +19188,22 @@ TEST(neon_2regmisc_rev) {
   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
 
-  __ Rev16(v16.V8B() , v0.V8B());
+  __ Rev16(v16.V8B(), v0.V8B());
   __ Rev16(v17.V16B(), v0.V16B());
 
-  __ Rev32(v18.V8B() , v0.V8B());
+  __ Rev32(v18.V8B(), v0.V8B());
   __ Rev32(v19.V16B(), v0.V16B());
-  __ Rev32(v20.V4H() , v0.V4H());
-  __ Rev32(v21.V8H() , v0.V8H());
+  __ Rev32(v20.V4H(), v0.V4H());
+  __ Rev32(v21.V8H(), v0.V8H());
 
-  __ Rev64(v22.V8B() , v0.V8B());
+  __ Rev64(v22.V8B(), v0.V8B());
   __ Rev64(v23.V16B(), v0.V16B());
-  __ Rev64(v24.V4H() , v0.V4H());
-  __ Rev64(v25.V8H() , v0.V8H());
-  __ Rev64(v26.V2S() , v0.V2S());
-  __ Rev64(v27.V4S() , v0.V4S());
+  __ Rev64(v24.V4H(), v0.V4H());
+  __ Rev64(v25.V8H(), v0.V8H());
+  __ Rev64(v26.V2S(), v0.V2S());
+  __ Rev64(v27.V4S(), v0.V4S());
 
-  __ Rbit(v28.V8B() , v1.V8B());
+  __ Rbit(v28.V8B(), v1.V8B());
   __ Rbit(v29.V16B(), v1.V16B());
 
   END();
@@ -18828,24 +19240,24 @@ TEST(neon_sli) {
   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
 
-  __ Mov(v16.V2D(),  v0.V2D());
-  __ Mov(v17.V2D(),  v0.V2D());
-  __ Mov(v18.V2D(),  v0.V2D());
-  __ Mov(v19.V2D(),  v0.V2D());
-  __ Mov(v20.V2D(),  v0.V2D());
-  __ Mov(v21.V2D(),  v0.V2D());
-  __ Mov(v22.V2D(),  v0.V2D());
-  __ Mov(v23.V2D(),  v0.V2D());
+  __ Mov(v16.V2D(), v0.V2D());
+  __ Mov(v17.V2D(), v0.V2D());
+  __ Mov(v18.V2D(), v0.V2D());
+  __ Mov(v19.V2D(), v0.V2D());
+  __ Mov(v20.V2D(), v0.V2D());
+  __ Mov(v21.V2D(), v0.V2D());
+  __ Mov(v22.V2D(), v0.V2D());
+  __ Mov(v23.V2D(), v0.V2D());
 
-  __ Sli(v16.V8B(),  v1.V8B(),  4);
+  __ Sli(v16.V8B(), v1.V8B(), 4);
   __ Sli(v17.V16B(), v1.V16B(), 7);
-  __ Sli(v18.V4H(),  v1.V4H(),  8);
-  __ Sli(v19.V8H(),  v1.V8H(), 15);
-  __ Sli(v20.V2S(),  v1.V2S(),  0);
-  __ Sli(v21.V4S(),  v1.V4S(), 31);
-  __ Sli(v22.V2D(),  v1.V2D(), 48);
+  __ Sli(v18.V4H(), v1.V4H(), 8);
+  __ Sli(v19.V8H(), v1.V8H(), 15);
+  __ Sli(v20.V2S(), v1.V2S(), 0);
+  __ Sli(v21.V4S(), v1.V4S(), 31);
+  __ Sli(v22.V2D(), v1.V2D(), 48);
 
-  __ Sli(d23,  d1, 48);
+  __ Sli(d23, d1, 48);
 
   END();
 
@@ -18874,24 +19286,24 @@ TEST(neon_sri) {
   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
 
-  __ Mov(v16.V2D(),  v0.V2D());
-  __ Mov(v17.V2D(),  v0.V2D());
-  __ Mov(v18.V2D(),  v0.V2D());
-  __ Mov(v19.V2D(),  v0.V2D());
-  __ Mov(v20.V2D(),  v0.V2D());
-  __ Mov(v21.V2D(),  v0.V2D());
-  __ Mov(v22.V2D(),  v0.V2D());
-  __ Mov(v23.V2D(),  v0.V2D());
+  __ Mov(v16.V2D(), v0.V2D());
+  __ Mov(v17.V2D(), v0.V2D());
+  __ Mov(v18.V2D(), v0.V2D());
+  __ Mov(v19.V2D(), v0.V2D());
+  __ Mov(v20.V2D(), v0.V2D());
+  __ Mov(v21.V2D(), v0.V2D());
+  __ Mov(v22.V2D(), v0.V2D());
+  __ Mov(v23.V2D(), v0.V2D());
 
-  __ Sri(v16.V8B(),  v1.V8B(),  4);
+  __ Sri(v16.V8B(), v1.V8B(), 4);
   __ Sri(v17.V16B(), v1.V16B(), 7);
-  __ Sri(v18.V4H(),  v1.V4H(),  8);
-  __ Sri(v19.V8H(),  v1.V8H(), 15);
-  __ Sri(v20.V2S(),  v1.V2S(),  1);
-  __ Sri(v21.V4S(),  v1.V4S(), 31);
-  __ Sri(v22.V2D(),  v1.V2D(), 48);
+  __ Sri(v18.V4H(), v1.V4H(), 8);
+  __ Sri(v19.V8H(), v1.V8H(), 15);
+  __ Sri(v20.V2S(), v1.V2S(), 1);
+  __ Sri(v21.V4S(), v1.V4S(), 31);
+  __ Sri(v22.V2D(), v1.V2D(), 48);
 
-  __ Sri(d23,  d1, 48);
+  __ Sri(d23, d1, 48);
 
   END();
 
@@ -18923,12 +19335,12 @@ TEST(neon_shrn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Shrn(v16.V8B(),   v0.V8H(), 8);
+  __ Shrn(v16.V8B(), v0.V8H(), 8);
   __ Shrn2(v16.V16B(), v1.V8H(), 1);
-  __ Shrn(v17.V4H(),   v1.V4S(), 16);
-  __ Shrn2(v17.V8H(),  v2.V4S(), 1);
-  __ Shrn(v18.V2S(),   v3.V2D(), 32);
-  __ Shrn2(v18.V4S(),  v3.V2D(), 1);
+  __ Shrn(v17.V4H(), v1.V4S(), 16);
+  __ Shrn2(v17.V8H(), v2.V4S(), 1);
+  __ Shrn(v18.V2S(), v3.V2D(), 32);
+  __ Shrn2(v18.V4S(), v3.V2D(), 1);
 
   END();
 
@@ -18951,12 +19363,12 @@ TEST(neon_rshrn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Rshrn(v16.V8B(),   v0.V8H(), 8);
+  __ Rshrn(v16.V8B(), v0.V8H(), 8);
   __ Rshrn2(v16.V16B(), v1.V8H(), 1);
-  __ Rshrn(v17.V4H(),   v1.V4S(), 16);
-  __ Rshrn2(v17.V8H(),  v2.V4S(), 1);
-  __ Rshrn(v18.V2S(),   v3.V2D(), 32);
-  __ Rshrn2(v18.V4S(),  v3.V2D(), 1);
+  __ Rshrn(v17.V4H(), v1.V4S(), 16);
+  __ Rshrn2(v17.V8H(), v2.V4S(), 1);
+  __ Rshrn(v18.V2S(), v3.V2D(), 32);
+  __ Rshrn2(v18.V4S(), v3.V2D(), 1);
 
   END();
 
@@ -18979,12 +19391,12 @@ TEST(neon_uqshrn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Uqshrn(v16.V8B(),   v0.V8H(), 8);
+  __ Uqshrn(v16.V8B(), v0.V8H(), 8);
   __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
-  __ Uqshrn(v17.V4H(),   v1.V4S(), 16);
-  __ Uqshrn2(v17.V8H(),  v2.V4S(), 1);
-  __ Uqshrn(v18.V2S(),   v3.V2D(), 32);
-  __ Uqshrn2(v18.V4S(),  v3.V2D(), 1);
+  __ Uqshrn(v17.V4H(), v1.V4S(), 16);
+  __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
+  __ Uqshrn(v18.V2S(), v3.V2D(), 32);
+  __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
 
   __ Uqshrn(b19, h0, 8);
   __ Uqshrn(h20, s1, 16);
@@ -19014,12 +19426,12 @@ TEST(neon_uqrshrn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Uqrshrn(v16.V8B(),   v0.V8H(), 8);
+  __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
   __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
-  __ Uqrshrn(v17.V4H(),   v1.V4S(), 16);
-  __ Uqrshrn2(v17.V8H(),  v2.V4S(), 1);
-  __ Uqrshrn(v18.V2S(),   v3.V2D(), 32);
-  __ Uqrshrn2(v18.V4S(),  v3.V2D(), 1);
+  __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
+  __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
+  __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
+  __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
 
   __ Uqrshrn(b19, h0, 8);
   __ Uqrshrn(h20, s1, 16);
@@ -19049,12 +19461,12 @@ TEST(neon_sqshrn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqshrn(v16.V8B(),   v0.V8H(), 8);
+  __ Sqshrn(v16.V8B(), v0.V8H(), 8);
   __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
-  __ Sqshrn(v17.V4H(),   v1.V4S(), 16);
-  __ Sqshrn2(v17.V8H(),  v2.V4S(), 1);
-  __ Sqshrn(v18.V2S(),   v3.V2D(), 32);
-  __ Sqshrn2(v18.V4S(),  v3.V2D(), 1);
+  __ Sqshrn(v17.V4H(), v1.V4S(), 16);
+  __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
+  __ Sqshrn(v18.V2S(), v3.V2D(), 32);
+  __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
 
   __ Sqshrn(b19, h0, 8);
   __ Sqshrn(h20, s1, 16);
@@ -19084,12 +19496,12 @@ TEST(neon_sqrshrn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqrshrn(v16.V8B(),   v0.V8H(), 8);
+  __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
   __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
-  __ Sqrshrn(v17.V4H(),   v1.V4S(), 16);
-  __ Sqrshrn2(v17.V8H(),  v2.V4S(), 1);
-  __ Sqrshrn(v18.V2S(),   v3.V2D(), 32);
-  __ Sqrshrn2(v18.V4S(),  v3.V2D(), 1);
+  __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
+  __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
+  __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
+  __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
 
   __ Sqrshrn(b19, h0, 8);
   __ Sqrshrn(h20, s1, 16);
@@ -19119,12 +19531,12 @@ TEST(neon_sqshrun) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqshrun(v16.V8B(),   v0.V8H(), 8);
+  __ Sqshrun(v16.V8B(), v0.V8H(), 8);
   __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
-  __ Sqshrun(v17.V4H(),   v1.V4S(), 16);
-  __ Sqshrun2(v17.V8H(),  v2.V4S(), 1);
-  __ Sqshrun(v18.V2S(),   v3.V2D(), 32);
-  __ Sqshrun2(v18.V4S(),  v3.V2D(), 1);
+  __ Sqshrun(v17.V4H(), v1.V4S(), 16);
+  __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
+  __ Sqshrun(v18.V2S(), v3.V2D(), 32);
+  __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
 
   __ Sqshrun(b19, h0, 8);
   __ Sqshrun(h20, s1, 16);
@@ -19154,12 +19566,12 @@ TEST(neon_sqrshrun) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Sqrshrun(v16.V8B(),   v0.V8H(), 8);
+  __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
   __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
-  __ Sqrshrun(v17.V4H(),   v1.V4S(), 16);
-  __ Sqrshrun2(v17.V8H(),  v2.V4S(), 1);
-  __ Sqrshrun(v18.V2S(),   v3.V2D(), 32);
-  __ Sqrshrun2(v18.V4S(),  v3.V2D(), 1);
+  __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
+  __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
+  __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
+  __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
 
   __ Sqrshrun(b19, h0, 8);
   __ Sqrshrun(h20, s1, 16);
@@ -19344,10 +19756,10 @@ TEST(neon_modimm_movi) {
 
   START();
 
-  __ Movi(v0.V8B(),  0xaa);
+  __ Movi(v0.V8B(), 0xaa);
   __ Movi(v1.V16B(), 0x55);
 
-  __ Movi(d2,       0x00ffff0000ffffff);
+  __ Movi(d2, 0x00ffff0000ffffff);
   __ Movi(v3.V2D(), 0x00ffff0000ffffff);
 
   __ Movi(v16.V4H(), 0x00, LSL, 0);
@@ -19614,9 +20026,9 @@ TEST(neon_copy_dup_element) {
   __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
 
   __ Dup(v16.V16B(), v0.B(), 0);
-  __ Dup(v17.V8H(),  v1.H(), 7);
-  __ Dup(v18.V4S(),  v1.S(), 3);
-  __ Dup(v19.V2D(),  v0.D(), 0);
+  __ Dup(v17.V8H(), v1.H(), 7);
+  __ Dup(v18.V4S(), v1.S(), 3);
+  __ Dup(v19.V2D(), v0.D(), 0);
 
   __ Dup(v20.V8B(), v0.B(), 0);
   __ Dup(v21.V4H(), v1.H(), 7);
@@ -19628,9 +20040,9 @@ TEST(neon_copy_dup_element) {
   __ Dup(v26.D(), v0.D(), 0);
 
   __ Dup(v2.V16B(), v2.B(), 0);
-  __ Dup(v3.V8H(),  v3.H(), 7);
-  __ Dup(v4.V4S(),  v4.S(), 0);
-  __ Dup(v5.V2D(),  v5.D(), 1);
+  __ Dup(v3.V8H(), v3.H(), 7);
+  __ Dup(v4.V4S(), v4.S(), 0);
+  __ Dup(v5.V2D(), v5.D(), 1);
 
   END();
 
@@ -19666,18 +20078,18 @@ TEST(neon_copy_dup_general) {
   __ Mov(x0, 0x0011223344556677);
 
   __ Dup(v16.V16B(), w0);
-  __ Dup(v17.V8H(),  w0);
-  __ Dup(v18.V4S(),  w0);
-  __ Dup(v19.V2D(),  x0);
+  __ Dup(v17.V8H(), w0);
+  __ Dup(v18.V4S(), w0);
+  __ Dup(v19.V2D(), x0);
 
   __ Dup(v20.V8B(), w0);
   __ Dup(v21.V4H(), w0);
   __ Dup(v22.V2S(), w0);
 
   __ Dup(v2.V16B(), wzr);
-  __ Dup(v3.V8H(),  wzr);
-  __ Dup(v4.V4S(),  wzr);
-  __ Dup(v5.V2D(),  xzr);
+  __ Dup(v3.V8H(), wzr);
+  __ Dup(v4.V4S(), wzr);
+  __ Dup(v5.V2D(), xzr);
 
   END();
 
@@ -19705,8 +20117,8 @@ TEST(neon_copy_ins_element) {
 
   START();
 
-  __ Movi(v0.V2D(),  0x0011223344556677, 0x8899aabbccddeeff);
-  __ Movi(v1.V2D(),  0xffeddccbbaae9988, 0x7766554433221100);
+  __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+  __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
@@ -19718,14 +20130,14 @@ TEST(neon_copy_ins_element) {
   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
 
   __ Ins(v16.V16B(), 15, v0.V16B(), 0);
-  __ Ins(v17.V8H(),  0,  v1.V8H(), 7);
-  __ Ins(v18.V4S(),  3,  v1.V4S(), 0);
-  __ Ins(v19.V2D(),  1,  v0.V2D(), 0);
+  __ Ins(v17.V8H(), 0, v1.V8H(), 7);
+  __ Ins(v18.V4S(), 3, v1.V4S(), 0);
+  __ Ins(v19.V2D(), 1, v0.V2D(), 0);
 
   __ Ins(v2.V16B(), 2, v2.V16B(), 0);
-  __ Ins(v3.V8H(),  0,  v3.V8H(), 7);
-  __ Ins(v4.V4S(),  3,  v4.V4S(), 0);
-  __ Ins(v5.V2D(),  0,  v5.V2D(), 1);
+  __ Ins(v3.V8H(), 0, v3.V8H(), 7);
+  __ Ins(v4.V4S(), 3, v4.V4S(), 0);
+  __ Ins(v5.V2D(), 0, v5.V2D(), 1);
 
   END();
 
@@ -19749,8 +20161,8 @@ TEST(neon_copy_mov_element) {
 
   START();
 
-  __ Movi(v0.V2D(),  0x0011223344556677, 0x8899aabbccddeeff);
-  __ Movi(v1.V2D(),  0xffeddccbbaae9988, 0x7766554433221100);
+  __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+  __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
@@ -19762,14 +20174,14 @@ TEST(neon_copy_mov_element) {
   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
 
   __ Mov(v16.V16B(), 15, v0.V16B(), 0);
-  __ Mov(v17.V8H(),  0,  v1.V8H(), 7);
-  __ Mov(v18.V4S(),  3,  v1.V4S(), 0);
-  __ Mov(v19.V2D(),  1,  v0.V2D(), 0);
+  __ Mov(v17.V8H(), 0, v1.V8H(), 7);
+  __ Mov(v18.V4S(), 3, v1.V4S(), 0);
+  __ Mov(v19.V2D(), 1, v0.V2D(), 0);
 
   __ Mov(v2.V16B(), 2, v2.V16B(), 0);
-  __ Mov(v3.V8H(),  0,  v3.V8H(), 7);
-  __ Mov(v4.V4S(),  3,  v4.V4S(), 0);
-  __ Mov(v5.V2D(),  0,  v5.V2D(), 1);
+  __ Mov(v3.V8H(), 0, v3.V8H(), 7);
+  __ Mov(v4.V4S(), 3, v4.V4S(), 0);
+  __ Mov(v5.V2D(), 0, v5.V2D(), 1);
 
   END();
 
@@ -19795,20 +20207,20 @@ TEST(neon_copy_smov) {
 
   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
 
-  __ Smov(w0, v0.B(),  7);
+  __ Smov(w0, v0.B(), 7);
   __ Smov(w1, v0.B(), 15);
 
-  __ Smov(w2, v0.H(),  0);
-  __ Smov(w3, v0.H(),  3);
+  __ Smov(w2, v0.H(), 0);
+  __ Smov(w3, v0.H(), 3);
 
-  __ Smov(x4, v0.B(),  7);
-  __ Smov(x5, v0.B(),  15);
+  __ Smov(x4, v0.B(), 7);
+  __ Smov(x5, v0.B(), 15);
 
-  __ Smov(x6, v0.H(),  0);
-  __ Smov(x7, v0.H(),  3);
+  __ Smov(x6, v0.H(), 0);
+  __ Smov(x7, v0.H(), 3);
 
-  __ Smov(x16, v0.S(),  0);
-  __ Smov(x17, v0.S(),  1);
+  __ Smov(x16, v0.S(), 0);
+  __ Smov(x17, v0.S(), 1);
 
   END();
 
@@ -19837,12 +20249,12 @@ TEST(neon_copy_umov_mov) {
   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
 
   __ Umov(w0, v0.B(), 15);
-  __ Umov(w1, v0.H(),  0);
-  __ Umov(w2, v0.S(),  3);
-  __ Umov(x3, v0.D(),  1);
+  __ Umov(w1, v0.H(), 0);
+  __ Umov(w2, v0.S(), 3);
+  __ Umov(x3, v0.D(), 1);
 
-  __ Mov(w4, v0.S(),  3);
-  __ Mov(x5, v0.D(),  1);
+  __ Mov(w4, v0.S(), 3);
+  __ Mov(x5, v0.D(), 1);
 
   END();
 
@@ -19876,14 +20288,14 @@ TEST(neon_copy_ins_general) {
   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
 
   __ Ins(v16.V16B(), 15, w0);
-  __ Ins(v17.V8H(),  0,  w0);
-  __ Ins(v18.V4S(),  3,  w0);
-  __ Ins(v19.V2D(),  0,  x0);
+  __ Ins(v17.V8H(), 0, w0);
+  __ Ins(v18.V4S(), 3, w0);
+  __ Ins(v19.V2D(), 0, x0);
 
   __ Ins(v2.V16B(), 2, w0);
-  __ Ins(v3.V8H(),  0, w0);
-  __ Ins(v4.V4S(),  3, w0);
-  __ Ins(v5.V2D(),  1, x0);
+  __ Ins(v3.V8H(), 0, w0);
+  __ Ins(v4.V4S(), 3, w0);
+  __ Ins(v5.V2D(), 1, x0);
 
   END();
 
@@ -19920,8 +20332,8 @@ TEST(neon_extract_ext) {
 
   __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
   __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
-  __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);     // Dest is same as one Src
-  __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);     // All reg are the same
+  __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);  // Dest is same as one Src
+  __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);  // All reg are the same
 
   END();
 
@@ -19994,13 +20406,13 @@ TEST(neon_3different_addhn_subhn) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Addhn(v16.V8B(),   v0.V8H(), v1.V8H());
+  __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
   __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
-  __ Raddhn(v17.V8B(),   v0.V8H(), v1.V8H());
+  __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
   __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
-  __ Subhn(v18.V8B(),   v0.V8H(), v1.V8H());
+  __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
   __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
-  __ Rsubhn(v19.V8B(),   v0.V8H(), v1.V8H());
+  __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
   __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
 
   END();
@@ -20243,14 +20655,14 @@ TEST(neon_sshll) {
   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
 
-  __ Sshll(v16.V8H(), v0.V8B(),  4);
+  __ Sshll(v16.V8H(), v0.V8B(), 4);
   __ Sshll2(v17.V8H(), v0.V16B(), 4);
 
-  __ Sshll(v18.V4S(),  v1.V4H(), 8);
-  __ Sshll2(v19.V4S(),  v1.V8H(), 8);
+  __ Sshll(v18.V4S(), v1.V4H(), 8);
+  __ Sshll2(v19.V4S(), v1.V8H(), 8);
 
-  __ Sshll(v20.V2D(),  v2.V2S(), 16);
-  __ Sshll2(v21.V2D(),  v2.V4S(), 16);
+  __ Sshll(v20.V2D(), v2.V2S(), 16);
+  __ Sshll2(v21.V2D(), v2.V4S(), 16);
 
   END();
 
@@ -20274,14 +20686,14 @@ TEST(neon_shll) {
   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
 
-  __ Shll(v16.V8H(), v0.V8B(),  8);
+  __ Shll(v16.V8H(), v0.V8B(), 8);
   __ Shll2(v17.V8H(), v0.V16B(), 8);
 
-  __ Shll(v18.V4S(),  v1.V4H(), 16);
-  __ Shll2(v19.V4S(),  v1.V8H(), 16);
+  __ Shll(v18.V4S(), v1.V4H(), 16);
+  __ Shll2(v19.V4S(), v1.V8H(), 16);
 
-  __ Shll(v20.V2D(),  v2.V2S(), 32);
-  __ Shll2(v21.V2D(),  v2.V4S(), 32);
+  __ Shll(v20.V2D(), v2.V2S(), 32);
+  __ Shll2(v21.V2D(), v2.V4S(), 32);
 
   END();
 
@@ -20305,14 +20717,14 @@ TEST(neon_ushll) {
   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
 
-  __ Ushll(v16.V8H(), v0.V8B(),  4);
+  __ Ushll(v16.V8H(), v0.V8B(), 4);
   __ Ushll2(v17.V8H(), v0.V16B(), 4);
 
-  __ Ushll(v18.V4S(),  v1.V4H(), 8);
-  __ Ushll2(v19.V4S(),  v1.V8H(), 8);
+  __ Ushll(v18.V4S(), v1.V4H(), 8);
+  __ Ushll2(v19.V4S(), v1.V8H(), 8);
 
-  __ Ushll(v20.V2D(),  v2.V2S(), 16);
-  __ Ushll2(v21.V2D(),  v2.V4S(), 16);
+  __ Ushll(v20.V2D(), v2.V2S(), 16);
+  __ Ushll2(v21.V2D(), v2.V4S(), 16);
 
   END();
 
@@ -20340,11 +20752,11 @@ TEST(neon_sxtl) {
   __ Sxtl(v16.V8H(), v0.V8B());
   __ Sxtl2(v17.V8H(), v0.V16B());
 
-  __ Sxtl(v18.V4S(),  v1.V4H());
-  __ Sxtl2(v19.V4S(),  v1.V8H());
+  __ Sxtl(v18.V4S(), v1.V4H());
+  __ Sxtl2(v19.V4S(), v1.V8H());
 
-  __ Sxtl(v20.V2D(),  v2.V2S());
-  __ Sxtl2(v21.V2D(),  v2.V4S());
+  __ Sxtl(v20.V2D(), v2.V2S());
+  __ Sxtl2(v21.V2D(), v2.V4S());
 
   END();
 
@@ -20372,11 +20784,11 @@ TEST(neon_uxtl) {
   __ Uxtl(v16.V8H(), v0.V8B());
   __ Uxtl2(v17.V8H(), v0.V16B());
 
-  __ Uxtl(v18.V4S(),  v1.V4H());
-  __ Uxtl2(v19.V4S(),  v1.V8H());
+  __ Uxtl(v18.V4S(), v1.V4H());
+  __ Uxtl2(v19.V4S(), v1.V8H());
 
-  __ Uxtl(v20.V2D(),  v2.V2S());
-  __ Uxtl2(v21.V2D(),  v2.V4S());
+  __ Uxtl(v20.V2D(), v2.V2S());
+  __ Uxtl2(v21.V2D(), v2.V4S());
 
   END();
 
@@ -20403,30 +20815,30 @@ TEST(neon_ssra) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Mov(v16.V2D(),   v0.V2D());
-  __ Mov(v17.V2D(),   v0.V2D());
-  __ Mov(v18.V2D(),   v1.V2D());
-  __ Mov(v19.V2D(),   v1.V2D());
-  __ Mov(v20.V2D(),   v2.V2D());
-  __ Mov(v21.V2D(),   v2.V2D());
-  __ Mov(v22.V2D(),   v3.V2D());
-  __ Mov(v23.V2D(),   v4.V2D());
-  __ Mov(v24.V2D(),   v3.V2D());
-  __ Mov(v25.V2D(),   v4.V2D());
-
-  __ Ssra(v16.V8B(),  v0.V8B(),  4);
+  __ Mov(v16.V2D(), v0.V2D());
+  __ Mov(v17.V2D(), v0.V2D());
+  __ Mov(v18.V2D(), v1.V2D());
+  __ Mov(v19.V2D(), v1.V2D());
+  __ Mov(v20.V2D(), v2.V2D());
+  __ Mov(v21.V2D(), v2.V2D());
+  __ Mov(v22.V2D(), v3.V2D());
+  __ Mov(v23.V2D(), v4.V2D());
+  __ Mov(v24.V2D(), v3.V2D());
+  __ Mov(v25.V2D(), v4.V2D());
+
+  __ Ssra(v16.V8B(), v0.V8B(), 4);
   __ Ssra(v17.V16B(), v0.V16B(), 4);
 
-  __ Ssra(v18.V4H(),  v1.V4H(), 8);
-  __ Ssra(v19.V8H(),  v1.V8H(), 8);
+  __ Ssra(v18.V4H(), v1.V4H(), 8);
+  __ Ssra(v19.V8H(), v1.V8H(), 8);
 
-  __ Ssra(v20.V2S(),  v2.V2S(), 16);
-  __ Ssra(v21.V4S(),  v2.V4S(), 16);
+  __ Ssra(v20.V2S(), v2.V2S(), 16);
+  __ Ssra(v21.V4S(), v2.V4S(), 16);
 
-  __ Ssra(v22.V2D(),  v3.V2D(), 32);
-  __ Ssra(v23.V2D(),  v4.V2D(), 32);
+  __ Ssra(v22.V2D(), v3.V2D(), 32);
+  __ Ssra(v23.V2D(), v4.V2D(), 32);
 
-  __ Ssra(d24,  d3, 48);
+  __ Ssra(d24, d3, 48);
 
   END();
 
@@ -20455,30 +20867,30 @@ TEST(neon_srsra) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Mov(v16.V2D(),   v0.V2D());
-  __ Mov(v17.V2D(),   v0.V2D());
-  __ Mov(v18.V2D(),   v1.V2D());
-  __ Mov(v19.V2D(),   v1.V2D());
-  __ Mov(v20.V2D(),   v2.V2D());
-  __ Mov(v21.V2D(),   v2.V2D());
-  __ Mov(v22.V2D(),   v3.V2D());
-  __ Mov(v23.V2D(),   v4.V2D());
-  __ Mov(v24.V2D(),   v3.V2D());
-  __ Mov(v25.V2D(),   v4.V2D());
-
-  __ Srsra(v16.V8B(),  v0.V8B(),  4);
+  __ Mov(v16.V2D(), v0.V2D());
+  __ Mov(v17.V2D(), v0.V2D());
+  __ Mov(v18.V2D(), v1.V2D());
+  __ Mov(v19.V2D(), v1.V2D());
+  __ Mov(v20.V2D(), v2.V2D());
+  __ Mov(v21.V2D(), v2.V2D());
+  __ Mov(v22.V2D(), v3.V2D());
+  __ Mov(v23.V2D(), v4.V2D());
+  __ Mov(v24.V2D(), v3.V2D());
+  __ Mov(v25.V2D(), v4.V2D());
+
+  __ Srsra(v16.V8B(), v0.V8B(), 4);
   __ Srsra(v17.V16B(), v0.V16B(), 4);
 
-  __ Srsra(v18.V4H(),  v1.V4H(), 8);
-  __ Srsra(v19.V8H(),  v1.V8H(), 8);
+  __ Srsra(v18.V4H(), v1.V4H(), 8);
+  __ Srsra(v19.V8H(), v1.V8H(), 8);
 
-  __ Srsra(v20.V2S(),  v2.V2S(), 16);
-  __ Srsra(v21.V4S(),  v2.V4S(), 16);
+  __ Srsra(v20.V2S(), v2.V2S(), 16);
+  __ Srsra(v21.V4S(), v2.V4S(), 16);
 
-  __ Srsra(v22.V2D(),  v3.V2D(), 32);
-  __ Srsra(v23.V2D(),  v4.V2D(), 32);
+  __ Srsra(v22.V2D(), v3.V2D(), 32);
+  __ Srsra(v23.V2D(), v4.V2D(), 32);
 
-  __ Srsra(d24,  d3, 48);
+  __ Srsra(d24, d3, 48);
 
   END();
 
@@ -20508,30 +20920,30 @@ TEST(neon_usra) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Mov(v16.V2D(),   v0.V2D());
-  __ Mov(v17.V2D(),   v0.V2D());
-  __ Mov(v18.V2D(),   v1.V2D());
-  __ Mov(v19.V2D(),   v1.V2D());
-  __ Mov(v20.V2D(),   v2.V2D());
-  __ Mov(v21.V2D(),   v2.V2D());
-  __ Mov(v22.V2D(),   v3.V2D());
-  __ Mov(v23.V2D(),   v4.V2D());
-  __ Mov(v24.V2D(),   v3.V2D());
-  __ Mov(v25.V2D(),   v4.V2D());
-
-  __ Usra(v16.V8B(),  v0.V8B(),  4);
+  __ Mov(v16.V2D(), v0.V2D());
+  __ Mov(v17.V2D(), v0.V2D());
+  __ Mov(v18.V2D(), v1.V2D());
+  __ Mov(v19.V2D(), v1.V2D());
+  __ Mov(v20.V2D(), v2.V2D());
+  __ Mov(v21.V2D(), v2.V2D());
+  __ Mov(v22.V2D(), v3.V2D());
+  __ Mov(v23.V2D(), v4.V2D());
+  __ Mov(v24.V2D(), v3.V2D());
+  __ Mov(v25.V2D(), v4.V2D());
+
+  __ Usra(v16.V8B(), v0.V8B(), 4);
   __ Usra(v17.V16B(), v0.V16B(), 4);
 
-  __ Usra(v18.V4H(),  v1.V4H(), 8);
-  __ Usra(v19.V8H(),  v1.V8H(), 8);
+  __ Usra(v18.V4H(), v1.V4H(), 8);
+  __ Usra(v19.V8H(), v1.V8H(), 8);
 
-  __ Usra(v20.V2S(),  v2.V2S(), 16);
-  __ Usra(v21.V4S(),  v2.V4S(), 16);
+  __ Usra(v20.V2S(), v2.V2S(), 16);
+  __ Usra(v21.V4S(), v2.V4S(), 16);
 
-  __ Usra(v22.V2D(),  v3.V2D(), 32);
-  __ Usra(v23.V2D(),  v4.V2D(), 32);
+  __ Usra(v22.V2D(), v3.V2D(), 32);
+  __ Usra(v23.V2D(), v4.V2D(), 32);
 
-  __ Usra(d24,  d3, 48);
+  __ Usra(d24, d3, 48);
 
   END();
 
@@ -20561,30 +20973,30 @@ TEST(neon_ursra) {
   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
 
-  __ Mov(v16.V2D(),   v0.V2D());
-  __ Mov(v17.V2D(),   v0.V2D());
-  __ Mov(v18.V2D(),   v1.V2D());
-  __ Mov(v19.V2D(),   v1.V2D());
-  __ Mov(v20.V2D(),   v2.V2D());
-  __ Mov(v21.V2D(),   v2.V2D());
-  __ Mov(v22.V2D(),   v3.V2D());
-  __ Mov(v23.V2D(),   v4.V2D());
-  __ Mov(v24.V2D(),   v3.V2D());
-  __ Mov(v25.V2D(),   v4.V2D());
-
-  __ Ursra(v16.V8B(),  v0.V8B(),  4);
+  __ Mov(v16.V2D(), v0.V2D());
+  __ Mov(v17.V2D(), v0.V2D());
+  __ Mov(v18.V2D(), v1.V2D());
+  __ Mov(v19.V2D(), v1.V2D());
+  __ Mov(v20.V2D(), v2.V2D());
+  __ Mov(v21.V2D(), v2.V2D());
+  __ Mov(v22.V2D(), v3.V2D());
+  __ Mov(v23.V2D(), v4.V2D());
+  __ Mov(v24.V2D(), v3.V2D());
+  __ Mov(v25.V2D(), v4.V2D());
+
+  __ Ursra(v16.V8B(), v0.V8B(), 4);
   __ Ursra(v17.V16B(), v0.V16B(), 4);
 
-  __ Ursra(v18.V4H(),  v1.V4H(), 8);
-  __ Ursra(v19.V8H(),  v1.V8H(), 8);
+  __ Ursra(v18.V4H(), v1.V4H(), 8);
+  __ Ursra(v19.V8H(), v1.V8H(), 8);
 
-  __ Ursra(v20.V2S(),  v2.V2S(), 16);
-  __ Ursra(v21.V4S(),  v2.V4S(), 16);
+  __ Ursra(v20.V2S(), v2.V2S(), 16);
+  __ Ursra(v21.V4S(), v2.V4S(), 16);
 
-  __ Ursra(v22.V2D(),  v3.V2D(), 32);
-  __ Ursra(v23.V2D(),  v4.V2D(), 32);
+  __ Ursra(v22.V2D(), v3.V2D(), 32);
+  __ Ursra(v23.V2D(), v4.V2D(), 32);
 
-  __ Ursra(d24,  d3, 48);
+  __ Ursra(d24, d3, 48);
 
   END();
 
@@ -21161,7 +21573,7 @@ TEST(crc32b) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0x5f058808, x11);
   ASSERT_EQUAL_64(0x5f058808, x12);
   ASSERT_EQUAL_64(0xedb88320, x13);
@@ -21203,7 +21615,7 @@ TEST(crc32h) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0x0e848dba, x11);
   ASSERT_EQUAL_64(0x0e848dba, x12);
   ASSERT_EQUAL_64(0x3b83984b, x13);
@@ -21241,7 +21653,7 @@ TEST(crc32w) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0x1d937b81, x11);
   ASSERT_EQUAL_64(0xed59b63b, x13);
   ASSERT_EQUAL_64(0x00be2612, x14);
@@ -21278,7 +21690,7 @@ TEST(crc32x) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0x40797b92, x11);
   ASSERT_EQUAL_64(0x533b85da, x13);
   ASSERT_EQUAL_64(0xbc962670, x14);
@@ -21319,7 +21731,7 @@ TEST(crc32cb) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0x4851927d, x11);
   ASSERT_EQUAL_64(0x4851927d, x12);
   ASSERT_EQUAL_64(0x82f63b78, x13);
@@ -21361,7 +21773,7 @@ TEST(crc32ch) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0xcef8494c, x11);
   ASSERT_EQUAL_64(0xcef8494c, x12);
   ASSERT_EQUAL_64(0xfbc3faf9, x13);
@@ -21399,7 +21811,7 @@ TEST(crc32cw) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0xbcb79ece, x11);
   ASSERT_EQUAL_64(0x52a0c93f, x13);
   ASSERT_EQUAL_64(0x9f9b5c7a, x14);
@@ -21436,7 +21848,7 @@ TEST(crc32cx) {
   END();
   RUN();
 
-  ASSERT_EQUAL_64(0x0,        x10);
+  ASSERT_EQUAL_64(0x0, x10);
   ASSERT_EQUAL_64(0x7f320fcb, x11);
   ASSERT_EQUAL_64(0x34019664, x13);
   ASSERT_EQUAL_64(0x6cc27dd0, x14);
@@ -21801,10 +22213,12 @@ TEST(far_branch_backward) {
   // that are outside the immediate range of branch instructions.
   // Take into account that backward branches can reach one instruction further
   // than forward branches.
-  const int overflow_size = kInstructionSize +
-    std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
-             std::max(Instruction::GetImmBranchForwardRange(CompareBranchType),
-                      Instruction::GetImmBranchForwardRange(CondBranchType)));
+  const int overflow_size =
+      kInstructionSize +
+      std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
+               std::max(Instruction::GetImmBranchForwardRange(
+                            CompareBranchType),
+                        Instruction::GetImmBranchForwardRange(CondBranchType)));
 
   SETUP();
   START();
@@ -21917,9 +22331,10 @@ TEST(simple_veneers) {
   // Test that the MacroAssembler correctly emits veneers for forward branches
   // to labels that are outside the immediate range of branch instructions.
   const int max_range =
-    std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
-             std::max(Instruction::GetImmBranchForwardRange(CompareBranchType),
-                      Instruction::GetImmBranchForwardRange(CondBranchType)));
+      std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
+               std::max(Instruction::GetImmBranchForwardRange(
+                            CompareBranchType),
+                        Instruction::GetImmBranchForwardRange(CondBranchType)));
 
   SETUP();
   START();
@@ -22084,7 +22499,7 @@ TEST(veneers_hanging) {
   // We use different labels to prevent the MacroAssembler from sharing veneers.
   Label labels[kNTotalBranches];
   for (int i = 0; i < kNTotalBranches; i++) {
-    new(&labels[i]) Label();
+    new (&labels[i]) Label();
   }
 
   for (int i = 0; i < n_bcond; i++) {
@@ -22401,11 +22816,16 @@ TEST(generic_operand_helpers) {
 TEST(generic_operand) {
   SETUP();
 
-  int32_t data_32_array[5] = {
-    0xbadbeef, 0x11111111, 0xbadbeef, 0x33333333, 0xbadbeef };
-  int64_t data_64_array[5] = {
-    INT64_C(0xbadbadbadbeef), INT64_C(0x1111111111111111),
-    INT64_C(0xbadbadbadbeef), INT64_C(0x3333333333333333), INT64_C(0xbadbadbadbeef) };
+  int32_t data_32_array[5] = {0xbadbeef,
+                              0x11111111,
+                              0xbadbeef,
+                              0x33333333,
+                              0xbadbeef};
+  int64_t data_64_array[5] = {INT64_C(0xbadbadbadbeef),
+                              INT64_C(0x1111111111111111),
+                              INT64_C(0xbadbadbadbeef),
+                              INT64_C(0x3333333333333333),
+                              INT64_C(0xbadbadbadbeef)};
   size_t size_32 = sizeof(data_32_array[0]);
   size_t size_64 = sizeof(data_64_array[0]);
 
@@ -22478,9 +22898,7 @@ TEST(generic_operand) {
 }
 
 
-int32_t runtime_call_add_one(int32_t a) {
-  return a + 1;
-}
+int32_t runtime_call_add_one(int32_t a) { return a + 1; }
 
 double runtime_call_add_doubles(double a, double b, double c) {
   return a + b + c;
@@ -22511,9 +22929,7 @@ double runtime_call_two_arguments_on_stack(int64_t arg1 __attribute__((unused)),
   return arg9 - arg10;
 }
 
-void runtime_call_store_at_address(int64_t* address) {
-  *address = 0xf00d;
-}
+void runtime_call_store_at_address(int64_t* address) { *address = 0xf00d; }
 
 // Test feature detection of calls to runtime functions.
 
@@ -22522,12 +22938,14 @@ void runtime_call_store_at_address(int64_t* address) {
 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && (__cplusplus >= 201103L) && \
     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) &&               \
     !defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT)
-#error "C++11 should be sufficient to provide support for simulated runtime calls."
+#error \
+    "C++11 should be sufficient to provide support for simulated runtime calls."
 #endif  // #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && ...
 
 #if (__cplusplus >= 201103L) && \
     !defined(VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT)
-#error "C++11 should be sufficient to provide support for `MacroAssembler::CallRuntime()`."
+#error \
+    "C++11 should be sufficient to provide support for `MacroAssembler::CallRuntime()`."
 #endif  // #if (__cplusplus >= 201103L) && ...
 
 #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 32ceb4c6..438b8aae 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -34,26 +34,26 @@
 #include "aarch64/disasm-aarch64.h"
 #include "aarch64/macro-assembler-aarch64.h"
 
-#define TEST(name)  TEST_(AARCH64_DISASM_##name)
+#define TEST(name) TEST_(AARCH64_DISASM_##name)
 
-#define SETUP_CLASS(ASMCLASS)                                                  \
-  uint32_t encoding = 0;                                                       \
-  ASMCLASS masm;                                                               \
-  Decoder decoder;                                                             \
-  Disassembler disasm;                                                         \
+#define SETUP_CLASS(ASMCLASS) \
+  uint32_t encoding = 0;      \
+  ASMCLASS masm;              \
+  Decoder decoder;            \
+  Disassembler disasm;        \
   decoder.AppendVisitor(&disasm)
 
 #define SETUP() SETUP_CLASS(Assembler)
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 // Run tests with the simulator.
-#define SETUP_MACRO()                                                          \
-  SETUP_CLASS(MacroAssembler);                                                 \
+#define SETUP_MACRO()          \
+  SETUP_CLASS(MacroAssembler); \
   masm.SetGenerateSimulatorCode(true)
 
 #else  // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
-#define SETUP_MACRO()                                                          \
-  SETUP_CLASS(MacroAssembler);                                                 \
+#define SETUP_MACRO()          \
+  SETUP_CLASS(MacroAssembler); \
   masm.SetGenerateSimulatorCode(false)
 
 #endif  // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
@@ -62,80 +62,84 @@
 // tests.
 #define MAX_SIZE_GENERATED 1024
 
-#define COMPARE(ASM, EXP)                                                      \
-  masm.Reset();                                                                \
-  {                                                                            \
-    CodeBufferCheckScope guard(&masm, MAX_SIZE_GENERATED);                     \
-    masm.ASM;                                                                  \
-  }                                                                            \
-  masm.FinalizeCode();                                                         \
-  decoder.Decode(masm.GetBuffer()->GetStartAddress<Instruction*>());           \
-  encoding = *masm.GetBuffer()->GetStartAddress<uint32_t*>();                  \
-  if (strcmp(disasm.GetOutput(), EXP) != 0) {                                  \
-    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",          \
-           encoding, EXP, disasm.GetOutput());                                 \
-    abort();                                                                   \
-  }                                                                            \
-  if (Test::disassemble()) {                                                   \
-    printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());               \
+#define COMPARE(ASM, EXP)                                             \
+  masm.Reset();                                                       \
+  {                                                                   \
+    CodeBufferCheckScope guard(&masm, MAX_SIZE_GENERATED);            \
+    masm.ASM;                                                         \
+  }                                                                   \
+  masm.FinalizeCode();                                                \
+  decoder.Decode(masm.GetBuffer()->GetStartAddress<Instruction*>());  \
+  encoding = *masm.GetBuffer()->GetStartAddress<uint32_t*>();         \
+  if (strcmp(disasm.GetOutput(), EXP) != 0) {                         \
+    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n", \
+           encoding,                                                  \
+           EXP,                                                       \
+           disasm.GetOutput());                                       \
+    abort();                                                          \
+  }                                                                   \
+  if (Test::disassemble()) {                                          \
+    printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());      \
   }
 
-#define COMPARE_PREFIX(ASM, EXP)                                               \
-  masm.Reset();                                                                \
-  {                                                                            \
-    CodeBufferCheckScope guard(&masm, MAX_SIZE_GENERATED);                     \
-    masm.ASM;                                                                  \
-  }                                                                            \
-  masm.FinalizeCode();                                                         \
-  decoder.Decode(masm.GetBuffer()->GetStartAddress<Instruction*>());           \
-  encoding = *masm.GetBuffer()->GetStartAddress<uint32_t*>();                  \
-  if (strncmp(disasm.GetOutput(), EXP, strlen(EXP)) != 0) {                    \
-    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n",          \
-           encoding, EXP, disasm.GetOutput());                                 \
-    abort();                                                                   \
-  }                                                                            \
-  if (Test::disassemble()) {                                                   \
-    printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());               \
+#define COMPARE_PREFIX(ASM, EXP)                                      \
+  masm.Reset();                                                       \
+  {                                                                   \
+    CodeBufferCheckScope guard(&masm, MAX_SIZE_GENERATED);            \
+    masm.ASM;                                                         \
+  }                                                                   \
+  masm.FinalizeCode();                                                \
+  decoder.Decode(masm.GetBuffer()->GetStartAddress<Instruction*>());  \
+  encoding = *masm.GetBuffer()->GetStartAddress<uint32_t*>();         \
+  if (strncmp(disasm.GetOutput(), EXP, strlen(EXP)) != 0) {           \
+    printf("\nEncoding: %08" PRIx32 "\nExpected: %s\nFound:    %s\n", \
+           encoding,                                                  \
+           EXP,                                                       \
+           disasm.GetOutput());                                       \
+    abort();                                                          \
+  }                                                                   \
+  if (Test::disassemble()) {                                          \
+    printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());      \
   }
 
-#define COMPARE_MACRO_BASE(ASM, EXP)                                           \
-  masm.Reset();                                                                \
-  masm.ASM;                                                                    \
-  masm.FinalizeCode();                                                         \
-  std::string res;                                                             \
-                                                                               \
-  Instruction* instruction =                                                   \
-      masm.GetBuffer()->GetStartAddress<Instruction*>();                       \
-  Instruction* end = masm.GetCursorAddress<Instruction*>();                    \
-  while (instruction != end) {                                                 \
-    decoder.Decode(instruction);                                               \
-    res.append(disasm.GetOutput());                                            \
-    if (Test::disassemble()) {                                                 \
-      encoding = *reinterpret_cast<uint32_t*>(instruction);                    \
-      printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput());             \
-    }                                                                          \
-    instruction += kInstructionSize;                                           \
-    if (instruction != end) {                                                  \
-      res.append("\n");                                                        \
-    }                                                                          \
+#define COMPARE_MACRO_BASE(ASM, EXP)                               \
+  masm.Reset();                                                    \
+  masm.ASM;                                                        \
+  masm.FinalizeCode();                                             \
+  std::string res;                                                 \
+                                                                   \
+  Instruction* instruction =                                       \
+      masm.GetBuffer()->GetStartAddress<Instruction*>();           \
+  Instruction* end = masm.GetCursorAddress<Instruction*>();        \
+  while (instruction != end) {                                     \
+    decoder.Decode(instruction);                                   \
+    res.append(disasm.GetOutput());                                \
+    if (Test::disassemble()) {                                     \
+      encoding = *reinterpret_cast<uint32_t*>(instruction);        \
+      printf("%08" PRIx32 "\t%s\n", encoding, disasm.GetOutput()); \
+    }                                                              \
+    instruction += kInstructionSize;                               \
+    if (instruction != end) {                                      \
+      res.append("\n");                                            \
+    }                                                              \
   }
 
-#define COMPARE_MACRO(ASM, EXP)                                                \
-  {                                                                            \
-    COMPARE_MACRO_BASE(ASM, EXP)                                               \
-    if (strcmp(res.c_str(), EXP) != 0) {                                       \
-      printf("Expected: %s\nFound:    %s\n", EXP, res.c_str());                \
-      abort();                                                                 \
-    }                                                                          \
+#define COMPARE_MACRO(ASM, EXP)                                 \
+  {                                                             \
+    COMPARE_MACRO_BASE(ASM, EXP)                                \
+    if (strcmp(res.c_str(), EXP) != 0) {                        \
+      printf("Expected: %s\nFound:    %s\n", EXP, res.c_str()); \
+      abort();                                                  \
+    }                                                           \
   }
 
-#define COMPARE_MACRO_PREFIX(ASM, EXP)                                         \
-  {                                                                            \
-    COMPARE_MACRO_BASE(ASM, EXP)                                               \
-    if (strncmp(res.c_str(), EXP, strlen(EXP)) != 0) {                         \
-      printf("Expected (prefix): %s\nFound:    %s\n", EXP, res.c_str());       \
-      abort();                                                                 \
-    }                                                                          \
+#define COMPARE_MACRO_PREFIX(ASM, EXP)                                   \
+  {                                                                      \
+    COMPARE_MACRO_BASE(ASM, EXP)                                         \
+    if (strncmp(res.c_str(), EXP, strlen(EXP)) != 0) {                   \
+      printf("Expected (prefix): %s\nFound:    %s\n", EXP, res.c_str()); \
+      abort();                                                           \
+    }                                                                    \
   }
 
 #define CLEANUP()
@@ -711,7 +715,7 @@ TEST(extract) {
 
 TEST(logical_immediate) {
   SETUP();
-  #define RESULT_SIZE (256)
+#define RESULT_SIZE (256)
 
   char result[RESULT_SIZE];
 
@@ -775,33 +779,25 @@ TEST(logical_immediate) {
           "and w0, w0, #0x55555555");  // 2-bit pattern.
 
   // Test other instructions.
-  COMPARE(tst(w1, Operand(0x11111111)),
-          "tst w1, #0x11111111");
-  COMPARE(tst(x2, Operand(0x8888888888888888)),
-          "tst x2, #0x8888888888888888");
-  COMPARE(orr(w7, w8, Operand(0xaaaaaaaa)),
-          "orr w7, w8, #0xaaaaaaaa");
+  COMPARE(tst(w1, Operand(0x11111111)), "tst w1, #0x11111111");
+  COMPARE(tst(x2, Operand(0x8888888888888888)), "tst x2, #0x8888888888888888");
+  COMPARE(orr(w7, w8, Operand(0xaaaaaaaa)), "orr w7, w8, #0xaaaaaaaa");
   COMPARE(orr(x9, x10, Operand(0x5555555555555555)),
           "orr x9, x10, #0x5555555555555555");
-  COMPARE(eor(w15, w16, Operand(0x00000001)),
-          "eor w15, w16, #0x1");
-  COMPARE(eor(x17, x18, Operand(0x0000000000000003)),
-          "eor x17, x18, #0x3");
+  COMPARE(eor(w15, w16, Operand(0x00000001)), "eor w15, w16, #0x1");
+  COMPARE(eor(x17, x18, Operand(0x0000000000000003)), "eor x17, x18, #0x3");
   COMPARE(ands(w23, w24, Operand(0x0000000f)), "ands w23, w24, #0xf");
   COMPARE(ands(x25, x26, Operand(0x800000000000000f)),
           "ands x25, x26, #0x800000000000000f");
 
   // Test inverse.
-  COMPARE(bic(w3, w4, Operand(0x20202020)),
-          "and w3, w4, #0xdfdfdfdf");
+  COMPARE(bic(w3, w4, Operand(0x20202020)), "and w3, w4, #0xdfdfdfdf");
   COMPARE(bic(x5, x6, Operand(0x4040404040404040)),
           "and x5, x6, #0xbfbfbfbfbfbfbfbf");
-  COMPARE(orn(w11, w12, Operand(0x40004000)),
-          "orr w11, w12, #0xbfffbfff");
+  COMPARE(orn(w11, w12, Operand(0x40004000)), "orr w11, w12, #0xbfffbfff");
   COMPARE(orn(x13, x14, Operand(0x8181818181818181)),
           "orr x13, x14, #0x7e7e7e7e7e7e7e7e");
-  COMPARE(eon(w19, w20, Operand(0x80000001)),
-          "eor w19, w20, #0x7ffffffe");
+  COMPARE(eon(w19, w20, Operand(0x80000001)), "eor w19, w20, #0x7ffffffe");
   COMPARE(eon(x21, x22, Operand(0xc000000000000003)),
           "eor x21, x22, #0x3ffffffffffffffc");
   COMPARE(bics(w27, w28, Operand(0xfffffff7)), "ands w27, w28, #0x8");
@@ -956,7 +952,7 @@ TEST(adrp) {
 TEST(branch) {
   SETUP();
 
-  #define INST_OFF(x) (INT64_C(x) >> kInstructionSizeLog2)
+#define INST_OFF(x) (INT64_C(x) >> kInstructionSizeLog2)
   COMPARE_PREFIX(b(INST_OFF(0x4)), "b #+0x4");
   COMPARE_PREFIX(b(INST_OFF(-0x4)), "b #-0x4");
   COMPARE_PREFIX(b(INST_OFF(0x7fffffc)), "b #+0x7fffffc");
@@ -1521,12 +1517,10 @@ TEST(load_store_unscaled_option) {
   SETUP();
 
   // Just like load_store_unscaled, but specify the scaling option explicitly.
-  LoadStoreScalingOption options[] = {
-    PreferUnscaledOffset,
-    RequireUnscaledOffset
-  };
+  LoadStoreScalingOption options[] = {PreferUnscaledOffset,
+                                      RequireUnscaledOffset};
 
-  for (size_t i = 0; i < sizeof(options)/sizeof(options[0]); i++) {
+  for (size_t i = 0; i < sizeof(options) / sizeof(options[0]); i++) {
     LoadStoreScalingOption option = options[i];
 
     // If an unscaled-offset instruction is requested, it is used, even if the
@@ -1753,8 +1747,7 @@ TEST(load_store_pair) {
           "stp x18, x19, [sp, #-8]!");
   COMPARE(ldp(s30, s31, MemOperand(sp, 12, PostIndex)),
           "ldp s30, s31, [sp], #12");
-  COMPARE(stp(d30, d31, MemOperand(sp, -16)),
-          "stp d30, d31, [sp, #-16]");
+  COMPARE(stp(d30, d31, MemOperand(sp, -16)), "stp d30, d31, [sp, #-16]");
   COMPARE(ldp(q30, q31, MemOperand(sp, 32, PostIndex)),
           "ldp q30, q31, [sp], #32");
 
@@ -1897,10 +1890,10 @@ TEST(load_literal_macro) {
   //    ldr   xzr, pc+12              // Pool marker.
   //    .word64 #0x1234567890abcdef   // Test literal.
 
-  COMPARE_PREFIX(Ldr(x10, 0x1234567890abcdef),  "ldr x10, pc+8");
-  COMPARE_PREFIX(Ldr(w20, 0xfedcba09),  "ldr w20, pc+8");
-  COMPARE_PREFIX(Ldr(d11, 1.234),  "ldr d11, pc+8");
-  COMPARE_PREFIX(Ldr(s22, 2.5f),  "ldr s22, pc+8");
+  COMPARE_PREFIX(Ldr(x10, 0x1234567890abcdef), "ldr x10, pc+8");
+  COMPARE_PREFIX(Ldr(w20, 0xfedcba09), "ldr w20, pc+8");
+  COMPARE_PREFIX(Ldr(d11, 1.234), "ldr d11, pc+8");
+  COMPARE_PREFIX(Ldr(s22, 2.5f), "ldr s22, pc+8");
   COMPARE_PREFIX(Ldrsw(x21, 0x80000000), "ldrsw x21, pc+8");
 
   CLEANUP();
@@ -1945,38 +1938,17 @@ TEST(prfm_operations) {
 
   // Test every encodable prefetch operation.
   const char* expected[] = {
-    "prfm pldl1keep, ",
-    "prfm pldl1strm, ",
-    "prfm pldl2keep, ",
-    "prfm pldl2strm, ",
-    "prfm pldl3keep, ",
-    "prfm pldl3strm, ",
-    "prfm #0b00110, ",
-    "prfm #0b00111, ",
-    "prfm plil1keep, ",
-    "prfm plil1strm, ",
-    "prfm plil2keep, ",
-    "prfm plil2strm, ",
-    "prfm plil3keep, ",
-    "prfm plil3strm, ",
-    "prfm #0b01110, ",
-    "prfm #0b01111, ",
-    "prfm pstl1keep, ",
-    "prfm pstl1strm, ",
-    "prfm pstl2keep, ",
-    "prfm pstl2strm, ",
-    "prfm pstl3keep, ",
-    "prfm pstl3strm, ",
-    "prfm #0b10110, ",
-    "prfm #0b10111, ",
-    "prfm #0b11000, ",
-    "prfm #0b11001, ",
-    "prfm #0b11010, ",
-    "prfm #0b11011, ",
-    "prfm #0b11100, ",
-    "prfm #0b11101, ",
-    "prfm #0b11110, ",
-    "prfm #0b11111, ",
+      "prfm pldl1keep, ", "prfm pldl1strm, ", "prfm pldl2keep, ",
+      "prfm pldl2strm, ", "prfm pldl3keep, ", "prfm pldl3strm, ",
+      "prfm #0b00110, ",  "prfm #0b00111, ",  "prfm plil1keep, ",
+      "prfm plil1strm, ", "prfm plil2keep, ", "prfm plil2strm, ",
+      "prfm plil3keep, ", "prfm plil3strm, ", "prfm #0b01110, ",
+      "prfm #0b01111, ",  "prfm pstl1keep, ", "prfm pstl1strm, ",
+      "prfm pstl2keep, ", "prfm pstl2strm, ", "prfm pstl3keep, ",
+      "prfm pstl3strm, ", "prfm #0b10110, ",  "prfm #0b10111, ",
+      "prfm #0b11000, ",  "prfm #0b11001, ",  "prfm #0b11010, ",
+      "prfm #0b11011, ",  "prfm #0b11100, ",  "prfm #0b11101, ",
+      "prfm #0b11110, ",  "prfm #0b11111, ",
   };
   const int expected_count = sizeof(expected) / sizeof(expected[0]);
   VIXL_STATIC_ASSERT((1 << ImmPrefetchOperation_width) == expected_count);
@@ -1997,38 +1969,17 @@ TEST(prfum_operations) {
 
   // Test every encodable prefetch operation.
   const char* expected[] = {
-    "prfum pldl1keep, ",
-    "prfum pldl1strm, ",
-    "prfum pldl2keep, ",
-    "prfum pldl2strm, ",
-    "prfum pldl3keep, ",
-    "prfum pldl3strm, ",
-    "prfum #0b00110, ",
-    "prfum #0b00111, ",
-    "prfum plil1keep, ",
-    "prfum plil1strm, ",
-    "prfum plil2keep, ",
-    "prfum plil2strm, ",
-    "prfum plil3keep, ",
-    "prfum plil3strm, ",
-    "prfum #0b01110, ",
-    "prfum #0b01111, ",
-    "prfum pstl1keep, ",
-    "prfum pstl1strm, ",
-    "prfum pstl2keep, ",
-    "prfum pstl2strm, ",
-    "prfum pstl3keep, ",
-    "prfum pstl3strm, ",
-    "prfum #0b10110, ",
-    "prfum #0b10111, ",
-    "prfum #0b11000, ",
-    "prfum #0b11001, ",
-    "prfum #0b11010, ",
-    "prfum #0b11011, ",
-    "prfum #0b11100, ",
-    "prfum #0b11101, ",
-    "prfum #0b11110, ",
-    "prfum #0b11111, ",
+      "prfum pldl1keep, ", "prfum pldl1strm, ", "prfum pldl2keep, ",
+      "prfum pldl2strm, ", "prfum pldl3keep, ", "prfum pldl3strm, ",
+      "prfum #0b00110, ",  "prfum #0b00111, ",  "prfum plil1keep, ",
+      "prfum plil1strm, ", "prfum plil2keep, ", "prfum plil2strm, ",
+      "prfum plil3keep, ", "prfum plil3strm, ", "prfum #0b01110, ",
+      "prfum #0b01111, ",  "prfum pstl1keep, ", "prfum pstl1strm, ",
+      "prfum pstl2keep, ", "prfum pstl2strm, ", "prfum pstl3keep, ",
+      "prfum pstl3strm, ", "prfum #0b10110, ",  "prfum #0b10111, ",
+      "prfum #0b11000, ",  "prfum #0b11001, ",  "prfum #0b11010, ",
+      "prfum #0b11011, ",  "prfum #0b11100, ",  "prfum #0b11101, ",
+      "prfum #0b11110, ",  "prfum #0b11111, ",
   };
   const int expected_count = sizeof(expected) / sizeof(expected[0]);
   VIXL_STATIC_ASSERT((1 << ImmPrefetchOperation_width) == expected_count);
@@ -2238,154 +2189,212 @@ TEST(cond_select_macro) {
   bool synthesises_right = false;
 
   COMPARE(Csel(w0, w1, -1, eq), "csinv w0, w1, wzr, eq");
-  MacroAssembler::GetCselSynthesisInformation(w0, w1, -1,
+  MacroAssembler::GetCselSynthesisInformation(w0,
+                                              w1,
+                                              -1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE(Csel(w2, w3, 0, ne), "csel w2, w3, wzr, ne");
-  MacroAssembler::GetCselSynthesisInformation(w2, w3, wzr,
+  MacroAssembler::GetCselSynthesisInformation(w2,
+                                              w3,
+                                              wzr,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE(Csel(w4, w5, 1, hs), "csinc w4, w5, wzr, hs");
-  MacroAssembler::GetCselSynthesisInformation(w4, w5, 1,
+  MacroAssembler::GetCselSynthesisInformation(w4,
+                                              w5,
+                                              1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE(Csel(x6, x7, -1, lo), "csinv x6, x7, xzr, lo");
-  MacroAssembler::GetCselSynthesisInformation(x6, x7, xzr,
+  MacroAssembler::GetCselSynthesisInformation(x6,
+                                              x7,
+                                              xzr,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE(Csel(x8, x9, 0, mi), "csel x8, x9, xzr, mi");
-  MacroAssembler::GetCselSynthesisInformation(x8, x9, xzr,
+  MacroAssembler::GetCselSynthesisInformation(x8,
+                                              x9,
+                                              xzr,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE(Csel(x10, x11, 1, pl), "csinc x10, x11, xzr, pl");
-  MacroAssembler::GetCselSynthesisInformation(x10, x11, xzr,
+  MacroAssembler::GetCselSynthesisInformation(x10,
+                                              x11,
+                                              xzr,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(x12,     0,     0, eq), "mov x12, #0x0");
-  MacroAssembler::GetCselSynthesisInformation(x12, 0, 0,
+  COMPARE_MACRO(Csel(x12, 0, 0, eq), "mov x12, #0x0");
+  MacroAssembler::GetCselSynthesisInformation(x12,
+                                              0,
+                                              0,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(w13,     0,     1, eq), "cset w13, ne");
-  MacroAssembler::GetCselSynthesisInformation(w13, 0, 1,
+  COMPARE_MACRO(Csel(w13, 0, 1, eq), "cset w13, ne");
+  MacroAssembler::GetCselSynthesisInformation(w13,
+                                              0,
+                                              1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(x14,     1,     0, eq), "cset x14, eq");
-  MacroAssembler::GetCselSynthesisInformation(x14, 1, 0,
+  COMPARE_MACRO(Csel(x14, 1, 0, eq), "cset x14, eq");
+  MacroAssembler::GetCselSynthesisInformation(x14,
+                                              1,
+                                              0,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(w15,     0,    -1, eq), "csetm w15, ne");
-  MacroAssembler::GetCselSynthesisInformation(w15, 0, -1,
+  COMPARE_MACRO(Csel(w15, 0, -1, eq), "csetm w15, ne");
+  MacroAssembler::GetCselSynthesisInformation(w15,
+                                              0,
+                                              -1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(x18,    -1,     0, eq), "csetm x18, eq");
-  MacroAssembler::GetCselSynthesisInformation(x18, -1, 0,
+  COMPARE_MACRO(Csel(x18, -1, 0, eq), "csetm x18, eq");
+  MacroAssembler::GetCselSynthesisInformation(x18,
+                                              -1,
+                                              0,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(w19,    -1,     1, eq), "mov w19, #0x1\n"
-                                             "cneg w19, w19, eq");
-  MacroAssembler::GetCselSynthesisInformation(w19, -1, 1,
+  COMPARE_MACRO(Csel(w19, -1, 1, eq),
+                "mov w19, #0x1\n"
+                "cneg w19, w19, eq");
+  MacroAssembler::GetCselSynthesisInformation(w19,
+                                              -1,
+                                              1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(x20,     1,    -1, eq), "mov x20, #0xffffffffffffffff\n"
-                                             "cneg x20, x20, eq");
-  MacroAssembler::GetCselSynthesisInformation(x20, 1, -1,
+  COMPARE_MACRO(Csel(x20, 1, -1, eq),
+                "mov x20, #0xffffffffffffffff\n"
+                "cneg x20, x20, eq");
+  MacroAssembler::GetCselSynthesisInformation(x20,
+                                              1,
+                                              -1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(w21,  0xaa,  0xbb, eq), "mov w16, #0xaa\n"
-                                             "mov w17, #0xbb\n"
-                                             "csel w21, w16, w17, eq");
-  MacroAssembler::GetCselSynthesisInformation(w21, 0xaa, 0xbb,
+  COMPARE_MACRO(Csel(w21, 0xaa, 0xbb, eq),
+                "mov w16, #0xaa\n"
+                "mov w17, #0xbb\n"
+                "csel w21, w16, w17, eq");
+  MacroAssembler::GetCselSynthesisInformation(w21,
+                                              0xaa,
+                                              0xbb,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(x22,  0xaa, -0xbb, eq), "mov x16, #0xaa\n"
-                                             "mov x17, #0xffffffffffffff45\n"
-                                             "csel x22, x16, x17, eq");
-  MacroAssembler::GetCselSynthesisInformation(x22, 0xaa, -0xbb,
+  COMPARE_MACRO(Csel(x22, 0xaa, -0xbb, eq),
+                "mov x16, #0xaa\n"
+                "mov x17, #0xffffffffffffff45\n"
+                "csel x22, x16, x17, eq");
+  MacroAssembler::GetCselSynthesisInformation(x22,
+                                              0xaa,
+                                              -0xbb,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(w23,     0,  0xaa, eq), "mov w16, #0xaa\n"
-                                             "csel w23, w16, wzr, ne");
-  MacroAssembler::GetCselSynthesisInformation(w23, 0, 0xaa,
+  COMPARE_MACRO(Csel(w23, 0, 0xaa, eq),
+                "mov w16, #0xaa\n"
+                "csel w23, w16, wzr, ne");
+  MacroAssembler::GetCselSynthesisInformation(w23,
+                                              0,
+                                              0xaa,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(x24, -0xaa,     0, eq), "mov x16, #0xffffffffffffff56\n"
-                                             "csel x24, x16, xzr, eq");
-  MacroAssembler::GetCselSynthesisInformation(x24, -0xaa, 0,
+  COMPARE_MACRO(Csel(x24, -0xaa, 0, eq),
+                "mov x16, #0xffffffffffffff56\n"
+                "csel x24, x16, xzr, eq");
+  MacroAssembler::GetCselSynthesisInformation(x24,
+                                              -0xaa,
+                                              0,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(w25,  0xcc, -0xcc, eq), "mov w25, #0xffffff34\n"
-                                             "cneg w25, w25, eq");
-  MacroAssembler::GetCselSynthesisInformation(w25, 0xcc, -0xcc,
+  COMPARE_MACRO(Csel(w25, 0xcc, -0xcc, eq),
+                "mov w25, #0xffffff34\n"
+                "cneg w25, w25, eq");
+  MacroAssembler::GetCselSynthesisInformation(w25,
+                                              0xcc,
+                                              -0xcc,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(x26, -0xcc,  0xcc, eq), "mov x26, #0xcc\n"
-                                             "cneg x26, x26, eq");
-  MacroAssembler::GetCselSynthesisInformation(w25, -0xcc, 0xcc,
+  COMPARE_MACRO(Csel(x26, -0xcc, 0xcc, eq),
+                "mov x26, #0xcc\n"
+                "cneg x26, x26, eq");
+  MacroAssembler::GetCselSynthesisInformation(w25,
+                                              -0xcc,
+                                              0xcc,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
   // Test with `Operand` inputs.
-  COMPARE_MACRO(Csel(x0, x1, Operand(x2, LSL, 3), eq), "lsl x16, x2, #3\n"
-                                                       "csel x0, x1, x16, eq");
-  MacroAssembler::GetCselSynthesisInformation(x0, x1,  Operand(x2, LSL, 3),
+  COMPARE_MACRO(Csel(x0, x1, Operand(x2, LSL, 3), eq),
+                "lsl x16, x2, #3\n"
+                "csel x0, x1, x16, eq");
+  MacroAssembler::GetCselSynthesisInformation(x0,
+                                              x1,
+                                              Operand(x2, LSL, 3),
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(x3, x4, Operand(x5, SXTH), eq), "sxth x16, w5\n"
-                                                     "csel x3, x4, x16, eq");
-  MacroAssembler::GetCselSynthesisInformation(x3, x4,  Operand(x5, SXTH),
+  COMPARE_MACRO(Csel(x3, x4, Operand(x5, SXTH), eq),
+                "sxth x16, w5\n"
+                "csel x3, x4, x16, eq");
+  MacroAssembler::GetCselSynthesisInformation(x3,
+                                              x4,
+                                              Operand(x5, SXTH),
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
-  COMPARE_MACRO(Csel(x6, Operand(x7, LSL, 7), x8, eq), "lsl x16, x7, #7\n"
-                                                       "csel x6, x16, x8, eq");
-  MacroAssembler::GetCselSynthesisInformation(x6, Operand(x7, LSL, 7), x8,
+  COMPARE_MACRO(Csel(x6, Operand(x7, LSL, 7), x8, eq),
+                "lsl x16, x7, #7\n"
+                "csel x6, x16, x8, eq");
+  MacroAssembler::GetCselSynthesisInformation(x6,
+                                              Operand(x7, LSL, 7),
+                                              x8,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(synthesises_left && !synthesises_right);
 
-  COMPARE_MACRO(Csel(x9, Operand(x10, SXTH), x11, eq), "sxth x16, w10\n"
-                                                       "csel x9, x16, x11, eq");
-  MacroAssembler::GetCselSynthesisInformation(x9, Operand(x10, SXTH), x11,
+  COMPARE_MACRO(Csel(x9, Operand(x10, SXTH), x11, eq),
+                "sxth x16, w10\n"
+                "csel x9, x16, x11, eq");
+  MacroAssembler::GetCselSynthesisInformation(x9,
+                                              Operand(x10, SXTH),
+                                              x11,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(synthesises_left && !synthesises_right);
@@ -2404,38 +2413,50 @@ TEST(cond_select_macro) {
   COMPARE_MACRO(Csel(x15, 0, Operand(x18, LSR, 18), eq),
                 "lsr x16, x18, #18\n"
                 "csel x15, x16, xzr, ne");
-  MacroAssembler::GetCselSynthesisInformation(x15, 0, Operand(x18, LSR, 18),
+  MacroAssembler::GetCselSynthesisInformation(x15,
+                                              0,
+                                              Operand(x18, LSR, 18),
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && synthesises_right);
 
   // Test with the zero register.
   COMPARE_MACRO(Csel(w19, wzr, wzr, eq), "mov w19, #0x0");
-  MacroAssembler::GetCselSynthesisInformation(w19, wzr, wzr,
+  MacroAssembler::GetCselSynthesisInformation(w19,
+                                              wzr,
+                                              wzr,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE_MACRO(Csel(x20, x21, xzr, eq), "csel x20, x21, xzr, eq");
-  MacroAssembler::GetCselSynthesisInformation(x20, x21, xzr,
+  MacroAssembler::GetCselSynthesisInformation(x20,
+                                              x21,
+                                              xzr,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE_MACRO(Csel(w22, wzr, w23, eq), "csel w22, w23, wzr, ne");
-  MacroAssembler::GetCselSynthesisInformation(w22, wzr, w23,
+  MacroAssembler::GetCselSynthesisInformation(w22,
+                                              wzr,
+                                              w23,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE_MACRO(Csel(x24, xzr, 0, eq), "mov x24, #0x0");
-  MacroAssembler::GetCselSynthesisInformation(x24, xzr, 0,
+  MacroAssembler::GetCselSynthesisInformation(x24,
+                                              xzr,
+                                              0,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
 
   COMPARE_MACRO(Csel(w25, wzr, 1, eq), "cset w25, ne");
-  MacroAssembler::GetCselSynthesisInformation(w25, wzr, 1,
+  MacroAssembler::GetCselSynthesisInformation(w25,
+                                              wzr,
+                                              1,
                                               &synthesises_left,
                                               &synthesises_right);
   VIXL_CHECK(!synthesises_left && !synthesises_right);
@@ -2688,14 +2709,14 @@ TEST(fcvt_scvtf_ucvtf) {
   COMPARE(fcvtzu(w18, s19), "fcvtzu w18, s19");
   COMPARE(fcvtzs(x20, s21), "fcvtzs x20, s21");
   COMPARE(fcvtzs(w22, s23), "fcvtzs w22, s23");
-  COMPARE(fcvtzs(w2, d1, 1),  "fcvtzs w2, d1, #1");
-  COMPARE(fcvtzs(w2, s1, 1),  "fcvtzs w2, s1, #1");
+  COMPARE(fcvtzs(w2, d1, 1), "fcvtzs w2, d1, #1");
+  COMPARE(fcvtzs(w2, s1, 1), "fcvtzs w2, s1, #1");
   COMPARE(fcvtzs(x4, d3, 15), "fcvtzs x4, d3, #15");
   COMPARE(fcvtzs(x4, s3, 15), "fcvtzs x4, s3, #15");
   COMPARE(fcvtzs(w6, d5, 32), "fcvtzs w6, d5, #32");
   COMPARE(fcvtzs(w6, s5, 32), "fcvtzs w6, s5, #32");
-  COMPARE(fcvtzu(w2, d1, 1),  "fcvtzu w2, d1, #1");
-  COMPARE(fcvtzu(w2, s1, 1),  "fcvtzu w2, s1, #1");
+  COMPARE(fcvtzu(w2, d1, 1), "fcvtzu w2, d1, #1");
+  COMPARE(fcvtzu(w2, s1, 1), "fcvtzu w2, s1, #1");
   COMPARE(fcvtzu(x4, d3, 15), "fcvtzu x4, d3, #15");
   COMPARE(fcvtzu(x4, s3, 15), "fcvtzu x4, s3, #15");
   COMPARE(fcvtzu(w6, d5, 32), "fcvtzu w6, d5, #32");
@@ -3036,132 +3057,143 @@ TEST(barriers) {
 
 
 #define VLIST2(v) \
-            v, VRegister((v.GetCode()+1)%32, v.GetSizeInBits(), v.GetLanes())
-#define VLIST3(v) VLIST2(v), \
-                  VRegister((v.GetCode()+2)%32, v.GetSizeInBits(), v.GetLanes())
-#define VLIST4(v) VLIST3(v), \
-                  VRegister((v.GetCode()+3)%32, v.GetSizeInBits(), v.GetLanes())
-
-
-#define NEON_FORMAT_LIST(V)       \
-  V(V8B(), "8b")                  \
-  V(V16B(), "16b")                \
-  V(V4H(), "4h")                  \
-  V(V8H(), "8h")                  \
-  V(V2S(), "2s")                  \
-  V(V4S(), "4s")                  \
+  v, VRegister((v.GetCode() + 1) % 32, v.GetSizeInBits(), v.GetLanes())
+#define VLIST3(v) \
+  VLIST2(v), VRegister((v.GetCode() + 2) % 32, v.GetSizeInBits(), v.GetLanes())
+#define VLIST4(v) \
+  VLIST3(v), VRegister((v.GetCode() + 3) % 32, v.GetSizeInBits(), v.GetLanes())
+
+
+#define NEON_FORMAT_LIST(V) \
+  V(V8B(), "8b")            \
+  V(V16B(), "16b")          \
+  V(V4H(), "4h")            \
+  V(V8H(), "8h")            \
+  V(V2S(), "2s")            \
+  V(V4S(), "4s")            \
   V(V2D(), "2d")
 
-#define NEON_FORMAT_LIST_LP(V)    \
-  V(V4H(), "4h", V8B(), "8b")     \
-  V(V2S(), "2s", V4H(), "4h")     \
-  V(V1D(), "1d", V2S(), "2s")     \
-  V(V8H(), "8h", V16B(), "16b")   \
-  V(V4S(), "4s", V8H(), "8h")     \
+#define NEON_FORMAT_LIST_LP(V)  \
+  V(V4H(), "4h", V8B(), "8b")   \
+  V(V2S(), "2s", V4H(), "4h")   \
+  V(V1D(), "1d", V2S(), "2s")   \
+  V(V8H(), "8h", V16B(), "16b") \
+  V(V4S(), "4s", V8H(), "8h")   \
   V(V2D(), "2d", V4S(), "4s")
 
-#define NEON_FORMAT_LIST_LW(V)    \
-  V(V8H(), "8h", V8B(), "8b")     \
-  V(V4S(), "4s", V4H(), "4h")     \
+#define NEON_FORMAT_LIST_LW(V) \
+  V(V8H(), "8h", V8B(), "8b")  \
+  V(V4S(), "4s", V4H(), "4h")  \
   V(V2D(), "2d", V2S(), "2s")
 
-#define NEON_FORMAT_LIST_LW2(V)   \
-  V(V8H(), "8h", V16B(), "16b")   \
-  V(V4S(), "4s", V8H(), "8h")     \
+#define NEON_FORMAT_LIST_LW2(V) \
+  V(V8H(), "8h", V16B(), "16b") \
+  V(V4S(), "4s", V8H(), "8h")   \
   V(V2D(), "2d", V4S(), "4s")
 
-#define NEON_FORMAT_LIST_BHS(V)   \
-  V(V8B(), "8b")                  \
-  V(V16B(), "16b")                \
-  V(V4H(), "4h")                  \
-  V(V8H(), "8h")                  \
-  V(V2S(), "2s")                  \
+#define NEON_FORMAT_LIST_BHS(V) \
+  V(V8B(), "8b")                \
+  V(V16B(), "16b")              \
+  V(V4H(), "4h")                \
+  V(V8H(), "8h")                \
+  V(V2S(), "2s")                \
   V(V4S(), "4s")
 
-#define NEON_FORMAT_LIST_HS(V)    \
-  V(V4H(), "4h")                  \
-  V(V8H(), "8h")                  \
-  V(V2S(), "2s")                  \
+#define NEON_FORMAT_LIST_HS(V) \
+  V(V4H(), "4h")               \
+  V(V8H(), "8h")               \
+  V(V2S(), "2s")               \
   V(V4S(), "4s")
 
 TEST(neon_load_store_vector) {
   SETUP_MACRO();
 
-  #define DISASM_INST(M, S)                                       \
-  COMPARE(Ld1(v0.M, MemOperand(x15)),                             \
-      "ld1 {v0." S "}, [x15]");                                   \
+#define DISASM_INST(M, S)                                         \
+  COMPARE(Ld1(v0.M, MemOperand(x15)), "ld1 {v0." S "}, [x15]");   \
   COMPARE(Ld1(v1.M, v2.M, MemOperand(x16)),                       \
-      "ld1 {v1." S ", v2." S "}, [x16]");                         \
+          "ld1 {v1." S ", v2." S "}, [x16]");                     \
   COMPARE(Ld1(v3.M, v4.M, v5.M, MemOperand(x17)),                 \
-      "ld1 {v3." S ", v4." S ", v5." S "}, [x17]");               \
+          "ld1 {v3." S ", v4." S ", v5." S "}, [x17]");           \
   COMPARE(Ld1(v6.M, v7.M, v8.M, v9.M, MemOperand(x18)),           \
-      "ld1 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18]")      \
+          "ld1 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18]")  \
   COMPARE(Ld1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp)),          \
-      "ld1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]")     \
+          "ld1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]") \
   COMPARE(Ld2(v1.M, v2.M, MemOperand(x16)),                       \
-      "ld2 {v1." S ", v2." S "}, [x16]");                         \
+          "ld2 {v1." S ", v2." S "}, [x16]");                     \
   COMPARE(Ld3(v3.M, v4.M, v5.M, MemOperand(x17)),                 \
-      "ld3 {v3." S ", v4." S ", v5." S "}, [x17]");               \
+          "ld3 {v3." S ", v4." S ", v5." S "}, [x17]");           \
   COMPARE(Ld4(v6.M, v7.M, v8.M, v9.M, MemOperand(x18)),           \
-      "ld4 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18]")      \
+          "ld4 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18]")  \
   COMPARE(Ld4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp)),          \
-      "ld4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]")     \
+          "ld4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]") \
   NEON_FORMAT_LIST(DISASM_INST);
-  #undef DISASM_INST
-
-  #define DISASM_INST(M, S)                                               \
-  COMPARE(Ld1(v0.M, MemOperand(x15, x20, PostIndex)),                     \
-      "ld1 {v0." S "}, [x15], x20");                                      \
-  COMPARE(Ld1(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),               \
-      "ld1 {v1." S ", v2." S "}, [x16], x21");                            \
-  COMPARE(Ld1(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),         \
-      "ld1 {v3." S ", v4." S ", v5." S "}, [x17], x22");                  \
-  COMPARE(Ld1(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),   \
-      "ld1 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")         \
-  COMPARE(Ld1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)),  \
-      "ld1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")        \
-  COMPARE(Ld2(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),               \
-      "ld2 {v1." S ", v2." S "}, [x16], x21");                            \
-  COMPARE(Ld3(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),         \
-      "ld3 {v3." S ", v4." S ", v5." S "}, [x17], x22");                  \
-  COMPARE(Ld4(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),   \
-      "ld4 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")         \
-  COMPARE(Ld4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)),  \
-      "ld4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")        \
+#undef DISASM_INST
+
+#define DISASM_INST(M, S)                                                \
+  COMPARE(Ld1(v0.M, MemOperand(x15, x20, PostIndex)),                    \
+          "ld1 {v0." S "}, [x15], x20");                                 \
+  COMPARE(Ld1(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),              \
+          "ld1 {v1." S ", v2." S "}, [x16], x21");                       \
+  COMPARE(Ld1(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),        \
+          "ld1 {v3." S ", v4." S ", v5." S "}, [x17], x22");             \
+  COMPARE(Ld1(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),  \
+          "ld1 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")    \
+  COMPARE(Ld1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)), \
+          "ld1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")   \
+  COMPARE(Ld2(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),              \
+          "ld2 {v1." S ", v2." S "}, [x16], x21");                       \
+  COMPARE(Ld3(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),        \
+          "ld3 {v3." S ", v4." S ", v5." S "}, [x17], x22");             \
+  COMPARE(Ld4(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),  \
+          "ld4 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")    \
+  COMPARE(Ld4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)), \
+          "ld4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")   \
   NEON_FORMAT_LIST(DISASM_INST);
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Ld1(v0.V8B(), MemOperand(x15, 8, PostIndex)),
-      "ld1 {v0.8b}, [x15], #8");
+          "ld1 {v0.8b}, [x15], #8");
   COMPARE(Ld1(v1.V16B(), MemOperand(x16, 16, PostIndex)),
-      "ld1 {v1.16b}, [x16], #16");
+          "ld1 {v1.16b}, [x16], #16");
   COMPARE(Ld1(v2.V4H(), v3.V4H(), MemOperand(x17, 16, PostIndex)),
-      "ld1 {v2.4h, v3.4h}, [x17], #16");
+          "ld1 {v2.4h, v3.4h}, [x17], #16");
   COMPARE(Ld1(v4.V8H(), v5.V8H(), MemOperand(x18, 32, PostIndex)),
-      "ld1 {v4.8h, v5.8h}, [x18], #32");
+          "ld1 {v4.8h, v5.8h}, [x18], #32");
   COMPARE(Ld1(v16.V2S(), v17.V2S(), v18.V2S(), MemOperand(x19, 24, PostIndex)),
-      "ld1 {v16.2s, v17.2s, v18.2s}, [x19], #24");
+          "ld1 {v16.2s, v17.2s, v18.2s}, [x19], #24");
   COMPARE(Ld1(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x19, 48, PostIndex)),
-      "ld1 {v16.4s, v17.4s, v18.4s}, [x19], #48");
-  COMPARE(Ld1(v19.V2S(), v20.V2S(), v21.V2S(), v22.V2S(),
+          "ld1 {v16.4s, v17.4s, v18.4s}, [x19], #48");
+  COMPARE(Ld1(v19.V2S(),
+              v20.V2S(),
+              v21.V2S(),
+              v22.V2S(),
               MemOperand(x20, 32, PostIndex)),
           "ld1 {v19.2s, v20.2s, v21.2s, v22.2s}, [x20], #32");
-  COMPARE(Ld1(v23.V2D(), v24.V2D(), v25.V2D(), v26.V2D(),
+  COMPARE(Ld1(v23.V2D(),
+              v24.V2D(),
+              v25.V2D(),
+              v26.V2D(),
               MemOperand(x21, 64, PostIndex)),
           "ld1 {v23.2d, v24.2d, v25.2d, v26.2d}, [x21], #64");
 
   COMPARE(Ld2(v2.V4H(), v3.V4H(), MemOperand(x17, 16, PostIndex)),
-      "ld2 {v2.4h, v3.4h}, [x17], #16");
+          "ld2 {v2.4h, v3.4h}, [x17], #16");
   COMPARE(Ld2(v4.V8H(), v5.V8H(), MemOperand(x18, 32, PostIndex)),
-      "ld2 {v4.8h, v5.8h}, [x18], #32");
+          "ld2 {v4.8h, v5.8h}, [x18], #32");
   COMPARE(Ld3(v16.V2S(), v17.V2S(), v18.V2S(), MemOperand(x19, 24, PostIndex)),
-      "ld3 {v16.2s, v17.2s, v18.2s}, [x19], #24");
+          "ld3 {v16.2s, v17.2s, v18.2s}, [x19], #24");
   COMPARE(Ld3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x19, 48, PostIndex)),
-      "ld3 {v16.4s, v17.4s, v18.4s}, [x19], #48");
-  COMPARE(Ld4(v19.V2S(), v20.V2S(), v21.V2S(), v22.V2S(),
+          "ld3 {v16.4s, v17.4s, v18.4s}, [x19], #48");
+  COMPARE(Ld4(v19.V2S(),
+              v20.V2S(),
+              v21.V2S(),
+              v22.V2S(),
               MemOperand(x20, 32, PostIndex)),
           "ld4 {v19.2s, v20.2s, v21.2s, v22.2s}, [x20], #32");
-  COMPARE(Ld4(v23.V2D(), v24.V2D(), v25.V2D(), v26.V2D(),
+  COMPARE(Ld4(v23.V2D(),
+              v24.V2D(),
+              v25.V2D(),
+              v26.V2D(),
               MemOperand(x21, 64, PostIndex)),
           "ld4 {v23.2d, v24.2d, v25.2d, v26.2d}, [x21], #64");
 
@@ -3170,70 +3202,78 @@ TEST(neon_load_store_vector) {
           "ld1 {v1.1d, v2.1d}, [x17], #16");
   COMPARE(Ld1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x18, x19, PostIndex)),
           "ld1 {v3.1d, v4.1d, v5.1d}, [x18], x19");
-  COMPARE(Ld1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(),
+  COMPARE(Ld1(v30.V1D(),
+              v31.V1D(),
+              v0.V1D(),
+              v1.V1D(),
               MemOperand(x20, 32, PostIndex)),
           "ld1 {v30.1d, v31.1d, v0.1d, v1.1d}, [x20], #32");
   COMPARE(Ld1(d30, d31, d0, d1, MemOperand(x21, x22, PostIndex)),
           "ld1 {v30.1d, v31.1d, v0.1d, v1.1d}, [x21], x22");
 
-  #define DISASM_INST(M, S)                                       \
-  COMPARE(St1(v20.M, MemOperand(x15)),                            \
-      "st1 {v20." S "}, [x15]");                                  \
-  COMPARE(St1(v21.M, v22.M, MemOperand(x16)),                     \
-      "st1 {v21." S ", v22." S "}, [x16]");                       \
-  COMPARE(St1(v23.M, v24.M, v25.M, MemOperand(x17)),              \
-      "st1 {v23." S ", v24." S ", v25." S "}, [x17]");            \
-  COMPARE(St1(v26.M, v27.M, v28.M, v29.M, MemOperand(x18)),       \
-      "st1 {v26." S ", v27." S ", v28." S ", v29." S "}, [x18]")  \
-  COMPARE(St1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp)),          \
-      "st1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]")     \
-  COMPARE(St2(VLIST2(v21.M), MemOperand(x16)),                     \
-      "st2 {v21." S ", v22." S "}, [x16]");                       \
-  COMPARE(St3(v23.M, v24.M, v25.M, MemOperand(x17)),              \
-      "st3 {v23." S ", v24." S ", v25." S "}, [x17]");            \
-  COMPARE(St4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp)),          \
-      "st4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]")
+#define DISASM_INST(M, S)                                            \
+  COMPARE(St1(v20.M, MemOperand(x15)), "st1 {v20." S "}, [x15]");    \
+  COMPARE(St1(v21.M, v22.M, MemOperand(x16)),                        \
+          "st1 {v21." S ", v22." S "}, [x16]");                      \
+  COMPARE(St1(v23.M, v24.M, v25.M, MemOperand(x17)),                 \
+          "st1 {v23." S ", v24." S ", v25." S "}, [x17]");           \
+  COMPARE(St1(v26.M, v27.M, v28.M, v29.M, MemOperand(x18)),          \
+          "st1 {v26." S ", v27." S ", v28." S ", v29." S "}, [x18]") \
+  COMPARE(St1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp)),             \
+          "st1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]")    \
+  COMPARE(St2(VLIST2(v21.M), MemOperand(x16)),                       \
+          "st2 {v21." S ", v22." S "}, [x16]");                      \
+  COMPARE(St3(v23.M, v24.M, v25.M, MemOperand(x17)),                 \
+          "st3 {v23." S ", v24." S ", v25." S "}, [x17]");           \
+  COMPARE(St4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp)),             \
+          "st4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp]")
   NEON_FORMAT_LIST(DISASM_INST);
-  #undef DISASM_INST
-
-  #define DISASM_INST(M, S)                                               \
-  COMPARE(St1(v0.M, MemOperand(x15, x20, PostIndex)),                     \
-      "st1 {v0." S "}, [x15], x20");                                      \
-  COMPARE(St1(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),               \
-      "st1 {v1." S ", v2." S "}, [x16], x21");                            \
-  COMPARE(St1(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),         \
-      "st1 {v3." S ", v4." S ", v5." S "}, [x17], x22");                  \
-  COMPARE(St1(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),   \
-      "st1 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")         \
-  COMPARE(St1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)),  \
-      "st1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")        \
-  COMPARE(St2(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),               \
-      "st2 {v1." S ", v2." S "}, [x16], x21");                            \
-  COMPARE(St3(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),         \
-      "st3 {v3." S ", v4." S ", v5." S "}, [x17], x22");                  \
-  COMPARE(St4(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),   \
-      "st4 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")         \
-  COMPARE(St4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)),  \
-      "st4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")
+#undef DISASM_INST
+
+#define DISASM_INST(M, S)                                                \
+  COMPARE(St1(v0.M, MemOperand(x15, x20, PostIndex)),                    \
+          "st1 {v0." S "}, [x15], x20");                                 \
+  COMPARE(St1(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),              \
+          "st1 {v1." S ", v2." S "}, [x16], x21");                       \
+  COMPARE(St1(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),        \
+          "st1 {v3." S ", v4." S ", v5." S "}, [x17], x22");             \
+  COMPARE(St1(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),  \
+          "st1 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")    \
+  COMPARE(St1(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)), \
+          "st1 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")   \
+  COMPARE(St2(v1.M, v2.M, MemOperand(x16, x21, PostIndex)),              \
+          "st2 {v1." S ", v2." S "}, [x16], x21");                       \
+  COMPARE(St3(v3.M, v4.M, v5.M, MemOperand(x17, x22, PostIndex)),        \
+          "st3 {v3." S ", v4." S ", v5." S "}, [x17], x22");             \
+  COMPARE(St4(v6.M, v7.M, v8.M, v9.M, MemOperand(x18, x23, PostIndex)),  \
+          "st4 {v6." S ", v7." S ", v8." S ", v9." S "}, [x18], x23")    \
+  COMPARE(St4(v30.M, v31.M, v0.M, v1.M, MemOperand(sp, x24, PostIndex)), \
+          "st4 {v30." S ", v31." S ", v0." S ", v1." S "}, [sp], x24")
   NEON_FORMAT_LIST(DISASM_INST);
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(St1(v0.V8B(), MemOperand(x15, 8, PostIndex)),
-      "st1 {v0.8b}, [x15], #8");
+          "st1 {v0.8b}, [x15], #8");
   COMPARE(St1(v1.V16B(), MemOperand(x16, 16, PostIndex)),
-      "st1 {v1.16b}, [x16], #16");
+          "st1 {v1.16b}, [x16], #16");
   COMPARE(St1(v2.V4H(), v3.V4H(), MemOperand(x17, 16, PostIndex)),
-      "st1 {v2.4h, v3.4h}, [x17], #16");
+          "st1 {v2.4h, v3.4h}, [x17], #16");
   COMPARE(St1(v4.V8H(), v5.V8H(), MemOperand(x18, 32, PostIndex)),
-      "st1 {v4.8h, v5.8h}, [x18], #32");
+          "st1 {v4.8h, v5.8h}, [x18], #32");
   COMPARE(St1(v16.V2S(), v17.V2S(), v18.V2S(), MemOperand(x19, 24, PostIndex)),
-      "st1 {v16.2s, v17.2s, v18.2s}, [x19], #24");
+          "st1 {v16.2s, v17.2s, v18.2s}, [x19], #24");
   COMPARE(St1(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x19, 48, PostIndex)),
-      "st1 {v16.4s, v17.4s, v18.4s}, [x19], #48");
-  COMPARE(St1(v19.V2S(), v20.V2S(), v21.V2S(), v22.V2S(),
+          "st1 {v16.4s, v17.4s, v18.4s}, [x19], #48");
+  COMPARE(St1(v19.V2S(),
+              v20.V2S(),
+              v21.V2S(),
+              v22.V2S(),
               MemOperand(x20, 32, PostIndex)),
           "st1 {v19.2s, v20.2s, v21.2s, v22.2s}, [x20], #32");
-  COMPARE(St1(v23.V2D(), v24.V2D(), v25.V2D(), v26.V2D(),
+  COMPARE(St1(v23.V2D(),
+              v24.V2D(),
+              v25.V2D(),
+              v26.V2D(),
               MemOperand(x21, 64, PostIndex)),
           "st1 {v23.2d, v24.2d, v25.2d, v26.2d}, [x21], #64");
   COMPARE(St2(v1.V16B(), v2.V16B(), MemOperand(x16, 32, PostIndex)),
@@ -3242,16 +3282,20 @@ TEST(neon_load_store_vector) {
           "st2 {v2.4h, v3.4h}, [x17], #16");
   COMPARE(St2(v4.V8H(), v5.V8H(), MemOperand(x18, 32, PostIndex)),
           "st2 {v4.8h, v5.8h}, [x18], #32");
-  COMPARE(St3(v16.V2S(), v17.V2S(), v18.V2S(),
-              MemOperand(x19, 24, PostIndex)),
+  COMPARE(St3(v16.V2S(), v17.V2S(), v18.V2S(), MemOperand(x19, 24, PostIndex)),
           "st3 {v16.2s, v17.2s, v18.2s}, [x19], #24");
-  COMPARE(St3(v16.V4S(), v17.V4S(), v18.V4S(),
-              MemOperand(x19, 48, PostIndex)),
+  COMPARE(St3(v16.V4S(), v17.V4S(), v18.V4S(), MemOperand(x19, 48, PostIndex)),
           "st3 {v16.4s, v17.4s, v18.4s}, [x19], #48");
-  COMPARE(St4(v19.V2S(), v20.V2S(), v21.V2S(), v22.V2S(),
+  COMPARE(St4(v19.V2S(),
+              v20.V2S(),
+              v21.V2S(),
+              v22.V2S(),
               MemOperand(x20, 32, PostIndex)),
           "st4 {v19.2s, v20.2s, v21.2s, v22.2s}, [x20], #32");
-  COMPARE(St4(v23.V2D(), v24.V2D(), v25.V2D(), v26.V2D(),
+  COMPARE(St4(v23.V2D(),
+              v24.V2D(),
+              v25.V2D(),
+              v26.V2D(),
               MemOperand(x21, 64, PostIndex)),
           "st4 {v23.2d, v24.2d, v25.2d, v26.2d}, [x21], #64");
 
@@ -3260,7 +3304,10 @@ TEST(neon_load_store_vector) {
           "st1 {v1.1d, v2.1d}, [x17], #16");
   COMPARE(St1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x18, x19, PostIndex)),
           "st1 {v3.1d, v4.1d, v5.1d}, [x18], x19");
-  COMPARE(St1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(),
+  COMPARE(St1(v30.V1D(),
+              v31.V1D(),
+              v0.V1D(),
+              v1.V1D(),
               MemOperand(x20, 32, PostIndex)),
           "st1 {v30.1d, v31.1d, v0.1d, v1.1d}, [x20], #32");
   COMPARE(St1(d30, d31, d0, d1, MemOperand(x21, x22, PostIndex)),
@@ -3386,237 +3433,275 @@ TEST(neon_load_store_lane) {
   COMPARE(Ld1(v13.D(), 1, MemOperand(sp, 8, PostIndex)),
           "ld1 {v13.d}[1], [sp], #8");
 
-  COMPARE(Ld2(v0.V8B(),  v1.V8B(),  0, MemOperand(x15)),
-      "ld2 {v0.b, v1.b}[0], [x15]");
+  COMPARE(Ld2(v0.V8B(), v1.V8B(), 0, MemOperand(x15)),
+          "ld2 {v0.b, v1.b}[0], [x15]");
   COMPARE(Ld2(v1.V16B(), v2.V16B(), 1, MemOperand(x16)),
-      "ld2 {v1.b, v2.b}[1], [x16]");
-  COMPARE(Ld2(v2.V4H(),  v3.V4H(),  2, MemOperand(x17)),
-      "ld2 {v2.h, v3.h}[2], [x17]");
-  COMPARE(Ld2(v3.V8H(),  v4.V8H(),  3, MemOperand(x18)),
-      "ld2 {v3.h, v4.h}[3], [x18]");
-  COMPARE(Ld2(v4.V2S(),  v5.V2S(),  0, MemOperand(x19)),
-      "ld2 {v4.s, v5.s}[0], [x19]");
-  COMPARE(Ld2(v5.V4S(),  v6.V4S(),  1, MemOperand(x20)),
-      "ld2 {v5.s, v6.s}[1], [x20]");
-  COMPARE(Ld2(v6.V2D(),  v7.V2D(),  0, MemOperand(x21)),
-      "ld2 {v6.d, v7.d}[0], [x21]");
-  COMPARE(Ld2(v7.B(),    v8.B(),    7, MemOperand(x22)),
-      "ld2 {v7.b, v8.b}[7], [x22]");
-  COMPARE(Ld2(v8.B(),    v9.B(),   15, MemOperand(x23)),
-      "ld2 {v8.b, v9.b}[15], [x23]");
-  COMPARE(Ld2(v9.H(),    v10.H(),   3, MemOperand(x24)),
-      "ld2 {v9.h, v10.h}[3], [x24]");
-  COMPARE(Ld2(v10.H(),   v11.H(),   7, MemOperand(x25)),
-      "ld2 {v10.h, v11.h}[7], [x25]");
-  COMPARE(Ld2(v11.S(),   v12.S(),   1, MemOperand(x26)),
-      "ld2 {v11.s, v12.s}[1], [x26]");
-  COMPARE(Ld2(v12.S(),   v13.S(),   3, MemOperand(x27)),
-      "ld2 {v12.s, v13.s}[3], [x27]");
-  COMPARE(Ld2(v13.D(),   v14.D(),   1, MemOperand(sp)),
-      "ld2 {v13.d, v14.d}[1], [sp]");
-
-  COMPARE(Ld2(v0.V8B(), v1.V8B(),  0, MemOperand(x15, x0, PostIndex)),
-      "ld2 {v0.b, v1.b}[0], [x15], x0");
+          "ld2 {v1.b, v2.b}[1], [x16]");
+  COMPARE(Ld2(v2.V4H(), v3.V4H(), 2, MemOperand(x17)),
+          "ld2 {v2.h, v3.h}[2], [x17]");
+  COMPARE(Ld2(v3.V8H(), v4.V8H(), 3, MemOperand(x18)),
+          "ld2 {v3.h, v4.h}[3], [x18]");
+  COMPARE(Ld2(v4.V2S(), v5.V2S(), 0, MemOperand(x19)),
+          "ld2 {v4.s, v5.s}[0], [x19]");
+  COMPARE(Ld2(v5.V4S(), v6.V4S(), 1, MemOperand(x20)),
+          "ld2 {v5.s, v6.s}[1], [x20]");
+  COMPARE(Ld2(v6.V2D(), v7.V2D(), 0, MemOperand(x21)),
+          "ld2 {v6.d, v7.d}[0], [x21]");
+  COMPARE(Ld2(v7.B(), v8.B(), 7, MemOperand(x22)),
+          "ld2 {v7.b, v8.b}[7], [x22]");
+  COMPARE(Ld2(v8.B(), v9.B(), 15, MemOperand(x23)),
+          "ld2 {v8.b, v9.b}[15], [x23]");
+  COMPARE(Ld2(v9.H(), v10.H(), 3, MemOperand(x24)),
+          "ld2 {v9.h, v10.h}[3], [x24]");
+  COMPARE(Ld2(v10.H(), v11.H(), 7, MemOperand(x25)),
+          "ld2 {v10.h, v11.h}[7], [x25]");
+  COMPARE(Ld2(v11.S(), v12.S(), 1, MemOperand(x26)),
+          "ld2 {v11.s, v12.s}[1], [x26]");
+  COMPARE(Ld2(v12.S(), v13.S(), 3, MemOperand(x27)),
+          "ld2 {v12.s, v13.s}[3], [x27]");
+  COMPARE(Ld2(v13.D(), v14.D(), 1, MemOperand(sp)),
+          "ld2 {v13.d, v14.d}[1], [sp]");
+
+  COMPARE(Ld2(v0.V8B(), v1.V8B(), 0, MemOperand(x15, x0, PostIndex)),
+          "ld2 {v0.b, v1.b}[0], [x15], x0");
   COMPARE(Ld2(v1.V16B(), v2.V16B(), 1, MemOperand(x16, 2, PostIndex)),
-      "ld2 {v1.b, v2.b}[1], [x16], #2");
-  COMPARE(Ld2(v2.V4H(), v3.V4H(),  2, MemOperand(x17, 4, PostIndex)),
-      "ld2 {v2.h, v3.h}[2], [x17], #4");
-  COMPARE(Ld2(v3.V8H(), v4.V8H(),  3, MemOperand(x18, x1, PostIndex)),
-      "ld2 {v3.h, v4.h}[3], [x18], x1");
-  COMPARE(Ld2(v4.V2S(), v5.V2S(),  0, MemOperand(x19, x2, PostIndex)),
-      "ld2 {v4.s, v5.s}[0], [x19], x2");
-  COMPARE(Ld2(v5.V4S(), v6.V4S(),  1, MemOperand(x20, 8, PostIndex)),
-      "ld2 {v5.s, v6.s}[1], [x20], #8");
-  COMPARE(Ld2(v6.V2D(), v7.V2D(),  0, MemOperand(x21, 16, PostIndex)),
-      "ld2 {v6.d, v7.d}[0], [x21], #16");
-  COMPARE(Ld2(v7.B(),   v8.B(),    7, MemOperand(x22, 2, PostIndex)),
-      "ld2 {v7.b, v8.b}[7], [x22], #2");
-  COMPARE(Ld2(v8.B(),   v9.B(),   15, MemOperand(x23, x3, PostIndex)),
-      "ld2 {v8.b, v9.b}[15], [x23], x3");
-  COMPARE(Ld2(v9.H(),   v10.H(),   3, MemOperand(x24, x4, PostIndex)),
-      "ld2 {v9.h, v10.h}[3], [x24], x4");
-  COMPARE(Ld2(v10.H(),  v11.H(),   7, MemOperand(x25, 4, PostIndex)),
-      "ld2 {v10.h, v11.h}[7], [x25], #4");
-  COMPARE(Ld2(v11.S(),  v12.S(),   1, MemOperand(x26, 8, PostIndex)),
-      "ld2 {v11.s, v12.s}[1], [x26], #8");
-  COMPARE(Ld2(v12.S(),  v13.S(),   3, MemOperand(x27, x5, PostIndex)),
-      "ld2 {v12.s, v13.s}[3], [x27], x5");
-  COMPARE(Ld2(v11.S(),  v12.S(),   3, MemOperand(x26, 8, PostIndex)),
-      "ld2 {v11.s, v12.s}[3], [x26], #8");
-  COMPARE(Ld2(v13.D(),  v14.D(),   1, MemOperand(sp, x6, PostIndex)),
-      "ld2 {v13.d, v14.d}[1], [sp], x6");
-  COMPARE(Ld2(v13.D(),  v14.D(),   1, MemOperand(sp, 16, PostIndex)),
-      "ld2 {v13.d, v14.d}[1], [sp], #16");
-
-  COMPARE(Ld3(v0.V8B(),  v1.V8B(),  v2.V8B(),   0, MemOperand(x15)),
-      "ld3 {v0.b, v1.b, v2.b}[0], [x15]");
-  COMPARE(Ld3(v1.V16B(), v2.V16B(), v3.V16B(),  1, MemOperand(x16)),
-      "ld3 {v1.b, v2.b, v3.b}[1], [x16]");
-  COMPARE(Ld3(v2.V4H(),  v3.V4H(),  v4.V4H(),   2, MemOperand(x17)),
-      "ld3 {v2.h, v3.h, v4.h}[2], [x17]");
-  COMPARE(Ld3(v3.V8H(),  v4.V8H(),  v5.V8H(),   3, MemOperand(x18)),
-      "ld3 {v3.h, v4.h, v5.h}[3], [x18]");
-  COMPARE(Ld3(v4.V2S(),  v5.V2S(),  v6.V2S(),   0, MemOperand(x19)),
-      "ld3 {v4.s, v5.s, v6.s}[0], [x19]");
-  COMPARE(Ld3(v5.V4S(),  v6.V4S(),  v7.V4S(),   1, MemOperand(x20)),
-      "ld3 {v5.s, v6.s, v7.s}[1], [x20]");
-  COMPARE(Ld3(v6.V2D(),  v7.V2D(),  v8.V2D(),   0, MemOperand(x21)),
-      "ld3 {v6.d, v7.d, v8.d}[0], [x21]");
-  COMPARE(Ld3(v7.B(),    v8.B(),    v9.B(),     7, MemOperand(x22)),
-      "ld3 {v7.b, v8.b, v9.b}[7], [x22]");
-  COMPARE(Ld3(v8.B(),    v9.B(),    v10.B(),    15, MemOperand(x23)),
-      "ld3 {v8.b, v9.b, v10.b}[15], [x23]");
-  COMPARE(Ld3(v9.H(),    v10.H(),   v11.H(),    3, MemOperand(x24)),
-      "ld3 {v9.h, v10.h, v11.h}[3], [x24]");
-  COMPARE(Ld3(v10.H(),   v11.H(),   v12.H(),    7, MemOperand(x25)),
-      "ld3 {v10.h, v11.h, v12.h}[7], [x25]");
-  COMPARE(Ld3(v11.S(),   v12.S(),   v13.S(),    1, MemOperand(x26)),
-      "ld3 {v11.s, v12.s, v13.s}[1], [x26]");
-  COMPARE(Ld3(v12.S(),   v13.S(),   v14.S(),    3, MemOperand(x27)),
-      "ld3 {v12.s, v13.s, v14.s}[3], [x27]");
-  COMPARE(Ld3(v13.D(),   v14.D(),   v15.D(),    1, MemOperand(sp)),
-      "ld3 {v13.d, v14.d, v15.d}[1], [sp]");
-
-  COMPARE(Ld3(v0.V8B(),  v1.V8B(),  v2.V8B(),  0,
-              MemOperand(x15, x0, PostIndex)),
-      "ld3 {v0.b, v1.b, v2.b}[0], [x15], x0");
-  COMPARE(Ld3(v1.V16B(), v2.V16B(), v3.V16B(), 1,
+          "ld2 {v1.b, v2.b}[1], [x16], #2");
+  COMPARE(Ld2(v2.V4H(), v3.V4H(), 2, MemOperand(x17, 4, PostIndex)),
+          "ld2 {v2.h, v3.h}[2], [x17], #4");
+  COMPARE(Ld2(v3.V8H(), v4.V8H(), 3, MemOperand(x18, x1, PostIndex)),
+          "ld2 {v3.h, v4.h}[3], [x18], x1");
+  COMPARE(Ld2(v4.V2S(), v5.V2S(), 0, MemOperand(x19, x2, PostIndex)),
+          "ld2 {v4.s, v5.s}[0], [x19], x2");
+  COMPARE(Ld2(v5.V4S(), v6.V4S(), 1, MemOperand(x20, 8, PostIndex)),
+          "ld2 {v5.s, v6.s}[1], [x20], #8");
+  COMPARE(Ld2(v6.V2D(), v7.V2D(), 0, MemOperand(x21, 16, PostIndex)),
+          "ld2 {v6.d, v7.d}[0], [x21], #16");
+  COMPARE(Ld2(v7.B(), v8.B(), 7, MemOperand(x22, 2, PostIndex)),
+          "ld2 {v7.b, v8.b}[7], [x22], #2");
+  COMPARE(Ld2(v8.B(), v9.B(), 15, MemOperand(x23, x3, PostIndex)),
+          "ld2 {v8.b, v9.b}[15], [x23], x3");
+  COMPARE(Ld2(v9.H(), v10.H(), 3, MemOperand(x24, x4, PostIndex)),
+          "ld2 {v9.h, v10.h}[3], [x24], x4");
+  COMPARE(Ld2(v10.H(), v11.H(), 7, MemOperand(x25, 4, PostIndex)),
+          "ld2 {v10.h, v11.h}[7], [x25], #4");
+  COMPARE(Ld2(v11.S(), v12.S(), 1, MemOperand(x26, 8, PostIndex)),
+          "ld2 {v11.s, v12.s}[1], [x26], #8");
+  COMPARE(Ld2(v12.S(), v13.S(), 3, MemOperand(x27, x5, PostIndex)),
+          "ld2 {v12.s, v13.s}[3], [x27], x5");
+  COMPARE(Ld2(v11.S(), v12.S(), 3, MemOperand(x26, 8, PostIndex)),
+          "ld2 {v11.s, v12.s}[3], [x26], #8");
+  COMPARE(Ld2(v13.D(), v14.D(), 1, MemOperand(sp, x6, PostIndex)),
+          "ld2 {v13.d, v14.d}[1], [sp], x6");
+  COMPARE(Ld2(v13.D(), v14.D(), 1, MemOperand(sp, 16, PostIndex)),
+          "ld2 {v13.d, v14.d}[1], [sp], #16");
+
+  COMPARE(Ld3(v0.V8B(), v1.V8B(), v2.V8B(), 0, MemOperand(x15)),
+          "ld3 {v0.b, v1.b, v2.b}[0], [x15]");
+  COMPARE(Ld3(v1.V16B(), v2.V16B(), v3.V16B(), 1, MemOperand(x16)),
+          "ld3 {v1.b, v2.b, v3.b}[1], [x16]");
+  COMPARE(Ld3(v2.V4H(), v3.V4H(), v4.V4H(), 2, MemOperand(x17)),
+          "ld3 {v2.h, v3.h, v4.h}[2], [x17]");
+  COMPARE(Ld3(v3.V8H(), v4.V8H(), v5.V8H(), 3, MemOperand(x18)),
+          "ld3 {v3.h, v4.h, v5.h}[3], [x18]");
+  COMPARE(Ld3(v4.V2S(), v5.V2S(), v6.V2S(), 0, MemOperand(x19)),
+          "ld3 {v4.s, v5.s, v6.s}[0], [x19]");
+  COMPARE(Ld3(v5.V4S(), v6.V4S(), v7.V4S(), 1, MemOperand(x20)),
+          "ld3 {v5.s, v6.s, v7.s}[1], [x20]");
+  COMPARE(Ld3(v6.V2D(), v7.V2D(), v8.V2D(), 0, MemOperand(x21)),
+          "ld3 {v6.d, v7.d, v8.d}[0], [x21]");
+  COMPARE(Ld3(v7.B(), v8.B(), v9.B(), 7, MemOperand(x22)),
+          "ld3 {v7.b, v8.b, v9.b}[7], [x22]");
+  COMPARE(Ld3(v8.B(), v9.B(), v10.B(), 15, MemOperand(x23)),
+          "ld3 {v8.b, v9.b, v10.b}[15], [x23]");
+  COMPARE(Ld3(v9.H(), v10.H(), v11.H(), 3, MemOperand(x24)),
+          "ld3 {v9.h, v10.h, v11.h}[3], [x24]");
+  COMPARE(Ld3(v10.H(), v11.H(), v12.H(), 7, MemOperand(x25)),
+          "ld3 {v10.h, v11.h, v12.h}[7], [x25]");
+  COMPARE(Ld3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x26)),
+          "ld3 {v11.s, v12.s, v13.s}[1], [x26]");
+  COMPARE(Ld3(v12.S(), v13.S(), v14.S(), 3, MemOperand(x27)),
+          "ld3 {v12.s, v13.s, v14.s}[3], [x27]");
+  COMPARE(Ld3(v13.D(), v14.D(), v15.D(), 1, MemOperand(sp)),
+          "ld3 {v13.d, v14.d, v15.d}[1], [sp]");
+
+  COMPARE(Ld3(v0.V8B(), v1.V8B(), v2.V8B(), 0, MemOperand(x15, x0, PostIndex)),
+          "ld3 {v0.b, v1.b, v2.b}[0], [x15], x0");
+  COMPARE(Ld3(v1.V16B(),
+              v2.V16B(),
+              v3.V16B(),
+              1,
               MemOperand(x16, 3, PostIndex)),
-      "ld3 {v1.b, v2.b, v3.b}[1], [x16], #3");
-  COMPARE(Ld3(v2.V4H(),  v3.V4H(),  v4.V4H(),  2,
-              MemOperand(x17, 6, PostIndex)),
-      "ld3 {v2.h, v3.h, v4.h}[2], [x17], #6");
-  COMPARE(Ld3(v3.V8H(),  v4.V8H(),  v5.V8H(),  3,
-              MemOperand(x18, x1, PostIndex)),
-      "ld3 {v3.h, v4.h, v5.h}[3], [x18], x1");
-  COMPARE(Ld3(v4.V2S(),  v5.V2S(),  v6.V2S(),  0,
-              MemOperand(x19, x2, PostIndex)),
-      "ld3 {v4.s, v5.s, v6.s}[0], [x19], x2");
-  COMPARE(Ld3(v5.V4S(),  v6.V4S(),  v7.V4S(),  1,
-              MemOperand(x20, 12, PostIndex)),
-      "ld3 {v5.s, v6.s, v7.s}[1], [x20], #12");
-  COMPARE(Ld3(v6.V2D(),  v7.V2D(),  v8.V2D(),  0,
-              MemOperand(x21, 24, PostIndex)),
-      "ld3 {v6.d, v7.d, v8.d}[0], [x21], #24");
-  COMPARE(Ld3(v7.B(),    v8.B(),    v9.B(),    7,
-              MemOperand(x22, 3, PostIndex)),
-      "ld3 {v7.b, v8.b, v9.b}[7], [x22], #3");
-  COMPARE(Ld3(v8.B(),    v9.B(),    v10.B(),   15,
-              MemOperand(x23, x3, PostIndex)),
-      "ld3 {v8.b, v9.b, v10.b}[15], [x23], x3");
-  COMPARE(Ld3(v9.H(),    v10.H(),   v11.H(),   3,
-              MemOperand(x24, x4, PostIndex)),
-      "ld3 {v9.h, v10.h, v11.h}[3], [x24], x4");
-  COMPARE(Ld3(v10.H(),   v11.H(),   v12.H(),   7,
-              MemOperand(x25, 6, PostIndex)),
-      "ld3 {v10.h, v11.h, v12.h}[7], [x25], #6");
-  COMPARE(Ld3(v11.S(),   v12.S(),   v13.S(),   1,
-              MemOperand(x26, 12, PostIndex)),
-      "ld3 {v11.s, v12.s, v13.s}[1], [x26], #12");
-  COMPARE(Ld3(v12.S(),   v13.S(),   v14.S(),   3,
-              MemOperand(x27, x5, PostIndex)),
-      "ld3 {v12.s, v13.s, v14.s}[3], [x27], x5");
-  COMPARE(Ld3(v12.S(),   v13.S(),   v14.S(),   3,
-              MemOperand(x27, 12, PostIndex)),
-      "ld3 {v12.s, v13.s, v14.s}[3], [x27], #12");
-  COMPARE(Ld3(v13.D(),   v14.D(),   v15.D(),   1,
-              MemOperand(sp, x6, PostIndex)),
-      "ld3 {v13.d, v14.d, v15.d}[1], [sp], x6");
-  COMPARE(Ld3(v13.D(),   v14.D(),   v15.D(),   1,
-              MemOperand(sp, 24, PostIndex)),
-      "ld3 {v13.d, v14.d, v15.d}[1], [sp], #24");
-
-  COMPARE(Ld4(v0.V8B(),   v1.V8B(),  v2.V8B(),   v3.V8B(),  0,
-              MemOperand(x15)),
-      "ld4 {v0.b, v1.b, v2.b, v3.b}[0], [x15]");
-  COMPARE(Ld4(v1.V16B(),  v2.V16B(), v3.V16B(),  v4.V16B(), 1,
-              MemOperand(x16)),
-      "ld4 {v1.b, v2.b, v3.b, v4.b}[1], [x16]");
-  COMPARE(Ld4(v2.V4H(),   v3.V4H(),  v4.V4H(),   v5.V4H(),  2,
-              MemOperand(x17)),
-      "ld4 {v2.h, v3.h, v4.h, v5.h}[2], [x17]");
-  COMPARE(Ld4(v3.V8H(),   v4.V8H(),  v5.V8H(),   v6.V8H(),  3,
-              MemOperand(x18)),
-      "ld4 {v3.h, v4.h, v5.h, v6.h}[3], [x18]");
-  COMPARE(Ld4(v4.V2S(),   v5.V2S(),  v6.V2S(),   v7.V2S(),  0,
-              MemOperand(x19)),
-      "ld4 {v4.s, v5.s, v6.s, v7.s}[0], [x19]");
-  COMPARE(Ld4(v5.V4S(),   v6.V4S(),  v7.V4S(),   v8.V4S(),  1,
-              MemOperand(x20)),
-      "ld4 {v5.s, v6.s, v7.s, v8.s}[1], [x20]");
-  COMPARE(Ld4(v6.V2D(),   v7.V2D(),  v8.V2D(),   v9.V2D(),  0,
-              MemOperand(x21)),
-      "ld4 {v6.d, v7.d, v8.d, v9.d}[0], [x21]");
-  COMPARE(Ld4(v7.B(),     v8.B(),    v9.B(),    v10.B(),    7,
-              MemOperand(x22)),
-      "ld4 {v7.b, v8.b, v9.b, v10.b}[7], [x22]");
-  COMPARE(Ld4(v8.B(),     v9.B(),    v10.B(),   v11.B(),   15,
-              MemOperand(x23)),
-      "ld4 {v8.b, v9.b, v10.b, v11.b}[15], [x23]");
-  COMPARE(Ld4(v9.H(),    v10.H(),   v11.H(),    v12.H(),    3,
-              MemOperand(x24)),
-      "ld4 {v9.h, v10.h, v11.h, v12.h}[3], [x24]");
-  COMPARE(Ld4(v10.H(),   v11.H(),   v12.H(),    v13.H(),    7,
-              MemOperand(x25)),
-      "ld4 {v10.h, v11.h, v12.h, v13.h}[7], [x25]");
-  COMPARE(Ld4(v11.S(),   v12.S(),   v13.S(),    v14.S(),    1,
-              MemOperand(x26)),
-      "ld4 {v11.s, v12.s, v13.s, v14.s}[1], [x26]");
-  COMPARE(Ld4(v12.S(),   v13.S(),   v14.S(),    v15.S(),    3,
-              MemOperand(x27)),
-      "ld4 {v12.s, v13.s, v14.s, v15.s}[3], [x27]");
-  COMPARE(Ld4(v13.D(),   v14.D(),   v15.D(),    v16.D(),    1,
-              MemOperand(sp)),
-      "ld4 {v13.d, v14.d, v15.d, v16.d}[1], [sp]");
-
-  COMPARE(Ld4(v0.V8B(),   v1.V8B(),  v2.V8B(),  v3.V8B(),  0,
+          "ld3 {v1.b, v2.b, v3.b}[1], [x16], #3");
+  COMPARE(Ld3(v2.V4H(), v3.V4H(), v4.V4H(), 2, MemOperand(x17, 6, PostIndex)),
+          "ld3 {v2.h, v3.h, v4.h}[2], [x17], #6");
+  COMPARE(Ld3(v3.V8H(), v4.V8H(), v5.V8H(), 3, MemOperand(x18, x1, PostIndex)),
+          "ld3 {v3.h, v4.h, v5.h}[3], [x18], x1");
+  COMPARE(Ld3(v4.V2S(), v5.V2S(), v6.V2S(), 0, MemOperand(x19, x2, PostIndex)),
+          "ld3 {v4.s, v5.s, v6.s}[0], [x19], x2");
+  COMPARE(Ld3(v5.V4S(), v6.V4S(), v7.V4S(), 1, MemOperand(x20, 12, PostIndex)),
+          "ld3 {v5.s, v6.s, v7.s}[1], [x20], #12");
+  COMPARE(Ld3(v6.V2D(), v7.V2D(), v8.V2D(), 0, MemOperand(x21, 24, PostIndex)),
+          "ld3 {v6.d, v7.d, v8.d}[0], [x21], #24");
+  COMPARE(Ld3(v7.B(), v8.B(), v9.B(), 7, MemOperand(x22, 3, PostIndex)),
+          "ld3 {v7.b, v8.b, v9.b}[7], [x22], #3");
+  COMPARE(Ld3(v8.B(), v9.B(), v10.B(), 15, MemOperand(x23, x3, PostIndex)),
+          "ld3 {v8.b, v9.b, v10.b}[15], [x23], x3");
+  COMPARE(Ld3(v9.H(), v10.H(), v11.H(), 3, MemOperand(x24, x4, PostIndex)),
+          "ld3 {v9.h, v10.h, v11.h}[3], [x24], x4");
+  COMPARE(Ld3(v10.H(), v11.H(), v12.H(), 7, MemOperand(x25, 6, PostIndex)),
+          "ld3 {v10.h, v11.h, v12.h}[7], [x25], #6");
+  COMPARE(Ld3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x26, 12, PostIndex)),
+          "ld3 {v11.s, v12.s, v13.s}[1], [x26], #12");
+  COMPARE(Ld3(v12.S(), v13.S(), v14.S(), 3, MemOperand(x27, x5, PostIndex)),
+          "ld3 {v12.s, v13.s, v14.s}[3], [x27], x5");
+  COMPARE(Ld3(v12.S(), v13.S(), v14.S(), 3, MemOperand(x27, 12, PostIndex)),
+          "ld3 {v12.s, v13.s, v14.s}[3], [x27], #12");
+  COMPARE(Ld3(v13.D(), v14.D(), v15.D(), 1, MemOperand(sp, x6, PostIndex)),
+          "ld3 {v13.d, v14.d, v15.d}[1], [sp], x6");
+  COMPARE(Ld3(v13.D(), v14.D(), v15.D(), 1, MemOperand(sp, 24, PostIndex)),
+          "ld3 {v13.d, v14.d, v15.d}[1], [sp], #24");
+
+  COMPARE(Ld4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), 0, MemOperand(x15)),
+          "ld4 {v0.b, v1.b, v2.b, v3.b}[0], [x15]");
+  COMPARE(Ld4(v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), 1, MemOperand(x16)),
+          "ld4 {v1.b, v2.b, v3.b, v4.b}[1], [x16]");
+  COMPARE(Ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), 2, MemOperand(x17)),
+          "ld4 {v2.h, v3.h, v4.h, v5.h}[2], [x17]");
+  COMPARE(Ld4(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), 3, MemOperand(x18)),
+          "ld4 {v3.h, v4.h, v5.h, v6.h}[3], [x18]");
+  COMPARE(Ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), 0, MemOperand(x19)),
+          "ld4 {v4.s, v5.s, v6.s, v7.s}[0], [x19]");
+  COMPARE(Ld4(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), 1, MemOperand(x20)),
+          "ld4 {v5.s, v6.s, v7.s, v8.s}[1], [x20]");
+  COMPARE(Ld4(v6.V2D(), v7.V2D(), v8.V2D(), v9.V2D(), 0, MemOperand(x21)),
+          "ld4 {v6.d, v7.d, v8.d, v9.d}[0], [x21]");
+  COMPARE(Ld4(v7.B(), v8.B(), v9.B(), v10.B(), 7, MemOperand(x22)),
+          "ld4 {v7.b, v8.b, v9.b, v10.b}[7], [x22]");
+  COMPARE(Ld4(v8.B(), v9.B(), v10.B(), v11.B(), 15, MemOperand(x23)),
+          "ld4 {v8.b, v9.b, v10.b, v11.b}[15], [x23]");
+  COMPARE(Ld4(v9.H(), v10.H(), v11.H(), v12.H(), 3, MemOperand(x24)),
+          "ld4 {v9.h, v10.h, v11.h, v12.h}[3], [x24]");
+  COMPARE(Ld4(v10.H(), v11.H(), v12.H(), v13.H(), 7, MemOperand(x25)),
+          "ld4 {v10.h, v11.h, v12.h, v13.h}[7], [x25]");
+  COMPARE(Ld4(v11.S(), v12.S(), v13.S(), v14.S(), 1, MemOperand(x26)),
+          "ld4 {v11.s, v12.s, v13.s, v14.s}[1], [x26]");
+  COMPARE(Ld4(v12.S(), v13.S(), v14.S(), v15.S(), 3, MemOperand(x27)),
+          "ld4 {v12.s, v13.s, v14.s, v15.s}[3], [x27]");
+  COMPARE(Ld4(v13.D(), v14.D(), v15.D(), v16.D(), 1, MemOperand(sp)),
+          "ld4 {v13.d, v14.d, v15.d, v16.d}[1], [sp]");
+
+  COMPARE(Ld4(v0.V8B(),
+              v1.V8B(),
+              v2.V8B(),
+              v3.V8B(),
+              0,
               MemOperand(x15, x0, PostIndex)),
-      "ld4 {v0.b, v1.b, v2.b, v3.b}[0], [x15], x0");
-  COMPARE(Ld4(v1.V16B(),  v2.V16B(), v3.V16B(), v4.V16B(), 1,
+          "ld4 {v0.b, v1.b, v2.b, v3.b}[0], [x15], x0");
+  COMPARE(Ld4(v1.V16B(),
+              v2.V16B(),
+              v3.V16B(),
+              v4.V16B(),
+              1,
               MemOperand(x16, 4, PostIndex)),
-      "ld4 {v1.b, v2.b, v3.b, v4.b}[1], [x16], #4");
-  COMPARE(Ld4(v2.V4H(),   v3.V4H(),  v4.V4H(),  v5.V4H(),  2,
+          "ld4 {v1.b, v2.b, v3.b, v4.b}[1], [x16], #4");
+  COMPARE(Ld4(v2.V4H(),
+              v3.V4H(),
+              v4.V4H(),
+              v5.V4H(),
+              2,
               MemOperand(x17, 8, PostIndex)),
-      "ld4 {v2.h, v3.h, v4.h, v5.h}[2], [x17], #8");
-  COMPARE(Ld4(v3.V8H(),   v4.V8H(),  v5.V8H(),  v6.V8H(),  3,
+          "ld4 {v2.h, v3.h, v4.h, v5.h}[2], [x17], #8");
+  COMPARE(Ld4(v3.V8H(),
+              v4.V8H(),
+              v5.V8H(),
+              v6.V8H(),
+              3,
               MemOperand(x18, x1, PostIndex)),
-      "ld4 {v3.h, v4.h, v5.h, v6.h}[3], [x18], x1");
-  COMPARE(Ld4(v4.V2S(),   v5.V2S(),  v6.V2S(),  v7.V2S(),  0,
+          "ld4 {v3.h, v4.h, v5.h, v6.h}[3], [x18], x1");
+  COMPARE(Ld4(v4.V2S(),
+              v5.V2S(),
+              v6.V2S(),
+              v7.V2S(),
+              0,
               MemOperand(x19, x2, PostIndex)),
-      "ld4 {v4.s, v5.s, v6.s, v7.s}[0], [x19], x2");
-  COMPARE(Ld4(v5.V4S(),   v6.V4S(),  v7.V4S(),  v8.V4S(),  1,
+          "ld4 {v4.s, v5.s, v6.s, v7.s}[0], [x19], x2");
+  COMPARE(Ld4(v5.V4S(),
+              v6.V4S(),
+              v7.V4S(),
+              v8.V4S(),
+              1,
               MemOperand(x20, 16, PostIndex)),
-      "ld4 {v5.s, v6.s, v7.s, v8.s}[1], [x20], #16");
-  COMPARE(Ld4(v6.V2D(),   v7.V2D(),  v8.V2D(),  v9.V2D(),  0,
+          "ld4 {v5.s, v6.s, v7.s, v8.s}[1], [x20], #16");
+  COMPARE(Ld4(v6.V2D(),
+              v7.V2D(),
+              v8.V2D(),
+              v9.V2D(),
+              0,
               MemOperand(x21, 32, PostIndex)),
-      "ld4 {v6.d, v7.d, v8.d, v9.d}[0], [x21], #32");
-  COMPARE(Ld4(v7.B(),     v8.B(),    v9.B(),   v10.B(),    7,
+          "ld4 {v6.d, v7.d, v8.d, v9.d}[0], [x21], #32");
+  COMPARE(Ld4(v7.B(),
+              v8.B(),
+              v9.B(),
+              v10.B(),
+              7,
               MemOperand(x22, 4, PostIndex)),
-      "ld4 {v7.b, v8.b, v9.b, v10.b}[7], [x22], #4");
-  COMPARE(Ld4(v8.B(),     v9.B(),   v10.B(),   v11.B(),   15,
+          "ld4 {v7.b, v8.b, v9.b, v10.b}[7], [x22], #4");
+  COMPARE(Ld4(v8.B(),
+              v9.B(),
+              v10.B(),
+              v11.B(),
+              15,
               MemOperand(x23, x3, PostIndex)),
-      "ld4 {v8.b, v9.b, v10.b, v11.b}[15], [x23], x3");
-  COMPARE(Ld4(v9.H(),    v10.H(),   v11.H(),   v12.H(),    3,
+          "ld4 {v8.b, v9.b, v10.b, v11.b}[15], [x23], x3");
+  COMPARE(Ld4(v9.H(),
+              v10.H(),
+              v11.H(),
+              v12.H(),
+              3,
               MemOperand(x24, x4, PostIndex)),
-      "ld4 {v9.h, v10.h, v11.h, v12.h}[3], [x24], x4");
-  COMPARE(Ld4(v10.H(),   v11.H(),   v12.H(),   v13.H(),    7,
+          "ld4 {v9.h, v10.h, v11.h, v12.h}[3], [x24], x4");
+  COMPARE(Ld4(v10.H(),
+              v11.H(),
+              v12.H(),
+              v13.H(),
+              7,
               MemOperand(x25, 8, PostIndex)),
-      "ld4 {v10.h, v11.h, v12.h, v13.h}[7], [x25], #8");
-  COMPARE(Ld4(v11.S(),   v12.S(),   v13.S(),   v14.S(),    1,
+          "ld4 {v10.h, v11.h, v12.h, v13.h}[7], [x25], #8");
+  COMPARE(Ld4(v11.S(),
+              v12.S(),
+              v13.S(),
+              v14.S(),
+              1,
               MemOperand(x26, 16, PostIndex)),
-      "ld4 {v11.s, v12.s, v13.s, v14.s}[1], [x26], #16");
-  COMPARE(Ld4(v12.S(),   v13.S(),   v14.S(),   v15.S(),    3,
+          "ld4 {v11.s, v12.s, v13.s, v14.s}[1], [x26], #16");
+  COMPARE(Ld4(v12.S(),
+              v13.S(),
+              v14.S(),
+              v15.S(),
+              3,
               MemOperand(x27, x5, PostIndex)),
-      "ld4 {v12.s, v13.s, v14.s, v15.s}[3], [x27], x5");
-  COMPARE(Ld4(v11.S(),   v12.S(),   v13.S(),   v14.S(),    3,
+          "ld4 {v12.s, v13.s, v14.s, v15.s}[3], [x27], x5");
+  COMPARE(Ld4(v11.S(),
+              v12.S(),
+              v13.S(),
+              v14.S(),
+              3,
               MemOperand(x26, 16, PostIndex)),
-      "ld4 {v11.s, v12.s, v13.s, v14.s}[3], [x26], #16");
-  COMPARE(Ld4(v13.D(),   v14.D(),   v15.D(),   v16.D(),    1,
+          "ld4 {v11.s, v12.s, v13.s, v14.s}[3], [x26], #16");
+  COMPARE(Ld4(v13.D(),
+              v14.D(),
+              v15.D(),
+              v16.D(),
+              1,
               MemOperand(sp, x6, PostIndex)),
-      "ld4 {v13.d, v14.d, v15.d, v16.d}[1], [sp], x6");
-  COMPARE(Ld4(v13.D(),   v14.D(),   v15.D(),   v16.D(),    1,
+          "ld4 {v13.d, v14.d, v15.d, v16.d}[1], [sp], x6");
+  COMPARE(Ld4(v13.D(),
+              v14.D(),
+              v15.D(),
+              v16.D(),
+              1,
               MemOperand(sp, 32, PostIndex)),
-      "ld4 {v13.d, v14.d, v15.d, v16.d}[1], [sp], #32");
+          "ld4 {v13.d, v14.d, v15.d, v16.d}[1], [sp], #32");
 
   COMPARE(St1(v0.V8B(), 0, MemOperand(x15)), "st1 {v0.b}[0], [x15]");
   COMPARE(St1(v1.V16B(), 1, MemOperand(x16)), "st1 {v1.b}[1], [x16]");
@@ -3661,90 +3746,90 @@ TEST(neon_load_store_lane) {
           "st1 {v12.s}[3], [x27], x5");
   COMPARE(St1(v13.D(), 1, MemOperand(sp, x6, PostIndex)),
           "st1 {v13.d}[1], [sp], x6");
-  COMPARE(St2(v0.V8B(),  v1.V8B(),  0, MemOperand(x15, x0, PostIndex)),
+  COMPARE(St2(v0.V8B(), v1.V8B(), 0, MemOperand(x15, x0, PostIndex)),
           "st2 {v0.b, v1.b}[0], [x15], x0");
   COMPARE(St2(v1.V16B(), v2.V16B(), 1, MemOperand(x16, 2, PostIndex)),
           "st2 {v1.b, v2.b}[1], [x16], #2");
-  COMPARE(St2(v2.V4H(),  v3.V4H(),  2, MemOperand(x17, 4, PostIndex)),
+  COMPARE(St2(v2.V4H(), v3.V4H(), 2, MemOperand(x17, 4, PostIndex)),
           "st2 {v2.h, v3.h}[2], [x17], #4");
-  COMPARE(St2(v3.V8H(),  v4.V8H(),  3, MemOperand(x18, x1, PostIndex)),
+  COMPARE(St2(v3.V8H(), v4.V8H(), 3, MemOperand(x18, x1, PostIndex)),
           "st2 {v3.h, v4.h}[3], [x18], x1");
-  COMPARE(St2(v4.V2S(),  v5.V2S(),  0, MemOperand(x19, x2, PostIndex)),
+  COMPARE(St2(v4.V2S(), v5.V2S(), 0, MemOperand(x19, x2, PostIndex)),
           "st2 {v4.s, v5.s}[0], [x19], x2");
-  COMPARE(St2(v5.V4S(),  v6.V4S(),  1, MemOperand(x20, 8, PostIndex)),
+  COMPARE(St2(v5.V4S(), v6.V4S(), 1, MemOperand(x20, 8, PostIndex)),
           "st2 {v5.s, v6.s}[1], [x20], #8");
-  COMPARE(St2(v6.V2D(),  v7.V2D(),  0, MemOperand(x21, 16, PostIndex)),
+  COMPARE(St2(v6.V2D(), v7.V2D(), 0, MemOperand(x21, 16, PostIndex)),
           "st2 {v6.d, v7.d}[0], [x21], #16");
-  COMPARE(St2(v7.B(),    v8.B(),    7, MemOperand(x22, 2, PostIndex)),
+  COMPARE(St2(v7.B(), v8.B(), 7, MemOperand(x22, 2, PostIndex)),
           "st2 {v7.b, v8.b}[7], [x22], #2");
-  COMPARE(St2(v8.B(),    v9.B(),  15, MemOperand(x23, x3, PostIndex)),
+  COMPARE(St2(v8.B(), v9.B(), 15, MemOperand(x23, x3, PostIndex)),
           "st2 {v8.b, v9.b}[15], [x23], x3");
-  COMPARE(St2(v9.H(),    v10.H(),    3, MemOperand(x24, x4, PostIndex)),
+  COMPARE(St2(v9.H(), v10.H(), 3, MemOperand(x24, x4, PostIndex)),
           "st2 {v9.h, v10.h}[3], [x24], x4");
-  COMPARE(St2(v10.H(),   v11.H(),   7, MemOperand(x25, 4, PostIndex)),
+  COMPARE(St2(v10.H(), v11.H(), 7, MemOperand(x25, 4, PostIndex)),
           "st2 {v10.h, v11.h}[7], [x25], #4");
-  COMPARE(St2(v11.S(),   v12.S(),   1, MemOperand(x26, 8, PostIndex)),
+  COMPARE(St2(v11.S(), v12.S(), 1, MemOperand(x26, 8, PostIndex)),
           "st2 {v11.s, v12.s}[1], [x26], #8");
-  COMPARE(St2(v12.S(),   v13.S(),   3, MemOperand(x27, x5, PostIndex)),
+  COMPARE(St2(v12.S(), v13.S(), 3, MemOperand(x27, x5, PostIndex)),
           "st2 {v12.s, v13.s}[3], [x27], x5");
-  COMPARE(St2(v13.D(),   v14.D(),   1, MemOperand(sp, x6, PostIndex)),
+  COMPARE(St2(v13.D(), v14.D(), 1, MemOperand(sp, x6, PostIndex)),
           "st2 {v13.d, v14.d}[1], [sp], x6");
-  COMPARE(St3(VLIST3(v0.V8B()),  0, MemOperand(x15, x0, PostIndex)),
+  COMPARE(St3(VLIST3(v0.V8B()), 0, MemOperand(x15, x0, PostIndex)),
           "st3 {v0.b, v1.b, v2.b}[0], [x15], x0");
   COMPARE(St3(VLIST3(v1.V16B()), 1, MemOperand(x16, 3, PostIndex)),
           "st3 {v1.b, v2.b, v3.b}[1], [x16], #3");
-  COMPARE(St3(VLIST3(v2.V4H()),  2, MemOperand(x17, 6, PostIndex)),
+  COMPARE(St3(VLIST3(v2.V4H()), 2, MemOperand(x17, 6, PostIndex)),
           "st3 {v2.h, v3.h, v4.h}[2], [x17], #6");
-  COMPARE(St3(VLIST3(v3.V8H()),  3, MemOperand(x18, x1, PostIndex)),
+  COMPARE(St3(VLIST3(v3.V8H()), 3, MemOperand(x18, x1, PostIndex)),
           "st3 {v3.h, v4.h, v5.h}[3], [x18], x1");
-  COMPARE(St3(VLIST3(v4.V2S()),  0, MemOperand(x19, x2, PostIndex)),
+  COMPARE(St3(VLIST3(v4.V2S()), 0, MemOperand(x19, x2, PostIndex)),
           "st3 {v4.s, v5.s, v6.s}[0], [x19], x2");
-  COMPARE(St3(VLIST3(v5.V4S()),  1, MemOperand(x20, 12, PostIndex)),
+  COMPARE(St3(VLIST3(v5.V4S()), 1, MemOperand(x20, 12, PostIndex)),
           "st3 {v5.s, v6.s, v7.s}[1], [x20], #12");
-  COMPARE(St3(VLIST3(v6.V2D()),  0, MemOperand(x21, 24, PostIndex)),
+  COMPARE(St3(VLIST3(v6.V2D()), 0, MemOperand(x21, 24, PostIndex)),
           "st3 {v6.d, v7.d, v8.d}[0], [x21], #24");
-  COMPARE(St3(VLIST3(v7.B()),    7, MemOperand(x22, 3, PostIndex)),
+  COMPARE(St3(VLIST3(v7.B()), 7, MemOperand(x22, 3, PostIndex)),
           "st3 {v7.b, v8.b, v9.b}[7], [x22], #3");
-  COMPARE(St3(VLIST3(v8.B()),   15, MemOperand(x23, x3, PostIndex)),
+  COMPARE(St3(VLIST3(v8.B()), 15, MemOperand(x23, x3, PostIndex)),
           "st3 {v8.b, v9.b, v10.b}[15], [x23], x3");
-  COMPARE(St3(VLIST3(v9.H()),    3, MemOperand(x24, x4, PostIndex)),
+  COMPARE(St3(VLIST3(v9.H()), 3, MemOperand(x24, x4, PostIndex)),
           "st3 {v9.h, v10.h, v11.h}[3], [x24], x4");
-  COMPARE(St3(VLIST3(v10.H()),   7, MemOperand(x25, 6, PostIndex)),
+  COMPARE(St3(VLIST3(v10.H()), 7, MemOperand(x25, 6, PostIndex)),
           "st3 {v10.h, v11.h, v12.h}[7], [x25], #6");
-  COMPARE(St3(VLIST3(v11.S()),   1, MemOperand(x26, 12, PostIndex)),
+  COMPARE(St3(VLIST3(v11.S()), 1, MemOperand(x26, 12, PostIndex)),
           "st3 {v11.s, v12.s, v13.s}[1], [x26], #12");
-  COMPARE(St3(VLIST3(v12.S()),   3, MemOperand(x27, x5, PostIndex)),
+  COMPARE(St3(VLIST3(v12.S()), 3, MemOperand(x27, x5, PostIndex)),
           "st3 {v12.s, v13.s, v14.s}[3], [x27], x5");
-  COMPARE(St3(VLIST3(v13.D()),   1, MemOperand(sp, x6, PostIndex)),
-        "st3 {v13.d, v14.d, v15.d}[1], [sp], x6");
+  COMPARE(St3(VLIST3(v13.D()), 1, MemOperand(sp, x6, PostIndex)),
+          "st3 {v13.d, v14.d, v15.d}[1], [sp], x6");
 
-  COMPARE(St4(VLIST4(v0.V8B()),  0, MemOperand(x15, x0, PostIndex)),
+  COMPARE(St4(VLIST4(v0.V8B()), 0, MemOperand(x15, x0, PostIndex)),
           "st4 {v0.b, v1.b, v2.b, v3.b}[0], [x15], x0");
   COMPARE(St4(VLIST4(v1.V16B()), 1, MemOperand(x16, 4, PostIndex)),
           "st4 {v1.b, v2.b, v3.b, v4.b}[1], [x16], #4");
-  COMPARE(St4(VLIST4(v2.V4H()),  2, MemOperand(x17, 8, PostIndex)),
+  COMPARE(St4(VLIST4(v2.V4H()), 2, MemOperand(x17, 8, PostIndex)),
           "st4 {v2.h, v3.h, v4.h, v5.h}[2], [x17], #8");
-  COMPARE(St4(VLIST4(v3.V8H()),  3, MemOperand(x18, x1, PostIndex)),
+  COMPARE(St4(VLIST4(v3.V8H()), 3, MemOperand(x18, x1, PostIndex)),
           "st4 {v3.h, v4.h, v5.h, v6.h}[3], [x18], x1");
-  COMPARE(St4(VLIST4(v4.V2S()),  0, MemOperand(x19, x2, PostIndex)),
+  COMPARE(St4(VLIST4(v4.V2S()), 0, MemOperand(x19, x2, PostIndex)),
           "st4 {v4.s, v5.s, v6.s, v7.s}[0], [x19], x2");
-  COMPARE(St4(VLIST4(v5.V4S()),  1, MemOperand(x20, 16, PostIndex)),
+  COMPARE(St4(VLIST4(v5.V4S()), 1, MemOperand(x20, 16, PostIndex)),
           "st4 {v5.s, v6.s, v7.s, v8.s}[1], [x20], #16");
-  COMPARE(St4(VLIST4(v6.V2D()),  0, MemOperand(x21, 32, PostIndex)),
+  COMPARE(St4(VLIST4(v6.V2D()), 0, MemOperand(x21, 32, PostIndex)),
           "st4 {v6.d, v7.d, v8.d, v9.d}[0], [x21], #32");
-  COMPARE(St4(VLIST4(v7.B()),    7, MemOperand(x22, 4, PostIndex)),
+  COMPARE(St4(VLIST4(v7.B()), 7, MemOperand(x22, 4, PostIndex)),
           "st4 {v7.b, v8.b, v9.b, v10.b}[7], [x22], #4");
-  COMPARE(St4(VLIST4(v8.B()),   15, MemOperand(x23, x3, PostIndex)),
+  COMPARE(St4(VLIST4(v8.B()), 15, MemOperand(x23, x3, PostIndex)),
           "st4 {v8.b, v9.b, v10.b, v11.b}[15], [x23], x3");
-  COMPARE(St4(VLIST4(v9.H()),    3, MemOperand(x24, x4, PostIndex)),
+  COMPARE(St4(VLIST4(v9.H()), 3, MemOperand(x24, x4, PostIndex)),
           "st4 {v9.h, v10.h, v11.h, v12.h}[3], [x24], x4");
-  COMPARE(St4(VLIST4(v10.H()),   7, MemOperand(x25, 8, PostIndex)),
+  COMPARE(St4(VLIST4(v10.H()), 7, MemOperand(x25, 8, PostIndex)),
           "st4 {v10.h, v11.h, v12.h, v13.h}[7], [x25], #8");
-  COMPARE(St4(VLIST4(v11.S()),   1, MemOperand(x26, 16, PostIndex)),
+  COMPARE(St4(VLIST4(v11.S()), 1, MemOperand(x26, 16, PostIndex)),
           "st4 {v11.s, v12.s, v13.s, v14.s}[1], [x26], #16");
-  COMPARE(St4(VLIST4(v12.S()),   3, MemOperand(x27, x5, PostIndex)),
-         "st4 {v12.s, v13.s, v14.s, v15.s}[3], [x27], x5");
-  COMPARE(St4(VLIST4(v13.D()),   1, MemOperand(sp, x6, PostIndex)),
+  COMPARE(St4(VLIST4(v12.S()), 3, MemOperand(x27, x5, PostIndex)),
+          "st4 {v12.s, v13.s, v14.s, v15.s}[3], [x27], x5");
+  COMPARE(St4(VLIST4(v13.D()), 1, MemOperand(sp, x6, PostIndex)),
           "st4 {v13.d, v14.d, v15.d, v16.d}[1], [sp], x6");
 
   CLEANUP();
@@ -3855,118 +3940,124 @@ TEST(neon_load_all_lanes) {
   COMPARE(Ld1r(v29.V1D(), MemOperand(x13, 8, PostIndex)),
           "ld1r {v29.1d}, [x13], #8");
 
-  COMPARE(Ld2r(v14.V8B(),  v15.V8B(),  MemOperand(x0)),
+  COMPARE(Ld2r(v14.V8B(), v15.V8B(), MemOperand(x0)),
           "ld2r {v14.8b, v15.8b}, [x0]");
   COMPARE(Ld2r(v15.V16B(), v16.V16B(), MemOperand(x1)),
           "ld2r {v15.16b, v16.16b}, [x1]");
-  COMPARE(Ld2r(v16.V4H(),  v17.V4H(),  MemOperand(x2)),
+  COMPARE(Ld2r(v16.V4H(), v17.V4H(), MemOperand(x2)),
           "ld2r {v16.4h, v17.4h}, [x2]");
-  COMPARE(Ld2r(v17.V8H(),  v18.V8H(),  MemOperand(x3)),
+  COMPARE(Ld2r(v17.V8H(), v18.V8H(), MemOperand(x3)),
           "ld2r {v17.8h, v18.8h}, [x3]");
-  COMPARE(Ld2r(v18.V2S(),  v19.V2S(),  MemOperand(x4)),
+  COMPARE(Ld2r(v18.V2S(), v19.V2S(), MemOperand(x4)),
           "ld2r {v18.2s, v19.2s}, [x4]");
-  COMPARE(Ld2r(v19.V4S(),  v20.V4S(),  MemOperand(x5)),
+  COMPARE(Ld2r(v19.V4S(), v20.V4S(), MemOperand(x5)),
           "ld2r {v19.4s, v20.4s}, [x5]");
-  COMPARE(Ld2r(v20.V2D(),  v21.V2D(),  MemOperand(sp)),
+  COMPARE(Ld2r(v20.V2D(), v21.V2D(), MemOperand(sp)),
           "ld2r {v20.2d, v21.2d}, [sp]");
-  COMPARE(Ld2r(v21.V8B(),  v22.V8B(),  MemOperand(x6, 2, PostIndex)),
+  COMPARE(Ld2r(v21.V8B(), v22.V8B(), MemOperand(x6, 2, PostIndex)),
           "ld2r {v21.8b, v22.8b}, [x6], #2");
   COMPARE(Ld2r(v22.V16B(), v23.V16B(), MemOperand(x7, x16, PostIndex)),
           "ld2r {v22.16b, v23.16b}, [x7], x16");
-  COMPARE(Ld2r(v23.V4H(),  v24.V4H(),  MemOperand(x8, x17, PostIndex)),
+  COMPARE(Ld2r(v23.V4H(), v24.V4H(), MemOperand(x8, x17, PostIndex)),
           "ld2r {v23.4h, v24.4h}, [x8], x17");
-  COMPARE(Ld2r(v24.V8H(),  v25.V8H(),  MemOperand(x9, 4, PostIndex)),
+  COMPARE(Ld2r(v24.V8H(), v25.V8H(), MemOperand(x9, 4, PostIndex)),
           "ld2r {v24.8h, v25.8h}, [x9], #4");
-  COMPARE(Ld2r(v25.V2S(),  v26.V2S(),  MemOperand(x10, 8, PostIndex)),
+  COMPARE(Ld2r(v25.V2S(), v26.V2S(), MemOperand(x10, 8, PostIndex)),
           "ld2r {v25.2s, v26.2s}, [x10], #8");
-  COMPARE(Ld2r(v26.V4S(),  v27.V4S(),  MemOperand(x11, x18, PostIndex)),
+  COMPARE(Ld2r(v26.V4S(), v27.V4S(), MemOperand(x11, x18, PostIndex)),
           "ld2r {v26.4s, v27.4s}, [x11], x18");
-  COMPARE(Ld2r(v27.V2D(),  v28.V2D(),  MemOperand(x12, 16, PostIndex)),
+  COMPARE(Ld2r(v27.V2D(), v28.V2D(), MemOperand(x12, 16, PostIndex)),
           "ld2r {v27.2d, v28.2d}, [x12], #16");
 
-  COMPARE(Ld3r(v14.V8B(),  v15.V8B(),  v16.V8B(),
-               MemOperand(x0)),
+  COMPARE(Ld3r(v14.V8B(), v15.V8B(), v16.V8B(), MemOperand(x0)),
           "ld3r {v14.8b, v15.8b, v16.8b}, [x0]");
-  COMPARE(Ld3r(v15.V16B(), v16.V16B(), v17.V16B(),
-               MemOperand(x1)),
+  COMPARE(Ld3r(v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x1)),
           "ld3r {v15.16b, v16.16b, v17.16b}, [x1]");
-  COMPARE(Ld3r(v16.V4H(),  v17.V4H(),  v18.V4H(),
-               MemOperand(x2)),
+  COMPARE(Ld3r(v16.V4H(), v17.V4H(), v18.V4H(), MemOperand(x2)),
           "ld3r {v16.4h, v17.4h, v18.4h}, [x2]");
-  COMPARE(Ld3r(v17.V8H(),  v18.V8H(),  v19.V8H(),
-               MemOperand(x3)),
+  COMPARE(Ld3r(v17.V8H(), v18.V8H(), v19.V8H(), MemOperand(x3)),
           "ld3r {v17.8h, v18.8h, v19.8h}, [x3]");
-  COMPARE(Ld3r(v18.V2S(),  v19.V2S(),  v20.V2S(),
-               MemOperand(x4)),
+  COMPARE(Ld3r(v18.V2S(), v19.V2S(), v20.V2S(), MemOperand(x4)),
           "ld3r {v18.2s, v19.2s, v20.2s}, [x4]");
-  COMPARE(Ld3r(v19.V4S(),  v20.V4S(),  v21.V4S(),
-               MemOperand(x5)),
+  COMPARE(Ld3r(v19.V4S(), v20.V4S(), v21.V4S(), MemOperand(x5)),
           "ld3r {v19.4s, v20.4s, v21.4s}, [x5]");
-  COMPARE(Ld3r(v20.V2D(),  v21.V2D(),  v22.V2D(),
-               MemOperand(sp)),
+  COMPARE(Ld3r(v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(sp)),
           "ld3r {v20.2d, v21.2d, v22.2d}, [sp]");
-  COMPARE(Ld3r(v21.V8B(),  v22.V8B(),  v23.V8B(),
-               MemOperand(x6, 3, PostIndex)),
+  COMPARE(Ld3r(v21.V8B(), v22.V8B(), v23.V8B(), MemOperand(x6, 3, PostIndex)),
           "ld3r {v21.8b, v22.8b, v23.8b}, [x6], #3");
-  COMPARE(Ld3r(v22.V16B(), v23.V16B(), v24.V16B(),
+  COMPARE(Ld3r(v22.V16B(),
+               v23.V16B(),
+               v24.V16B(),
                MemOperand(x7, x16, PostIndex)),
           "ld3r {v22.16b, v23.16b, v24.16b}, [x7], x16");
-  COMPARE(Ld3r(v23.V4H(),  v24.V4H(),  v25.V4H(),
-               MemOperand(x8, x17, PostIndex)),
+  COMPARE(Ld3r(v23.V4H(), v24.V4H(), v25.V4H(), MemOperand(x8, x17, PostIndex)),
           "ld3r {v23.4h, v24.4h, v25.4h}, [x8], x17");
-  COMPARE(Ld3r(v24.V8H(),  v25.V8H(),  v26.V8H(),
-               MemOperand(x9, 6, PostIndex)),
+  COMPARE(Ld3r(v24.V8H(), v25.V8H(), v26.V8H(), MemOperand(x9, 6, PostIndex)),
           "ld3r {v24.8h, v25.8h, v26.8h}, [x9], #6");
-  COMPARE(Ld3r(v25.V2S(),  v26.V2S(),  v27.V2S(),
-               MemOperand(x10, 12, PostIndex)),
+  COMPARE(Ld3r(v25.V2S(), v26.V2S(), v27.V2S(), MemOperand(x10, 12, PostIndex)),
           "ld3r {v25.2s, v26.2s, v27.2s}, [x10], #12");
-  COMPARE(Ld3r(v26.V4S(),  v27.V4S(),  v28.V4S(),
+  COMPARE(Ld3r(v26.V4S(),
+               v27.V4S(),
+               v28.V4S(),
                MemOperand(x11, x18, PostIndex)),
           "ld3r {v26.4s, v27.4s, v28.4s}, [x11], x18");
-  COMPARE(Ld3r(v27.V2D(),  v28.V2D(),  v29.V2D(),
-               MemOperand(x12, 24, PostIndex)),
+  COMPARE(Ld3r(v27.V2D(), v28.V2D(), v29.V2D(), MemOperand(x12, 24, PostIndex)),
           "ld3r {v27.2d, v28.2d, v29.2d}, [x12], #24");
 
-  COMPARE(Ld4r(v14.V8B(),  v15.V8B(),  v16.V8B(),  v17.V8B(),
-               MemOperand(x0)),
+  COMPARE(Ld4r(v14.V8B(), v15.V8B(), v16.V8B(), v17.V8B(), MemOperand(x0)),
           "ld4r {v14.8b, v15.8b, v16.8b, v17.8b}, [x0]");
-  COMPARE(Ld4r(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(),
-               MemOperand(x1)),
+  COMPARE(Ld4r(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x1)),
           "ld4r {v15.16b, v16.16b, v17.16b, v18.16b}, [x1]");
-  COMPARE(Ld4r(v16.V4H(),  v17.V4H(),  v18.V4H(),  v19.V4H(),
-               MemOperand(x2)),
+  COMPARE(Ld4r(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x2)),
           "ld4r {v16.4h, v17.4h, v18.4h, v19.4h}, [x2]");
-  COMPARE(Ld4r(v17.V8H(),  v18.V8H(),  v19.V8H(),  v20.V8H(),
-               MemOperand(x3)),
+  COMPARE(Ld4r(v17.V8H(), v18.V8H(), v19.V8H(), v20.V8H(), MemOperand(x3)),
           "ld4r {v17.8h, v18.8h, v19.8h, v20.8h}, [x3]");
-  COMPARE(Ld4r(v18.V2S(),  v19.V2S(),  v20.V2S(),  v21.V2S(),
-               MemOperand(x4)),
+  COMPARE(Ld4r(v18.V2S(), v19.V2S(), v20.V2S(), v21.V2S(), MemOperand(x4)),
           "ld4r {v18.2s, v19.2s, v20.2s, v21.2s}, [x4]");
-  COMPARE(Ld4r(v19.V4S(),  v20.V4S(),  v21.V4S(),  v22.V4S(),
-               MemOperand(x5)),
+  COMPARE(Ld4r(v19.V4S(), v20.V4S(), v21.V4S(), v22.V4S(), MemOperand(x5)),
           "ld4r {v19.4s, v20.4s, v21.4s, v22.4s}, [x5]");
-  COMPARE(Ld4r(v20.V2D(),  v21.V2D(),  v22.V2D(),  v23.V2D(),
-               MemOperand(sp)),
+  COMPARE(Ld4r(v20.V2D(), v21.V2D(), v22.V2D(), v23.V2D(), MemOperand(sp)),
           "ld4r {v20.2d, v21.2d, v22.2d, v23.2d}, [sp]");
-  COMPARE(Ld4r(v21.V8B(),  v22.V8B(),  v23.V8B(),  v24.V8B(),
+  COMPARE(Ld4r(v21.V8B(),
+               v22.V8B(),
+               v23.V8B(),
+               v24.V8B(),
                MemOperand(x6, 4, PostIndex)),
           "ld4r {v21.8b, v22.8b, v23.8b, v24.8b}, [x6], #4");
-  COMPARE(Ld4r(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(),
+  COMPARE(Ld4r(v22.V16B(),
+               v23.V16B(),
+               v24.V16B(),
+               v25.V16B(),
                MemOperand(x7, x16, PostIndex)),
           "ld4r {v22.16b, v23.16b, v24.16b, v25.16b}, [x7], x16");
-  COMPARE(Ld4r(v23.V4H(),  v24.V4H(),  v25.V4H(),  v26.V4H(),
+  COMPARE(Ld4r(v23.V4H(),
+               v24.V4H(),
+               v25.V4H(),
+               v26.V4H(),
                MemOperand(x8, x17, PostIndex)),
           "ld4r {v23.4h, v24.4h, v25.4h, v26.4h}, [x8], x17");
-  COMPARE(Ld4r(v24.V8H(),  v25.V8H(),  v26.V8H(),  v27.V8H(),
+  COMPARE(Ld4r(v24.V8H(),
+               v25.V8H(),
+               v26.V8H(),
+               v27.V8H(),
                MemOperand(x9, 8, PostIndex)),
           "ld4r {v24.8h, v25.8h, v26.8h, v27.8h}, [x9], #8");
-  COMPARE(Ld4r(v25.V2S(),  v26.V2S(),  v27.V2S(),  v28.V2S(),
+  COMPARE(Ld4r(v25.V2S(),
+               v26.V2S(),
+               v27.V2S(),
+               v28.V2S(),
                MemOperand(x10, 16, PostIndex)),
           "ld4r {v25.2s, v26.2s, v27.2s, v28.2s}, [x10], #16");
-  COMPARE(Ld4r(v26.V4S(),  v27.V4S(),  v28.V4S(),  v29.V4S(),
+  COMPARE(Ld4r(v26.V4S(),
+               v27.V4S(),
+               v28.V4S(),
+               v29.V4S(),
                MemOperand(x11, x18, PostIndex)),
           "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, [x11], x18");
-  COMPARE(Ld4r(v27.V2D(),  v28.V2D(),  v29.V2D(),  v30.V2D(),
+  COMPARE(Ld4r(v27.V2D(),
+               v28.V2D(),
+               v29.V2D(),
+               v30.V2D(),
                MemOperand(x12, 32, PostIndex)),
           "ld4r {v27.2d, v28.2d, v29.2d, v30.2d}, [x12], #32");
 
@@ -4012,399 +4103,408 @@ TEST(neon_load_all_lanes_unallocated) {
 TEST(neon_3same) {
   SETUP_MACRO();
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmeq(v0.M, v1.M, v2.M), "cmeq v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmge(v0.M, v1.M, v2.M), "cmge v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmgt(v0.M, v1.M, v2.M), "cmgt v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmhi(v0.M, v1.M, v2.M), "cmhi v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmhs(v0.M, v1.M, v2.M), "cmhs v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmtst(v0.M, v1.M, v2.M), "cmtst v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Add(v0.M, v1.M, v2.M), "add v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sub(v3.M, v4.M, v5.M), "sub v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sabd(v3.M, v4.M, v5.M), "sabd v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uabd(v3.M, v4.M, v5.M), "uabd v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Saba(v3.M, v4.M, v5.M), "saba v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uaba(v3.M, v4.M, v5.M), "uaba v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Smax(v3.M, v4.M, v5.M), "smax v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Smin(v3.M, v4.M, v5.M), "smin v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Umax(v3.M, v4.M, v5.M), "umax v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Umin(v3.M, v4.M, v5.M), "umin v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Smaxp(v3.M, v4.M, v5.M), "smaxp v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sminp(v3.M, v4.M, v5.M), "sminp v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Umaxp(v3.M, v4.M, v5.M), "umaxp v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uminp(v3.M, v4.M, v5.M), "uminp v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uqadd(v6.M, v7.M, v8.M), "uqadd v6." S ", v7." S ", v8." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sqadd(v9.M, v10.M, v11.M), "sqadd v9." S ", v10." S ", v11." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uqsub(v6.M, v7.M, v8.M), "uqsub v6." S ", v7." S ", v8." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sqsub(v9.M, v10.M, v11.M), "sqsub v9." S ", v10." S ", v11." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sshl(v12.M, v13.M, v14.M), "sshl v12." S ", v13." S ", v14." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Ushl(v15.M, v16.M, v17.M), "ushl v15." S ", v16." S ", v17." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sqshl(v18.M, v19.M, v20.M), "sqshl v18." S ", v19." S ", v20." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uqshl(v21.M, v22.M, v23.M), "uqshl v21." S ", v22." S ", v23." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Srshl(v24.M, v25.M, v26.M), "srshl v24." S ", v25." S ", v26." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Urshl(v27.M, v28.M, v29.M), "urshl v27." S ", v28." S ", v29." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sqrshl(v30.M, v31.M, v0.M), "sqrshl v30." S ", v31." S ", v0." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uqrshl(v1.M, v2.M, v3.M), "uqrshl v1." S ", v2." S ", v3." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Shadd(v4.M, v5.M, v6.M), "shadd v4." S ", v5." S ", v6." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uhadd(v7.M, v8.M, v9.M), "uhadd v7." S ", v8." S ", v9." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Srhadd(v10.M, v11.M, v12.M), "srhadd v10." S ", v11." S ", v12." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Urhadd(v13.M, v14.M, v15.M), "urhadd v13." S ", v14." S ", v15." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Shsub(v16.M, v17.M, v18.M), "shsub v16." S ", v17." S ", v18." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uhsub(v19.M, v20.M, v21.M), "uhsub v19." S ", v20." S ", v21." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Addp(v19.M, v20.M, v21.M), "addp v19." S ", v20." S ", v21." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Mla(v19.M, v20.M, v21.M), "mla v19." S ", v20." S ", v21." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Mls(v19.M, v20.M, v21.M), "mls v19." S ", v20." S ", v21." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Mul(v19.M, v20.M, v21.M), "mul v19." S ", v20." S ", v21." S);
   NEON_FORMAT_LIST_BHS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sqdmulh(v1.M, v2.M, v3.M), "sqdmulh v1." S ", v2." S ", v3." S);
   NEON_FORMAT_LIST_HS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Sqrdmulh(v1.M, v2.M, v3.M), "sqrdmulh v1." S ", v2." S ", v3." S);
   NEON_FORMAT_LIST_HS(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(And(v6.V8B(), v7.V8B(), v8.V8B()),    "and v6.8b, v7.8b, v8.8b");
+  COMPARE(And(v6.V8B(), v7.V8B(), v8.V8B()), "and v6.8b, v7.8b, v8.8b");
   COMPARE(And(v6.V16B(), v7.V16B(), v8.V16B()), "and v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Bic(v6.V8B(), v7.V8B(), v8.V8B()),    "bic v6.8b, v7.8b, v8.8b");
+  COMPARE(Bic(v6.V8B(), v7.V8B(), v8.V8B()), "bic v6.8b, v7.8b, v8.8b");
   COMPARE(Bic(v6.V16B(), v7.V16B(), v8.V16B()), "bic v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Orr(v6.V8B(), v7.V8B(), v8.V8B()),    "orr v6.8b, v7.8b, v8.8b");
+  COMPARE(Orr(v6.V8B(), v7.V8B(), v8.V8B()), "orr v6.8b, v7.8b, v8.8b");
   COMPARE(Orr(v6.V16B(), v7.V16B(), v8.V16B()), "orr v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Orr(v6.V8B(), v7.V8B(), v7.V8B()),    "mov v6.8b, v7.8b");
+  COMPARE(Orr(v6.V8B(), v7.V8B(), v7.V8B()), "mov v6.8b, v7.8b");
   COMPARE(Orr(v6.V16B(), v7.V16B(), v7.V16B()), "mov v6.16b, v7.16b");
 
-  COMPARE(Mov(v6.V8B(), v8.V8B()),              "mov v6.8b, v8.8b");
-  COMPARE(Mov(v6.V16B(), v8.V16B()),            "mov v6.16b, v8.16b");
+  COMPARE(Mov(v6.V8B(), v8.V8B()), "mov v6.8b, v8.8b");
+  COMPARE(Mov(v6.V16B(), v8.V16B()), "mov v6.16b, v8.16b");
 
-  COMPARE(Orn(v6.V8B(), v7.V8B(), v8.V8B()),    "orn v6.8b, v7.8b, v8.8b");
+  COMPARE(Orn(v6.V8B(), v7.V8B(), v8.V8B()), "orn v6.8b, v7.8b, v8.8b");
   COMPARE(Orn(v6.V16B(), v7.V16B(), v8.V16B()), "orn v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Eor(v6.V8B(), v7.V8B(), v8.V8B()),    "eor v6.8b, v7.8b, v8.8b");
+  COMPARE(Eor(v6.V8B(), v7.V8B(), v8.V8B()), "eor v6.8b, v7.8b, v8.8b");
   COMPARE(Eor(v6.V16B(), v7.V16B(), v8.V16B()), "eor v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Bif(v6.V8B(), v7.V8B(), v8.V8B()),    "bif v6.8b, v7.8b, v8.8b");
+  COMPARE(Bif(v6.V8B(), v7.V8B(), v8.V8B()), "bif v6.8b, v7.8b, v8.8b");
   COMPARE(Bif(v6.V16B(), v7.V16B(), v8.V16B()), "bif v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Bit(v6.V8B(), v7.V8B(), v8.V8B()),    "bit v6.8b, v7.8b, v8.8b");
+  COMPARE(Bit(v6.V8B(), v7.V8B(), v8.V8B()), "bit v6.8b, v7.8b, v8.8b");
   COMPARE(Bit(v6.V16B(), v7.V16B(), v8.V16B()), "bit v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Bsl(v6.V8B(), v7.V8B(), v8.V8B()),    "bsl v6.8b, v7.8b, v8.8b");
+  COMPARE(Bsl(v6.V8B(), v7.V8B(), v8.V8B()), "bsl v6.8b, v7.8b, v8.8b");
   COMPARE(Bsl(v6.V16B(), v7.V16B(), v8.V16B()), "bsl v6.16b, v7.16b, v8.16b");
 
-  COMPARE(Pmul(v6.V8B(), v7.V8B(), v8.V8B()),    "pmul v6.8b, v7.8b, v8.8b");
+  COMPARE(Pmul(v6.V8B(), v7.V8B(), v8.V8B()), "pmul v6.8b, v7.8b, v8.8b");
   COMPARE(Pmul(v6.V16B(), v7.V16B(), v8.V16B()), "pmul v6.16b, v7.16b, v8.16b");
 
   CLEANUP();
 }
 
 
-#define NEON_FORMAT_LIST_FP(V)  \
-  V(V2S(), "2s")                \
-  V(V4S(), "4s")                \
+#define NEON_FORMAT_LIST_FP(V) \
+  V(V2S(), "2s")               \
+  V(V4S(), "4s")               \
   V(V2D(), "2d")
 
 TEST(neon_fp_3same) {
   SETUP_MACRO();
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fadd(v0.M, v1.M, v2.M), "fadd v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fsub(v3.M, v4.M, v5.M), "fsub v3." S ", v4." S ", v5." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmul(v6.M, v7.M, v8.M), "fmul v6." S ", v7." S ", v8." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fdiv(v9.M, v10.M, v11.M), "fdiv v9." S ", v10." S ", v11." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmin(v12.M, v13.M, v14.M), "fmin v12." S ", v13." S ", v14." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fminnm(v15.M, v16.M, v17.M), "fminnm v15." S ", v16." S ", v17." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmax(v18.M, v19.M, v20.M), "fmax v18." S ", v19." S ", v20." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmaxnm(v21.M, v22.M, v23.M), "fmaxnm v21." S ", v22." S ", v23." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Frecps(v24.M, v25.M, v26.M), "frecps v24." S ", v25." S ", v26." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Frsqrts(v27.M, v28.M, v29.M), "frsqrts v27." S ", v28." S ", v29." S);
+#define DISASM_INST(M, S)               \
+  COMPARE(Frsqrts(v27.M, v28.M, v29.M), \
+          "frsqrts v27." S ", v28." S   \
+          ", "                          \
+          "v29." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmulx(v30.M, v31.M, v0.M), "fmulx v30." S ", v31." S ", v0." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmla(v1.M, v2.M, v3.M), "fmla v1." S ", v2." S ", v3." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmls(v4.M, v5.M, v6.M), "fmls v4." S ", v5." S ", v6." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fabd(v7.M, v8.M, v9.M), "fabd v7." S ", v8." S ", v9." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Faddp(v10.M, v11.M, v12.M), "faddp v10." S ", v11." S ", v12." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fmaxp(v13.M, v14.M, v15.M), "fmaxp v13." S ", v14." S ", v15." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fminp(v16.M, v17.M, v18.M), "fminp v16." S ", v17." S ", v18." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Fmaxnmp(v19.M, v20.M, v21.M), "fmaxnmp v19." S ", v20." S ", v21." S);
+#define DISASM_INST(M, S)               \
+  COMPARE(Fmaxnmp(v19.M, v20.M, v21.M), \
+          "fmaxnmp v19." S ", v20." S   \
+          ", "                          \
+          "v21." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Fminnmp(v22.M, v23.M, v24.M), "fminnmp v22." S ", v23." S ", v24." S);
+#define DISASM_INST(M, S)               \
+  COMPARE(Fminnmp(v22.M, v23.M, v24.M), \
+          "fminnmp v22." S ", v23." S   \
+          ", "                          \
+          "v24." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmeq(v25.M, v26.M, v27.M), "fcmeq v25." S ", v26." S ", v27." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmge(v25.M, v26.M, v27.M), "fcmge v25." S ", v26." S ", v27." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmgt(v25.M, v26.M, v27.M), "fcmgt v25." S ", v26." S ", v27." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Facge(v25.M, v26.M, v27.M), "facge v25." S ", v26." S ", v27." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Facgt(v25.M, v26.M, v27.M), "facgt v25." S ", v26." S ", v27." S);
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   CLEANUP();
 }
 
 
-#define NEON_SCALAR_FORMAT_LIST(V)  \
-  V(B(), "b")                       \
-  V(H(), "h")                       \
-  V(S(), "s")                       \
+#define NEON_SCALAR_FORMAT_LIST(V) \
+  V(B(), "b")                      \
+  V(H(), "h")                      \
+  V(S(), "s")                      \
   V(D(), "d")
 
 TEST(neon_scalar_3same) {
@@ -4448,45 +4548,45 @@ TEST(neon_scalar_3same) {
   COMPARE(Sqrdmulh(v12.S(), v13.S(), v14.S()), "sqrdmulh s12, s13, s14");
   COMPARE(Sqrdmulh(v15.H(), v16.H(), v17.H()), "sqrdmulh h15, h16, h17");
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Uqadd(v6.M, v7.M, v8.M), "uqadd " R "6, " R "7, " R "8");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Uqsub(v9.M, v10.M, v11.M), "uqsub " R "9, " R "10, " R "11");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Sqadd(v12.M, v13.M, v14.M), "sqadd " R "12, " R "13, " R "14");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Sqsub(v15.M, v16.M, v17.M), "sqsub " R "15, " R "16, " R "17");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Uqshl(v18.M, v19.M, v20.M), "uqshl " R "18, " R "19, " R "20");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Sqshl(v21.M, v22.M, v23.M), "sqshl " R "21, " R "22, " R "23");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Uqrshl(v30.M, v31.M, v0.M), "uqrshl " R "30, " R "31, " R "0");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, R)  \
+#define DISASM_INST(M, R) \
   COMPARE(Sqrshl(v1.M, v2.M, v3.M), "sqrshl " R "1, " R "2, " R "3");
   NEON_SCALAR_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   CLEANUP();
 }
@@ -4560,10 +4660,10 @@ TEST(neon_byelement) {
   COMPARE(Umlal2(v2.V2D(), v3.V4S(), v4.S(), 3),
           "umlal2 v2.2d, v3.4s, v4.s[3]");
 
-  COMPARE(Smlsl(v0.V4S(), v1.V4H(), v2.H(), 0),  "smlsl v0.4s, v1.4h, v2.h[0]");
+  COMPARE(Smlsl(v0.V4S(), v1.V4H(), v2.H(), 0), "smlsl v0.4s, v1.4h, v2.h[0]");
   COMPARE(Smlsl2(v2.V4S(), v3.V8H(), v4.H(), 7),
           "smlsl2 v2.4s, v3.8h, v4.h[7]");
-  COMPARE(Smlsl(v0.V2D(), v1.V2S(), v2.S(), 0),  "smlsl v0.2d, v1.2s, v2.s[0]");
+  COMPARE(Smlsl(v0.V2D(), v1.V2S(), v2.S(), 0), "smlsl v0.2d, v1.2s, v2.s[0]");
   COMPARE(Smlsl2(v2.V2D(), v3.V4S(), v4.S(), 3),
           "smlsl2 v2.2d, v3.4s, v4.s[3]");
 
@@ -4575,7 +4675,7 @@ TEST(neon_byelement) {
           "umlsl2 v2.2d, v3.4s, v4.s[3]");
 
   COMPARE(Sqdmull(v0.V4S(), v1.V4H(), v2.H(), 0),
-         "sqdmull v0.4s, v1.4h, v2.h[0]");
+          "sqdmull v0.4s, v1.4h, v2.h[0]");
   COMPARE(Sqdmull2(v2.V4S(), v3.V8H(), v4.H(), 7),
           "sqdmull2 v2.4s, v3.8h, v4.h[7]");
   COMPARE(Sqdmull(v0.V2D(), v1.V2S(), v2.S(), 0),
@@ -4645,203 +4745,257 @@ TEST(neon_fp_byelement) {
 TEST(neon_3different) {
   SETUP_MACRO();
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uaddl(v0.TA, v1.TB, v2.TB), "uaddl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Uaddl(v0.TA, v1.TB, v2.TB), \
+          "uaddl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uaddl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Uaddl2(v0.TA, v1.TB, v2.TB), \
           "uaddl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uaddw(v0.TA, v1.TA, v2.TB), "uaddw v0." TAS ", v1." TAS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Uaddw(v0.TA, v1.TA, v2.TB), \
+          "uaddw v0." TAS ", v1." TAS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uaddw2(v0.TA, v1.TA, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Uaddw2(v0.TA, v1.TA, v2.TB), \
           "uaddw2 v0." TAS ", v1." TAS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Saddl(v0.TA, v1.TB, v2.TB), "saddl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Saddl(v0.TA, v1.TB, v2.TB), \
+          "saddl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Saddl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Saddl2(v0.TA, v1.TB, v2.TB), \
           "saddl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Saddw(v0.TA, v1.TA, v2.TB), "saddw v0." TAS ", v1." TAS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Saddw(v0.TA, v1.TA, v2.TB), \
+          "saddw v0." TAS ", v1." TAS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Saddw2(v0.TA, v1.TA, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Saddw2(v0.TA, v1.TA, v2.TB), \
           "saddw2 v0." TAS ", v1." TAS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Usubl(v0.TA, v1.TB, v2.TB), "usubl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Usubl(v0.TA, v1.TB, v2.TB), \
+          "usubl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Usubl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Usubl2(v0.TA, v1.TB, v2.TB), \
           "usubl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Usubw(v0.TA, v1.TA, v2.TB), "usubw v0." TAS ", v1." TAS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Usubw(v0.TA, v1.TA, v2.TB), \
+          "usubw v0." TAS ", v1." TAS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Usubw2(v0.TA, v1.TA, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Usubw2(v0.TA, v1.TA, v2.TB), \
           "usubw2 v0." TAS ", v1." TAS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Ssubl(v0.TA, v1.TB, v2.TB), "ssubl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Ssubl(v0.TA, v1.TB, v2.TB), \
+          "ssubl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Ssubl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Ssubl2(v0.TA, v1.TB, v2.TB), \
           "ssubl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Ssubw(v0.TA, v1.TA, v2.TB), "ssubw v0." TAS ", v1." TAS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Ssubw(v0.TA, v1.TA, v2.TB), \
+          "ssubw v0." TAS ", v1." TAS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Ssubw2(v0.TA, v1.TA, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Ssubw2(v0.TA, v1.TA, v2.TB), \
           "ssubw2 v0." TAS ", v1." TAS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Sabal(v0.TA, v1.TB, v2.TB), "sabal v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Sabal(v0.TA, v1.TB, v2.TB), \
+          "sabal v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Sabal2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Sabal2(v0.TA, v1.TB, v2.TB), \
           "sabal2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uabal(v0.TA, v1.TB, v2.TB), "uabal v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Uabal(v0.TA, v1.TB, v2.TB), \
+          "uabal v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uabal2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Uabal2(v0.TA, v1.TB, v2.TB), \
           "uabal2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Sabdl(v0.TA, v1.TB, v2.TB), "sabdl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Sabdl(v0.TA, v1.TB, v2.TB), \
+          "sabdl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Sabdl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Sabdl2(v0.TA, v1.TB, v2.TB), \
           "sabdl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uabdl(v0.TA, v1.TB, v2.TB), "uabdl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Uabdl(v0.TA, v1.TB, v2.TB), \
+          "uabdl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Uabdl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Uabdl2(v0.TA, v1.TB, v2.TB), \
           "uabdl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Smlal(v0.TA, v1.TB, v2.TB), "smlal v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Smlal(v0.TA, v1.TB, v2.TB), \
+          "smlal v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Smlal2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Smlal2(v0.TA, v1.TB, v2.TB), \
           "smlal2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Umlsl(v0.TA, v1.TB, v2.TB), "umlsl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Umlsl(v0.TA, v1.TB, v2.TB), \
+          "umlsl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Umlsl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Umlsl2(v0.TA, v1.TB, v2.TB), \
           "umlsl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Smlsl(v0.TA, v1.TB, v2.TB), "smlsl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Smlsl(v0.TA, v1.TB, v2.TB), \
+          "smlsl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Smlsl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Smlsl2(v0.TA, v1.TB, v2.TB), \
           "smlsl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Umlsl(v0.TA, v1.TB, v2.TB), "umlsl v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Umlsl(v0.TA, v1.TB, v2.TB), \
+          "umlsl v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Umlsl2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Umlsl2(v0.TA, v1.TB, v2.TB), \
           "umlsl2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Smull(v0.TA, v1.TB, v2.TB), "smull v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Smull(v0.TA, v1.TB, v2.TB), \
+          "smull v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Smull2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Smull2(v0.TA, v1.TB, v2.TB), \
           "smull2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Umull(v0.TA, v1.TB, v2.TB), "umull v0." TAS ", v1." TBS ", v2." TBS);
+#define DISASM_INST(TA, TAS, TB, TBS) \
+  COMPARE(Umull(v0.TA, v1.TB, v2.TB), \
+          "umull v0." TAS ", v1." TBS \
+          ", "                        \
+          "v2." TBS);
   NEON_FORMAT_LIST_LW(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
-  COMPARE(Umull2(v0.TA, v1.TB, v2.TB),   \
+#define DISASM_INST(TA, TAS, TB, TBS)  \
+  COMPARE(Umull2(v0.TA, v1.TB, v2.TB), \
           "umull2 v0." TAS ", v1." TBS ", v2." TBS);
   NEON_FORMAT_LIST_LW2(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Sqdmull(v0.V4S(), v1.V4H(), v2.V4H()), "sqdmull v0.4s, v1.4h, v2.4h");
   COMPARE(Sqdmull(v1.V2D(), v2.V2S(), v3.V2S()), "sqdmull v1.2d, v2.2s, v3.2s");
@@ -4895,7 +5049,7 @@ TEST(neon_3different) {
   COMPARE(Rsubhn(v1.V4H(), v2.V4S(), v3.V4S()), "rsubhn v1.4h, v2.4s, v3.4s");
   COMPARE(Rsubhn(v2.V2S(), v3.V2D(), v4.V2D()), "rsubhn v2.2s, v3.2d, v4.2d");
   COMPARE(Rsubhn2(v0.V16B(), v1.V8H(), v5.V8H()),
-         "rsubhn2 v0.16b, v1.8h, v5.8h");
+          "rsubhn2 v0.16b, v1.8h, v5.8h");
   COMPARE(Rsubhn2(v1.V8H(), v2.V4S(), v6.V4S()), "rsubhn2 v1.8h, v2.4s, v6.4s");
   COMPARE(Rsubhn2(v2.V4S(), v3.V2D(), v7.V2D()), "rsubhn2 v2.4s, v3.2d, v7.2d");
 
@@ -4910,35 +5064,35 @@ TEST(neon_3different) {
 TEST(neon_perm) {
   SETUP_MACRO();
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Trn1(v0.M, v1.M, v2.M), "trn1 v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Trn2(v0.M, v1.M, v2.M), "trn2 v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uzp1(v0.M, v1.M, v2.M), "uzp1 v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Uzp2(v0.M, v1.M, v2.M), "uzp2 v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Zip1(v0.M, v1.M, v2.M), "zip1 v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Zip2(v0.M, v1.M, v2.M), "zip2 v0." S ", v1." S ", v2." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   CLEANUP();
 }
@@ -4948,121 +5102,121 @@ TEST(neon_copy) {
   SETUP_MACRO();
 
   COMPARE(Ins(v1.V16B(), 4, v5.V16B(), 0), "mov v1.b[4], v5.b[0]");
-  COMPARE(Ins(v2.V8B(),  5, v6.V8B(),  1), "mov v2.b[5], v6.b[1]");
-  COMPARE(Ins(v3.B(),    6, v7.B(),    2), "mov v3.b[6], v7.b[2]");
-  COMPARE(Ins(v4.V8H(),  7, v8.V8H(),  3), "mov v4.h[7], v8.h[3]");
-  COMPARE(Ins(v5.V4H(),  3, v9.V4H(),  0), "mov v5.h[3], v9.h[0]");
-  COMPARE(Ins(v6.H(),    6, v1.H(),    1), "mov v6.h[6], v1.h[1]");
-  COMPARE(Ins(v7.V4S(),  2, v2.V4S(),  2), "mov v7.s[2], v2.s[2]");
-  COMPARE(Ins(v8.V2S(),  1, v3.V2S(),  0), "mov v8.s[1], v3.s[0]");
-  COMPARE(Ins(v9.S(),    0, v4.S(),    1), "mov v9.s[0], v4.s[1]");
-  COMPARE(Ins(v1.V2D(),  1, v5.V2D(),  0), "mov v1.d[1], v5.d[0]");
-  COMPARE(Ins(v2.D(),    0, v6.D(),    1), "mov v2.d[0], v6.d[1]");
+  COMPARE(Ins(v2.V8B(), 5, v6.V8B(), 1), "mov v2.b[5], v6.b[1]");
+  COMPARE(Ins(v3.B(), 6, v7.B(), 2), "mov v3.b[6], v7.b[2]");
+  COMPARE(Ins(v4.V8H(), 7, v8.V8H(), 3), "mov v4.h[7], v8.h[3]");
+  COMPARE(Ins(v5.V4H(), 3, v9.V4H(), 0), "mov v5.h[3], v9.h[0]");
+  COMPARE(Ins(v6.H(), 6, v1.H(), 1), "mov v6.h[6], v1.h[1]");
+  COMPARE(Ins(v7.V4S(), 2, v2.V4S(), 2), "mov v7.s[2], v2.s[2]");
+  COMPARE(Ins(v8.V2S(), 1, v3.V2S(), 0), "mov v8.s[1], v3.s[0]");
+  COMPARE(Ins(v9.S(), 0, v4.S(), 1), "mov v9.s[0], v4.s[1]");
+  COMPARE(Ins(v1.V2D(), 1, v5.V2D(), 0), "mov v1.d[1], v5.d[0]");
+  COMPARE(Ins(v2.D(), 0, v6.D(), 1), "mov v2.d[0], v6.d[1]");
 
   COMPARE(Mov(v3.V16B(), 4, v7.V16B(), 0), "mov v3.b[4], v7.b[0]");
-  COMPARE(Mov(v4.V8B(),  5, v8.V8B(),  1), "mov v4.b[5], v8.b[1]");
-  COMPARE(Mov(v5.B(),    6, v9.B(),    2), "mov v5.b[6], v9.b[2]");
-  COMPARE(Mov(v6.V8H(),  7, v1.V8H(),  3), "mov v6.h[7], v1.h[3]");
-  COMPARE(Mov(v7.V4H(),  0, v2.V4H(),  0), "mov v7.h[0], v2.h[0]");
-  COMPARE(Mov(v8.H(),    1, v3.H(),    1), "mov v8.h[1], v3.h[1]");
-  COMPARE(Mov(v9.V4S(),  2, v4.V4S(),  2), "mov v9.s[2], v4.s[2]");
-  COMPARE(Mov(v1.V2S(),  3, v5.V2S(),  0), "mov v1.s[3], v5.s[0]");
-  COMPARE(Mov(v2.S(),    0, v6.S(),    1), "mov v2.s[0], v6.s[1]");
-  COMPARE(Mov(v3.V2D(),  1, v7.V2D(),  0), "mov v3.d[1], v7.d[0]");
-  COMPARE(Mov(v4.D(),    0, v8.D(),    1), "mov v4.d[0], v8.d[1]");
+  COMPARE(Mov(v4.V8B(), 5, v8.V8B(), 1), "mov v4.b[5], v8.b[1]");
+  COMPARE(Mov(v5.B(), 6, v9.B(), 2), "mov v5.b[6], v9.b[2]");
+  COMPARE(Mov(v6.V8H(), 7, v1.V8H(), 3), "mov v6.h[7], v1.h[3]");
+  COMPARE(Mov(v7.V4H(), 0, v2.V4H(), 0), "mov v7.h[0], v2.h[0]");
+  COMPARE(Mov(v8.H(), 1, v3.H(), 1), "mov v8.h[1], v3.h[1]");
+  COMPARE(Mov(v9.V4S(), 2, v4.V4S(), 2), "mov v9.s[2], v4.s[2]");
+  COMPARE(Mov(v1.V2S(), 3, v5.V2S(), 0), "mov v1.s[3], v5.s[0]");
+  COMPARE(Mov(v2.S(), 0, v6.S(), 1), "mov v2.s[0], v6.s[1]");
+  COMPARE(Mov(v3.V2D(), 1, v7.V2D(), 0), "mov v3.d[1], v7.d[0]");
+  COMPARE(Mov(v4.D(), 0, v8.D(), 1), "mov v4.d[0], v8.d[1]");
 
   COMPARE(Ins(v1.V16B(), 4, w0), "mov v1.b[4], w0");
-  COMPARE(Ins(v2.V8B(),  5, w1), "mov v2.b[5], w1");
-  COMPARE(Ins(v3.B(),    6, w2), "mov v3.b[6], w2");
-  COMPARE(Ins(v4.V8H(),  7, w3), "mov v4.h[7], w3");
-  COMPARE(Ins(v5.V4H(),  3, w0), "mov v5.h[3], w0");
-  COMPARE(Ins(v6.H(),    6, w1), "mov v6.h[6], w1");
-  COMPARE(Ins(v7.V4S(),  2, w2), "mov v7.s[2], w2");
-  COMPARE(Ins(v8.V2S(),  1, w0), "mov v8.s[1], w0");
-  COMPARE(Ins(v9.S(),    0, w1), "mov v9.s[0], w1");
-  COMPARE(Ins(v1.V2D(),  1, x0), "mov v1.d[1], x0");
-  COMPARE(Ins(v2.D(),    0, x1), "mov v2.d[0], x1");
+  COMPARE(Ins(v2.V8B(), 5, w1), "mov v2.b[5], w1");
+  COMPARE(Ins(v3.B(), 6, w2), "mov v3.b[6], w2");
+  COMPARE(Ins(v4.V8H(), 7, w3), "mov v4.h[7], w3");
+  COMPARE(Ins(v5.V4H(), 3, w0), "mov v5.h[3], w0");
+  COMPARE(Ins(v6.H(), 6, w1), "mov v6.h[6], w1");
+  COMPARE(Ins(v7.V4S(), 2, w2), "mov v7.s[2], w2");
+  COMPARE(Ins(v8.V2S(), 1, w0), "mov v8.s[1], w0");
+  COMPARE(Ins(v9.S(), 0, w1), "mov v9.s[0], w1");
+  COMPARE(Ins(v1.V2D(), 1, x0), "mov v1.d[1], x0");
+  COMPARE(Ins(v2.D(), 0, x1), "mov v2.d[0], x1");
 
   COMPARE(Mov(v1.V16B(), 4, w0), "mov v1.b[4], w0");
-  COMPARE(Mov(v2.V8B(),  5, w1), "mov v2.b[5], w1");
-  COMPARE(Mov(v3.B(),    6, w2), "mov v3.b[6], w2");
-  COMPARE(Mov(v4.V8H(),  7, w3), "mov v4.h[7], w3");
-  COMPARE(Mov(v5.V4H(),  3, w0), "mov v5.h[3], w0");
-  COMPARE(Mov(v6.H(),    6, w1), "mov v6.h[6], w1");
-  COMPARE(Mov(v7.V4S(),  2, w2), "mov v7.s[2], w2");
-  COMPARE(Mov(v8.V2S(),  1, w0), "mov v8.s[1], w0");
-  COMPARE(Mov(v9.S(),    0, w1), "mov v9.s[0], w1");
-  COMPARE(Mov(v1.V2D(),  1, x0), "mov v1.d[1], x0");
-  COMPARE(Mov(v2.D(),    0, x1), "mov v2.d[0], x1");
-
-  COMPARE(Dup(v5.V8B(), v9.V8B(), 6),   "dup v5.8b, v9.b[6]");
+  COMPARE(Mov(v2.V8B(), 5, w1), "mov v2.b[5], w1");
+  COMPARE(Mov(v3.B(), 6, w2), "mov v3.b[6], w2");
+  COMPARE(Mov(v4.V8H(), 7, w3), "mov v4.h[7], w3");
+  COMPARE(Mov(v5.V4H(), 3, w0), "mov v5.h[3], w0");
+  COMPARE(Mov(v6.H(), 6, w1), "mov v6.h[6], w1");
+  COMPARE(Mov(v7.V4S(), 2, w2), "mov v7.s[2], w2");
+  COMPARE(Mov(v8.V2S(), 1, w0), "mov v8.s[1], w0");
+  COMPARE(Mov(v9.S(), 0, w1), "mov v9.s[0], w1");
+  COMPARE(Mov(v1.V2D(), 1, x0), "mov v1.d[1], x0");
+  COMPARE(Mov(v2.D(), 0, x1), "mov v2.d[0], x1");
+
+  COMPARE(Dup(v5.V8B(), v9.V8B(), 6), "dup v5.8b, v9.b[6]");
   COMPARE(Dup(v6.V16B(), v1.V16B(), 5), "dup v6.16b, v1.b[5]");
-  COMPARE(Dup(v7.V4H(), v2.V4H(), 4),   "dup v7.4h, v2.h[4]");
-  COMPARE(Dup(v8.V8H(), v3.V8H(), 3),   "dup v8.8h, v3.h[3]");
-  COMPARE(Dup(v9.V2S(), v4.V2S(), 2),   "dup v9.2s, v4.s[2]");
-  COMPARE(Dup(v1.V4S(), v5.V4S(), 1),   "dup v1.4s, v5.s[1]");
-  COMPARE(Dup(v2.V2D(), v6.V2D(), 0),   "dup v2.2d, v6.d[0]");
-
-  COMPARE(Dup(v5.B(), v9.B(), 6),   "mov b5, v9.b[6]");
-  COMPARE(Dup(v7.H(), v2.H(), 4),   "mov h7, v2.h[4]");
-  COMPARE(Dup(v9.S(), v4.S(), 2),   "mov s9, v4.s[2]");
-  COMPARE(Dup(v2.D(), v6.D(), 0),   "mov d2, v6.d[0]");
-
-  COMPARE(Mov(v5.B(), v9.B(), 6),   "mov b5, v9.b[6]");
-  COMPARE(Mov(v7.H(), v2.H(), 4),   "mov h7, v2.h[4]");
-  COMPARE(Mov(v9.S(), v4.S(), 2),   "mov s9, v4.s[2]");
-  COMPARE(Mov(v2.D(), v6.D(), 0),   "mov d2, v6.d[0]");
-
-  COMPARE(Mov(v0.B(), v1.V8B(), 7),   "mov b0, v1.b[7]");
-  COMPARE(Mov(b2, v3.V16B(), 15),     "mov b2, v3.b[15]");
-  COMPARE(Mov(v4.H(), v5.V4H(), 3),   "mov h4, v5.h[3]");
-  COMPARE(Mov(h6, v7.V8H(), 7),       "mov h6, v7.h[7]");
-  COMPARE(Mov(v8.S(), v9.V2S(), 1),   "mov s8, v9.s[1]");
-  COMPARE(Mov(s10, v11.V4S(), 3),     "mov s10, v11.s[3]");
+  COMPARE(Dup(v7.V4H(), v2.V4H(), 4), "dup v7.4h, v2.h[4]");
+  COMPARE(Dup(v8.V8H(), v3.V8H(), 3), "dup v8.8h, v3.h[3]");
+  COMPARE(Dup(v9.V2S(), v4.V2S(), 2), "dup v9.2s, v4.s[2]");
+  COMPARE(Dup(v1.V4S(), v5.V4S(), 1), "dup v1.4s, v5.s[1]");
+  COMPARE(Dup(v2.V2D(), v6.V2D(), 0), "dup v2.2d, v6.d[0]");
+
+  COMPARE(Dup(v5.B(), v9.B(), 6), "mov b5, v9.b[6]");
+  COMPARE(Dup(v7.H(), v2.H(), 4), "mov h7, v2.h[4]");
+  COMPARE(Dup(v9.S(), v4.S(), 2), "mov s9, v4.s[2]");
+  COMPARE(Dup(v2.D(), v6.D(), 0), "mov d2, v6.d[0]");
+
+  COMPARE(Mov(v5.B(), v9.B(), 6), "mov b5, v9.b[6]");
+  COMPARE(Mov(v7.H(), v2.H(), 4), "mov h7, v2.h[4]");
+  COMPARE(Mov(v9.S(), v4.S(), 2), "mov s9, v4.s[2]");
+  COMPARE(Mov(v2.D(), v6.D(), 0), "mov d2, v6.d[0]");
+
+  COMPARE(Mov(v0.B(), v1.V8B(), 7), "mov b0, v1.b[7]");
+  COMPARE(Mov(b2, v3.V16B(), 15), "mov b2, v3.b[15]");
+  COMPARE(Mov(v4.H(), v5.V4H(), 3), "mov h4, v5.h[3]");
+  COMPARE(Mov(h6, v7.V8H(), 7), "mov h6, v7.h[7]");
+  COMPARE(Mov(v8.S(), v9.V2S(), 1), "mov s8, v9.s[1]");
+  COMPARE(Mov(s10, v11.V4S(), 3), "mov s10, v11.s[3]");
   COMPARE(Mov(v12.D(), v13.V2D(), 1), "mov d12, v13.d[1]");
 
-  COMPARE(Dup(v5.V8B(),  w0), "dup v5.8b, w0");
+  COMPARE(Dup(v5.V8B(), w0), "dup v5.8b, w0");
   COMPARE(Dup(v6.V16B(), w1), "dup v6.16b, w1");
-  COMPARE(Dup(v7.V4H(),  w2), "dup v7.4h, w2");
-  COMPARE(Dup(v8.V8H(),  w3), "dup v8.8h, w3");
-  COMPARE(Dup(v9.V2S(),  w4), "dup v9.2s, w4");
-  COMPARE(Dup(v1.V4S(),  w5), "dup v1.4s, w5");
-  COMPARE(Dup(v2.V2D(),  x6), "dup v2.2d, x6");
+  COMPARE(Dup(v7.V4H(), w2), "dup v7.4h, w2");
+  COMPARE(Dup(v8.V8H(), w3), "dup v8.8h, w3");
+  COMPARE(Dup(v9.V2S(), w4), "dup v9.2s, w4");
+  COMPARE(Dup(v1.V4S(), w5), "dup v1.4s, w5");
+  COMPARE(Dup(v2.V2D(), x6), "dup v2.2d, x6");
 
   COMPARE(Smov(w0, v1.V16B(), 4), "smov w0, v1.b[4]");
-  COMPARE(Smov(w1, v2.V8B(),  5), "smov w1, v2.b[5]");
-  COMPARE(Smov(w2, v3.B(),    6), "smov w2, v3.b[6]");
-  COMPARE(Smov(w3, v4.V8H(),  7), "smov w3, v4.h[7]");
-  COMPARE(Smov(w0, v5.V4H(),  3), "smov w0, v5.h[3]");
-  COMPARE(Smov(w1, v6.H(),    6), "smov w1, v6.h[6]");
+  COMPARE(Smov(w1, v2.V8B(), 5), "smov w1, v2.b[5]");
+  COMPARE(Smov(w2, v3.B(), 6), "smov w2, v3.b[6]");
+  COMPARE(Smov(w3, v4.V8H(), 7), "smov w3, v4.h[7]");
+  COMPARE(Smov(w0, v5.V4H(), 3), "smov w0, v5.h[3]");
+  COMPARE(Smov(w1, v6.H(), 6), "smov w1, v6.h[6]");
 
   COMPARE(Smov(x0, v1.V16B(), 4), "smov x0, v1.b[4]");
-  COMPARE(Smov(x1, v2.V8B(),  5), "smov x1, v2.b[5]");
-  COMPARE(Smov(x2, v3.B(),    6), "smov x2, v3.b[6]");
-  COMPARE(Smov(x3, v4.V8H(),  7), "smov x3, v4.h[7]");
-  COMPARE(Smov(x0, v5.V4H(),  3), "smov x0, v5.h[3]");
-  COMPARE(Smov(x1, v6.H(),    6), "smov x1, v6.h[6]");
-  COMPARE(Smov(x2, v7.V4S(),  2), "smov x2, v7.s[2]");
-  COMPARE(Smov(x0, v8.V2S(),  1), "smov x0, v8.s[1]");
-  COMPARE(Smov(x1, v9.S(),    0), "smov x1, v9.s[0]");
+  COMPARE(Smov(x1, v2.V8B(), 5), "smov x1, v2.b[5]");
+  COMPARE(Smov(x2, v3.B(), 6), "smov x2, v3.b[6]");
+  COMPARE(Smov(x3, v4.V8H(), 7), "smov x3, v4.h[7]");
+  COMPARE(Smov(x0, v5.V4H(), 3), "smov x0, v5.h[3]");
+  COMPARE(Smov(x1, v6.H(), 6), "smov x1, v6.h[6]");
+  COMPARE(Smov(x2, v7.V4S(), 2), "smov x2, v7.s[2]");
+  COMPARE(Smov(x0, v8.V2S(), 1), "smov x0, v8.s[1]");
+  COMPARE(Smov(x1, v9.S(), 0), "smov x1, v9.s[0]");
 
   COMPARE(Umov(w0, v1.V16B(), 4), "umov w0, v1.b[4]");
-  COMPARE(Umov(w1, v2.V8B(),  5), "umov w1, v2.b[5]");
-  COMPARE(Umov(w2, v3.B(),    6), "umov w2, v3.b[6]");
-  COMPARE(Umov(w3, v4.V8H(),  7), "umov w3, v4.h[7]");
-  COMPARE(Umov(w0, v5.V4H(),  3), "umov w0, v5.h[3]");
-  COMPARE(Umov(w1, v6.H(),    6), "umov w1, v6.h[6]");
-  COMPARE(Umov(w2, v7.V4S(),  2), "mov w2, v7.s[2]");
-  COMPARE(Umov(w0, v8.V2S(),  1), "mov w0, v8.s[1]");
-  COMPARE(Umov(w1, v9.S(),    0), "mov w1, v9.s[0]");
-  COMPARE(Umov(x0, v1.V2D(),  1), "mov x0, v1.d[1]");
-  COMPARE(Umov(x1, v2.D(),    0), "mov x1, v2.d[0]");
-
-  COMPARE(Mov(w2, v7.V4S(),  2), "mov w2, v7.s[2]");
-  COMPARE(Mov(w0, v8.V2S(),  1), "mov w0, v8.s[1]");
-  COMPARE(Mov(w1, v9.S(),    0), "mov w1, v9.s[0]");
-  COMPARE(Mov(x0, v1.V2D(),  1), "mov x0, v1.d[1]");
-  COMPARE(Mov(x1, v2.D(),    0), "mov x1, v2.d[0]");
+  COMPARE(Umov(w1, v2.V8B(), 5), "umov w1, v2.b[5]");
+  COMPARE(Umov(w2, v3.B(), 6), "umov w2, v3.b[6]");
+  COMPARE(Umov(w3, v4.V8H(), 7), "umov w3, v4.h[7]");
+  COMPARE(Umov(w0, v5.V4H(), 3), "umov w0, v5.h[3]");
+  COMPARE(Umov(w1, v6.H(), 6), "umov w1, v6.h[6]");
+  COMPARE(Umov(w2, v7.V4S(), 2), "mov w2, v7.s[2]");
+  COMPARE(Umov(w0, v8.V2S(), 1), "mov w0, v8.s[1]");
+  COMPARE(Umov(w1, v9.S(), 0), "mov w1, v9.s[0]");
+  COMPARE(Umov(x0, v1.V2D(), 1), "mov x0, v1.d[1]");
+  COMPARE(Umov(x1, v2.D(), 0), "mov x1, v2.d[0]");
+
+  COMPARE(Mov(w2, v7.V4S(), 2), "mov w2, v7.s[2]");
+  COMPARE(Mov(w0, v8.V2S(), 1), "mov w0, v8.s[1]");
+  COMPARE(Mov(w1, v9.S(), 0), "mov w1, v9.s[0]");
+  COMPARE(Mov(x0, v1.V2D(), 1), "mov x0, v1.d[1]");
+  COMPARE(Mov(x1, v2.D(), 0), "mov x1, v2.d[0]");
 
   CLEANUP();
 }
@@ -5076,14 +5230,25 @@ TEST(neon_table) {
           "tbl v3.8b, {v4.16b, v5.16b}, v6.8b");
   COMPARE(Tbl(v7.V8B(), v8.V16B(), v9.V16B(), v10.V16B(), v11.V8B()),
           "tbl v7.8b, {v8.16b, v9.16b, v10.16b}, v11.8b");
-  COMPARE(Tbl(v12.V8B(), v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), v17.V8B()),
+  COMPARE(Tbl(v12.V8B(),
+              v13.V16B(),
+              v14.V16B(),
+              v15.V16B(),
+              v16.V16B(),
+              v17.V8B()),
           "tbl v12.8b, {v13.16b, v14.16b, v15.16b, v16.16b}, v17.8b");
-  COMPARE(Tbl(v18.V16B(), v19.V16B(), v20.V16B()), "tbl v18.16b, {v19.16b}, v20.16b");
+  COMPARE(Tbl(v18.V16B(), v19.V16B(), v20.V16B()),
+          "tbl v18.16b, {v19.16b}, v20.16b");
   COMPARE(Tbl(v21.V16B(), v22.V16B(), v23.V16B(), v24.V16B()),
           "tbl v21.16b, {v22.16b, v23.16b}, v24.16b");
   COMPARE(Tbl(v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v29.V16B()),
           "tbl v25.16b, {v26.16b, v27.16b, v28.16b}, v29.16b");
-  COMPARE(Tbl(v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()),
+  COMPARE(Tbl(v30.V16B(),
+              v31.V16B(),
+              v0.V16B(),
+              v1.V16B(),
+              v2.V16B(),
+              v3.V16B()),
           "tbl v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v3.16b");
 
   COMPARE(Tbx(v0.V8B(), v1.V16B(), v2.V8B()), "tbx v0.8b, {v1.16b}, v2.8b");
@@ -5091,14 +5256,25 @@ TEST(neon_table) {
           "tbx v3.8b, {v4.16b, v5.16b}, v6.8b");
   COMPARE(Tbx(v7.V8B(), v8.V16B(), v9.V16B(), v10.V16B(), v11.V8B()),
           "tbx v7.8b, {v8.16b, v9.16b, v10.16b}, v11.8b");
-  COMPARE(Tbx(v12.V8B(), v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), v17.V8B()),
+  COMPARE(Tbx(v12.V8B(),
+              v13.V16B(),
+              v14.V16B(),
+              v15.V16B(),
+              v16.V16B(),
+              v17.V8B()),
           "tbx v12.8b, {v13.16b, v14.16b, v15.16b, v16.16b}, v17.8b");
-  COMPARE(Tbx(v18.V16B(), v19.V16B(), v20.V16B()), "tbx v18.16b, {v19.16b}, v20.16b");
+  COMPARE(Tbx(v18.V16B(), v19.V16B(), v20.V16B()),
+          "tbx v18.16b, {v19.16b}, v20.16b");
   COMPARE(Tbx(v21.V16B(), v22.V16B(), v23.V16B(), v24.V16B()),
           "tbx v21.16b, {v22.16b, v23.16b}, v24.16b");
   COMPARE(Tbx(v25.V16B(), v26.V16B(), v27.V16B(), v28.V16B(), v29.V16B()),
           "tbx v25.16b, {v26.16b, v27.16b, v28.16b}, v29.16b");
-  COMPARE(Tbx(v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()),
+  COMPARE(Tbx(v30.V16B(),
+              v31.V16B(),
+              v0.V16B(),
+              v1.V16B(),
+              v2.V16B(),
+              v3.V16B()),
           "tbx v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v3.16b");
 
   CLEANUP();
@@ -5148,7 +5324,7 @@ TEST(neon_modimm) {
   COMPARE(Mvni(v4.V4S(), 0xaa, MSL, 8), "mvni v4.4s, #0xaa, msl #8");
   COMPARE(Mvni(v1.V4S(), 0xcc, MSL, 16), "mvni v1.4s, #0xcc, msl #16");
 
-  COMPARE(Movi(v4.V8B(),  0xaa), "movi v4.8b, #0xaa");
+  COMPARE(Movi(v4.V8B(), 0xaa), "movi v4.8b, #0xaa");
   COMPARE(Movi(v1.V16B(), 0xcc), "movi v1.16b, #0xcc");
 
   COMPARE(Movi(v4.V4H(), 0xaa, LSL, 0), "movi v4.4h, #0xaa, lsl #0");
@@ -5164,7 +5340,7 @@ TEST(neon_modimm) {
   COMPARE(Movi(v4.V4S(), 0xaa, MSL, 8), "movi v4.4s, #0xaa, msl #8");
   COMPARE(Movi(v1.V4S(), 0xcc, MSL, 16), "movi v1.4s, #0xcc, msl #16");
 
-  COMPARE(Movi(d2,       0xffff0000ffffff), "movi d2, #0xffff0000ffffff");
+  COMPARE(Movi(d2, 0xffff0000ffffff), "movi d2, #0xffff0000ffffff");
   COMPARE(Movi(v1.V2D(), 0xffff0000ffffff), "movi v1.2d, #0xffff0000ffffff");
 
   COMPARE(Fmov(v0.V2S(), 1.0f), "fmov v0.2s, #0x70 (1.0000)");
@@ -5184,42 +5360,42 @@ TEST(neon_modimm) {
 TEST(neon_2regmisc) {
   SETUP_MACRO();
 
-  COMPARE(Shll(v1.V8H(),  v8.V8B(),  8), "shll v1.8h, v8.8b, #8");
-  COMPARE(Shll(v3.V4S(),  v1.V4H(),  16), "shll v3.4s, v1.4h, #16");
-  COMPARE(Shll(v5.V2D(),  v3.V2S(),  32), "shll v5.2d, v3.2s, #32");
+  COMPARE(Shll(v1.V8H(), v8.V8B(), 8), "shll v1.8h, v8.8b, #8");
+  COMPARE(Shll(v3.V4S(), v1.V4H(), 16), "shll v3.4s, v1.4h, #16");
+  COMPARE(Shll(v5.V2D(), v3.V2S(), 32), "shll v5.2d, v3.2s, #32");
   COMPARE(Shll2(v2.V8H(), v9.V16B(), 8), "shll2 v2.8h, v9.16b, #8");
-  COMPARE(Shll2(v4.V4S(), v2.V8H(),  16), "shll2 v4.4s, v2.8h, #16");
-  COMPARE(Shll2(v6.V2D(), v4.V4S(),  32), "shll2 v6.2d, v4.4s, #32");
+  COMPARE(Shll2(v4.V4S(), v2.V8H(), 16), "shll2 v4.4s, v2.8h, #16");
+  COMPARE(Shll2(v6.V2D(), v4.V4S(), 32), "shll2 v6.2d, v4.4s, #32");
 
   // An unallocated form of shll.
   COMPARE(dci(0x2ee13bff), "unallocated (NEON2RegMisc)");
   // An unallocated form of shll2.
   COMPARE(dci(0x6ee13bff), "unallocated (NEON2RegMisc)");
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmeq(v0.M, v1.M, 0), "cmeq v0." S ", v1." S ", #0");
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmge(v0.M, v1.M, 0), "cmge v0." S ", v1." S ", #0");
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmgt(v0.M, v1.M, 0), "cmgt v0." S ", v1." S ", #0");
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmle(v0.M, v1.M, 0), "cmle v0." S ", v1." S ", #0");
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Cmlt(v0.M, v1.M, 0), "cmlt v0." S ", v1." S ", #0");
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Cmeq(v0.D(), v1.D(), 0), "cmeq d0, d1, #0");
   COMPARE(Cmge(v3.D(), v4.D(), 0), "cmge d3, d4, #0");
@@ -5227,99 +5403,93 @@ TEST(neon_2regmisc) {
   COMPARE(Cmle(v0.D(), v1.D(), 0), "cmle d0, d1, #0");
   COMPARE(Cmlt(v3.D(), v4.D(), 0), "cmlt d3, d4, #0");
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmeq(v0.M, v1.M, 0), "fcmeq v0." S ", v1." S ", #0.0");
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Fcmeq(v0.S(), v1.S(), 0), "fcmeq s0, s1, #0.0");
   COMPARE(Fcmeq(v0.D(), v1.D(), 0), "fcmeq d0, d1, #0.0");
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmge(v0.M, v1.M, 0), "fcmge v0." S ", v1." S ", #0.0");
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Fcmge(v0.S(), v1.S(), 0), "fcmge s0, s1, #0.0");
   COMPARE(Fcmge(v0.D(), v1.D(), 0), "fcmge d0, d1, #0.0");
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmgt(v0.M, v1.M, 0), "fcmgt v0." S ", v1." S ", #0.0");
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Fcmgt(v0.S(), v1.S(), 0), "fcmgt s0, s1, #0.0");
   COMPARE(Fcmgt(v0.D(), v1.D(), 0), "fcmgt d0, d1, #0.0");
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmle(v0.M, v1.M, 0), "fcmle v0." S ", v1." S ", #0.0");
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Fcmle(v0.S(), v1.S(), 0), "fcmle s0, s1, #0.0");
   COMPARE(Fcmle(v0.D(), v1.D(), 0), "fcmle d0, d1, #0.0");
 
-  #define DISASM_INST(M, S)  \
+#define DISASM_INST(M, S) \
   COMPARE(Fcmlt(v0.M, v1.M, 0), "fcmlt v0." S ", v1." S ", #0.0");
   NEON_FORMAT_LIST_FP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   COMPARE(Fcmlt(v0.S(), v1.S(), 0), "fcmlt s0, s1, #0.0");
   COMPARE(Fcmlt(v0.D(), v1.D(), 0), "fcmlt d0, d1, #0.0");
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Neg(v0.M, v1.M), "neg v0." S ", v1." S);
+#define DISASM_INST(M, S) COMPARE(Neg(v0.M, v1.M), "neg v0." S ", v1." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(Neg(v0.D(), v1.D()),    "neg d0, d1");
+  COMPARE(Neg(v0.D(), v1.D()), "neg d0, d1");
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Sqneg(v0.M, v1.M), "sqneg v0." S ", v1." S);
+#define DISASM_INST(M, S) COMPARE(Sqneg(v0.M, v1.M), "sqneg v0." S ", v1." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(Sqneg(b0, b1),  "sqneg b0, b1");
-  COMPARE(Sqneg(h1, h2),  "sqneg h1, h2");
-  COMPARE(Sqneg(s2, s3),  "sqneg s2, s3");
-  COMPARE(Sqneg(d3, d4),  "sqneg d3, d4");
+  COMPARE(Sqneg(b0, b1), "sqneg b0, b1");
+  COMPARE(Sqneg(h1, h2), "sqneg h1, h2");
+  COMPARE(Sqneg(s2, s3), "sqneg s2, s3");
+  COMPARE(Sqneg(d3, d4), "sqneg d3, d4");
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Abs(v0.M, v1.M), "abs v0." S ", v1." S);
+#define DISASM_INST(M, S) COMPARE(Abs(v0.M, v1.M), "abs v0." S ", v1." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(Abs(v0.D(), v1.D()),    "abs d0, d1");
+  COMPARE(Abs(v0.D(), v1.D()), "abs d0, d1");
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Sqabs(v0.M, v1.M), "sqabs v0." S ", v1." S);
+#define DISASM_INST(M, S) COMPARE(Sqabs(v0.M, v1.M), "sqabs v0." S ", v1." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(Sqabs(b0, b1),  "sqabs b0, b1");
-  COMPARE(Sqabs(h1, h2),  "sqabs h1, h2");
-  COMPARE(Sqabs(s2, s3),  "sqabs s2, s3");
-  COMPARE(Sqabs(d3, d4),  "sqabs d3, d4");
+  COMPARE(Sqabs(b0, b1), "sqabs b0, b1");
+  COMPARE(Sqabs(h1, h2), "sqabs h1, h2");
+  COMPARE(Sqabs(s2, s3), "sqabs s2, s3");
+  COMPARE(Sqabs(d3, d4), "sqabs d3, d4");
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Suqadd(v0.M, v1.M), "suqadd v0." S ", v1." S);
+#define DISASM_INST(M, S) COMPARE(Suqadd(v0.M, v1.M), "suqadd v0." S ", v1." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(Suqadd(b0, b1),  "suqadd b0, b1");
-  COMPARE(Suqadd(h1, h2),  "suqadd h1, h2");
-  COMPARE(Suqadd(s2, s3),  "suqadd s2, s3");
-  COMPARE(Suqadd(d3, d4),  "suqadd d3, d4");
+  COMPARE(Suqadd(b0, b1), "suqadd b0, b1");
+  COMPARE(Suqadd(h1, h2), "suqadd h1, h2");
+  COMPARE(Suqadd(s2, s3), "suqadd s2, s3");
+  COMPARE(Suqadd(d3, d4), "suqadd d3, d4");
 
-  #define DISASM_INST(M, S)  \
-  COMPARE(Usqadd(v0.M, v1.M), "usqadd v0." S ", v1." S);
+#define DISASM_INST(M, S) COMPARE(Usqadd(v0.M, v1.M), "usqadd v0." S ", v1." S);
   NEON_FORMAT_LIST(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  COMPARE(Usqadd(b0, b1),  "usqadd b0, b1");
-  COMPARE(Usqadd(h1, h2),  "usqadd h1, h2");
-  COMPARE(Usqadd(s2, s3),  "usqadd s2, s3");
-  COMPARE(Usqadd(d3, d4),  "usqadd d3, d4");
+  COMPARE(Usqadd(b0, b1), "usqadd b0, b1");
+  COMPARE(Usqadd(h1, h2), "usqadd h1, h2");
+  COMPARE(Usqadd(s2, s3), "usqadd s2, s3");
+  COMPARE(Usqadd(d3, d4), "usqadd d3, d4");
 
   COMPARE(Xtn(v0.V8B(), v1.V8H()), "xtn v0.8b, v1.8h");
   COMPARE(Xtn(v1.V4H(), v2.V4S()), "xtn v1.4h, v2.4s");
@@ -5335,7 +5505,7 @@ TEST(neon_2regmisc) {
   COMPARE(Sqxtn2(v1.V8H(), v2.V4S()), "sqxtn2 v1.8h, v2.4s");
   COMPARE(Sqxtn2(v2.V4S(), v3.V2D()), "sqxtn2 v2.4s, v3.2d");
   COMPARE(Sqxtn(b19, h0), "sqxtn b19, h0");
-  COMPARE(Sqxtn(h20, s0), "sqxtn h20, s0") ;
+  COMPARE(Sqxtn(h20, s0), "sqxtn h20, s0");
   COMPARE(Sqxtn(s21, d0), "sqxtn s21, d0");
 
   COMPARE(Uqxtn(v0.V8B(), v1.V8H()), "uqxtn v0.8b, v1.8h");
@@ -5345,7 +5515,7 @@ TEST(neon_2regmisc) {
   COMPARE(Uqxtn2(v1.V8H(), v2.V4S()), "uqxtn2 v1.8h, v2.4s");
   COMPARE(Uqxtn2(v2.V4S(), v3.V2D()), "uqxtn2 v2.4s, v3.2d");
   COMPARE(Uqxtn(b19, h0), "uqxtn b19, h0");
-  COMPARE(Uqxtn(h20, s0), "uqxtn h20, s0") ;
+  COMPARE(Uqxtn(h20, s0), "uqxtn h20, s0");
   COMPARE(Uqxtn(s21, d0), "uqxtn s21, d0");
 
   COMPARE(Sqxtun(v0.V8B(), v1.V8H()), "sqxtun v0.8b, v1.8h");
@@ -5355,24 +5525,24 @@ TEST(neon_2regmisc) {
   COMPARE(Sqxtun2(v1.V8H(), v2.V4S()), "sqxtun2 v1.8h, v2.4s");
   COMPARE(Sqxtun2(v2.V4S(), v3.V2D()), "sqxtun2 v2.4s, v3.2d");
   COMPARE(Sqxtun(b19, h0), "sqxtun b19, h0");
-  COMPARE(Sqxtun(h20, s0), "sqxtun h20, s0") ;
+  COMPARE(Sqxtun(h20, s0), "sqxtun h20, s0");
   COMPARE(Sqxtun(s21, d0), "sqxtun s21, d0");
 
-  COMPARE(Cls(v1.V8B(),  v8.V8B()),  "cls v1.8b, v8.8b");
+  COMPARE(Cls(v1.V8B(), v8.V8B()), "cls v1.8b, v8.8b");
   COMPARE(Cls(v2.V16B(), v9.V16B()), "cls v2.16b, v9.16b");
-  COMPARE(Cls(v3.V4H(),  v1.V4H()),  "cls v3.4h, v1.4h");
-  COMPARE(Cls(v4.V8H(),  v2.V8H()),  "cls v4.8h, v2.8h");
-  COMPARE(Cls(v5.V2S(),  v3.V2S()),  "cls v5.2s, v3.2s");
-  COMPARE(Cls(v6.V4S(),  v4.V4S()),  "cls v6.4s, v4.4s");
+  COMPARE(Cls(v3.V4H(), v1.V4H()), "cls v3.4h, v1.4h");
+  COMPARE(Cls(v4.V8H(), v2.V8H()), "cls v4.8h, v2.8h");
+  COMPARE(Cls(v5.V2S(), v3.V2S()), "cls v5.2s, v3.2s");
+  COMPARE(Cls(v6.V4S(), v4.V4S()), "cls v6.4s, v4.4s");
 
-  COMPARE(Clz(v1.V8B(),  v8.V8B()),  "clz v1.8b, v8.8b");
+  COMPARE(Clz(v1.V8B(), v8.V8B()), "clz v1.8b, v8.8b");
   COMPARE(Clz(v2.V16B(), v9.V16B()), "clz v2.16b, v9.16b");
-  COMPARE(Clz(v3.V4H(),  v1.V4H()),  "clz v3.4h, v1.4h");
-  COMPARE(Clz(v4.V8H(),  v2.V8H()),  "clz v4.8h, v2.8h");
-  COMPARE(Clz(v5.V2S(),  v3.V2S()),  "clz v5.2s, v3.2s");
-  COMPARE(Clz(v6.V4S(),  v4.V4S()),  "clz v6.4s, v4.4s");
+  COMPARE(Clz(v3.V4H(), v1.V4H()), "clz v3.4h, v1.4h");
+  COMPARE(Clz(v4.V8H(), v2.V8H()), "clz v4.8h, v2.8h");
+  COMPARE(Clz(v5.V2S(), v3.V2S()), "clz v5.2s, v3.2s");
+  COMPARE(Clz(v6.V4S(), v4.V4S()), "clz v6.4s, v4.4s");
 
-  COMPARE(Cnt(v1.V8B(),  v8.V8B()),  "cnt v1.8b, v8.8b");
+  COMPARE(Cnt(v1.V8B(), v8.V8B()), "cnt v1.8b, v8.8b");
   COMPARE(Cnt(v2.V16B(), v9.V16B()), "cnt v2.16b, v9.16b");
 
   COMPARE(Mvn(v4.V8B(), v5.V8B()), "mvn v4.8b, v5.8b");
@@ -5381,116 +5551,116 @@ TEST(neon_2regmisc) {
   COMPARE(Not(v4.V8B(), v5.V8B()), "mvn v4.8b, v5.8b");
   COMPARE(Not(v4.V16B(), v5.V16B()), "mvn v4.16b, v5.16b");
 
-  COMPARE(Rev64(v1.V8B(),  v8.V8B()),  "rev64 v1.8b, v8.8b");
+  COMPARE(Rev64(v1.V8B(), v8.V8B()), "rev64 v1.8b, v8.8b");
   COMPARE(Rev64(v2.V16B(), v9.V16B()), "rev64 v2.16b, v9.16b");
-  COMPARE(Rev64(v3.V4H(),  v1.V4H()),  "rev64 v3.4h, v1.4h");
-  COMPARE(Rev64(v4.V8H(),  v2.V8H()),  "rev64 v4.8h, v2.8h");
-  COMPARE(Rev64(v5.V2S(),  v3.V2S()),  "rev64 v5.2s, v3.2s");
-  COMPARE(Rev64(v6.V4S(),  v4.V4S()),  "rev64 v6.4s, v4.4s");
+  COMPARE(Rev64(v3.V4H(), v1.V4H()), "rev64 v3.4h, v1.4h");
+  COMPARE(Rev64(v4.V8H(), v2.V8H()), "rev64 v4.8h, v2.8h");
+  COMPARE(Rev64(v5.V2S(), v3.V2S()), "rev64 v5.2s, v3.2s");
+  COMPARE(Rev64(v6.V4S(), v4.V4S()), "rev64 v6.4s, v4.4s");
 
-  COMPARE(Rev32(v1.V8B(),  v8.V8B()),  "rev32 v1.8b, v8.8b");
+  COMPARE(Rev32(v1.V8B(), v8.V8B()), "rev32 v1.8b, v8.8b");
   COMPARE(Rev32(v2.V16B(), v9.V16B()), "rev32 v2.16b, v9.16b");
-  COMPARE(Rev32(v3.V4H(),  v1.V4H()),  "rev32 v3.4h, v1.4h");
-  COMPARE(Rev32(v4.V8H(),  v2.V8H()),  "rev32 v4.8h, v2.8h");
+  COMPARE(Rev32(v3.V4H(), v1.V4H()), "rev32 v3.4h, v1.4h");
+  COMPARE(Rev32(v4.V8H(), v2.V8H()), "rev32 v4.8h, v2.8h");
 
-  COMPARE(Rev16(v1.V8B(),  v8.V8B()),  "rev16 v1.8b, v8.8b");
+  COMPARE(Rev16(v1.V8B(), v8.V8B()), "rev16 v1.8b, v8.8b");
   COMPARE(Rev16(v2.V16B(), v9.V16B()), "rev16 v2.16b, v9.16b");
 
-  COMPARE(Rbit(v1.V8B(),  v8.V8B()),  "rbit v1.8b, v8.8b");
+  COMPARE(Rbit(v1.V8B(), v8.V8B()), "rbit v1.8b, v8.8b");
   COMPARE(Rbit(v2.V16B(), v9.V16B()), "rbit v2.16b, v9.16b");
 
-  COMPARE(Ursqrte(v2.V2S(), v9.V2S()),   "ursqrte v2.2s, v9.2s");
+  COMPARE(Ursqrte(v2.V2S(), v9.V2S()), "ursqrte v2.2s, v9.2s");
   COMPARE(Ursqrte(v16.V4S(), v23.V4S()), "ursqrte v16.4s, v23.4s");
 
-  COMPARE(Urecpe(v2.V2S(), v9.V2S()),   "urecpe v2.2s, v9.2s");
+  COMPARE(Urecpe(v2.V2S(), v9.V2S()), "urecpe v2.2s, v9.2s");
   COMPARE(Urecpe(v16.V4S(), v23.V4S()), "urecpe v16.4s, v23.4s");
 
-  COMPARE(Frsqrte(v2.V2S(), v9.V2S()),   "frsqrte v2.2s, v9.2s");
+  COMPARE(Frsqrte(v2.V2S(), v9.V2S()), "frsqrte v2.2s, v9.2s");
   COMPARE(Frsqrte(v16.V4S(), v23.V4S()), "frsqrte v16.4s, v23.4s");
-  COMPARE(Frsqrte(v2.V2D(), v9.V2D()),   "frsqrte v2.2d, v9.2d");
+  COMPARE(Frsqrte(v2.V2D(), v9.V2D()), "frsqrte v2.2d, v9.2d");
   COMPARE(Frsqrte(v0.S(), v1.S()), "frsqrte s0, s1");
   COMPARE(Frsqrte(v0.D(), v1.D()), "frsqrte d0, d1");
 
-  COMPARE(Frecpe(v2.V2S(), v9.V2S()),   "frecpe v2.2s, v9.2s");
+  COMPARE(Frecpe(v2.V2S(), v9.V2S()), "frecpe v2.2s, v9.2s");
   COMPARE(Frecpe(v16.V4S(), v23.V4S()), "frecpe v16.4s, v23.4s");
-  COMPARE(Frecpe(v2.V2D(), v9.V2D()),   "frecpe v2.2d, v9.2d");
+  COMPARE(Frecpe(v2.V2D(), v9.V2D()), "frecpe v2.2d, v9.2d");
   COMPARE(Frecpe(v0.S(), v1.S()), "frecpe s0, s1");
   COMPARE(Frecpe(v0.D(), v1.D()), "frecpe d0, d1");
 
-  COMPARE(Fabs(v2.V2S(), v9.V2S()),   "fabs v2.2s, v9.2s");
+  COMPARE(Fabs(v2.V2S(), v9.V2S()), "fabs v2.2s, v9.2s");
   COMPARE(Fabs(v16.V4S(), v23.V4S()), "fabs v16.4s, v23.4s");
   COMPARE(Fabs(v31.V2D(), v30.V2D()), "fabs v31.2d, v30.2d");
 
-  COMPARE(Fneg(v2.V2S(), v9.V2S()),   "fneg v2.2s, v9.2s");
+  COMPARE(Fneg(v2.V2S(), v9.V2S()), "fneg v2.2s, v9.2s");
   COMPARE(Fneg(v16.V4S(), v23.V4S()), "fneg v16.4s, v23.4s");
   COMPARE(Fneg(v31.V2D(), v30.V2D()), "fneg v31.2d, v30.2d");
 
-  COMPARE(Frintn(v2.V2S(), v9.V2S()),   "frintn v2.2s, v9.2s");
+  COMPARE(Frintn(v2.V2S(), v9.V2S()), "frintn v2.2s, v9.2s");
   COMPARE(Frintn(v16.V4S(), v23.V4S()), "frintn v16.4s, v23.4s");
   COMPARE(Frintn(v31.V2D(), v30.V2D()), "frintn v31.2d, v30.2d");
 
-  COMPARE(Frinta(v2.V2S(), v9.V2S()),   "frinta v2.2s, v9.2s");
+  COMPARE(Frinta(v2.V2S(), v9.V2S()), "frinta v2.2s, v9.2s");
   COMPARE(Frinta(v16.V4S(), v23.V4S()), "frinta v16.4s, v23.4s");
   COMPARE(Frinta(v31.V2D(), v30.V2D()), "frinta v31.2d, v30.2d");
 
-  COMPARE(Frintp(v2.V2S(), v9.V2S()),   "frintp v2.2s, v9.2s");
+  COMPARE(Frintp(v2.V2S(), v9.V2S()), "frintp v2.2s, v9.2s");
   COMPARE(Frintp(v16.V4S(), v23.V4S()), "frintp v16.4s, v23.4s");
   COMPARE(Frintp(v31.V2D(), v30.V2D()), "frintp v31.2d, v30.2d");
 
-  COMPARE(Frintm(v2.V2S(), v9.V2S()),   "frintm v2.2s, v9.2s");
+  COMPARE(Frintm(v2.V2S(), v9.V2S()), "frintm v2.2s, v9.2s");
   COMPARE(Frintm(v16.V4S(), v23.V4S()), "frintm v16.4s, v23.4s");
   COMPARE(Frintm(v31.V2D(), v30.V2D()), "frintm v31.2d, v30.2d");
 
-  COMPARE(Frintx(v2.V2S(), v9.V2S()),   "frintx v2.2s, v9.2s");
+  COMPARE(Frintx(v2.V2S(), v9.V2S()), "frintx v2.2s, v9.2s");
   COMPARE(Frintx(v16.V4S(), v23.V4S()), "frintx v16.4s, v23.4s");
   COMPARE(Frintx(v31.V2D(), v30.V2D()), "frintx v31.2d, v30.2d");
 
-  COMPARE(Frintz(v2.V2S(), v9.V2S()),   "frintz v2.2s, v9.2s");
+  COMPARE(Frintz(v2.V2S(), v9.V2S()), "frintz v2.2s, v9.2s");
   COMPARE(Frintz(v16.V4S(), v23.V4S()), "frintz v16.4s, v23.4s");
   COMPARE(Frintz(v31.V2D(), v30.V2D()), "frintz v31.2d, v30.2d");
 
-  COMPARE(Frinti(v2.V2S(), v9.V2S()),   "frinti v2.2s, v9.2s");
+  COMPARE(Frinti(v2.V2S(), v9.V2S()), "frinti v2.2s, v9.2s");
   COMPARE(Frinti(v16.V4S(), v23.V4S()), "frinti v16.4s, v23.4s");
   COMPARE(Frinti(v31.V2D(), v30.V2D()), "frinti v31.2d, v30.2d");
 
-  COMPARE(Fsqrt(v3.V2S(), v10.V2S()),  "fsqrt v3.2s, v10.2s");
+  COMPARE(Fsqrt(v3.V2S(), v10.V2S()), "fsqrt v3.2s, v10.2s");
   COMPARE(Fsqrt(v22.V4S(), v11.V4S()), "fsqrt v22.4s, v11.4s");
-  COMPARE(Fsqrt(v31.V2D(), v0.V2D()),  "fsqrt v31.2d, v0.2d");
+  COMPARE(Fsqrt(v31.V2D(), v0.V2D()), "fsqrt v31.2d, v0.2d");
 
-  COMPARE(Fcvtns(v4.V2S(), v11.V2S()),  "fcvtns v4.2s, v11.2s");
+  COMPARE(Fcvtns(v4.V2S(), v11.V2S()), "fcvtns v4.2s, v11.2s");
   COMPARE(Fcvtns(v23.V4S(), v12.V4S()), "fcvtns v23.4s, v12.4s");
-  COMPARE(Fcvtns(v30.V2D(), v1.V2D()),  "fcvtns v30.2d, v1.2d");
-  COMPARE(Fcvtnu(v4.V2S(), v11.V2S()),  "fcvtnu v4.2s, v11.2s");
+  COMPARE(Fcvtns(v30.V2D(), v1.V2D()), "fcvtns v30.2d, v1.2d");
+  COMPARE(Fcvtnu(v4.V2S(), v11.V2S()), "fcvtnu v4.2s, v11.2s");
   COMPARE(Fcvtnu(v23.V4S(), v12.V4S()), "fcvtnu v23.4s, v12.4s");
-  COMPARE(Fcvtnu(v30.V2D(), v1.V2D()),  "fcvtnu v30.2d, v1.2d");
+  COMPARE(Fcvtnu(v30.V2D(), v1.V2D()), "fcvtnu v30.2d, v1.2d");
 
-  COMPARE(Fcvtps(v4.V2S(), v11.V2S()),  "fcvtps v4.2s, v11.2s");
+  COMPARE(Fcvtps(v4.V2S(), v11.V2S()), "fcvtps v4.2s, v11.2s");
   COMPARE(Fcvtps(v23.V4S(), v12.V4S()), "fcvtps v23.4s, v12.4s");
-  COMPARE(Fcvtps(v30.V2D(), v1.V2D()),  "fcvtps v30.2d, v1.2d");
-  COMPARE(Fcvtpu(v4.V2S(), v11.V2S()),  "fcvtpu v4.2s, v11.2s");
+  COMPARE(Fcvtps(v30.V2D(), v1.V2D()), "fcvtps v30.2d, v1.2d");
+  COMPARE(Fcvtpu(v4.V2S(), v11.V2S()), "fcvtpu v4.2s, v11.2s");
   COMPARE(Fcvtpu(v23.V4S(), v12.V4S()), "fcvtpu v23.4s, v12.4s");
-  COMPARE(Fcvtpu(v30.V2D(), v1.V2D()),  "fcvtpu v30.2d, v1.2d");
+  COMPARE(Fcvtpu(v30.V2D(), v1.V2D()), "fcvtpu v30.2d, v1.2d");
 
-  COMPARE(Fcvtms(v4.V2S(), v11.V2S()),  "fcvtms v4.2s, v11.2s");
+  COMPARE(Fcvtms(v4.V2S(), v11.V2S()), "fcvtms v4.2s, v11.2s");
   COMPARE(Fcvtms(v23.V4S(), v12.V4S()), "fcvtms v23.4s, v12.4s");
-  COMPARE(Fcvtms(v30.V2D(), v1.V2D()),  "fcvtms v30.2d, v1.2d");
-  COMPARE(Fcvtmu(v4.V2S(), v11.V2S()),  "fcvtmu v4.2s, v11.2s");
+  COMPARE(Fcvtms(v30.V2D(), v1.V2D()), "fcvtms v30.2d, v1.2d");
+  COMPARE(Fcvtmu(v4.V2S(), v11.V2S()), "fcvtmu v4.2s, v11.2s");
   COMPARE(Fcvtmu(v23.V4S(), v12.V4S()), "fcvtmu v23.4s, v12.4s");
-  COMPARE(Fcvtmu(v30.V2D(), v1.V2D()),  "fcvtmu v30.2d, v1.2d");
+  COMPARE(Fcvtmu(v30.V2D(), v1.V2D()), "fcvtmu v30.2d, v1.2d");
 
-  COMPARE(Fcvtzs(v4.V2S(), v11.V2S()),  "fcvtzs v4.2s, v11.2s");
+  COMPARE(Fcvtzs(v4.V2S(), v11.V2S()), "fcvtzs v4.2s, v11.2s");
   COMPARE(Fcvtzs(v23.V4S(), v12.V4S()), "fcvtzs v23.4s, v12.4s");
-  COMPARE(Fcvtzs(v30.V2D(), v1.V2D()),  "fcvtzs v30.2d, v1.2d");
-  COMPARE(Fcvtzu(v4.V2S(), v11.V2S()),  "fcvtzu v4.2s, v11.2s");
+  COMPARE(Fcvtzs(v30.V2D(), v1.V2D()), "fcvtzs v30.2d, v1.2d");
+  COMPARE(Fcvtzu(v4.V2S(), v11.V2S()), "fcvtzu v4.2s, v11.2s");
   COMPARE(Fcvtzu(v23.V4S(), v12.V4S()), "fcvtzu v23.4s, v12.4s");
-  COMPARE(Fcvtzu(v30.V2D(), v1.V2D()),  "fcvtzu v30.2d, v1.2d");
+  COMPARE(Fcvtzu(v30.V2D(), v1.V2D()), "fcvtzu v30.2d, v1.2d");
 
-  COMPARE(Fcvtas(v4.V2S(), v11.V2S()),  "fcvtas v4.2s, v11.2s");
+  COMPARE(Fcvtas(v4.V2S(), v11.V2S()), "fcvtas v4.2s, v11.2s");
   COMPARE(Fcvtas(v23.V4S(), v12.V4S()), "fcvtas v23.4s, v12.4s");
-  COMPARE(Fcvtas(v30.V2D(), v1.V2D()),  "fcvtas v30.2d, v1.2d");
-  COMPARE(Fcvtau(v4.V2S(), v11.V2S()),  "fcvtau v4.2s, v11.2s");
+  COMPARE(Fcvtas(v30.V2D(), v1.V2D()), "fcvtas v30.2d, v1.2d");
+  COMPARE(Fcvtau(v4.V2S(), v11.V2S()), "fcvtau v4.2s, v11.2s");
   COMPARE(Fcvtau(v23.V4S(), v12.V4S()), "fcvtau v23.4s, v12.4s");
-  COMPARE(Fcvtau(v30.V2D(), v1.V2D()),  "fcvtau v30.2d, v1.2d");
+  COMPARE(Fcvtau(v30.V2D(), v1.V2D()), "fcvtau v30.2d, v1.2d");
 
   COMPARE(Fcvtns(s0, s1), "fcvtns s0, s1");
   COMPARE(Fcvtns(d2, d3), "fcvtns d2, d3");
@@ -5535,34 +5705,34 @@ TEST(neon_2regmisc) {
   COMPARE(Scvtf(v5.V2S(), v3.V2S()), "scvtf v5.2s, v3.2s");
   COMPARE(Scvtf(v6.V4S(), v4.V4S()), "scvtf v6.4s, v4.4s");
   COMPARE(Scvtf(v7.V2D(), v5.V2D()), "scvtf v7.2d, v5.2d");
-  COMPARE(Scvtf(s8, s6),             "scvtf s8, s6");
-  COMPARE(Scvtf(d8, d6),             "scvtf d8, d6");
+  COMPARE(Scvtf(s8, s6), "scvtf s8, s6");
+  COMPARE(Scvtf(d8, d6), "scvtf d8, d6");
 
   COMPARE(Ucvtf(v5.V2S(), v3.V2S()), "ucvtf v5.2s, v3.2s");
   COMPARE(Ucvtf(v6.V4S(), v4.V4S()), "ucvtf v6.4s, v4.4s");
   COMPARE(Ucvtf(v7.V2D(), v5.V2D()), "ucvtf v7.2d, v5.2d");
-  COMPARE(Ucvtf(s8, s6),             "ucvtf s8, s6");
-  COMPARE(Ucvtf(d8, d6),             "ucvtf d8, d6");
+  COMPARE(Ucvtf(s8, s6), "ucvtf s8, s6");
+  COMPARE(Ucvtf(d8, d6), "ucvtf d8, d6");
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
+#define DISASM_INST(TA, TAS, TB, TBS) \
   COMPARE(Saddlp(v0.TA, v1.TB), "saddlp v0." TAS ", v1." TBS);
   NEON_FORMAT_LIST_LP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
+#define DISASM_INST(TA, TAS, TB, TBS) \
   COMPARE(Uaddlp(v0.TA, v1.TB), "uaddlp v0." TAS ", v1." TBS);
   NEON_FORMAT_LIST_LP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
+#define DISASM_INST(TA, TAS, TB, TBS) \
   COMPARE(Sadalp(v0.TA, v1.TB), "sadalp v0." TAS ", v1." TBS);
   NEON_FORMAT_LIST_LP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
-  #define DISASM_INST(TA, TAS, TB, TBS)  \
+#define DISASM_INST(TA, TAS, TB, TBS) \
   COMPARE(Uadalp(v0.TA, v1.TB), "uadalp v0." TAS ", v1." TBS);
   NEON_FORMAT_LIST_LP(DISASM_INST)
-  #undef DISASM_INST
+#undef DISASM_INST
 
   CLEANUP();
 }
@@ -5570,47 +5740,47 @@ TEST(neon_2regmisc) {
 TEST(neon_acrosslanes) {
   SETUP_MACRO();
 
-  COMPARE(Smaxv(b4, v5.V8B()),  "smaxv b4, v5.8b");
+  COMPARE(Smaxv(b4, v5.V8B()), "smaxv b4, v5.8b");
   COMPARE(Smaxv(b4, v5.V16B()), "smaxv b4, v5.16b");
-  COMPARE(Smaxv(h4, v5.V4H()),  "smaxv h4, v5.4h");
-  COMPARE(Smaxv(h4, v5.V8H()),  "smaxv h4, v5.8h");
-  COMPARE(Smaxv(s4, v5.V4S()),  "smaxv s4, v5.4s");
+  COMPARE(Smaxv(h4, v5.V4H()), "smaxv h4, v5.4h");
+  COMPARE(Smaxv(h4, v5.V8H()), "smaxv h4, v5.8h");
+  COMPARE(Smaxv(s4, v5.V4S()), "smaxv s4, v5.4s");
 
-  COMPARE(Sminv(b4, v5.V8B()),  "sminv b4, v5.8b");
+  COMPARE(Sminv(b4, v5.V8B()), "sminv b4, v5.8b");
   COMPARE(Sminv(b4, v5.V16B()), "sminv b4, v5.16b");
-  COMPARE(Sminv(h4, v5.V4H()),  "sminv h4, v5.4h");
-  COMPARE(Sminv(h4, v5.V8H()),  "sminv h4, v5.8h");
-  COMPARE(Sminv(s4, v5.V4S()),  "sminv s4, v5.4s");
+  COMPARE(Sminv(h4, v5.V4H()), "sminv h4, v5.4h");
+  COMPARE(Sminv(h4, v5.V8H()), "sminv h4, v5.8h");
+  COMPARE(Sminv(s4, v5.V4S()), "sminv s4, v5.4s");
 
-  COMPARE(Umaxv(b4, v5.V8B()),  "umaxv b4, v5.8b");
+  COMPARE(Umaxv(b4, v5.V8B()), "umaxv b4, v5.8b");
   COMPARE(Umaxv(b4, v5.V16B()), "umaxv b4, v5.16b");
-  COMPARE(Umaxv(h4, v5.V4H()),  "umaxv h4, v5.4h");
-  COMPARE(Umaxv(h4, v5.V8H()),  "umaxv h4, v5.8h");
-  COMPARE(Umaxv(s4, v5.V4S()),  "umaxv s4, v5.4s");
+  COMPARE(Umaxv(h4, v5.V4H()), "umaxv h4, v5.4h");
+  COMPARE(Umaxv(h4, v5.V8H()), "umaxv h4, v5.8h");
+  COMPARE(Umaxv(s4, v5.V4S()), "umaxv s4, v5.4s");
 
-  COMPARE(Uminv(b4, v5.V8B()),  "uminv b4, v5.8b");
+  COMPARE(Uminv(b4, v5.V8B()), "uminv b4, v5.8b");
   COMPARE(Uminv(b4, v5.V16B()), "uminv b4, v5.16b");
-  COMPARE(Uminv(h4, v5.V4H()),  "uminv h4, v5.4h");
-  COMPARE(Uminv(h4, v5.V8H()),  "uminv h4, v5.8h");
-  COMPARE(Uminv(s4, v5.V4S()),  "uminv s4, v5.4s");
+  COMPARE(Uminv(h4, v5.V4H()), "uminv h4, v5.4h");
+  COMPARE(Uminv(h4, v5.V8H()), "uminv h4, v5.8h");
+  COMPARE(Uminv(s4, v5.V4S()), "uminv s4, v5.4s");
 
-  COMPARE(Addv(b4, v5.V8B()),  "addv b4, v5.8b");
+  COMPARE(Addv(b4, v5.V8B()), "addv b4, v5.8b");
   COMPARE(Addv(b4, v5.V16B()), "addv b4, v5.16b");
-  COMPARE(Addv(h4, v5.V4H()),  "addv h4, v5.4h");
-  COMPARE(Addv(h4, v5.V8H()),  "addv h4, v5.8h");
-  COMPARE(Addv(s4, v5.V4S()),  "addv s4, v5.4s");
+  COMPARE(Addv(h4, v5.V4H()), "addv h4, v5.4h");
+  COMPARE(Addv(h4, v5.V8H()), "addv h4, v5.8h");
+  COMPARE(Addv(s4, v5.V4S()), "addv s4, v5.4s");
 
-  COMPARE(Saddlv(h4, v5.V8B()),  "saddlv h4, v5.8b");
+  COMPARE(Saddlv(h4, v5.V8B()), "saddlv h4, v5.8b");
   COMPARE(Saddlv(h4, v5.V16B()), "saddlv h4, v5.16b");
-  COMPARE(Saddlv(s4, v5.V4H()),  "saddlv s4, v5.4h");
-  COMPARE(Saddlv(s4, v5.V8H()),  "saddlv s4, v5.8h");
-  COMPARE(Saddlv(d4, v5.V4S()),  "saddlv d4, v5.4s");
+  COMPARE(Saddlv(s4, v5.V4H()), "saddlv s4, v5.4h");
+  COMPARE(Saddlv(s4, v5.V8H()), "saddlv s4, v5.8h");
+  COMPARE(Saddlv(d4, v5.V4S()), "saddlv d4, v5.4s");
 
-  COMPARE(Uaddlv(h4, v5.V8B()),  "uaddlv h4, v5.8b");
+  COMPARE(Uaddlv(h4, v5.V8B()), "uaddlv h4, v5.8b");
   COMPARE(Uaddlv(h4, v5.V16B()), "uaddlv h4, v5.16b");
-  COMPARE(Uaddlv(s4, v5.V4H()),  "uaddlv s4, v5.4h");
-  COMPARE(Uaddlv(s4, v5.V8H()),  "uaddlv s4, v5.8h");
-  COMPARE(Uaddlv(d4, v5.V4S()),  "uaddlv d4, v5.4s");
+  COMPARE(Uaddlv(s4, v5.V4H()), "uaddlv s4, v5.4h");
+  COMPARE(Uaddlv(s4, v5.V8H()), "uaddlv s4, v5.8h");
+  COMPARE(Uaddlv(d4, v5.V4S()), "uaddlv d4, v5.4s");
 
   COMPARE(Fmaxv(s4, v5.V4S()), "fmaxv s4, v5.4s");
   COMPARE(Fminv(s4, v5.V4S()), "fminv s4, v5.4s");
@@ -5640,238 +5810,238 @@ TEST(neon_scalar_pairwise) {
 TEST(neon_shift_immediate) {
   SETUP_MACRO();
 
-  COMPARE(Sshr(v0.V8B(), v1.V8B(), 1),  "sshr v0.8b, v1.8b, #1");
-  COMPARE(Sshr(v2.V8B(), v3.V8B(), 8),  "sshr v2.8b, v3.8b, #8");
-  COMPARE(Sshr(v4.V16B(), v5.V16B(), 1),  "sshr v4.16b, v5.16b, #1");
-  COMPARE(Sshr(v6.V16B(), v7.V16B(), 8),  "sshr v6.16b, v7.16b, #8");
-  COMPARE(Sshr(v8.V4H(), v9.V4H(), 1),  "sshr v8.4h, v9.4h, #1");
-  COMPARE(Sshr(v10.V4H(), v11.V4H(), 16),  "sshr v10.4h, v11.4h, #16");
-  COMPARE(Sshr(v12.V8H(), v13.V8H(), 1),  "sshr v12.8h, v13.8h, #1");
-  COMPARE(Sshr(v14.V8H(), v15.V8H(), 16),  "sshr v14.8h, v15.8h, #16");
-  COMPARE(Sshr(v16.V2S(), v17.V2S(), 1),  "sshr v16.2s, v17.2s, #1");
-  COMPARE(Sshr(v18.V2S(), v19.V2S(), 32),  "sshr v18.2s, v19.2s, #32");
-  COMPARE(Sshr(v20.V4S(), v21.V4S(), 1),  "sshr v20.4s, v21.4s, #1");
-  COMPARE(Sshr(v22.V4S(), v23.V4S(), 32),  "sshr v22.4s, v23.4s, #32");
-  COMPARE(Sshr(v28.V2D(), v29.V2D(), 1),  "sshr v28.2d, v29.2d, #1");
-  COMPARE(Sshr(v30.V2D(), v31.V2D(), 64),  "sshr v30.2d, v31.2d, #64");
+  COMPARE(Sshr(v0.V8B(), v1.V8B(), 1), "sshr v0.8b, v1.8b, #1");
+  COMPARE(Sshr(v2.V8B(), v3.V8B(), 8), "sshr v2.8b, v3.8b, #8");
+  COMPARE(Sshr(v4.V16B(), v5.V16B(), 1), "sshr v4.16b, v5.16b, #1");
+  COMPARE(Sshr(v6.V16B(), v7.V16B(), 8), "sshr v6.16b, v7.16b, #8");
+  COMPARE(Sshr(v8.V4H(), v9.V4H(), 1), "sshr v8.4h, v9.4h, #1");
+  COMPARE(Sshr(v10.V4H(), v11.V4H(), 16), "sshr v10.4h, v11.4h, #16");
+  COMPARE(Sshr(v12.V8H(), v13.V8H(), 1), "sshr v12.8h, v13.8h, #1");
+  COMPARE(Sshr(v14.V8H(), v15.V8H(), 16), "sshr v14.8h, v15.8h, #16");
+  COMPARE(Sshr(v16.V2S(), v17.V2S(), 1), "sshr v16.2s, v17.2s, #1");
+  COMPARE(Sshr(v18.V2S(), v19.V2S(), 32), "sshr v18.2s, v19.2s, #32");
+  COMPARE(Sshr(v20.V4S(), v21.V4S(), 1), "sshr v20.4s, v21.4s, #1");
+  COMPARE(Sshr(v22.V4S(), v23.V4S(), 32), "sshr v22.4s, v23.4s, #32");
+  COMPARE(Sshr(v28.V2D(), v29.V2D(), 1), "sshr v28.2d, v29.2d, #1");
+  COMPARE(Sshr(v30.V2D(), v31.V2D(), 64), "sshr v30.2d, v31.2d, #64");
   COMPARE(Sshr(d0, d1, 7), "sshr d0, d1, #7");
 
-  COMPARE(Ushr(v0.V8B(), v1.V8B(), 1),  "ushr v0.8b, v1.8b, #1");
-  COMPARE(Ushr(v2.V8B(), v3.V8B(), 8),  "ushr v2.8b, v3.8b, #8");
-  COMPARE(Ushr(v4.V16B(), v5.V16B(), 1),  "ushr v4.16b, v5.16b, #1");
-  COMPARE(Ushr(v6.V16B(), v7.V16B(), 8),  "ushr v6.16b, v7.16b, #8");
-  COMPARE(Ushr(v8.V4H(), v9.V4H(), 1),  "ushr v8.4h, v9.4h, #1");
-  COMPARE(Ushr(v10.V4H(), v11.V4H(), 16),  "ushr v10.4h, v11.4h, #16");
-  COMPARE(Ushr(v12.V8H(), v13.V8H(), 1),  "ushr v12.8h, v13.8h, #1");
-  COMPARE(Ushr(v14.V8H(), v15.V8H(), 16),  "ushr v14.8h, v15.8h, #16");
-  COMPARE(Ushr(v16.V2S(), v17.V2S(), 1),  "ushr v16.2s, v17.2s, #1");
-  COMPARE(Ushr(v18.V2S(), v19.V2S(), 32),  "ushr v18.2s, v19.2s, #32");
-  COMPARE(Ushr(v20.V4S(), v21.V4S(), 1),  "ushr v20.4s, v21.4s, #1");
-  COMPARE(Ushr(v22.V4S(), v23.V4S(), 32),  "ushr v22.4s, v23.4s, #32");
-  COMPARE(Ushr(v28.V2D(), v29.V2D(), 1),  "ushr v28.2d, v29.2d, #1");
-  COMPARE(Ushr(v30.V2D(), v31.V2D(), 64),  "ushr v30.2d, v31.2d, #64");
+  COMPARE(Ushr(v0.V8B(), v1.V8B(), 1), "ushr v0.8b, v1.8b, #1");
+  COMPARE(Ushr(v2.V8B(), v3.V8B(), 8), "ushr v2.8b, v3.8b, #8");
+  COMPARE(Ushr(v4.V16B(), v5.V16B(), 1), "ushr v4.16b, v5.16b, #1");
+  COMPARE(Ushr(v6.V16B(), v7.V16B(), 8), "ushr v6.16b, v7.16b, #8");
+  COMPARE(Ushr(v8.V4H(), v9.V4H(), 1), "ushr v8.4h, v9.4h, #1");
+  COMPARE(Ushr(v10.V4H(), v11.V4H(), 16), "ushr v10.4h, v11.4h, #16");
+  COMPARE(Ushr(v12.V8H(), v13.V8H(), 1), "ushr v12.8h, v13.8h, #1");
+  COMPARE(Ushr(v14.V8H(), v15.V8H(), 16), "ushr v14.8h, v15.8h, #16");
+  COMPARE(Ushr(v16.V2S(), v17.V2S(), 1), "ushr v16.2s, v17.2s, #1");
+  COMPARE(Ushr(v18.V2S(), v19.V2S(), 32), "ushr v18.2s, v19.2s, #32");
+  COMPARE(Ushr(v20.V4S(), v21.V4S(), 1), "ushr v20.4s, v21.4s, #1");
+  COMPARE(Ushr(v22.V4S(), v23.V4S(), 32), "ushr v22.4s, v23.4s, #32");
+  COMPARE(Ushr(v28.V2D(), v29.V2D(), 1), "ushr v28.2d, v29.2d, #1");
+  COMPARE(Ushr(v30.V2D(), v31.V2D(), 64), "ushr v30.2d, v31.2d, #64");
   COMPARE(Ushr(d0, d1, 7), "ushr d0, d1, #7");
 
-  COMPARE(Srshr(v0.V8B(), v1.V8B(), 1),  "srshr v0.8b, v1.8b, #1");
-  COMPARE(Srshr(v2.V8B(), v3.V8B(), 8),  "srshr v2.8b, v3.8b, #8");
-  COMPARE(Srshr(v4.V16B(), v5.V16B(), 1),  "srshr v4.16b, v5.16b, #1");
-  COMPARE(Srshr(v6.V16B(), v7.V16B(), 8),  "srshr v6.16b, v7.16b, #8");
-  COMPARE(Srshr(v8.V4H(), v9.V4H(), 1),  "srshr v8.4h, v9.4h, #1");
-  COMPARE(Srshr(v10.V4H(), v11.V4H(), 16),  "srshr v10.4h, v11.4h, #16");
-  COMPARE(Srshr(v12.V8H(), v13.V8H(), 1),  "srshr v12.8h, v13.8h, #1");
-  COMPARE(Srshr(v14.V8H(), v15.V8H(), 16),  "srshr v14.8h, v15.8h, #16");
-  COMPARE(Srshr(v16.V2S(), v17.V2S(), 1),  "srshr v16.2s, v17.2s, #1");
-  COMPARE(Srshr(v18.V2S(), v19.V2S(), 32),  "srshr v18.2s, v19.2s, #32");
-  COMPARE(Srshr(v20.V4S(), v21.V4S(), 1),  "srshr v20.4s, v21.4s, #1");
-  COMPARE(Srshr(v22.V4S(), v23.V4S(), 32),  "srshr v22.4s, v23.4s, #32");
-  COMPARE(Srshr(v28.V2D(), v29.V2D(), 1),  "srshr v28.2d, v29.2d, #1");
-  COMPARE(Srshr(v30.V2D(), v31.V2D(), 64),  "srshr v30.2d, v31.2d, #64");
+  COMPARE(Srshr(v0.V8B(), v1.V8B(), 1), "srshr v0.8b, v1.8b, #1");
+  COMPARE(Srshr(v2.V8B(), v3.V8B(), 8), "srshr v2.8b, v3.8b, #8");
+  COMPARE(Srshr(v4.V16B(), v5.V16B(), 1), "srshr v4.16b, v5.16b, #1");
+  COMPARE(Srshr(v6.V16B(), v7.V16B(), 8), "srshr v6.16b, v7.16b, #8");
+  COMPARE(Srshr(v8.V4H(), v9.V4H(), 1), "srshr v8.4h, v9.4h, #1");
+  COMPARE(Srshr(v10.V4H(), v11.V4H(), 16), "srshr v10.4h, v11.4h, #16");
+  COMPARE(Srshr(v12.V8H(), v13.V8H(), 1), "srshr v12.8h, v13.8h, #1");
+  COMPARE(Srshr(v14.V8H(), v15.V8H(), 16), "srshr v14.8h, v15.8h, #16");
+  COMPARE(Srshr(v16.V2S(), v17.V2S(), 1), "srshr v16.2s, v17.2s, #1");
+  COMPARE(Srshr(v18.V2S(), v19.V2S(), 32), "srshr v18.2s, v19.2s, #32");
+  COMPARE(Srshr(v20.V4S(), v21.V4S(), 1), "srshr v20.4s, v21.4s, #1");
+  COMPARE(Srshr(v22.V4S(), v23.V4S(), 32), "srshr v22.4s, v23.4s, #32");
+  COMPARE(Srshr(v28.V2D(), v29.V2D(), 1), "srshr v28.2d, v29.2d, #1");
+  COMPARE(Srshr(v30.V2D(), v31.V2D(), 64), "srshr v30.2d, v31.2d, #64");
   COMPARE(Srshr(d0, d1, 7), "srshr d0, d1, #7");
 
-  COMPARE(Urshr(v0.V8B(), v1.V8B(), 1),  "urshr v0.8b, v1.8b, #1");
-  COMPARE(Urshr(v2.V8B(), v3.V8B(), 8),  "urshr v2.8b, v3.8b, #8");
-  COMPARE(Urshr(v4.V16B(), v5.V16B(), 1),  "urshr v4.16b, v5.16b, #1");
-  COMPARE(Urshr(v6.V16B(), v7.V16B(), 8),  "urshr v6.16b, v7.16b, #8");
-  COMPARE(Urshr(v8.V4H(), v9.V4H(), 1),  "urshr v8.4h, v9.4h, #1");
-  COMPARE(Urshr(v10.V4H(), v11.V4H(), 16),  "urshr v10.4h, v11.4h, #16");
-  COMPARE(Urshr(v12.V8H(), v13.V8H(), 1),  "urshr v12.8h, v13.8h, #1");
-  COMPARE(Urshr(v14.V8H(), v15.V8H(), 16),  "urshr v14.8h, v15.8h, #16");
-  COMPARE(Urshr(v16.V2S(), v17.V2S(), 1),  "urshr v16.2s, v17.2s, #1");
-  COMPARE(Urshr(v18.V2S(), v19.V2S(), 32),  "urshr v18.2s, v19.2s, #32");
-  COMPARE(Urshr(v20.V4S(), v21.V4S(), 1),  "urshr v20.4s, v21.4s, #1");
-  COMPARE(Urshr(v22.V4S(), v23.V4S(), 32),  "urshr v22.4s, v23.4s, #32");
-  COMPARE(Urshr(v28.V2D(), v29.V2D(), 1),  "urshr v28.2d, v29.2d, #1");
-  COMPARE(Urshr(v30.V2D(), v31.V2D(), 64),  "urshr v30.2d, v31.2d, #64");
+  COMPARE(Urshr(v0.V8B(), v1.V8B(), 1), "urshr v0.8b, v1.8b, #1");
+  COMPARE(Urshr(v2.V8B(), v3.V8B(), 8), "urshr v2.8b, v3.8b, #8");
+  COMPARE(Urshr(v4.V16B(), v5.V16B(), 1), "urshr v4.16b, v5.16b, #1");
+  COMPARE(Urshr(v6.V16B(), v7.V16B(), 8), "urshr v6.16b, v7.16b, #8");
+  COMPARE(Urshr(v8.V4H(), v9.V4H(), 1), "urshr v8.4h, v9.4h, #1");
+  COMPARE(Urshr(v10.V4H(), v11.V4H(), 16), "urshr v10.4h, v11.4h, #16");
+  COMPARE(Urshr(v12.V8H(), v13.V8H(), 1), "urshr v12.8h, v13.8h, #1");
+  COMPARE(Urshr(v14.V8H(), v15.V8H(), 16), "urshr v14.8h, v15.8h, #16");
+  COMPARE(Urshr(v16.V2S(), v17.V2S(), 1), "urshr v16.2s, v17.2s, #1");
+  COMPARE(Urshr(v18.V2S(), v19.V2S(), 32), "urshr v18.2s, v19.2s, #32");
+  COMPARE(Urshr(v20.V4S(), v21.V4S(), 1), "urshr v20.4s, v21.4s, #1");
+  COMPARE(Urshr(v22.V4S(), v23.V4S(), 32), "urshr v22.4s, v23.4s, #32");
+  COMPARE(Urshr(v28.V2D(), v29.V2D(), 1), "urshr v28.2d, v29.2d, #1");
+  COMPARE(Urshr(v30.V2D(), v31.V2D(), 64), "urshr v30.2d, v31.2d, #64");
   COMPARE(Urshr(d0, d1, 7), "urshr d0, d1, #7");
 
-  COMPARE(Srsra(v0.V8B(), v1.V8B(), 1),  "srsra v0.8b, v1.8b, #1");
-  COMPARE(Srsra(v2.V8B(), v3.V8B(), 8),  "srsra v2.8b, v3.8b, #8");
-  COMPARE(Srsra(v4.V16B(), v5.V16B(), 1),  "srsra v4.16b, v5.16b, #1");
-  COMPARE(Srsra(v6.V16B(), v7.V16B(), 8),  "srsra v6.16b, v7.16b, #8");
-  COMPARE(Srsra(v8.V4H(), v9.V4H(), 1),  "srsra v8.4h, v9.4h, #1");
-  COMPARE(Srsra(v10.V4H(), v11.V4H(), 16),  "srsra v10.4h, v11.4h, #16");
-  COMPARE(Srsra(v12.V8H(), v13.V8H(), 1),  "srsra v12.8h, v13.8h, #1");
-  COMPARE(Srsra(v14.V8H(), v15.V8H(), 16),  "srsra v14.8h, v15.8h, #16");
-  COMPARE(Srsra(v16.V2S(), v17.V2S(), 1),  "srsra v16.2s, v17.2s, #1");
-  COMPARE(Srsra(v18.V2S(), v19.V2S(), 32),  "srsra v18.2s, v19.2s, #32");
-  COMPARE(Srsra(v20.V4S(), v21.V4S(), 1),  "srsra v20.4s, v21.4s, #1");
-  COMPARE(Srsra(v22.V4S(), v23.V4S(), 32),  "srsra v22.4s, v23.4s, #32");
-  COMPARE(Srsra(v28.V2D(), v29.V2D(), 1),  "srsra v28.2d, v29.2d, #1");
-  COMPARE(Srsra(v30.V2D(), v31.V2D(), 64),  "srsra v30.2d, v31.2d, #64");
+  COMPARE(Srsra(v0.V8B(), v1.V8B(), 1), "srsra v0.8b, v1.8b, #1");
+  COMPARE(Srsra(v2.V8B(), v3.V8B(), 8), "srsra v2.8b, v3.8b, #8");
+  COMPARE(Srsra(v4.V16B(), v5.V16B(), 1), "srsra v4.16b, v5.16b, #1");
+  COMPARE(Srsra(v6.V16B(), v7.V16B(), 8), "srsra v6.16b, v7.16b, #8");
+  COMPARE(Srsra(v8.V4H(), v9.V4H(), 1), "srsra v8.4h, v9.4h, #1");
+  COMPARE(Srsra(v10.V4H(), v11.V4H(), 16), "srsra v10.4h, v11.4h, #16");
+  COMPARE(Srsra(v12.V8H(), v13.V8H(), 1), "srsra v12.8h, v13.8h, #1");
+  COMPARE(Srsra(v14.V8H(), v15.V8H(), 16), "srsra v14.8h, v15.8h, #16");
+  COMPARE(Srsra(v16.V2S(), v17.V2S(), 1), "srsra v16.2s, v17.2s, #1");
+  COMPARE(Srsra(v18.V2S(), v19.V2S(), 32), "srsra v18.2s, v19.2s, #32");
+  COMPARE(Srsra(v20.V4S(), v21.V4S(), 1), "srsra v20.4s, v21.4s, #1");
+  COMPARE(Srsra(v22.V4S(), v23.V4S(), 32), "srsra v22.4s, v23.4s, #32");
+  COMPARE(Srsra(v28.V2D(), v29.V2D(), 1), "srsra v28.2d, v29.2d, #1");
+  COMPARE(Srsra(v30.V2D(), v31.V2D(), 64), "srsra v30.2d, v31.2d, #64");
   COMPARE(Srsra(d0, d1, 7), "srsra d0, d1, #7");
 
-  COMPARE(Ssra(v0.V8B(), v1.V8B(), 1),  "ssra v0.8b, v1.8b, #1");
-  COMPARE(Ssra(v2.V8B(), v3.V8B(), 8),  "ssra v2.8b, v3.8b, #8");
-  COMPARE(Ssra(v4.V16B(), v5.V16B(), 1),  "ssra v4.16b, v5.16b, #1");
-  COMPARE(Ssra(v6.V16B(), v7.V16B(), 8),  "ssra v6.16b, v7.16b, #8");
-  COMPARE(Ssra(v8.V4H(), v9.V4H(), 1),  "ssra v8.4h, v9.4h, #1");
-  COMPARE(Ssra(v10.V4H(), v11.V4H(), 16),  "ssra v10.4h, v11.4h, #16");
-  COMPARE(Ssra(v12.V8H(), v13.V8H(), 1),  "ssra v12.8h, v13.8h, #1");
-  COMPARE(Ssra(v14.V8H(), v15.V8H(), 16),  "ssra v14.8h, v15.8h, #16");
-  COMPARE(Ssra(v16.V2S(), v17.V2S(), 1),  "ssra v16.2s, v17.2s, #1");
-  COMPARE(Ssra(v18.V2S(), v19.V2S(), 32),  "ssra v18.2s, v19.2s, #32");
-  COMPARE(Ssra(v20.V4S(), v21.V4S(), 1),  "ssra v20.4s, v21.4s, #1");
-  COMPARE(Ssra(v22.V4S(), v23.V4S(), 32),  "ssra v22.4s, v23.4s, #32");
-  COMPARE(Ssra(v28.V2D(), v29.V2D(), 1),  "ssra v28.2d, v29.2d, #1");
-  COMPARE(Ssra(v30.V2D(), v31.V2D(), 64),  "ssra v30.2d, v31.2d, #64");
+  COMPARE(Ssra(v0.V8B(), v1.V8B(), 1), "ssra v0.8b, v1.8b, #1");
+  COMPARE(Ssra(v2.V8B(), v3.V8B(), 8), "ssra v2.8b, v3.8b, #8");
+  COMPARE(Ssra(v4.V16B(), v5.V16B(), 1), "ssra v4.16b, v5.16b, #1");
+  COMPARE(Ssra(v6.V16B(), v7.V16B(), 8), "ssra v6.16b, v7.16b, #8");
+  COMPARE(Ssra(v8.V4H(), v9.V4H(), 1), "ssra v8.4h, v9.4h, #1");
+  COMPARE(Ssra(v10.V4H(), v11.V4H(), 16), "ssra v10.4h, v11.4h, #16");
+  COMPARE(Ssra(v12.V8H(), v13.V8H(), 1), "ssra v12.8h, v13.8h, #1");
+  COMPARE(Ssra(v14.V8H(), v15.V8H(), 16), "ssra v14.8h, v15.8h, #16");
+  COMPARE(Ssra(v16.V2S(), v17.V2S(), 1), "ssra v16.2s, v17.2s, #1");
+  COMPARE(Ssra(v18.V2S(), v19.V2S(), 32), "ssra v18.2s, v19.2s, #32");
+  COMPARE(Ssra(v20.V4S(), v21.V4S(), 1), "ssra v20.4s, v21.4s, #1");
+  COMPARE(Ssra(v22.V4S(), v23.V4S(), 32), "ssra v22.4s, v23.4s, #32");
+  COMPARE(Ssra(v28.V2D(), v29.V2D(), 1), "ssra v28.2d, v29.2d, #1");
+  COMPARE(Ssra(v30.V2D(), v31.V2D(), 64), "ssra v30.2d, v31.2d, #64");
   COMPARE(Ssra(d0, d1, 7), "ssra d0, d1, #7");
 
-  COMPARE(Ursra(v0.V8B(), v1.V8B(), 1),  "ursra v0.8b, v1.8b, #1");
-  COMPARE(Ursra(v2.V8B(), v3.V8B(), 8),  "ursra v2.8b, v3.8b, #8");
-  COMPARE(Ursra(v4.V16B(), v5.V16B(), 1),  "ursra v4.16b, v5.16b, #1");
-  COMPARE(Ursra(v6.V16B(), v7.V16B(), 8),  "ursra v6.16b, v7.16b, #8");
-  COMPARE(Ursra(v8.V4H(), v9.V4H(), 1),  "ursra v8.4h, v9.4h, #1");
-  COMPARE(Ursra(v10.V4H(), v11.V4H(), 16),  "ursra v10.4h, v11.4h, #16");
-  COMPARE(Ursra(v12.V8H(), v13.V8H(), 1),  "ursra v12.8h, v13.8h, #1");
-  COMPARE(Ursra(v14.V8H(), v15.V8H(), 16),  "ursra v14.8h, v15.8h, #16");
-  COMPARE(Ursra(v16.V2S(), v17.V2S(), 1),  "ursra v16.2s, v17.2s, #1");
-  COMPARE(Ursra(v18.V2S(), v19.V2S(), 32),  "ursra v18.2s, v19.2s, #32");
-  COMPARE(Ursra(v20.V4S(), v21.V4S(), 1),  "ursra v20.4s, v21.4s, #1");
-  COMPARE(Ursra(v22.V4S(), v23.V4S(), 32),  "ursra v22.4s, v23.4s, #32");
-  COMPARE(Ursra(v28.V2D(), v29.V2D(), 1),  "ursra v28.2d, v29.2d, #1");
-  COMPARE(Ursra(v30.V2D(), v31.V2D(), 64),  "ursra v30.2d, v31.2d, #64");
+  COMPARE(Ursra(v0.V8B(), v1.V8B(), 1), "ursra v0.8b, v1.8b, #1");
+  COMPARE(Ursra(v2.V8B(), v3.V8B(), 8), "ursra v2.8b, v3.8b, #8");
+  COMPARE(Ursra(v4.V16B(), v5.V16B(), 1), "ursra v4.16b, v5.16b, #1");
+  COMPARE(Ursra(v6.V16B(), v7.V16B(), 8), "ursra v6.16b, v7.16b, #8");
+  COMPARE(Ursra(v8.V4H(), v9.V4H(), 1), "ursra v8.4h, v9.4h, #1");
+  COMPARE(Ursra(v10.V4H(), v11.V4H(), 16), "ursra v10.4h, v11.4h, #16");
+  COMPARE(Ursra(v12.V8H(), v13.V8H(), 1), "ursra v12.8h, v13.8h, #1");
+  COMPARE(Ursra(v14.V8H(), v15.V8H(), 16), "ursra v14.8h, v15.8h, #16");
+  COMPARE(Ursra(v16.V2S(), v17.V2S(), 1), "ursra v16.2s, v17.2s, #1");
+  COMPARE(Ursra(v18.V2S(), v19.V2S(), 32), "ursra v18.2s, v19.2s, #32");
+  COMPARE(Ursra(v20.V4S(), v21.V4S(), 1), "ursra v20.4s, v21.4s, #1");
+  COMPARE(Ursra(v22.V4S(), v23.V4S(), 32), "ursra v22.4s, v23.4s, #32");
+  COMPARE(Ursra(v28.V2D(), v29.V2D(), 1), "ursra v28.2d, v29.2d, #1");
+  COMPARE(Ursra(v30.V2D(), v31.V2D(), 64), "ursra v30.2d, v31.2d, #64");
   COMPARE(Ursra(d0, d1, 7), "ursra d0, d1, #7");
 
-  COMPARE(Usra(v0.V8B(), v1.V8B(), 1),  "usra v0.8b, v1.8b, #1");
-  COMPARE(Usra(v2.V8B(), v3.V8B(), 8),  "usra v2.8b, v3.8b, #8");
-  COMPARE(Usra(v4.V16B(), v5.V16B(), 1),  "usra v4.16b, v5.16b, #1");
-  COMPARE(Usra(v6.V16B(), v7.V16B(), 8),  "usra v6.16b, v7.16b, #8");
-  COMPARE(Usra(v8.V4H(), v9.V4H(), 1),  "usra v8.4h, v9.4h, #1");
-  COMPARE(Usra(v10.V4H(), v11.V4H(), 16),  "usra v10.4h, v11.4h, #16");
-  COMPARE(Usra(v12.V8H(), v13.V8H(), 1),  "usra v12.8h, v13.8h, #1");
-  COMPARE(Usra(v14.V8H(), v15.V8H(), 16),  "usra v14.8h, v15.8h, #16");
-  COMPARE(Usra(v16.V2S(), v17.V2S(), 1),  "usra v16.2s, v17.2s, #1");
-  COMPARE(Usra(v18.V2S(), v19.V2S(), 32),  "usra v18.2s, v19.2s, #32");
-  COMPARE(Usra(v20.V4S(), v21.V4S(), 1),  "usra v20.4s, v21.4s, #1");
-  COMPARE(Usra(v22.V4S(), v23.V4S(), 32),  "usra v22.4s, v23.4s, #32");
-  COMPARE(Usra(v28.V2D(), v29.V2D(), 1),  "usra v28.2d, v29.2d, #1");
-  COMPARE(Usra(v30.V2D(), v31.V2D(), 64),  "usra v30.2d, v31.2d, #64");
+  COMPARE(Usra(v0.V8B(), v1.V8B(), 1), "usra v0.8b, v1.8b, #1");
+  COMPARE(Usra(v2.V8B(), v3.V8B(), 8), "usra v2.8b, v3.8b, #8");
+  COMPARE(Usra(v4.V16B(), v5.V16B(), 1), "usra v4.16b, v5.16b, #1");
+  COMPARE(Usra(v6.V16B(), v7.V16B(), 8), "usra v6.16b, v7.16b, #8");
+  COMPARE(Usra(v8.V4H(), v9.V4H(), 1), "usra v8.4h, v9.4h, #1");
+  COMPARE(Usra(v10.V4H(), v11.V4H(), 16), "usra v10.4h, v11.4h, #16");
+  COMPARE(Usra(v12.V8H(), v13.V8H(), 1), "usra v12.8h, v13.8h, #1");
+  COMPARE(Usra(v14.V8H(), v15.V8H(), 16), "usra v14.8h, v15.8h, #16");
+  COMPARE(Usra(v16.V2S(), v17.V2S(), 1), "usra v16.2s, v17.2s, #1");
+  COMPARE(Usra(v18.V2S(), v19.V2S(), 32), "usra v18.2s, v19.2s, #32");
+  COMPARE(Usra(v20.V4S(), v21.V4S(), 1), "usra v20.4s, v21.4s, #1");
+  COMPARE(Usra(v22.V4S(), v23.V4S(), 32), "usra v22.4s, v23.4s, #32");
+  COMPARE(Usra(v28.V2D(), v29.V2D(), 1), "usra v28.2d, v29.2d, #1");
+  COMPARE(Usra(v30.V2D(), v31.V2D(), 64), "usra v30.2d, v31.2d, #64");
   COMPARE(Usra(d0, d1, 7), "usra d0, d1, #7");
 
-  COMPARE(Sli(v1.V8B(),  v8.V8B(),  1), "sli v1.8b, v8.8b, #1");
+  COMPARE(Sli(v1.V8B(), v8.V8B(), 1), "sli v1.8b, v8.8b, #1");
   COMPARE(Sli(v2.V16B(), v9.V16B(), 2), "sli v2.16b, v9.16b, #2");
-  COMPARE(Sli(v3.V4H(),  v1.V4H(),  3), "sli v3.4h, v1.4h, #3");
-  COMPARE(Sli(v4.V8H(),  v2.V8H(),  4), "sli v4.8h, v2.8h, #4");
-  COMPARE(Sli(v5.V2S(),  v3.V2S(),  5), "sli v5.2s, v3.2s, #5");
-  COMPARE(Sli(v6.V4S(),  v4.V4S(),  6), "sli v6.4s, v4.4s, #6");
-  COMPARE(Sli(v7.V2D(),  v5.V2D(),  7), "sli v7.2d, v5.2d, #7");
-  COMPARE(Sli(d8,  d6,  8),             "sli d8, d6, #8");
-
-  COMPARE(Shl(v1.V8B(),  v8.V8B(),  1), "shl v1.8b, v8.8b, #1");
+  COMPARE(Sli(v3.V4H(), v1.V4H(), 3), "sli v3.4h, v1.4h, #3");
+  COMPARE(Sli(v4.V8H(), v2.V8H(), 4), "sli v4.8h, v2.8h, #4");
+  COMPARE(Sli(v5.V2S(), v3.V2S(), 5), "sli v5.2s, v3.2s, #5");
+  COMPARE(Sli(v6.V4S(), v4.V4S(), 6), "sli v6.4s, v4.4s, #6");
+  COMPARE(Sli(v7.V2D(), v5.V2D(), 7), "sli v7.2d, v5.2d, #7");
+  COMPARE(Sli(d8, d6, 8), "sli d8, d6, #8");
+
+  COMPARE(Shl(v1.V8B(), v8.V8B(), 1), "shl v1.8b, v8.8b, #1");
   COMPARE(Shl(v2.V16B(), v9.V16B(), 2), "shl v2.16b, v9.16b, #2");
-  COMPARE(Shl(v3.V4H(),  v1.V4H(),  3), "shl v3.4h, v1.4h, #3");
-  COMPARE(Shl(v4.V8H(),  v2.V8H(),  4), "shl v4.8h, v2.8h, #4");
-  COMPARE(Shl(v5.V2S(),  v3.V2S(),  5), "shl v5.2s, v3.2s, #5");
-  COMPARE(Shl(v6.V4S(),  v4.V4S(),  6), "shl v6.4s, v4.4s, #6");
-  COMPARE(Shl(v7.V2D(),  v5.V2D(),  7), "shl v7.2d, v5.2d, #7");
-  COMPARE(Shl(d8,  d6,  8),             "shl d8, d6, #8");
-
-  COMPARE(Sqshl(v1.V8B(),  v8.V8B(),  1), "sqshl v1.8b, v8.8b, #1");
+  COMPARE(Shl(v3.V4H(), v1.V4H(), 3), "shl v3.4h, v1.4h, #3");
+  COMPARE(Shl(v4.V8H(), v2.V8H(), 4), "shl v4.8h, v2.8h, #4");
+  COMPARE(Shl(v5.V2S(), v3.V2S(), 5), "shl v5.2s, v3.2s, #5");
+  COMPARE(Shl(v6.V4S(), v4.V4S(), 6), "shl v6.4s, v4.4s, #6");
+  COMPARE(Shl(v7.V2D(), v5.V2D(), 7), "shl v7.2d, v5.2d, #7");
+  COMPARE(Shl(d8, d6, 8), "shl d8, d6, #8");
+
+  COMPARE(Sqshl(v1.V8B(), v8.V8B(), 1), "sqshl v1.8b, v8.8b, #1");
   COMPARE(Sqshl(v2.V16B(), v9.V16B(), 2), "sqshl v2.16b, v9.16b, #2");
-  COMPARE(Sqshl(v3.V4H(),  v1.V4H(),  3), "sqshl v3.4h, v1.4h, #3");
-  COMPARE(Sqshl(v4.V8H(),  v2.V8H(),  4), "sqshl v4.8h, v2.8h, #4");
-  COMPARE(Sqshl(v5.V2S(),  v3.V2S(),  5), "sqshl v5.2s, v3.2s, #5");
-  COMPARE(Sqshl(v6.V4S(),  v4.V4S(),  6), "sqshl v6.4s, v4.4s, #6");
-  COMPARE(Sqshl(v7.V2D(),  v5.V2D(),  7), "sqshl v7.2d, v5.2d, #7");
-  COMPARE(Sqshl(b8, b7, 1),               "sqshl b8, b7, #1");
-  COMPARE(Sqshl(h9, h8, 2),               "sqshl h9, h8, #2");
-  COMPARE(Sqshl(s10, s9, 3),              "sqshl s10, s9, #3");
-  COMPARE(Sqshl(d11, d10, 4),             "sqshl d11, d10, #4");
-
-  COMPARE(Sqshlu(v1.V8B(),  v8.V8B(),  1), "sqshlu v1.8b, v8.8b, #1");
+  COMPARE(Sqshl(v3.V4H(), v1.V4H(), 3), "sqshl v3.4h, v1.4h, #3");
+  COMPARE(Sqshl(v4.V8H(), v2.V8H(), 4), "sqshl v4.8h, v2.8h, #4");
+  COMPARE(Sqshl(v5.V2S(), v3.V2S(), 5), "sqshl v5.2s, v3.2s, #5");
+  COMPARE(Sqshl(v6.V4S(), v4.V4S(), 6), "sqshl v6.4s, v4.4s, #6");
+  COMPARE(Sqshl(v7.V2D(), v5.V2D(), 7), "sqshl v7.2d, v5.2d, #7");
+  COMPARE(Sqshl(b8, b7, 1), "sqshl b8, b7, #1");
+  COMPARE(Sqshl(h9, h8, 2), "sqshl h9, h8, #2");
+  COMPARE(Sqshl(s10, s9, 3), "sqshl s10, s9, #3");
+  COMPARE(Sqshl(d11, d10, 4), "sqshl d11, d10, #4");
+
+  COMPARE(Sqshlu(v1.V8B(), v8.V8B(), 1), "sqshlu v1.8b, v8.8b, #1");
   COMPARE(Sqshlu(v2.V16B(), v9.V16B(), 2), "sqshlu v2.16b, v9.16b, #2");
-  COMPARE(Sqshlu(v3.V4H(),  v1.V4H(),  3), "sqshlu v3.4h, v1.4h, #3");
-  COMPARE(Sqshlu(v4.V8H(),  v2.V8H(),  4), "sqshlu v4.8h, v2.8h, #4");
-  COMPARE(Sqshlu(v5.V2S(),  v3.V2S(),  5), "sqshlu v5.2s, v3.2s, #5");
-  COMPARE(Sqshlu(v6.V4S(),  v4.V4S(),  6), "sqshlu v6.4s, v4.4s, #6");
-  COMPARE(Sqshlu(v7.V2D(),  v5.V2D(),  7), "sqshlu v7.2d, v5.2d, #7");
-  COMPARE(Sqshlu(b8, b7, 1),               "sqshlu b8, b7, #1");
-  COMPARE(Sqshlu(h9, h8, 2),               "sqshlu h9, h8, #2");
-  COMPARE(Sqshlu(s10, s9, 3),              "sqshlu s10, s9, #3");
-  COMPARE(Sqshlu(d11, d10, 4),             "sqshlu d11, d10, #4");
-
-  COMPARE(Uqshl(v1.V8B(),  v8.V8B(),  1), "uqshl v1.8b, v8.8b, #1");
+  COMPARE(Sqshlu(v3.V4H(), v1.V4H(), 3), "sqshlu v3.4h, v1.4h, #3");
+  COMPARE(Sqshlu(v4.V8H(), v2.V8H(), 4), "sqshlu v4.8h, v2.8h, #4");
+  COMPARE(Sqshlu(v5.V2S(), v3.V2S(), 5), "sqshlu v5.2s, v3.2s, #5");
+  COMPARE(Sqshlu(v6.V4S(), v4.V4S(), 6), "sqshlu v6.4s, v4.4s, #6");
+  COMPARE(Sqshlu(v7.V2D(), v5.V2D(), 7), "sqshlu v7.2d, v5.2d, #7");
+  COMPARE(Sqshlu(b8, b7, 1), "sqshlu b8, b7, #1");
+  COMPARE(Sqshlu(h9, h8, 2), "sqshlu h9, h8, #2");
+  COMPARE(Sqshlu(s10, s9, 3), "sqshlu s10, s9, #3");
+  COMPARE(Sqshlu(d11, d10, 4), "sqshlu d11, d10, #4");
+
+  COMPARE(Uqshl(v1.V8B(), v8.V8B(), 1), "uqshl v1.8b, v8.8b, #1");
   COMPARE(Uqshl(v2.V16B(), v9.V16B(), 2), "uqshl v2.16b, v9.16b, #2");
-  COMPARE(Uqshl(v3.V4H(),  v1.V4H(),  3), "uqshl v3.4h, v1.4h, #3");
-  COMPARE(Uqshl(v4.V8H(),  v2.V8H(),  4), "uqshl v4.8h, v2.8h, #4");
-  COMPARE(Uqshl(v5.V2S(),  v3.V2S(),  5), "uqshl v5.2s, v3.2s, #5");
-  COMPARE(Uqshl(v6.V4S(),  v4.V4S(),  6), "uqshl v6.4s, v4.4s, #6");
-  COMPARE(Uqshl(v7.V2D(),  v5.V2D(),  7), "uqshl v7.2d, v5.2d, #7");
-  COMPARE(Uqshl(b8, b7, 1),               "uqshl b8, b7, #1");
-  COMPARE(Uqshl(h9, h8, 2),               "uqshl h9, h8, #2");
-  COMPARE(Uqshl(s10, s9, 3),              "uqshl s10, s9, #3");
-  COMPARE(Uqshl(d11, d10, 4),             "uqshl d11, d10, #4");
-
-  COMPARE(Sshll(v1.V8H(),  v8.V8B(),  1), "sshll v1.8h, v8.8b, #1");
-  COMPARE(Sshll(v3.V4S(),  v1.V4H(),  3), "sshll v3.4s, v1.4h, #3");
-  COMPARE(Sshll(v5.V2D(),  v3.V2S(),  5), "sshll v5.2d, v3.2s, #5");
+  COMPARE(Uqshl(v3.V4H(), v1.V4H(), 3), "uqshl v3.4h, v1.4h, #3");
+  COMPARE(Uqshl(v4.V8H(), v2.V8H(), 4), "uqshl v4.8h, v2.8h, #4");
+  COMPARE(Uqshl(v5.V2S(), v3.V2S(), 5), "uqshl v5.2s, v3.2s, #5");
+  COMPARE(Uqshl(v6.V4S(), v4.V4S(), 6), "uqshl v6.4s, v4.4s, #6");
+  COMPARE(Uqshl(v7.V2D(), v5.V2D(), 7), "uqshl v7.2d, v5.2d, #7");
+  COMPARE(Uqshl(b8, b7, 1), "uqshl b8, b7, #1");
+  COMPARE(Uqshl(h9, h8, 2), "uqshl h9, h8, #2");
+  COMPARE(Uqshl(s10, s9, 3), "uqshl s10, s9, #3");
+  COMPARE(Uqshl(d11, d10, 4), "uqshl d11, d10, #4");
+
+  COMPARE(Sshll(v1.V8H(), v8.V8B(), 1), "sshll v1.8h, v8.8b, #1");
+  COMPARE(Sshll(v3.V4S(), v1.V4H(), 3), "sshll v3.4s, v1.4h, #3");
+  COMPARE(Sshll(v5.V2D(), v3.V2S(), 5), "sshll v5.2d, v3.2s, #5");
   COMPARE(Sshll2(v2.V8H(), v9.V16B(), 2), "sshll2 v2.8h, v9.16b, #2");
-  COMPARE(Sshll2(v4.V4S(), v2.V8H(),  4), "sshll2 v4.4s, v2.8h, #4");
-  COMPARE(Sshll2(v6.V2D(), v4.V4S(),  6), "sshll2 v6.2d, v4.4s, #6");
+  COMPARE(Sshll2(v4.V4S(), v2.V8H(), 4), "sshll2 v4.4s, v2.8h, #4");
+  COMPARE(Sshll2(v6.V2D(), v4.V4S(), 6), "sshll2 v6.2d, v4.4s, #6");
 
-  COMPARE(Sshll(v1.V8H(),  v8.V8B(),  0), "sxtl v1.8h, v8.8b");
-  COMPARE(Sshll(v3.V4S(),  v1.V4H(),  0), "sxtl v3.4s, v1.4h");
-  COMPARE(Sshll(v5.V2D(),  v3.V2S(),  0), "sxtl v5.2d, v3.2s");
+  COMPARE(Sshll(v1.V8H(), v8.V8B(), 0), "sxtl v1.8h, v8.8b");
+  COMPARE(Sshll(v3.V4S(), v1.V4H(), 0), "sxtl v3.4s, v1.4h");
+  COMPARE(Sshll(v5.V2D(), v3.V2S(), 0), "sxtl v5.2d, v3.2s");
   COMPARE(Sshll2(v2.V8H(), v9.V16B(), 0), "sxtl2 v2.8h, v9.16b");
-  COMPARE(Sshll2(v4.V4S(), v2.V8H(),  0), "sxtl2 v4.4s, v2.8h");
-  COMPARE(Sshll2(v6.V2D(), v4.V4S(),  0), "sxtl2 v6.2d, v4.4s");
+  COMPARE(Sshll2(v4.V4S(), v2.V8H(), 0), "sxtl2 v4.4s, v2.8h");
+  COMPARE(Sshll2(v6.V2D(), v4.V4S(), 0), "sxtl2 v6.2d, v4.4s");
 
-  COMPARE(Sxtl(v1.V8H(),  v8.V8B()),  "sxtl v1.8h, v8.8b");
-  COMPARE(Sxtl(v3.V4S(),  v1.V4H()),  "sxtl v3.4s, v1.4h");
-  COMPARE(Sxtl(v5.V2D(),  v3.V2S()),  "sxtl v5.2d, v3.2s");
+  COMPARE(Sxtl(v1.V8H(), v8.V8B()), "sxtl v1.8h, v8.8b");
+  COMPARE(Sxtl(v3.V4S(), v1.V4H()), "sxtl v3.4s, v1.4h");
+  COMPARE(Sxtl(v5.V2D(), v3.V2S()), "sxtl v5.2d, v3.2s");
   COMPARE(Sxtl2(v2.V8H(), v9.V16B()), "sxtl2 v2.8h, v9.16b");
-  COMPARE(Sxtl2(v4.V4S(), v2.V8H()),  "sxtl2 v4.4s, v2.8h");
-  COMPARE(Sxtl2(v6.V2D(), v4.V4S()),  "sxtl2 v6.2d, v4.4s");
+  COMPARE(Sxtl2(v4.V4S(), v2.V8H()), "sxtl2 v4.4s, v2.8h");
+  COMPARE(Sxtl2(v6.V2D(), v4.V4S()), "sxtl2 v6.2d, v4.4s");
 
-  COMPARE(Ushll(v1.V8H(),  v8.V8B(),  1), "ushll v1.8h, v8.8b, #1");
-  COMPARE(Ushll(v3.V4S(),  v1.V4H(),  3), "ushll v3.4s, v1.4h, #3");
-  COMPARE(Ushll(v5.V2D(),  v3.V2S(),  5), "ushll v5.2d, v3.2s, #5");
+  COMPARE(Ushll(v1.V8H(), v8.V8B(), 1), "ushll v1.8h, v8.8b, #1");
+  COMPARE(Ushll(v3.V4S(), v1.V4H(), 3), "ushll v3.4s, v1.4h, #3");
+  COMPARE(Ushll(v5.V2D(), v3.V2S(), 5), "ushll v5.2d, v3.2s, #5");
   COMPARE(Ushll2(v2.V8H(), v9.V16B(), 2), "ushll2 v2.8h, v9.16b, #2");
-  COMPARE(Ushll2(v4.V4S(), v2.V8H(),  4), "ushll2 v4.4s, v2.8h, #4");
-  COMPARE(Ushll2(v6.V2D(), v4.V4S(),  6), "ushll2 v6.2d, v4.4s, #6");
+  COMPARE(Ushll2(v4.V4S(), v2.V8H(), 4), "ushll2 v4.4s, v2.8h, #4");
+  COMPARE(Ushll2(v6.V2D(), v4.V4S(), 6), "ushll2 v6.2d, v4.4s, #6");
 
-  COMPARE(Ushll(v1.V8H(),  v8.V8B(),  0), "uxtl v1.8h, v8.8b");
-  COMPARE(Ushll(v3.V4S(),  v1.V4H(),  0), "uxtl v3.4s, v1.4h");
-  COMPARE(Ushll(v5.V2D(),  v3.V2S(),  0), "uxtl v5.2d, v3.2s");
+  COMPARE(Ushll(v1.V8H(), v8.V8B(), 0), "uxtl v1.8h, v8.8b");
+  COMPARE(Ushll(v3.V4S(), v1.V4H(), 0), "uxtl v3.4s, v1.4h");
+  COMPARE(Ushll(v5.V2D(), v3.V2S(), 0), "uxtl v5.2d, v3.2s");
   COMPARE(Ushll2(v2.V8H(), v9.V16B(), 0), "uxtl2 v2.8h, v9.16b");
-  COMPARE(Ushll2(v4.V4S(), v2.V8H(),  0), "uxtl2 v4.4s, v2.8h");
-  COMPARE(Ushll2(v6.V2D(), v4.V4S(),  0), "uxtl2 v6.2d, v4.4s");
+  COMPARE(Ushll2(v4.V4S(), v2.V8H(), 0), "uxtl2 v4.4s, v2.8h");
+  COMPARE(Ushll2(v6.V2D(), v4.V4S(), 0), "uxtl2 v6.2d, v4.4s");
 
-  COMPARE(Uxtl(v1.V8H(),  v8.V8B()),  "uxtl v1.8h, v8.8b");
-  COMPARE(Uxtl(v3.V4S(),  v1.V4H()),  "uxtl v3.4s, v1.4h");
-  COMPARE(Uxtl(v5.V2D(),  v3.V2S()),  "uxtl v5.2d, v3.2s");
+  COMPARE(Uxtl(v1.V8H(), v8.V8B()), "uxtl v1.8h, v8.8b");
+  COMPARE(Uxtl(v3.V4S(), v1.V4H()), "uxtl v3.4s, v1.4h");
+  COMPARE(Uxtl(v5.V2D(), v3.V2S()), "uxtl v5.2d, v3.2s");
   COMPARE(Uxtl2(v2.V8H(), v9.V16B()), "uxtl2 v2.8h, v9.16b");
-  COMPARE(Uxtl2(v4.V4S(), v2.V8H()),  "uxtl2 v4.4s, v2.8h");
-  COMPARE(Uxtl2(v6.V2D(), v4.V4S()),  "uxtl2 v6.2d, v4.4s");
+  COMPARE(Uxtl2(v4.V4S(), v2.V8H()), "uxtl2 v4.4s, v2.8h");
+  COMPARE(Uxtl2(v6.V2D(), v4.V4S()), "uxtl2 v6.2d, v4.4s");
 
-  COMPARE(Sri(v1.V8B(),  v8.V8B(),  1), "sri v1.8b, v8.8b, #1");
+  COMPARE(Sri(v1.V8B(), v8.V8B(), 1), "sri v1.8b, v8.8b, #1");
   COMPARE(Sri(v2.V16B(), v9.V16B(), 2), "sri v2.16b, v9.16b, #2");
-  COMPARE(Sri(v3.V4H(),  v1.V4H(),  3), "sri v3.4h, v1.4h, #3");
-  COMPARE(Sri(v4.V8H(),  v2.V8H(),  4), "sri v4.8h, v2.8h, #4");
-  COMPARE(Sri(v5.V2S(),  v3.V2S(),  5), "sri v5.2s, v3.2s, #5");
-  COMPARE(Sri(v6.V4S(),  v4.V4S(),  6), "sri v6.4s, v4.4s, #6");
-  COMPARE(Sri(v7.V2D(),  v5.V2D(),  7), "sri v7.2d, v5.2d, #7");
-  COMPARE(Sri(d8,  d6,  8),             "sri d8, d6, #8");
+  COMPARE(Sri(v3.V4H(), v1.V4H(), 3), "sri v3.4h, v1.4h, #3");
+  COMPARE(Sri(v4.V8H(), v2.V8H(), 4), "sri v4.8h, v2.8h, #4");
+  COMPARE(Sri(v5.V2S(), v3.V2S(), 5), "sri v5.2s, v3.2s, #5");
+  COMPARE(Sri(v6.V4S(), v4.V4S(), 6), "sri v6.4s, v4.4s, #6");
+  COMPARE(Sri(v7.V2D(), v5.V2D(), 7), "sri v7.2d, v5.2d, #7");
+  COMPARE(Sri(d8, d6, 8), "sri d8, d6, #8");
 
   COMPARE(Shrn(v0.V8B(), v1.V8H(), 1), "shrn v0.8b, v1.8h, #1");
   COMPARE(Shrn(v1.V4H(), v2.V4S(), 2), "shrn v1.4h, v2.4s, #2");
diff --git a/test/aarch64/test-fuzz-aarch64.cc b/test/aarch64/test-fuzz-aarch64.cc
index 691efbfc..56c1c1d4 100644
--- a/test/aarch64/test-fuzz-aarch64.cc
+++ b/test/aarch64/test-fuzz-aarch64.cc
@@ -31,7 +31,7 @@
 #include "aarch64/decoder-aarch64.h"
 #include "aarch64/disasm-aarch64.h"
 
-#define TEST(name)  TEST_(AARCH64_FUZZ_##name)
+#define TEST(name) TEST_(AARCH64_FUZZ_##name)
 
 
 namespace vixl {
@@ -117,5 +117,5 @@ TEST(disasm_pedantic) {
 }
 #endif
 
-}   // namespace aarch64
-}   // namespace vixl
+}  // namespace aarch64
+}  // namespace vixl
diff --git a/test/aarch64/test-simulator-aarch64.cc b/test/aarch64/test-simulator-aarch64.cc
index 56d17e96..0fa1e723 100644
--- a/test/aarch64/test-simulator-aarch64.cc
+++ b/test/aarch64/test-simulator-aarch64.cc
@@ -53,76 +53,75 @@ namespace aarch64 {
 // test-simulator-traces-aarch64.h.
 
 #define __ masm.
-#define TEST(name)  TEST_(AARCH64_SIM_##name)
+#define TEST(name) TEST_(AARCH64_SIM_##name)
 
 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
 
-#define SETUP()                                                               \
-  MacroAssembler masm;                                                        \
-  Decoder decoder;                                                            \
-  Simulator* simulator = Test::run_debugger() ? new Debugger(&decoder)        \
-                                              : new Simulator(&decoder);      \
-  simulator->SetColouredTrace(Test::coloured_trace());                        \
-  simulator->SetInstructionStats(Test::instruction_stats());                  \
-
-#define START()                                                               \
-  masm.Reset();                                                               \
-  simulator->ResetState();                                                    \
-  __ PushCalleeSavedRegisters();                                              \
-  if (Test::trace_reg()) {                                                    \
-    __ Trace(LOG_STATE, TRACE_ENABLE);                                        \
-  }                                                                           \
-  if (Test::trace_write()) {                                                  \
-    __ Trace(LOG_WRITE, TRACE_ENABLE);                                        \
-  }                                                                           \
-  if (Test::trace_sim()) {                                                    \
-    __ Trace(LOG_DISASM, TRACE_ENABLE);                                       \
-  }                                                                           \
-  if (Test::instruction_stats()) {                                            \
-    __ EnableInstrumentation();                                               \
+#define SETUP()                                                                \
+  MacroAssembler masm;                                                         \
+  Decoder decoder;                                                             \
+  Simulator* simulator =                                                       \
+      Test::run_debugger() ? new Debugger(&decoder) : new Simulator(&decoder); \
+  simulator->SetColouredTrace(Test::coloured_trace());                         \
+  simulator->SetInstructionStats(Test::instruction_stats());
+
+#define START()                         \
+  masm.Reset();                         \
+  simulator->ResetState();              \
+  __ PushCalleeSavedRegisters();        \
+  if (Test::trace_reg()) {              \
+    __ Trace(LOG_STATE, TRACE_ENABLE);  \
+  }                                     \
+  if (Test::trace_write()) {            \
+    __ Trace(LOG_WRITE, TRACE_ENABLE);  \
+  }                                     \
+  if (Test::trace_sim()) {              \
+    __ Trace(LOG_DISASM, TRACE_ENABLE); \
+  }                                     \
+  if (Test::instruction_stats()) {      \
+    __ EnableInstrumentation();         \
   }
 
-#define END()                                                                 \
-  if (Test::instruction_stats()) {                                            \
-    __ DisableInstrumentation();                                              \
-  }                                                                           \
-  __ Trace(LOG_ALL, TRACE_DISABLE);                                           \
-  __ PopCalleeSavedRegisters();                                               \
-  __ Ret();                                                                   \
+#define END()                       \
+  if (Test::instruction_stats()) {  \
+    __ DisableInstrumentation();    \
+  }                                 \
+  __ Trace(LOG_ALL, TRACE_DISABLE); \
+  __ PopCalleeSavedRegisters();     \
+  __ Ret();                         \
   masm.FinalizeCode()
 
-#define RUN()                                                                 \
+#define RUN() \
   simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
 
-#define TEARDOWN()                                                            \
-  delete simulator;
+#define TEARDOWN() delete simulator;
 
-#else     // VIXL_INCLUDE_SIMULATOR_AARCH64
+#else  // VIXL_INCLUDE_SIMULATOR_AARCH64
 
-#define SETUP()                                                               \
-  MacroAssembler masm;                                                        \
+#define SETUP()        \
+  MacroAssembler masm; \
   CPU::SetUp()
 
-#define START()                                                               \
-  masm.Reset();                                                               \
+#define START() \
+  masm.Reset(); \
   __ PushCalleeSavedRegisters()
 
-#define END()                                                                 \
-  __ PopCalleeSavedRegisters();                                               \
-  __ Ret();                                                                   \
+#define END()                   \
+  __ PopCalleeSavedRegisters(); \
+  __ Ret();                     \
   masm.FinalizeCode()
 
-#define RUN()                                                                 \
-  {                                                                           \
-    masm.GetBuffer()->SetExecutable();                                        \
-    ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(),                 \
-                  masm.GetSizeOfCodeGenerated());                             \
-    masm.GetBuffer()->SetWritable();                                          \
+#define RUN()                                                 \
+  {                                                           \
+    masm.GetBuffer()->SetExecutable();                        \
+    ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
+                  masm.GetSizeOfCodeGenerated());             \
+    masm.GetBuffer()->SetWritable();                          \
   }
 
 #define TEARDOWN()
 
-#endif    // VIXL_INCLUDE_SIMULATOR_AARCH64
+#endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
 
 
 // The maximum number of errors to report in detail for each test.
@@ -131,13 +130,9 @@ static const unsigned kErrorReportLimit = 8;
 
 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
 // templated test functions.
-static float rawbits_to_fp(uint32_t bits) {
-  return RawbitsToFloat(bits);
-}
+static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
 
-static double rawbits_to_fp(uint64_t bits) {
-  return RawbitsToDouble(bits);
-}
+static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
 
 
 // MacroAssembler member function pointers to pass to the test dispatchers.
@@ -164,22 +159,26 @@ typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
                                                       int fbits);
 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
 //       consolidated into one routine.
-typedef void (MacroAssembler::*Test1OpNEONHelper_t)(
-  const VRegister& vd, const VRegister& vn);
-typedef void (MacroAssembler::*Test2OpNEONHelper_t)(
-  const VRegister& vd, const VRegister& vn, const VRegister& vm);
-typedef void (MacroAssembler::*TestByElementNEONHelper_t)(
-  const VRegister& vd, const VRegister& vn, const VRegister& vm, int vm_index);
+typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
+                                                    const VRegister& vn);
+typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
+                                                    const VRegister& vn,
+                                                    const VRegister& vm);
+typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
+                                                          const VRegister& vn,
+                                                          const VRegister& vm,
+                                                          int vm_index);
 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
-  const VRegister& vd, int imm1, const VRegister& vn, int imm2);
+    const VRegister& vd, int imm1, const VRegister& vn, int imm2);
 
 // This helps using the same typename for both the function pointer
 // and the array of immediates passed to helper routines.
 template <typename T>
 class Test2OpImmediateNEONHelper_t {
  public:
-    typedef void (MacroAssembler::*mnemonic)(
-      const VRegister& vd, const VRegister& vn, T imm);
+  typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
+                                           const VRegister& vn,
+                                           T imm);
 };
 
 
@@ -195,9 +194,12 @@ static unsigned MaxHexCharCount() {
 // Standard test dispatchers.
 
 
-static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
-                           unsigned inputs_length, uintptr_t results,
-                           unsigned d_size, unsigned n_size) {
+static void Test1Op_Helper(Test1OpFPHelper_t helper,
+                           uintptr_t inputs,
+                           unsigned inputs_length,
+                           uintptr_t results,
+                           unsigned d_size,
+                           unsigned n_size) {
   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
 
@@ -246,26 +248,34 @@ static void Test1Op_Helper(Test1OpFPHelper_t helper, uintptr_t inputs,
 // rawbits representations of doubles or floats. This ensures that exact bit
 // comparisons can be performed.
 template <typename Tn, typename Td>
-static void Test1Op(const char * name, Test1OpFPHelper_t helper,
-                    const Tn inputs[], unsigned inputs_length,
-                    const Td expected[], unsigned expected_length) {
+static void Test1Op(const char* name,
+                    Test1OpFPHelper_t helper,
+                    const Tn inputs[],
+                    unsigned inputs_length,
+                    const Td expected[],
+                    unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length;
-  Td * results = new Td[results_length];
+  Td* results = new Td[results_length];
 
   const unsigned d_bits = sizeof(Td) * 8;
   const unsigned n_bits = sizeof(Tn) * 8;
 
-  Test1Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                 reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
+  Test1Op_Helper(helper,
+                 reinterpret_cast<uintptr_t>(inputs),
+                 inputs_length,
+                 reinterpret_cast<uintptr_t>(results),
+                 d_bits,
+                 n_bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
     for (unsigned d = 0; d < results_length; d++) {
       printf("  0x%0*" PRIx64 ",\n",
-             d_bits / 4, static_cast<uint64_t>(results[d]));
+             d_bits / 4,
+             static_cast<uint64_t>(results[d]));
     }
     printf("};\n");
     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
@@ -279,13 +289,18 @@ static void Test1Op(const char * name, Test1OpFPHelper_t helper,
         if (++error_count > kErrorReportLimit) continue;
 
         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
-               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
-               name, rawbits_to_fp(inputs[n]));
+               name,
+               n_bits / 4,
+               static_cast<uint64_t>(inputs[n]),
+               name,
+               rawbits_to_fp(inputs[n]));
         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
-               d_bits / 4, static_cast<uint64_t>(expected[d]),
+               d_bits / 4,
+               static_cast<uint64_t>(expected[d]),
                rawbits_to_fp(expected[d]));
         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
-               d_bits / 4, static_cast<uint64_t>(results[d]),
+               d_bits / 4,
+               static_cast<uint64_t>(results[d]),
                rawbits_to_fp(results[d]));
         printf("\n");
       }
@@ -301,8 +316,10 @@ static void Test1Op(const char * name, Test1OpFPHelper_t helper,
 
 
 static void Test2Op_Helper(Test2OpFPHelper_t helper,
-                           uintptr_t inputs, unsigned inputs_length,
-                           uintptr_t results, unsigned reg_size) {
+                           uintptr_t inputs,
+                           unsigned inputs_length,
+                           uintptr_t results,
+                           unsigned reg_size) {
   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
 
   SETUP();
@@ -341,7 +358,7 @@ static void Test2Op_Helper(Test2OpFPHelper_t helper,
     SingleEmissionCheckScope guard(&masm);
     (masm.*helper)(fd, fn, fm);
   }
-    __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
+  __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
 
   __ Add(index_m, index_m, 1);
   __ Cmp(index_m, inputs_length);
@@ -361,25 +378,32 @@ static void Test2Op_Helper(Test2OpFPHelper_t helper,
 // rawbits representations of doubles or floats. This ensures that exact bit
 // comparisons can be performed.
 template <typename T>
-static void Test2Op(const char * name, Test2OpFPHelper_t helper,
-                    const T inputs[], unsigned inputs_length,
-                    const T expected[], unsigned expected_length) {
+static void Test2Op(const char* name,
+                    Test2OpFPHelper_t helper,
+                    const T inputs[],
+                    unsigned inputs_length,
+                    const T expected[],
+                    unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length * inputs_length;
-  T * results = new T[results_length];
+  T* results = new T[results_length];
 
   const unsigned bits = sizeof(T) * 8;
 
-  Test2Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                 reinterpret_cast<uintptr_t>(results), bits);
+  Test2Op_Helper(helper,
+                 reinterpret_cast<uintptr_t>(inputs),
+                 inputs_length,
+                 reinterpret_cast<uintptr_t>(results),
+                 bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
     for (unsigned d = 0; d < results_length; d++) {
       printf("  0x%0*" PRIx64 ",\n",
-             bits / 4, static_cast<uint64_t>(results[d]));
+             bits / 4,
+             static_cast<uint64_t>(results[d]));
     }
     printf("};\n");
     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
@@ -395,16 +419,20 @@ static void Test2Op(const char * name, Test2OpFPHelper_t helper,
 
           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
                  name,
-                 bits / 4, static_cast<uint64_t>(inputs[n]),
-                 bits / 4, static_cast<uint64_t>(inputs[m]),
+                 bits / 4,
+                 static_cast<uint64_t>(inputs[n]),
+                 bits / 4,
+                 static_cast<uint64_t>(inputs[m]),
                  name,
                  rawbits_to_fp(inputs[n]),
                  rawbits_to_fp(inputs[m]));
           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
-                 bits / 4, static_cast<uint64_t>(expected[d]),
+                 bits / 4,
+                 static_cast<uint64_t>(expected[d]),
                  rawbits_to_fp(expected[d]));
           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
-                 bits / 4, static_cast<uint64_t>(results[d]),
+                 bits / 4,
+                 static_cast<uint64_t>(results[d]),
                  rawbits_to_fp(results[d]));
           printf("\n");
         }
@@ -421,8 +449,10 @@ static void Test2Op(const char * name, Test2OpFPHelper_t helper,
 
 
 static void Test3Op_Helper(Test3OpFPHelper_t helper,
-                           uintptr_t inputs, unsigned inputs_length,
-                           uintptr_t results, unsigned reg_size) {
+                           uintptr_t inputs,
+                           unsigned inputs_length,
+                           uintptr_t results,
+                           unsigned reg_size) {
   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
 
   SETUP();
@@ -491,25 +521,32 @@ static void Test3Op_Helper(Test3OpFPHelper_t helper,
 // rawbits representations of doubles or floats. This ensures that exact bit
 // comparisons can be performed.
 template <typename T>
-static void Test3Op(const char * name, Test3OpFPHelper_t helper,
-                    const T inputs[], unsigned inputs_length,
-                    const T expected[], unsigned expected_length) {
+static void Test3Op(const char* name,
+                    Test3OpFPHelper_t helper,
+                    const T inputs[],
+                    unsigned inputs_length,
+                    const T expected[],
+                    unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length * inputs_length * inputs_length;
-  T * results = new T[results_length];
+  T* results = new T[results_length];
 
   const unsigned bits = sizeof(T) * 8;
 
-  Test3Op_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                 reinterpret_cast<uintptr_t>(results), bits);
+  Test3Op_Helper(helper,
+                 reinterpret_cast<uintptr_t>(inputs),
+                 inputs_length,
+                 reinterpret_cast<uintptr_t>(results),
+                 bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
     for (unsigned d = 0; d < results_length; d++) {
       printf("  0x%0*" PRIx64 ",\n",
-             bits / 4, static_cast<uint64_t>(results[d]));
+             bits / 4,
+             static_cast<uint64_t>(results[d]));
     }
     printf("};\n");
     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
@@ -527,18 +564,23 @@ static void Test3Op(const char * name, Test3OpFPHelper_t helper,
             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
                    " (%s %g %g %g):\n",
                    name,
-                   bits / 4, static_cast<uint64_t>(inputs[n]),
-                   bits / 4, static_cast<uint64_t>(inputs[m]),
-                   bits / 4, static_cast<uint64_t>(inputs[a]),
+                   bits / 4,
+                   static_cast<uint64_t>(inputs[n]),
+                   bits / 4,
+                   static_cast<uint64_t>(inputs[m]),
+                   bits / 4,
+                   static_cast<uint64_t>(inputs[a]),
                    name,
                    rawbits_to_fp(inputs[n]),
                    rawbits_to_fp(inputs[m]),
                    rawbits_to_fp(inputs[a]));
             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
-                   bits / 4, static_cast<uint64_t>(expected[d]),
+                   bits / 4,
+                   static_cast<uint64_t>(expected[d]),
                    rawbits_to_fp(expected[d]));
             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
-                   bits / 4, static_cast<uint64_t>(results[d]),
+                   bits / 4,
+                   static_cast<uint64_t>(results[d]),
                    rawbits_to_fp(results[d]));
             printf("\n");
           }
@@ -556,8 +598,10 @@ static void Test3Op(const char * name, Test3OpFPHelper_t helper,
 
 
 static void TestCmp_Helper(TestFPCmpHelper_t helper,
-                           uintptr_t inputs, unsigned inputs_length,
-                           uintptr_t results, unsigned reg_size) {
+                           uintptr_t inputs,
+                           unsigned inputs_length,
+                           uintptr_t results,
+                           unsigned reg_size) {
   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
 
   SETUP();
@@ -618,18 +662,24 @@ static void TestCmp_Helper(TestFPCmpHelper_t helper,
 // rawbits representations of doubles or floats. This ensures that exact bit
 // comparisons can be performed.
 template <typename T>
-static void TestCmp(const char * name, TestFPCmpHelper_t helper,
-                    const T inputs[], unsigned inputs_length,
-                    const uint8_t expected[], unsigned expected_length) {
+static void TestCmp(const char* name,
+                    TestFPCmpHelper_t helper,
+                    const T inputs[],
+                    unsigned inputs_length,
+                    const uint8_t expected[],
+                    unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length * inputs_length;
-  uint8_t * results = new uint8_t[results_length];
+  uint8_t* results = new uint8_t[results_length];
 
   const unsigned bits = sizeof(T) * 8;
 
-  TestCmp_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                 reinterpret_cast<uintptr_t>(results), bits);
+  TestCmp_Helper(helper,
+                 reinterpret_cast<uintptr_t>(inputs),
+                 inputs_length,
+                 reinterpret_cast<uintptr_t>(results),
+                 bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -653,8 +703,10 @@ static void TestCmp(const char * name, TestFPCmpHelper_t helper,
 
           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
                  name,
-                 bits / 4, static_cast<uint64_t>(inputs[n]),
-                 bits / 4, static_cast<uint64_t>(inputs[m]),
+                 bits / 4,
+                 static_cast<uint64_t>(inputs[n]),
+                 bits / 4,
+                 static_cast<uint64_t>(inputs[m]),
                  name,
                  rawbits_to_fp(inputs[n]),
                  rawbits_to_fp(inputs[m]));
@@ -685,8 +737,10 @@ static void TestCmp(const char * name, TestFPCmpHelper_t helper,
 
 
 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
-                               uintptr_t inputs, unsigned inputs_length,
-                               uintptr_t results, unsigned reg_size) {
+                               uintptr_t inputs,
+                               unsigned inputs_length,
+                               uintptr_t results,
+                               unsigned reg_size) {
   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
 
   SETUP();
@@ -737,18 +791,24 @@ static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
 // rawbits representations of doubles or floats. This ensures that exact bit
 // comparisons can be performed.
 template <typename T>
-static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
-                        const T inputs[], unsigned inputs_length,
-                        const uint8_t expected[], unsigned expected_length) {
+static void TestCmpZero(const char* name,
+                        TestFPCmpZeroHelper_t helper,
+                        const T inputs[],
+                        unsigned inputs_length,
+                        const uint8_t expected[],
+                        unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length;
-  uint8_t * results = new uint8_t[results_length];
+  uint8_t* results = new uint8_t[results_length];
 
   const unsigned bits = sizeof(T) * 8;
 
-  TestCmpZero_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                     reinterpret_cast<uintptr_t>(results), bits);
+  TestCmpZero_Helper(helper,
+                     reinterpret_cast<uintptr_t>(inputs),
+                     inputs_length,
+                     reinterpret_cast<uintptr_t>(results),
+                     bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -771,8 +831,10 @@ static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
 
         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
                name,
-               bits / 4, static_cast<uint64_t>(inputs[n]),
-               bits / 4, 0,
+               bits / 4,
+               static_cast<uint64_t>(inputs[n]),
+               bits / 4,
+               0,
                name,
                rawbits_to_fp(inputs[n]));
         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
@@ -801,9 +863,11 @@ static void TestCmpZero(const char * name, TestFPCmpZeroHelper_t helper,
 
 
 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
-                                 uintptr_t inputs, unsigned inputs_length,
+                                 uintptr_t inputs,
+                                 unsigned inputs_length,
                                  uintptr_t results,
-                                 unsigned d_size, unsigned n_size) {
+                                 unsigned d_size,
+                                 unsigned n_size) {
   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
 
@@ -850,9 +914,12 @@ static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
 }
 
 
-static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
-                               unsigned inputs_length, uintptr_t results,
-                               unsigned d_size, unsigned n_size) {
+static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
+                               uintptr_t inputs,
+                               unsigned inputs_length,
+                               uintptr_t results,
+                               unsigned d_size,
+                               unsigned n_size) {
   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
 
@@ -903,19 +970,26 @@ static void TestFPToInt_Helper(TestFPToIntHelper_t helper, uintptr_t inputs,
 //    performed.
 //  - The expected[] array should be an array of signed integers.
 template <typename Tn, typename Td>
-static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
-                      const Tn inputs[], unsigned inputs_length,
-                      const Td expected[], unsigned expected_length) {
+static void TestFPToS(const char* name,
+                      TestFPToIntHelper_t helper,
+                      const Tn inputs[],
+                      unsigned inputs_length,
+                      const Td expected[],
+                      unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length;
-  Td * results = new Td[results_length];
+  Td* results = new Td[results_length];
 
   const unsigned d_bits = sizeof(Td) * 8;
   const unsigned n_bits = sizeof(Tn) * 8;
 
-  TestFPToInt_Helper(helper, reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                     reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
+  TestFPToInt_Helper(helper,
+                     reinterpret_cast<uintptr_t>(inputs),
+                     inputs_length,
+                     reinterpret_cast<uintptr_t>(results),
+                     d_bits,
+                     n_bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -925,7 +999,7 @@ static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
     // the like) avoids warnings about comparing values with differing ranges.
     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
-    const int64_t int_d_min = -(int_d_max) - 1;
+    const int64_t int_d_min = -(int_d_max)-1;
     for (unsigned d = 0; d < results_length; d++) {
       if (results[d] == int_d_min) {
         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
@@ -953,13 +1027,18 @@ static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
         if (++error_count > kErrorReportLimit) continue;
 
         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
-               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
-               name, rawbits_to_fp(inputs[n]));
+               name,
+               n_bits / 4,
+               static_cast<uint64_t>(inputs[n]),
+               name,
+               rawbits_to_fp(inputs[n]));
         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
-               d_bits / 4, static_cast<uint64_t>(expected[d]),
+               d_bits / 4,
+               static_cast<uint64_t>(expected[d]),
                static_cast<int64_t>(expected[d]));
         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
-               d_bits / 4, static_cast<uint64_t>(results[d]),
+               d_bits / 4,
+               static_cast<uint64_t>(results[d]),
                static_cast<int64_t>(results[d]));
         printf("\n");
       }
@@ -980,20 +1059,26 @@ static void TestFPToS(const char * name, TestFPToIntHelper_t helper,
 //    performed.
 //  - The expected[] array should be an array of unsigned integers.
 template <typename Tn, typename Td>
-static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
-                      const Tn inputs[], unsigned inputs_length,
-                      const Td expected[], unsigned expected_length) {
+static void TestFPToU(const char* name,
+                      TestFPToIntHelper_t helper,
+                      const Tn inputs[],
+                      unsigned inputs_length,
+                      const Td expected[],
+                      unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned results_length = inputs_length;
-  Td * results = new Td[results_length];
+  Td* results = new Td[results_length];
 
   const unsigned d_bits = sizeof(Td) * 8;
   const unsigned n_bits = sizeof(Tn) * 8;
 
   TestFPToInt_Helper(helper,
-                     reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                     reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
+                     reinterpret_cast<uintptr_t>(inputs),
+                     inputs_length,
+                     reinterpret_cast<uintptr_t>(results),
+                     d_bits,
+                     n_bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1013,13 +1098,18 @@ static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
         if (++error_count > kErrorReportLimit) continue;
 
         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
-               name, n_bits / 4, static_cast<uint64_t>(inputs[n]),
-               name, rawbits_to_fp(inputs[n]));
+               name,
+               n_bits / 4,
+               static_cast<uint64_t>(inputs[n]),
+               name,
+               rawbits_to_fp(inputs[n]));
         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
-               d_bits / 4, static_cast<uint64_t>(expected[d]),
+               d_bits / 4,
+               static_cast<uint64_t>(expected[d]),
                static_cast<uint64_t>(expected[d]));
         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
-               d_bits / 4, static_cast<uint64_t>(results[d]),
+               d_bits / 4,
+               static_cast<uint64_t>(results[d]),
                static_cast<uint64_t>(results[d]));
         printf("\n");
       }
@@ -1040,20 +1130,26 @@ static void TestFPToU(const char * name, TestFPToIntHelper_t helper,
 //    performed.
 //  - The expected[] array should be an array of signed integers.
 template <typename Tn, typename Td>
-static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
-                           const Tn inputs[], unsigned inputs_length,
-                           const Td expected[], unsigned expected_length) {
+static void TestFPToFixedS(const char* name,
+                           TestFPToFixedHelper_t helper,
+                           const Tn inputs[],
+                           unsigned inputs_length,
+                           const Td expected[],
+                           unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned d_bits = sizeof(Td) * 8;
   const unsigned n_bits = sizeof(Tn) * 8;
 
   const unsigned results_length = inputs_length * (d_bits + 1);
-  Td * results = new Td[results_length];
+  Td* results = new Td[results_length];
 
   TestFPToFixed_Helper(helper,
-                       reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                       reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
+                       reinterpret_cast<uintptr_t>(inputs),
+                       inputs_length,
+                       reinterpret_cast<uintptr_t>(results),
+                       d_bits,
+                       n_bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1063,7 +1159,7 @@ static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
     // the like) avoids warnings about comparing values with differing ranges.
     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
-    const int64_t int_d_min = -(int_d_max) - 1;
+    const int64_t int_d_min = -(int_d_max)-1;
     for (unsigned d = 0; d < results_length; d++) {
       if (results[d] == int_d_min) {
         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
@@ -1092,13 +1188,20 @@ static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
           if (++error_count > kErrorReportLimit) continue;
 
           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
-                 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
-                 name, rawbits_to_fp(inputs[n]), fbits);
+                 name,
+                 n_bits / 4,
+                 static_cast<uint64_t>(inputs[n]),
+                 fbits,
+                 name,
+                 rawbits_to_fp(inputs[n]),
+                 fbits);
           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
-                 d_bits / 4, static_cast<uint64_t>(expected[d]),
+                 d_bits / 4,
+                 static_cast<uint64_t>(expected[d]),
                  static_cast<int64_t>(expected[d]));
           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
-                 d_bits / 4, static_cast<uint64_t>(results[d]),
+                 d_bits / 4,
+                 static_cast<uint64_t>(results[d]),
                  static_cast<int64_t>(results[d]));
           printf("\n");
         }
@@ -1120,20 +1223,26 @@ static void TestFPToFixedS(const char * name, TestFPToFixedHelper_t helper,
 //    performed.
 //  - The expected[] array should be an array of unsigned integers.
 template <typename Tn, typename Td>
-static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
-                           const Tn inputs[], unsigned inputs_length,
-                           const Td expected[], unsigned expected_length) {
+static void TestFPToFixedU(const char* name,
+                           TestFPToFixedHelper_t helper,
+                           const Tn inputs[],
+                           unsigned inputs_length,
+                           const Td expected[],
+                           unsigned expected_length) {
   VIXL_ASSERT(inputs_length > 0);
 
   const unsigned d_bits = sizeof(Td) * 8;
   const unsigned n_bits = sizeof(Tn) * 8;
 
   const unsigned results_length = inputs_length * (d_bits + 1);
-  Td * results = new Td[results_length];
+  Td* results = new Td[results_length];
 
   TestFPToFixed_Helper(helper,
-                       reinterpret_cast<uintptr_t>(inputs), inputs_length,
-                       reinterpret_cast<uintptr_t>(results), d_bits, n_bits);
+                       reinterpret_cast<uintptr_t>(inputs),
+                       inputs_length,
+                       reinterpret_cast<uintptr_t>(results),
+                       d_bits,
+                       n_bits);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1154,13 +1263,20 @@ static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
           if (++error_count > kErrorReportLimit) continue;
 
           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
-                 name, n_bits / 4, static_cast<uint64_t>(inputs[n]), fbits,
-                 name, rawbits_to_fp(inputs[n]), fbits);
+                 name,
+                 n_bits / 4,
+                 static_cast<uint64_t>(inputs[n]),
+                 fbits,
+                 name,
+                 rawbits_to_fp(inputs[n]),
+                 fbits);
           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
-                 d_bits / 4, static_cast<uint64_t>(expected[d]),
+                 d_bits / 4,
+                 static_cast<uint64_t>(expected[d]),
                  static_cast<uint64_t>(expected[d]));
           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
-                 d_bits / 4, static_cast<uint64_t>(results[d]),
+                 d_bits / 4,
+                 static_cast<uint64_t>(results[d]),
                  static_cast<uint64_t>(results[d]));
           printf("\n");
         }
@@ -1180,7 +1296,8 @@ static void TestFPToFixedU(const char * name, TestFPToFixedHelper_t helper,
 
 
 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
-                               uintptr_t inputs_n, unsigned inputs_n_length,
+                               uintptr_t inputs_n,
+                               unsigned inputs_n_length,
                                uintptr_t results,
                                VectorFormat vd_form,
                                VectorFormat vn_form) {
@@ -1233,8 +1350,8 @@ static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
   __ Mov(index_n, 0);
   __ Bind(&loop_n);
 
-  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
-                                  vn_lane_bytes_log2));
+  __ Ldr(vntmp_single,
+         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   __ Ext(vn, vn, vntmp, vn_lane_bytes);
 
   // Set the destination to zero.
@@ -1263,9 +1380,12 @@ static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
 // arrays of rawbit representation of input values. This ensures that
 // exact bit comparisons can be performed.
 template <typename Td, typename Tn>
-static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
-                        const Tn inputs_n[], unsigned inputs_n_length,
-                        const Td expected[], unsigned expected_length,
+static void Test1OpNEON(const char* name,
+                        Test1OpNEONHelper_t helper,
+                        const Tn inputs_n[],
+                        unsigned inputs_n_length,
+                        const Td expected[],
+                        unsigned expected_length,
                         VectorFormat vd_form,
                         VectorFormat vn_form) {
   VIXL_ASSERT(inputs_n_length > 0);
@@ -1283,7 +1403,8 @@ static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
                      reinterpret_cast<uintptr_t>(inputs_n),
                      inputs_n_length,
                      reinterpret_cast<uintptr_t>(results),
-                     vd_form, vn_form);
+                     vd_form,
+                     vn_form);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1326,27 +1447,29 @@ static void Test1OpNEON(const char * name, Test1OpNEONHelper_t helper,
       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
         printf("%s\n", name);
         printf(" Vn%.*s| Vd%.*s| Expected\n",
-                lane_len_in_hex+1, padding,
-                lane_len_in_hex+1, padding);
+               lane_len_in_hex + 1,
+               padding,
+               lane_len_in_hex + 1,
+               padding);
 
         const unsigned first_index_n =
-          inputs_n_length - (16 / vn_lane_bytes) + n + 1;
+            inputs_n_length - (16 / vn_lane_bytes) + n + 1;
 
-        for (unsigned lane = 0;
-             lane < std::max(vd_lane_count, vn_lane_count);
+        for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
              lane++) {
           unsigned output_index = (n * vd_lane_count) + lane;
           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
 
-          printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
-                  "| 0x%0*" PRIx64 "\n",
-                  results[output_index] != expected[output_index] ? '*' : ' ',
-                  lane_len_in_hex,
-                  static_cast<uint64_t>(inputs_n[input_index_n]),
-                  lane_len_in_hex,
-                  static_cast<uint64_t>(results[output_index]),
-                  lane_len_in_hex,
-                  static_cast<uint64_t>(expected[output_index]));
+          printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
+                 " "
+                 "| 0x%0*" PRIx64 "\n",
+                 results[output_index] != expected[output_index] ? '*' : ' ',
+                 lane_len_in_hex,
+                 static_cast<uint64_t>(inputs_n[input_index_n]),
+                 lane_len_in_hex,
+                 static_cast<uint64_t>(results[output_index]),
+                 lane_len_in_hex,
+                 static_cast<uint64_t>(expected[output_index]));
         }
       }
     }
@@ -1432,8 +1555,8 @@ static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
   __ Mov(index_n, 0);
   __ Bind(&loop_n);
 
-  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
-                                  vn_lane_bytes_log2));
+  __ Ldr(vntmp_single,
+         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
 
   if (destructive) {
@@ -1460,9 +1583,12 @@ static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
 // arrays of rawbit representation of input values. This ensures that
 // exact bit comparisons can be performed.
 template <typename Td, typename Tn>
-static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
-                              const Tn inputs_n[], unsigned inputs_n_length,
-                              const Td expected[], unsigned expected_length,
+static void Test1OpAcrossNEON(const char* name,
+                              Test1OpNEONHelper_t helper,
+                              const Tn inputs_n[],
+                              unsigned inputs_n_length,
+                              const Td expected[],
+                              unsigned expected_length,
                               VectorFormat vd_form,
                               VectorFormat vn_form) {
   VIXL_ASSERT(inputs_n_length > 0);
@@ -1479,7 +1605,8 @@ static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
                            reinterpret_cast<uintptr_t>(inputs_n),
                            inputs_n_length,
                            reinterpret_cast<uintptr_t>(results),
-                           vd_form, vn_form);
+                           vd_form,
+                           vn_form);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1534,8 +1661,10 @@ static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
 
         printf("%s\n", name);
         printf(" Vn%.*s| Vd%.*s| Expected\n",
-                lane_len_in_hex + 1, padding,
-                lane_len_in_hex + 1, padding);
+               lane_len_in_hex + 1,
+               padding,
+               lane_len_in_hex + 1,
+               padding);
 
         // TODO: In case of an error, all tests print out as many elements as
         //       there are lanes in the output or input vectors. This way
@@ -1546,9 +1675,11 @@ static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
         //       This output for the 'Across' category has the required
         //       modifications.
         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
-          unsigned results_index = (n * vd_lanes_per_q) + ((vn_lane_count - 1)  - lane);
-          unsigned input_index_n = (inputs_n_length - vn_lane_count +
-              n + 1 + lane) % inputs_n_length;
+          unsigned results_index =
+              (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
+          unsigned input_index_n =
+              (inputs_n_length - vn_lane_count + n + 1 + lane) %
+              inputs_n_length;
 
           Td expect = 0;
           if ((vn_lane_count - 1) == lane) {
@@ -1584,8 +1715,10 @@ static void Test1OpAcrossNEON(const char * name, Test1OpNEONHelper_t helper,
 
 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
                                uintptr_t inputs_d,
-                               uintptr_t inputs_n, unsigned inputs_n_length,
-                               uintptr_t inputs_m, unsigned inputs_m_length,
+                               uintptr_t inputs_n,
+                               unsigned inputs_n_length,
+                               uintptr_t inputs_m,
+                               unsigned inputs_m_length,
                                uintptr_t results,
                                VectorFormat vd_form,
                                VectorFormat vn_form,
@@ -1659,15 +1792,15 @@ static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
   __ Mov(index_n, 0);
   __ Bind(&loop_n);
 
-  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
-                                  vn_lane_bytes_log2));
+  __ Ldr(vntmp_single,
+         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   __ Ext(vn, vn, vntmp, vn_lane_bytes);
 
   __ Mov(index_m, 0);
   __ Bind(&loop_m);
 
-  __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
-                                  vm_lane_bytes_log2));
+  __ Ldr(vmtmp_single,
+         MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
 
   __ Mov(vres, vd);
@@ -1695,11 +1828,15 @@ static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
 // arrays of rawbit representation of input values. This ensures that
 // exact bit comparisons can be performed.
 template <typename Td, typename Tn, typename Tm>
-static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
+static void Test2OpNEON(const char* name,
+                        Test2OpNEONHelper_t helper,
                         const Td inputs_d[],
-                        const Tn inputs_n[], unsigned inputs_n_length,
-                        const Tm inputs_m[], unsigned inputs_m_length,
-                        const Td expected[], unsigned expected_length,
+                        const Tn inputs_n[],
+                        unsigned inputs_n_length,
+                        const Tm inputs_m[],
+                        unsigned inputs_m_length,
+                        const Td expected[],
+                        unsigned expected_length,
                         VectorFormat vd_form,
                         VectorFormat vn_form,
                         VectorFormat vm_form) {
@@ -1714,10 +1851,14 @@ static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
 
   Test2OpNEON_Helper(helper,
                      reinterpret_cast<uintptr_t>(inputs_d),
-                     reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
-                     reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
+                     reinterpret_cast<uintptr_t>(inputs_n),
+                     inputs_n_length,
+                     reinterpret_cast<uintptr_t>(inputs_m),
+                     inputs_m_length,
                      reinterpret_cast<uintptr_t>(results),
-                     vd_form, vn_form, vm_form);
+                     vd_form,
+                     vn_form,
+                     vm_form);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1751,7 +1892,7 @@ static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
 
         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
-              (m * vd_lane_count) + lane;
+                                  (m * vd_lane_count) + lane;
 
           if (results[output_index] != expected[output_index]) {
             error_in_vector = true;
@@ -1762,20 +1903,27 @@ static void Test2OpNEON(const char * name, Test2OpNEONHelper_t helper,
         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
           printf("%s\n", name);
           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
-                 lane_len_in_hex+1, padding,
-                 lane_len_in_hex+1, padding,
-                 lane_len_in_hex+1, padding,
-                 lane_len_in_hex+1, padding);
+                 lane_len_in_hex + 1,
+                 padding,
+                 lane_len_in_hex + 1,
+                 padding,
+                 lane_len_in_hex + 1,
+                 padding,
+                 lane_len_in_hex + 1,
+                 padding);
 
           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
-                (m * vd_lane_count) + lane;
-            unsigned input_index_n = (inputs_n_length - vd_lane_count +
-                n + 1 + lane) % inputs_n_length;
-            unsigned input_index_m = (inputs_m_length - vd_lane_count +
-                m + 1 + lane) % inputs_m_length;
-
-            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
+                                    (m * vd_lane_count) + lane;
+            unsigned input_index_n =
+                (inputs_n_length - vd_lane_count + n + 1 + lane) %
+                inputs_n_length;
+            unsigned input_index_m =
+                (inputs_m_length - vd_lane_count + m + 1 + lane) %
+                inputs_m_length;
+
+            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
+                   " "
                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
                    results[output_index] != expected[output_index] ? '*' : ' ',
                    lane_len_in_hex,
@@ -1885,15 +2033,15 @@ static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
   __ Mov(index_n, 0);
   __ Bind(&loop_n);
 
-  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
-                                  vn_lane_bytes_log2));
+  __ Ldr(vntmp_single,
+         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   __ Ext(vn, vn, vntmp, vn_lane_bytes);
 
   __ Mov(index_m, 0);
   __ Bind(&loop_m);
 
-  __ Ldr(vmtmp_single, MemOperand(inputs_m_base, index_m, LSL,
-                                  vm_lane_bytes_log2));
+  __ Ldr(vmtmp_single,
+         MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
 
   __ Mov(vres, vd);
@@ -1921,18 +2069,21 @@ static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
 }
 
 
-
 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
 // arrays of rawbit representation of input values. This ensures that
 // exact bit comparisons can be performed.
 template <typename Td, typename Tn, typename Tm>
-static void TestByElementNEON(const char *name,
+static void TestByElementNEON(const char* name,
                               TestByElementNEONHelper_t helper,
                               const Td inputs_d[],
-                              const Tn inputs_n[], unsigned inputs_n_length,
-                              const Tm inputs_m[], unsigned inputs_m_length,
-                              const int indices[], unsigned indices_length,
-                              const Td expected[], unsigned expected_length,
+                              const Tn inputs_n[],
+                              unsigned inputs_n_length,
+                              const Tm inputs_m[],
+                              unsigned inputs_m_length,
+                              const int indices[],
+                              unsigned indices_length,
+                              const Td expected[],
+                              unsigned expected_length,
                               VectorFormat vd_form,
                               VectorFormat vn_form,
                               VectorFormat vm_form) {
@@ -1942,19 +2093,24 @@ static void TestByElementNEON(const char *name,
 
   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
 
-  const unsigned results_length = inputs_n_length * inputs_m_length *
-                                  indices_length;
+  const unsigned results_length =
+      inputs_n_length * inputs_m_length * indices_length;
   Td* results = new Td[results_length * vd_lane_count];
   const unsigned lane_bit = sizeof(Td) * 8;
   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
 
   TestByElementNEON_Helper(helper,
-    reinterpret_cast<uintptr_t>(inputs_d),
-    reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
-    reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
-    indices, indices_length,
-    reinterpret_cast<uintptr_t>(results),
-    vd_form, vn_form, vm_form);
+                           reinterpret_cast<uintptr_t>(inputs_d),
+                           reinterpret_cast<uintptr_t>(inputs_n),
+                           inputs_n_length,
+                           reinterpret_cast<uintptr_t>(inputs_m),
+                           inputs_m_length,
+                           indices,
+                           indices_length,
+                           reinterpret_cast<uintptr_t>(results),
+                           vd_form,
+                           vn_form,
+                           vm_form);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -1990,8 +2146,8 @@ static void TestByElementNEON(const char *name,
           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
             unsigned output_index =
                 (n * inputs_m_length * indices_length * vd_lane_count) +
-                (m * indices_length * vd_lane_count) +
-                (index * vd_lane_count) + lane;
+                (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
+                lane;
 
             if (results[output_index] != expected[output_index]) {
               error_in_vector = true;
@@ -2002,35 +2158,43 @@ static void TestByElementNEON(const char *name,
           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
             printf("%s\n", name);
             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
-                  lane_len_in_hex+1, padding,
-                  lane_len_in_hex+1, padding,
-                  lane_len_in_hex+1, padding,
-                  lane_len_in_hex+1, padding);
+                   lane_len_in_hex + 1,
+                   padding,
+                   lane_len_in_hex + 1,
+                   padding,
+                   lane_len_in_hex + 1,
+                   padding,
+                   lane_len_in_hex + 1,
+                   padding);
 
             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
               unsigned output_index =
                   (n * inputs_m_length * indices_length * vd_lane_count) +
                   (m * indices_length * vd_lane_count) +
                   (index * vd_lane_count) + lane;
-              unsigned input_index_n = (inputs_n_length - vd_lane_count +
-                  n + 1 + lane) % inputs_n_length;
-              unsigned input_index_m = (inputs_m_length - vd_lane_count +
-                  m + 1 + lane) % inputs_m_length;
-
-              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
-                "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
-                results[output_index] != expected[output_index] ? '*' : ' ',
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_d[lane]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_n[input_index_n]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_m[input_index_m]),
-                indices[index],
-                lane_len_in_hex,
-                static_cast<uint64_t>(results[output_index]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(expected[output_index]));
+              unsigned input_index_n =
+                  (inputs_n_length - vd_lane_count + n + 1 + lane) %
+                  inputs_n_length;
+              unsigned input_index_m =
+                  (inputs_m_length - vd_lane_count + m + 1 + lane) %
+                  inputs_m_length;
+
+              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
+                     " "
+                     "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
+                     results[output_index] != expected[output_index] ? '*'
+                                                                     : ' ',
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_d[lane]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_n[input_index_n]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_m[input_index_m]),
+                     indices[index],
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(results[output_index]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(expected[output_index]));
             }
           }
         }
@@ -2059,8 +2223,7 @@ void Test2OpImmNEON_Helper(
     uintptr_t results,
     VectorFormat vd_form,
     VectorFormat vn_form) {
-  VIXL_ASSERT(vd_form != kFormatUndefined &&
-              vn_form != kFormatUndefined);
+  VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
 
   SETUP();
   START();
@@ -2108,8 +2271,8 @@ void Test2OpImmNEON_Helper(
   __ Mov(index_n, 0);
   __ Bind(&loop_n);
 
-  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
-                                  vn_lane_bytes_log2));
+  __ Ldr(vntmp_single,
+         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   __ Ext(vn, vn, vntmp, vn_lane_bytes);
 
   // Set the destination to zero for tests such as '[r]shrn2'.
@@ -2142,11 +2305,14 @@ void Test2OpImmNEON_Helper(
 // exact bit comparisons can be performed.
 template <typename Td, typename Tn, typename Tm>
 static void Test2OpImmNEON(
-    const char * name,
+    const char* name,
     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
-    const Tn inputs_n[], unsigned inputs_n_length,
-    const Tm inputs_m[], unsigned inputs_m_length,
-    const Td expected[], unsigned expected_length,
+    const Tn inputs_n[],
+    unsigned inputs_n_length,
+    const Tm inputs_m[],
+    unsigned inputs_m_length,
+    const Td expected[],
+    unsigned expected_length,
     VectorFormat vd_form,
     VectorFormat vn_form) {
   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
@@ -2161,10 +2327,13 @@ static void Test2OpImmNEON(
   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
 
   Test2OpImmNEON_Helper(helper,
-                        reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
-                        inputs_m, inputs_m_length,
+                        reinterpret_cast<uintptr_t>(inputs_n),
+                        inputs_n_length,
+                        inputs_m,
+                        inputs_m_length,
                         reinterpret_cast<uintptr_t>(results),
-                        vd_form, vn_form);
+                        vd_form,
+                        vn_form);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -2198,7 +2367,7 @@ static void Test2OpImmNEON(
 
         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
-              (m * vd_lane_count) + lane;
+                                  (m * vd_lane_count) + lane;
 
           if (results[output_index] != expected[output_index]) {
             error_in_vector = true;
@@ -2209,22 +2378,25 @@ static void Test2OpImmNEON(
         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
           printf("%s\n", name);
           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
-                 lane_len_in_hex+1, padding,
-                 lane_len_in_hex, padding,
-                 lane_len_in_hex+1, padding);
+                 lane_len_in_hex + 1,
+                 padding,
+                 lane_len_in_hex,
+                 padding,
+                 lane_len_in_hex + 1,
+                 padding);
 
-        const unsigned first_index_n =
-          inputs_n_length - (16 / vn_lane_bytes) + n + 1;
+          const unsigned first_index_n =
+              inputs_n_length - (16 / vn_lane_bytes) + n + 1;
 
-        for (unsigned lane = 0;
-             lane < std::max(vd_lane_count, vn_lane_count);
-             lane++) {
+          for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
+               lane++) {
             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
-                (m * vd_lane_count) + lane;
+                                    (m * vd_lane_count) + lane;
             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
             unsigned input_index_m = m;
 
-            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
+            printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
+                   " "
                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
                    results[output_index] != expected[output_index] ? '*' : ' ',
                    lane_len_in_hex,
@@ -2252,14 +2424,17 @@ static void Test2OpImmNEON(
 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
 
 
-static void TestOpImmOpImmNEON_Helper(
-  TestOpImmOpImmVdUpdateNEONHelper_t helper,
-  uintptr_t inputs_d,
-  const int inputs_imm1[], unsigned inputs_imm1_length,
-  uintptr_t inputs_n, unsigned inputs_n_length,
-  const int inputs_imm2[], unsigned inputs_imm2_length,
-  uintptr_t results,
-  VectorFormat vd_form, VectorFormat vn_form) {
+static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
+                                      uintptr_t inputs_d,
+                                      const int inputs_imm1[],
+                                      unsigned inputs_imm1_length,
+                                      uintptr_t inputs_n,
+                                      unsigned inputs_n_length,
+                                      const int inputs_imm2[],
+                                      unsigned inputs_imm2_length,
+                                      uintptr_t results,
+                                      VectorFormat vd_form,
+                                      VectorFormat vn_form) {
   VIXL_ASSERT(vd_form != kFormatUndefined);
   VIXL_ASSERT(vn_form != kFormatUndefined);
 
@@ -2318,13 +2493,14 @@ static void TestOpImmOpImmNEON_Helper(
   __ Mov(index_n, 0);
   __ Bind(&loop_n);
 
-  __ Ldr(vntmp_single, MemOperand(inputs_n_base, index_n, LSL,
-                                  vn_lane_bytes_log2));
+  __ Ldr(vntmp_single,
+         MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
 
   {
     EmissionCheckScope guard(&masm,
-        kInstructionSize * inputs_imm1_length * inputs_imm2_length * 3);
+                             kInstructionSize * inputs_imm1_length *
+                                 inputs_imm2_length * 3);
     for (unsigned i = 0; i < inputs_imm1_length; i++) {
       for (unsigned j = 0; j < inputs_imm2_length; j++) {
         __ Mov(vres, vd);
@@ -2348,7 +2524,7 @@ static void TestOpImmOpImmNEON_Helper(
 // arrays of rawbit representation of input values. This ensures that
 // exact bit comparisons can be performed.
 template <typename Td, typename Tn>
-static void TestOpImmOpImmNEON(const char * name,
+static void TestOpImmOpImmNEON(const char* name,
                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
                                const Td inputs_d[],
                                const int inputs_imm1[],
@@ -2367,8 +2543,8 @@ static void TestOpImmOpImmNEON(const char * name,
 
   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
 
-  const unsigned results_length = inputs_n_length *
-      inputs_imm1_length * inputs_imm2_length;
+  const unsigned results_length =
+      inputs_n_length * inputs_imm1_length * inputs_imm2_length;
 
   Td* results = new Td[results_length * vd_lane_count];
   const unsigned lane_bit = sizeof(Td) * 8;
@@ -2383,7 +2559,8 @@ static void TestOpImmOpImmNEON(const char * name,
                             inputs_imm2,
                             inputs_imm2_length,
                             reinterpret_cast<uintptr_t>(results),
-                            vd_form, vn_form);
+                            vd_form,
+                            vn_form);
 
   if (Test::generate_test_trace()) {
     // Print the results.
@@ -2420,8 +2597,7 @@ static void TestOpImmOpImmNEON(const char * name,
 
           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
             unsigned output_index =
-                (n * inputs_imm1_length *
-                 inputs_imm2_length * vd_lane_count) +
+                (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
                 (imm1 * inputs_imm2_length * vd_lane_count) +
                 (imm2 * vd_lane_count) + lane;
 
@@ -2434,39 +2610,46 @@ static void TestOpImmOpImmNEON(const char * name,
           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
             printf("%s\n", name);
             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
-                   lane_len_in_hex+1, padding,
-                   lane_len_in_hex, padding,
-                   lane_len_in_hex+1, padding,
-                   lane_len_in_hex, padding,
-                   lane_len_in_hex+1, padding);
+                   lane_len_in_hex + 1,
+                   padding,
+                   lane_len_in_hex,
+                   padding,
+                   lane_len_in_hex + 1,
+                   padding,
+                   lane_len_in_hex,
+                   padding,
+                   lane_len_in_hex + 1,
+                   padding);
 
             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
               unsigned output_index =
-                (n * inputs_imm1_length *
-                 inputs_imm2_length * vd_lane_count) +
-                (imm1 * inputs_imm2_length * vd_lane_count) +
-                (imm2 * vd_lane_count) + lane;
-              unsigned input_index_n = (inputs_n_length - vd_lane_count +
-                  n + 1 + lane) % inputs_n_length;
+                  (n * inputs_imm1_length * inputs_imm2_length *
+                   vd_lane_count) +
+                  (imm1 * inputs_imm2_length * vd_lane_count) +
+                  (imm2 * vd_lane_count) + lane;
+              unsigned input_index_n =
+                  (inputs_n_length - vd_lane_count + n + 1 + lane) %
+                  inputs_n_length;
               unsigned input_index_imm1 = imm1;
               unsigned input_index_imm2 = imm2;
 
-              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 " "
-                "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
-                results[output_index] !=
-                  expected[output_index] ? '*' : ' ',
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_d[lane]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_n[input_index_n]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(results[output_index]),
-                lane_len_in_hex,
-                static_cast<uint64_t>(expected[output_index]));
+              printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
+                     " "
+                     "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
+                     results[output_index] != expected[output_index] ? '*'
+                                                                     : ' ',
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_d[lane]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_n[input_index_n]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(results[output_index]),
+                     lane_len_in_hex,
+                     static_cast<uint64_t>(expected[output_index]));
             }
           }
         }
@@ -2489,20 +2672,21 @@ static void TestOpImmOpImmNEON(const char * name,
 // operations.
 #define STRINGIFY(s) #s
 
-#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input)         \
-    Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),          \
-               &MacroAssembler::mnemonic,                           \
-               input, sizeof(input) / sizeof(input[0]),             \
-               kExpected_##mnemonic##_##variant,                    \
-               kExpectedCount_##mnemonic##_##variant)
-
-#define DEFINE_TEST_FP(mnemonic, type, input)                       \
-    TEST(mnemonic##_d) {                                            \
-      CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
-    }                                                               \
-    TEST(mnemonic##_s) {                                            \
-      CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
-    }
+#define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
+  Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
+             &MacroAssembler::mnemonic,                     \
+             input,                                         \
+             sizeof(input) / sizeof(input[0]),              \
+             kExpected_##mnemonic##_##variant,              \
+             kExpectedCount_##mnemonic##_##variant)
+
+#define DEFINE_TEST_FP(mnemonic, type, input)                    \
+  TEST(mnemonic##_d) {                                           \
+    CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
+  }                                                              \
+  TEST(mnemonic##_s) {                                           \
+    CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
+  }
 
 // TODO: Test with a newer version of valgrind.
 //
@@ -2544,19 +2728,19 @@ TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
 
-#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)                \
-    TEST(mnemonic##_xd) {                                           \
-      CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
-    }                                                               \
-    TEST(mnemonic##_xs) {                                           \
-      CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
-    }                                                               \
-    TEST(mnemonic##_wd) {                                           \
-      CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
-    }                                                               \
-    TEST(mnemonic##_ws) {                                           \
-      CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
-    }
+#define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)              \
+  TEST(mnemonic##_xd) {                                           \
+    CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
+  }                                                               \
+  TEST(mnemonic##_xs) {                                           \
+    CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
+  }                                                               \
+  TEST(mnemonic##_wd) {                                           \
+    CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
+  }                                                               \
+  TEST(mnemonic##_ws) {                                           \
+    CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
+  }
 
 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
@@ -2578,1139 +2762,1307 @@ DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
 
 // ==== NEON Tests. ====
 
-#define CALL_TEST_NEON_HELPER_1Op(mnemonic,                                  \
-                                  vdform, vnform,                            \
-                                  input_n)                                   \
-    Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
-                &MacroAssembler::mnemonic,                                   \
-                input_n,                                                     \
-                (sizeof(input_n) / sizeof(input_n[0])),                      \
-                kExpected_NEON_##mnemonic##_##vdform,                        \
-                kExpectedCount_NEON_##mnemonic##_##vdform,                   \
-                kFormat##vdform,                                             \
-                kFormat##vnform)
-
-#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                            \
-                                        vdform, vnform,                      \
-                                        input_n)                             \
-    Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
-                                          "_" STRINGIFY(vnform),             \
-                      &MacroAssembler::mnemonic,                             \
-                      input_n,                                               \
-                      (sizeof(input_n) / sizeof(input_n[0])),                \
-                      kExpected_NEON_##mnemonic##_##vdform##_##vnform,       \
-                      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,  \
-                      kFormat##vdform,                                       \
-                      kFormat##vnform)
-
-#define CALL_TEST_NEON_HELPER_2Op(mnemonic,                                  \
-                                  vdform, vnform, vmform,                    \
-                                  input_d, input_n, input_m)                 \
-    Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),                   \
-                &MacroAssembler::mnemonic,                                   \
-                input_d,                                                     \
-                input_n,                                                     \
-                (sizeof(input_n) / sizeof(input_n[0])),                      \
-                input_m,                                                     \
-                (sizeof(input_m) / sizeof(input_m[0])),                      \
-                kExpected_NEON_##mnemonic##_##vdform,                        \
-                kExpectedCount_NEON_##mnemonic##_##vdform,                   \
-                kFormat##vdform,                                             \
-                kFormat##vnform,                                             \
-                kFormat##vmform)
-
-#define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                               \
-                                     vdform, vnform,                         \
-                                     input_n, input_m)                       \
-    Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM",      \
-                   &MacroAssembler::mnemonic,                                \
-                   input_n,                                                  \
-                   (sizeof(input_n) / sizeof(input_n[0])),                   \
-                   input_m,                                                  \
-                   (sizeof(input_m) / sizeof(input_m[0])),                   \
-                   kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,            \
-                   kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,       \
-                   kFormat##vdform,                                          \
-                   kFormat##vnform)
-
-#define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                            \
-                                        vdform, vnform, vmform,              \
-                                        input_d, input_n, input_m, indices)  \
-    TestByElementNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform)              \
-        "_" STRINGIFY(vnform) "_" STRINGIFY(vmform),                         \
-        &MacroAssembler::mnemonic,                                           \
-        input_d,                                                             \
-        input_n,                                                             \
-        (sizeof(input_n) / sizeof(input_n[0])),                              \
-        input_m,                                                             \
-        (sizeof(input_m) / sizeof(input_m[0])),                              \
-        indices,                                                             \
-        (sizeof(indices) / sizeof(indices[0])),                              \
-        kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,          \
-        kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,     \
-        kFormat##vdform,                                                     \
-        kFormat##vnform,                                                     \
-        kFormat##vmform)
-
-#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                             \
-                                         mnemonic,                           \
-                                         vdform, vnform,                     \
-                                         input_d, input_imm1,                \
-                                         input_n, input_imm2)                \
-    TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),            \
-                       helper,                                               \
-                       input_d,                                              \
-                       input_imm1,                                           \
-                       (sizeof(input_imm1) / sizeof(input_imm1[0])),         \
-                       input_n,                                              \
-                       (sizeof(input_n) / sizeof(input_n[0])),               \
-                       input_imm2,                                           \
-                       (sizeof(input_imm2) / sizeof(input_imm2[0])),         \
-                       kExpected_NEON_##mnemonic##_##vdform,                 \
-                       kExpectedCount_NEON_##mnemonic##_##vdform,            \
-                       kFormat##vdform,                                      \
-                       kFormat##vnform)
-
-#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input)                \
-    CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
-                              variant, variant,                              \
-                              input)
-
-#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                       \
-    TEST(mnemonic##_8B) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);         \
-    }                                                                        \
-    TEST(mnemonic##_16B) {                                                   \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input);        \
-    }
-
-#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)                        \
-    TEST(mnemonic##_4H) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input);        \
-    }                                                                        \
-    TEST(mnemonic##_8H) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input);        \
-    }
-
-#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                        \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input);        \
-    }                                                                        \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input);        \
-    }
+#define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
+  Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
+              &MacroAssembler::mnemonic,                             \
+              input_n,                                               \
+              (sizeof(input_n) / sizeof(input_n[0])),                \
+              kExpected_NEON_##mnemonic##_##vdform,                  \
+              kExpectedCount_NEON_##mnemonic##_##vdform,             \
+              kFormat##vdform,                                       \
+              kFormat##vnform)
+
+#define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
+  Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
+                        vnform),                                             \
+                    &MacroAssembler::mnemonic,                               \
+                    input_n,                                                 \
+                    (sizeof(input_n) / sizeof(input_n[0])),                  \
+                    kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
+                    kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
+                    kFormat##vdform,                                         \
+                    kFormat##vnform)
+
+#define CALL_TEST_NEON_HELPER_2Op(                               \
+    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
+  Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),         \
+              &MacroAssembler::mnemonic,                         \
+              input_d,                                           \
+              input_n,                                           \
+              (sizeof(input_n) / sizeof(input_n[0])),            \
+              input_m,                                           \
+              (sizeof(input_m) / sizeof(input_m[0])),            \
+              kExpected_NEON_##mnemonic##_##vdform,              \
+              kExpectedCount_NEON_##mnemonic##_##vdform,         \
+              kFormat##vdform,                                   \
+              kFormat##vnform,                                   \
+              kFormat##vmform)
+
+#define CALL_TEST_NEON_HELPER_2OpImm(                                 \
+    mnemonic, vdform, vnform, input_n, input_m)                       \
+  Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
+                 &MacroAssembler::mnemonic,                           \
+                 input_n,                                             \
+                 (sizeof(input_n) / sizeof(input_n[0])),              \
+                 input_m,                                             \
+                 (sizeof(input_m) / sizeof(input_m[0])),              \
+                 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
+                 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
+                 kFormat##vdform,                                     \
+                 kFormat##vnform)
+
+#define CALL_TEST_NEON_HELPER_ByElement(                                  \
+    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
+  TestByElementNEON(                                                      \
+      STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(            \
+          vnform) "_" STRINGIFY(vmform),                                  \
+      &MacroAssembler::mnemonic,                                          \
+      input_d,                                                            \
+      input_n,                                                            \
+      (sizeof(input_n) / sizeof(input_n[0])),                             \
+      input_m,                                                            \
+      (sizeof(input_m) / sizeof(input_m[0])),                             \
+      indices,                                                            \
+      (sizeof(indices) / sizeof(indices[0])),                             \
+      kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,         \
+      kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,    \
+      kFormat##vdform,                                                    \
+      kFormat##vnform,                                                    \
+      kFormat##vmform)
+
+#define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
+                                         mnemonic,                 \
+                                         vdform,                   \
+                                         vnform,                   \
+                                         input_d,                  \
+                                         input_imm1,               \
+                                         input_n,                  \
+                                         input_imm2)               \
+  TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
+                     helper,                                       \
+                     input_d,                                      \
+                     input_imm1,                                   \
+                     (sizeof(input_imm1) / sizeof(input_imm1[0])), \
+                     input_n,                                      \
+                     (sizeof(input_n) / sizeof(input_n[0])),       \
+                     input_imm2,                                   \
+                     (sizeof(input_imm2) / sizeof(input_imm2[0])), \
+                     kExpected_NEON_##mnemonic##_##vdform,         \
+                     kExpectedCount_NEON_##mnemonic##_##vdform,    \
+                     kFormat##vdform,                              \
+                     kFormat##vnform)
+
+#define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
+  CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
+
+#define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
+  TEST(mnemonic##_8B) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
+  }                                                                 \
+  TEST(mnemonic##_16B) {                                            \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                           \
-    DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)                           \
-    DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
+#define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
+  TEST(mnemonic##_4H) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
+  }                                                                 \
+  TEST(mnemonic##_8H) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                         \
-    DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)                               \
-    DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
+#define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
+  TEST(mnemonic##_2S) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
+  }                                                                 \
+  TEST(mnemonic##_4S) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_2SAME(mnemonic, input)                              \
-    DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                             \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
-    }
-#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                           \
-    DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                            \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input);        \
-    }
+#define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
+  DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
+  DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
 
-#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                           \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);         \
-    }                                                                        \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);         \
-    }                                                                        \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input);        \
-    }
+#define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
+  DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
+  DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
 
-#define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)                    \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);          \
-    }                                                                        \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);         \
-    }
+#define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
+  DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
+  TEST(mnemonic##_2D) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
+  }
+#define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
+  DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
+  TEST(mnemonic##_2D) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                     \
-    TEST(mnemonic##_B) {                                                     \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input);          \
-    }
-#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                     \
-    TEST(mnemonic##_H) {                                                     \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input);         \
-    }
-#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                     \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input);         \
-    }
-#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)                     \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input);         \
-    }
+#define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
+  TEST(mnemonic##_2S) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
+  }                                                                 \
+  TEST(mnemonic##_4S) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
+  }                                                                 \
+  TEST(mnemonic##_2D) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
+  }
 
-#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input)                       \
-    DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)                         \
-    DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)                         \
-    DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
-    DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
-
-#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input)                    \
-    DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)                         \
-    DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
-
-
-#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n)    \
-    CALL_TEST_NEON_HELPER_1OpAcross(mnemonic,                                \
-                                    vd_form, vn_form,                        \
-                                    input_n)
-
-#define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                             \
-    TEST(mnemonic##_B_8B) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);     \
-    }                                                                        \
-    TEST(mnemonic##_B_16B) {                                                 \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_H_4H) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_H_8H) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_S_4S) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input);    \
-    }
+#define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)          \
+  TEST(mnemonic##_S) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);  \
+  }                                                                \
+  TEST(mnemonic##_D) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
+  }
 
-#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                        \
-    TEST(mnemonic##_H_8B) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);     \
-    }                                                                        \
-    TEST(mnemonic##_H_16B) {                                                 \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_S_4H) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_S_8H) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_D_4S) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input);    \
-    }
+#define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
+  TEST(mnemonic##_B) {                                            \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
+  }
+#define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
+  TEST(mnemonic##_H) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
+  }
+#define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
+  TEST(mnemonic##_S) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
+  }
+#define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
+  TEST(mnemonic##_D) {                                             \
+    CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                          \
-    TEST(mnemonic##_S_4S) {                                                  \
-      CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);     \
-    }
+#define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
+  DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
+  DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
+  DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
+  DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
+
+#define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
+  DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
+  DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
+
+
+#define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
+  CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
+
+#define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
+  TEST(mnemonic##_B_8B) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
+  }                                                                     \
+  TEST(mnemonic##_B_16B) {                                              \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_H_4H) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_H_8H) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_S_4S) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
+  }
 
-#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic,                                \
-                                    vdform, vnform,                          \
-                                    input_n)                                 \
-    CALL_TEST_NEON_HELPER_1Op(mnemonic,                                      \
-                              vdform, vnform,                                \
-                              input_n)
-
-#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                         \
-    TEST(mnemonic##_4H) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);     \
-    }                                                                        \
-    TEST(mnemonic##_8H) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_1D) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input);    \
-    }
+#define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
+  TEST(mnemonic##_H_8B) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
+  }                                                                     \
+  TEST(mnemonic##_H_16B) {                                              \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_S_4H) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_S_8H) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_D_4S) {                                               \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                       \
-    TEST(mnemonic##_8B) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_4H) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);    \
-    }                                                                        \
-    TEST(mnemonic##2_16B) {                                                  \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input);\
-    }                                                                        \
-    TEST(mnemonic##2_8H) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
-    }
+#define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                    \
+  TEST(mnemonic##_S_4S) {                                              \
+    CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
+  }
 
-#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                      \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);   \
-    }                                                                        \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);     \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input);\
-    }                                                                        \
-    TEST(mnemonic##2_2D) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);  \
-    }
+#define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
+  CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
+
+#define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
+  TEST(mnemonic##_4H) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
+  }                                                                     \
+  TEST(mnemonic##_8H) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_2S) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_4S) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_1D) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
+  }                                                                     \
+  TEST(mnemonic##_2D) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                    \
-    TEST(mnemonic##_4H) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
-    }                                                                        \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
-    }                                                                        \
-    TEST(mnemonic##2_8H) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
-    }
+#define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
+  TEST(mnemonic##_8B) {                                                     \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
+  }                                                                         \
+  TEST(mnemonic##_4H) {                                                     \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
+  }                                                                         \
+  TEST(mnemonic##_2S) {                                                     \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
+  }                                                                         \
+  TEST(mnemonic##2_16B) {                                                   \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
+  }                                                                         \
+  TEST(mnemonic##2_8H) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
+  }                                                                         \
+  TEST(mnemonic##2_4S) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
+  }
 
-#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)                 \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
-    }
+#define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
+  TEST(mnemonic##_4S) {                                                     \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
+  }                                                                         \
+  TEST(mnemonic##_2D) {                                                     \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
+  }                                                                         \
+  TEST(mnemonic##2_4S) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
+  }                                                                         \
+  TEST(mnemonic##2_2D) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
+  }
 
-#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)                \
-    TEST(mnemonic##_B) {                                                     \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input);      \
-    }                                                                        \
-    TEST(mnemonic##_H) {                                                     \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input);      \
-    }                                                                        \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input);      \
-    }
+#define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
+  TEST(mnemonic##_4H) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
+  }                                                                        \
+  TEST(mnemonic##_2S) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
+  }                                                                        \
+  TEST(mnemonic##2_8H) {                                                   \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
+  }                                                                        \
+  TEST(mnemonic##2_4S) {                                                   \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
+  }
 
-#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)                 \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);      \
-    }                                                                        \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);     \
-    }
+#define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
+  TEST(mnemonic##_2S) {                                                    \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
+  }                                                                        \
+  TEST(mnemonic##2_4S) {                                                   \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
+  }
 
-#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) {  \
-    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
-                              variant, variant, variant,                     \
-                              input_d, input_nm, input_nm);                  \
-    }
+#define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
+  TEST(mnemonic##_B) {                                                \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
+  }                                                                   \
+  TEST(mnemonic##_H) {                                                \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
+  }                                                                   \
+  TEST(mnemonic##_S) {                                                \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
+  }
 
-#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                       \
-    TEST(mnemonic##_8B) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B,                              \
-                                  kInput8bitsAccDestination,                 \
-                                  kInput8bits##input);                       \
-    }                                                                        \
-    TEST(mnemonic##_16B) {                                                   \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B,                             \
-                                  kInput8bitsAccDestination,                 \
-                                  kInput8bits##input);                       \
-    }                                                                        \
-
-#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)                           \
-    TEST(mnemonic##_4H) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H,                              \
-                                  kInput16bitsAccDestination,                \
-                                  kInput16bits##input);                      \
-    }                                                                        \
-    TEST(mnemonic##_8H) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H,                              \
-                                  kInput16bitsAccDestination,                \
-                                  kInput16bits##input);                      \
-    }                                                                        \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
-                                  kInput32bitsAccDestination,                \
-                                  kInput32bits##input);                      \
-    }                                                                        \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
-                                  kInput32bitsAccDestination,                \
-                                  kInput32bits##input);                      \
-    }
+#define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)           \
+  TEST(mnemonic##_S) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);  \
+  }                                                                    \
+  TEST(mnemonic##_D) {                                                 \
+    CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
+  }
 
-#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                         \
-    DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)                           \
-    DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
+#define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
+  {                                                                       \
+    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
+                              variant,                                    \
+                              variant,                                    \
+                              variant,                                    \
+                              input_d,                                    \
+                              input_nm,                                   \
+                              input_nm);                                  \
+  }
 
-#define DEFINE_TEST_NEON_3SAME(mnemonic, input)                              \
-    DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)                             \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
-                                  kInput64bitsAccDestination,                \
-                                  kInput64bits##input);                      \
-    }
+#define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
+  TEST(mnemonic##_8B) {                                    \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
+                                8B,                        \
+                                kInput8bitsAccDestination, \
+                                kInput8bits##input);       \
+  }                                                        \
+  TEST(mnemonic##_16B) {                                   \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
+                                16B,                       \
+                                kInput8bitsAccDestination, \
+                                kInput8bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)                           \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S,                              \
-                                  kInputFloatAccDestination,                 \
-                                  kInputFloat##input);                       \
-    }                                                                        \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S,                              \
-                                  kInputFloatAccDestination,                 \
-                                  kInputFloat##input);                       \
-    }                                                                        \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D,                              \
-                                  kInputDoubleAccDestination,                \
-                                  kInputDouble##input);                      \
-    }
+#define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
+  TEST(mnemonic##_4H) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                4H,                         \
+                                kInput16bitsAccDestination, \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_8H) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                8H,                         \
+                                kInput16bitsAccDestination, \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_2S) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                2S,                         \
+                                kInput32bitsAccDestination, \
+                                kInput32bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_4S) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                4S,                         \
+                                kInput32bitsAccDestination, \
+                                kInput32bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)                     \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
-                                  kInput64bitsAccDestination,                \
-                                  kInput64bits##input);                      \
-    }
+#define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
+  DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
+  DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
+
+#define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
+  DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
+  TEST(mnemonic##_2D) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                2D,                         \
+                                kInput64bitsAccDestination, \
+                                kInput64bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)                    \
-    TEST(mnemonic##_H) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
-                                  kInput16bitsAccDestination,                \
-                                  kInput16bits##input);                      \
-    }                                                                        \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
-                                  kInput32bitsAccDestination,                \
-                                  kInput32bits##input);                      \
-    }                                                                        \
-
-#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)                       \
-    TEST(mnemonic##_B) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, B,                               \
-                                  kInput8bitsAccDestination,                 \
-                                  kInput8bits##input);                       \
-    }                                                                        \
-    TEST(mnemonic##_H) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, H,                               \
-                                  kInput16bitsAccDestination,                \
-                                  kInput16bits##input);                      \
-    }                                                                        \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
-                                  kInput32bitsAccDestination,                \
-                                  kInput32bits##input);                      \
-    }                                                                        \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
-                                  kInput64bitsAccDestination,                \
-                                  kInput64bits##input);                      \
-    }
+#define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)          \
+  TEST(mnemonic##_2S) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                2S,                         \
+                                kInputFloatAccDestination,  \
+                                kInputFloat##input);        \
+  }                                                         \
+  TEST(mnemonic##_4S) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                4S,                         \
+                                kInputFloatAccDestination,  \
+                                kInputFloat##input);        \
+  }                                                         \
+  TEST(mnemonic##_2D) {                                     \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                2D,                         \
+                                kInputDoubleAccDestination, \
+                                kInputDouble##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)                    \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, S,                               \
-                                  kInputFloatAccDestination,                 \
-                                  kInputFloat##input);                       \
-    }                                                                        \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_3SAME(mnemonic, D,                               \
-                                  kInputDoubleAccDestination,                \
-                                  kInputDouble##input);                      \
-    }
+#define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
+  TEST(mnemonic##_D) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                D,                          \
+                                kInput64bitsAccDestination, \
+                                kInput64bits##input);       \
+  }
 
-#define CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                                \
-                                    vdform, vnform, vmform,                  \
-                                    input_d, input_n, input_m) {             \
-    CALL_TEST_NEON_HELPER_2Op(mnemonic,                                      \
-                              vdform, vnform, vmform,                        \
-                              input_d, input_n, input_m);                    \
-    }
+#define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
+  TEST(mnemonic##_H) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                H,                          \
+                                kInput16bitsAccDestination, \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_S) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                S,                          \
+                                kInput32bitsAccDestination, \
+                                kInput32bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                      \
-    TEST(mnemonic##_8H) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B,                      \
-                                  kInput16bitsAccDestination,                \
-                                  kInput8bits##input, kInput8bits##input);   \
-    }                                                                        \
-    TEST(mnemonic##2_8H) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B,                 \
-                                  kInput16bitsAccDestination,                \
-                                  kInput8bits##input, kInput8bits##input);   \
-    }
+#define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
+  TEST(mnemonic##_B) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                B,                          \
+                                kInput8bitsAccDestination,  \
+                                kInput8bits##input);        \
+  }                                                         \
+  TEST(mnemonic##_H) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                H,                          \
+                                kInput16bitsAccDestination, \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_S) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                S,                          \
+                                kInput32bitsAccDestination, \
+                                kInput32bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_D) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                D,                          \
+                                kInput64bitsAccDestination, \
+                                kInput64bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                      \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H,                      \
-                                  kInput32bitsAccDestination,                \
-                                  kInput16bits##input, kInput16bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H,                   \
-                                  kInput32bitsAccDestination,                \
-                                  kInput16bits##input, kInput16bits##input); \
-    }
+#define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)   \
+  TEST(mnemonic##_S) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                S,                          \
+                                kInputFloatAccDestination,  \
+                                kInputFloat##input);        \
+  }                                                         \
+  TEST(mnemonic##_D) {                                      \
+    CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
+                                D,                          \
+                                kInputDoubleAccDestination, \
+                                kInputDouble##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)                      \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S,                      \
-                                  kInput64bitsAccDestination,                \
-                                  kInput32bits##input, kInput32bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_2D) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S,                   \
-                                  kInput64bitsAccDestination,                \
-                                  kInput32bits##input, kInput32bits##input); \
-    }
+#define CALL_TEST_NEON_HELPER_3DIFF(                             \
+    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
+  {                                                              \
+    CALL_TEST_NEON_HELPER_2Op(mnemonic,                          \
+                              vdform,                            \
+                              vnform,                            \
+                              vmform,                            \
+                              input_d,                           \
+                              input_n,                           \
+                              input_m);                          \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input)                      \
-    DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
-    DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
-
-#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input)                         \
-    DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)                          \
-    DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)                          \
-    DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
-
-#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H,                         \
-                                  kInput32bitsAccDestination,                \
-                                  kInput16bits##input,                       \
-                                  kInput16bits##input);                      \
-    }
+#define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
+  TEST(mnemonic##_8H) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                8H,                         \
+                                8B,                         \
+                                8B,                         \
+                                kInput16bitsAccDestination, \
+                                kInput8bits##input,         \
+                                kInput8bits##input);        \
+  }                                                         \
+  TEST(mnemonic##2_8H) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                8H,                         \
+                                16B,                        \
+                                16B,                        \
+                                kInput16bitsAccDestination, \
+                                kInput8bits##input,         \
+                                kInput8bits##input);        \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)                \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S,                         \
-                                  kInput64bitsAccDestination,                \
-                                  kInput32bits##input,                       \
-                                  kInput32bits##input);                      \
-    }
+#define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
+  TEST(mnemonic##_4S) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                4S,                         \
+                                4H,                         \
+                                4H,                         \
+                                kInput32bitsAccDestination, \
+                                kInput16bits##input,        \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_4S) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                4S,                         \
+                                8H,                         \
+                                8H,                         \
+                                kInput32bitsAccDestination, \
+                                kInput16bits##input,        \
+                                kInput16bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input)               \
-    DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)                    \
-    DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
-
-#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)                         \
-    TEST(mnemonic##_8H) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B,                      \
-                                  kInput16bitsAccDestination,                \
-                                  kInput16bits##input, kInput8bits##input);  \
-    }                                                                        \
-    TEST(mnemonic##_4S) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H,                      \
-                                  kInput32bitsAccDestination,                \
-                                  kInput32bits##input, kInput16bits##input); \
-    }                                                                        \
-    TEST(mnemonic##_2D) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S,                      \
-                                  kInput64bitsAccDestination,                \
-                                  kInput64bits##input, kInput32bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_8H) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B,                  \
-                                  kInput16bitsAccDestination,                \
-                                  kInput16bits##input, kInput8bits##input);  \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H,                   \
-                                  kInput32bitsAccDestination,                \
-                                  kInput32bits##input, kInput16bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_2D) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S,                   \
-                                  kInput64bitsAccDestination,                \
-                                  kInput64bits##input, kInput32bits##input); \
-    }
+#define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
+  TEST(mnemonic##_2D) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                2D,                         \
+                                2S,                         \
+                                2S,                         \
+                                kInput64bitsAccDestination, \
+                                kInput32bits##input,        \
+                                kInput32bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_2D) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                2D,                         \
+                                4S,                         \
+                                4S,                         \
+                                kInput64bitsAccDestination, \
+                                kInput32bits##input,        \
+                                kInput32bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)                       \
-    TEST(mnemonic##_8B) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H,                      \
-                                  kInput8bitsAccDestination,                 \
-                                  kInput16bits##input, kInput16bits##input); \
-    }                                                                        \
-    TEST(mnemonic##_4H) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S,                      \
-                                  kInput16bitsAccDestination,                \
-                                  kInput32bits##input, kInput32bits##input); \
-    }                                                                        \
-    TEST(mnemonic##_2S) {                                                    \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D,                      \
-                                  kInput32bitsAccDestination,                \
-                                  kInput64bits##input, kInput64bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_16B) {                                                  \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H,                  \
-                                  kInput8bitsAccDestination,                 \
-                                  kInput16bits##input, kInput16bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_8H) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S,                   \
-                                  kInput16bitsAccDestination,                \
-                                  kInput32bits##input, kInput32bits##input); \
-    }                                                                        \
-    TEST(mnemonic##2_4S) {                                                   \
-      CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D,                   \
-                                  kInput32bitsAccDestination,                \
-                                  kInput64bits##input, kInput64bits##input); \
-    }
+#define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
+  DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
+  DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
+
+#define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
+  DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
+  DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
+  DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
+
+#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
+  TEST(mnemonic##_S) {                                        \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
+                                S,                            \
+                                H,                            \
+                                H,                            \
+                                kInput32bitsAccDestination,   \
+                                kInput16bits##input,          \
+                                kInput16bits##input);         \
+  }
 
-#define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
-                                     vdform, vnform,                         \
-                                     input_n,                                \
-                                     input_imm) {                            \
-    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                                   \
-                                 vdform, vnform,                             \
-                                 input_n, input_imm);                        \
-    }
+#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
+  TEST(mnemonic##_D) {                                        \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
+                                D,                            \
+                                S,                            \
+                                S,                            \
+                                kInput64bitsAccDestination,   \
+                                kInput32bits##input,          \
+                                kInput32bits##input);         \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)                  \
-    TEST(mnemonic##_8B_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   8B, 8B,                                   \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_16B_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   16B, 16B,                                 \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_4H_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4H, 4H,                                   \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_8H_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   8H, 8H,                                   \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2S, 2S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_4S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4S, 4S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2D_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2D, 2D,                                   \
-                                   kInput64bits##input,                      \
-                                   kInput64bitsImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
+  DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
+  DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
+
+#define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
+  TEST(mnemonic##_8H) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                8H,                         \
+                                8H,                         \
+                                8B,                         \
+                                kInput16bitsAccDestination, \
+                                kInput16bits##input,        \
+                                kInput8bits##input);        \
+  }                                                         \
+  TEST(mnemonic##_4S) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                4S,                         \
+                                4S,                         \
+                                4H,                         \
+                                kInput32bitsAccDestination, \
+                                kInput32bits##input,        \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_2D) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                2D,                         \
+                                2D,                         \
+                                2S,                         \
+                                kInput64bitsAccDestination, \
+                                kInput64bits##input,        \
+                                kInput32bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_8H) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                8H,                         \
+                                8H,                         \
+                                16B,                        \
+                                kInput16bitsAccDestination, \
+                                kInput16bits##input,        \
+                                kInput8bits##input);        \
+  }                                                         \
+  TEST(mnemonic##2_4S) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                4S,                         \
+                                4S,                         \
+                                8H,                         \
+                                kInput32bitsAccDestination, \
+                                kInput32bits##input,        \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_2D) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                2D,                         \
+                                2D,                         \
+                                4S,                         \
+                                kInput64bitsAccDestination, \
+                                kInput64bits##input,        \
+                                kInput32bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm)             \
-    TEST(mnemonic##_8B_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   8B, B,                                    \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_16B_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   16B, B,                                   \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_4H_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4H, H,                                    \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_8H_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   8H, H,                                    \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2S, S,                                    \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_4S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4S, S,                                    \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2D_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2D, D,                                    \
-                                   kInput64bits##input,                      \
-                                   kInput64bitsImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
+  TEST(mnemonic##_8B) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                8B,                         \
+                                8H,                         \
+                                8H,                         \
+                                kInput8bitsAccDestination,  \
+                                kInput16bits##input,        \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_4H) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                4H,                         \
+                                4S,                         \
+                                4S,                         \
+                                kInput16bitsAccDestination, \
+                                kInput32bits##input,        \
+                                kInput32bits##input);       \
+  }                                                         \
+  TEST(mnemonic##_2S) {                                     \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
+                                2S,                         \
+                                2D,                         \
+                                2D,                         \
+                                kInput32bitsAccDestination, \
+                                kInput64bits##input,        \
+                                kInput64bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_16B) {                                   \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                16B,                        \
+                                8H,                         \
+                                8H,                         \
+                                kInput8bitsAccDestination,  \
+                                kInput16bits##input,        \
+                                kInput16bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_8H) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                8H,                         \
+                                4S,                         \
+                                4S,                         \
+                                kInput16bitsAccDestination, \
+                                kInput32bits##input,        \
+                                kInput32bits##input);       \
+  }                                                         \
+  TEST(mnemonic##2_4S) {                                    \
+    CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
+                                4S,                         \
+                                2D,                         \
+                                2D,                         \
+                                kInput32bitsAccDestination, \
+                                kInput64bits##input,        \
+                                kInput64bits##input);       \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm)           \
-    TEST(mnemonic##_8B_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   8B, 8H,                                   \
-                                   kInput16bits##input,                      \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_4H_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4H, 4S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2S, 2D,                                   \
-                                   kInput64bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##2_16B_2OPIMM) {                                           \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
-                                   16B, 8H,                                  \
-                                   kInput16bits##input,                      \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##2_8H_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
-                                   8H, 4S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##2_4S_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
-                                   4S, 2D,                                   \
-                                   kInput64bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }
+#define CALL_TEST_NEON_HELPER_2OPIMM(             \
+    mnemonic, vdform, vnform, input_n, input_imm) \
+  {                                               \
+    CALL_TEST_NEON_HELPER_2OpImm(mnemonic,        \
+                                 vdform,          \
+                                 vnform,          \
+                                 input_n,         \
+                                 input_imm);      \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm)    \
-    TEST(mnemonic##_B_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   B, H,                                     \
-                                   kInput16bits##input,                      \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_H_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   H, S,                                     \
-                                   kInput32bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_S_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   S, D,                                     \
-                                   kInput64bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
+  TEST(mnemonic##_8B_2OPIMM) {                                \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 8B,                          \
+                                 8B,                          \
+                                 kInput8bits##input,          \
+                                 kInput8bitsImm##input_imm);  \
+  }                                                           \
+  TEST(mnemonic##_16B_2OPIMM) {                               \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 16B,                         \
+                                 16B,                         \
+                                 kInput8bits##input,          \
+                                 kInput8bitsImm##input_imm);  \
+  }                                                           \
+  TEST(mnemonic##_4H_2OPIMM) {                                \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 4H,                          \
+                                 4H,                          \
+                                 kInput16bits##input,         \
+                                 kInput16bitsImm##input_imm); \
+  }                                                           \
+  TEST(mnemonic##_8H_2OPIMM) {                                \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 8H,                          \
+                                 8H,                          \
+                                 kInput16bits##input,         \
+                                 kInput16bitsImm##input_imm); \
+  }                                                           \
+  TEST(mnemonic##_2S_2OPIMM) {                                \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 2S,                          \
+                                 2S,                          \
+                                 kInput32bits##input,         \
+                                 kInput32bitsImm##input_imm); \
+  }                                                           \
+  TEST(mnemonic##_4S_2OPIMM) {                                \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 4S,                          \
+                                 4S,                          \
+                                 kInput32bits##input,         \
+                                 kInput32bitsImm##input_imm); \
+  }                                                           \
+  TEST(mnemonic##_2D_2OPIMM) {                                \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
+                                 2D,                          \
+                                 2D,                          \
+                                 kInput64bits##input,         \
+                                 kInput64bitsImm##input_imm); \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm)        \
-    TEST(mnemonic##_2S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          2S, 2S,                                                            \
-          kInputFloat##Basic,                                                \
-          kInputDoubleImm##input_imm)                                        \
-    }                                                                        \
-    TEST(mnemonic##_4S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          4S, 4S,                                                            \
-          kInputFloat##input,                                                \
-          kInputDoubleImm##input_imm);                                       \
-    }                                                                        \
-    TEST(mnemonic##_2D_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          2D, 2D,                                                            \
-          kInputDouble##input,                                               \
-          kInputDoubleImm##input_imm);                                       \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
+  TEST(mnemonic##_8B_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 8B,                             \
+                                 B,                              \
+                                 kInput8bits##input,             \
+                                 kInput8bitsImm##input_imm);     \
+  }                                                              \
+  TEST(mnemonic##_16B_2OPIMM) {                                  \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 16B,                            \
+                                 B,                              \
+                                 kInput8bits##input,             \
+                                 kInput8bitsImm##input_imm);     \
+  }                                                              \
+  TEST(mnemonic##_4H_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 4H,                             \
+                                 H,                              \
+                                 kInput16bits##input,            \
+                                 kInput16bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##_8H_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 8H,                             \
+                                 H,                              \
+                                 kInput16bits##input,            \
+                                 kInput16bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##_2S_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 2S,                             \
+                                 S,                              \
+                                 kInput32bits##input,            \
+                                 kInput32bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##_4S_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 4S,                             \
+                                 S,                              \
+                                 kInput32bits##input,            \
+                                 kInput32bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##_2D_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 2D,                             \
+                                 D,                              \
+                                 kInput64bits##input,            \
+                                 kInput64bitsImm##input_imm);    \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm)               \
-    TEST(mnemonic##_2S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          2S, 2S,                                                            \
-          kInputFloat##Basic,                                                \
-          kInput32bitsImm##input_imm)                                        \
-    }                                                                        \
-    TEST(mnemonic##_4S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          4S, 4S,                                                            \
-          kInputFloat##input,                                                \
-          kInput32bitsImm##input_imm)                                        \
-    }                                                                        \
-    TEST(mnemonic##_2D_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          2D, 2D,                                                            \
-          kInputDouble##input,                                               \
-          kInput64bitsImm##input_imm)                                        \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
+  TEST(mnemonic##_8B_2OPIMM) {                                     \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
+                                 8B,                               \
+                                 8H,                               \
+                                 kInput16bits##input,              \
+                                 kInput8bitsImm##input_imm);       \
+  }                                                                \
+  TEST(mnemonic##_4H_2OPIMM) {                                     \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
+                                 4H,                               \
+                                 4S,                               \
+                                 kInput32bits##input,              \
+                                 kInput16bitsImm##input_imm);      \
+  }                                                                \
+  TEST(mnemonic##_2S_2OPIMM) {                                     \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
+                                 2S,                               \
+                                 2D,                               \
+                                 kInput64bits##input,              \
+                                 kInput32bitsImm##input_imm);      \
+  }                                                                \
+  TEST(mnemonic##2_16B_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
+                                 16B,                              \
+                                 8H,                               \
+                                 kInput16bits##input,              \
+                                 kInput8bitsImm##input_imm);       \
+  }                                                                \
+  TEST(mnemonic##2_8H_2OPIMM) {                                    \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
+                                 8H,                               \
+                                 4S,                               \
+                                 kInput32bits##input,              \
+                                 kInput16bitsImm##input_imm);      \
+  }                                                                \
+  TEST(mnemonic##2_4S_2OPIMM) {                                    \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
+                                 4S,                               \
+                                 2D,                               \
+                                 kInput64bits##input,              \
+                                 kInput32bitsImm##input_imm);      \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm)        \
-    TEST(mnemonic##_S_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          S, S,                                                              \
-          kInputFloat##Basic,                                                \
-          kInput32bitsImm##input_imm)                                        \
-    }                                                                        \
-    TEST(mnemonic##_D_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(                                          \
-          mnemonic,                                                          \
-          D, D,                                                              \
-          kInputDouble##input,                                               \
-          kInput64bitsImm##input_imm)                                        \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
+  TEST(mnemonic##_B_2OPIMM) {                                             \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
+                                 B,                                       \
+                                 H,                                       \
+                                 kInput16bits##input,                     \
+                                 kInput8bitsImm##input_imm);              \
+  }                                                                       \
+  TEST(mnemonic##_H_2OPIMM) {                                             \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
+                                 H,                                       \
+                                 S,                                       \
+                                 kInput32bits##input,                     \
+                                 kInput16bitsImm##input_imm);             \
+  }                                                                       \
+  TEST(mnemonic##_S_2OPIMM) {                                             \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
+                                 S,                                       \
+                                 D,                                       \
+                                 kInput64bits##input,                     \
+                                 kInput32bitsImm##input_imm);             \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm)               \
-    TEST(mnemonic##_2S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2S, 2S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_4S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4S, 4S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2D_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2D, 2D,                                   \
-                                   kInput64bits##input,                      \
-                                   kInput64bitsImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
+  TEST(mnemonic##_2S_2OPIMM) {                                        \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
+                                 2S,                                  \
+                                 2S,                                  \
+                                 kInputFloat##Basic,                  \
+                                 kInputDoubleImm##input_imm)          \
+  }                                                                   \
+  TEST(mnemonic##_4S_2OPIMM) {                                        \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
+                                 4S,                                  \
+                                 4S,                                  \
+                                 kInputFloat##input,                  \
+                                 kInputDoubleImm##input_imm);         \
+  }                                                                   \
+  TEST(mnemonic##_2D_2OPIMM) {                                        \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
+                                 2D,                                  \
+                                 2D,                                  \
+                                 kInputDouble##input,                 \
+                                 kInputDoubleImm##input_imm);         \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)         \
-    TEST(mnemonic##_D_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   D, D,                                     \
-                                   kInput64bits##input,                      \
-                                   kInput64bitsImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
+  TEST(mnemonic##_2S_2OPIMM) {                                 \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
+                                 2S,                           \
+                                 2S,                           \
+                                 kInputFloat##Basic,           \
+                                 kInput32bitsImm##input_imm)   \
+  }                                                            \
+  TEST(mnemonic##_4S_2OPIMM) {                                 \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
+                                 4S,                           \
+                                 4S,                           \
+                                 kInputFloat##input,           \
+                                 kInput32bitsImm##input_imm)   \
+  }                                                            \
+  TEST(mnemonic##_2D_2OPIMM) {                                 \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
+                                 2D,                           \
+                                 2D,                           \
+                                 kInputDouble##input,          \
+                                 kInput64bitsImm##input_imm)   \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)        \
-    TEST(mnemonic##_S_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   S, S,                                     \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
-
-#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)      \
-    TEST(mnemonic##_D_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   D, D,                                     \
-                                   kInputDouble##input,                      \
-                                   kInputDoubleImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
+  TEST(mnemonic##_S_2OPIMM) {                                         \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
+                                 S,                                   \
+                                 S,                                   \
+                                 kInputFloat##Basic,                  \
+                                 kInput32bitsImm##input_imm)          \
+  }                                                                   \
+  TEST(mnemonic##_D_2OPIMM) {                                         \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
+                                 D,                                   \
+                                 D,                                   \
+                                 kInputDouble##input,                 \
+                                 kInput64bitsImm##input_imm)          \
+  }
 
-#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm)     \
-    TEST(mnemonic##_S_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   S, S,                                     \
-                                   kInputFloat##input,                       \
-                                   kInputDoubleImm##input_imm);              \
-    }                                                                        \
-    DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
-
-#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm)           \
-    TEST(mnemonic##_B_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   B, B,                                     \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_H_2OPIMM) {                                              \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   H, H,                                     \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
-
-#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm)             \
-    TEST(mnemonic##_8H_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   8H, 8B,                                   \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##_4S_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   4S, 4H,                                   \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##_2D_2OPIMM) {                                             \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                 \
-                                   2D, 2S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##2_8H_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
-                                   8H, 16B,                                  \
-                                   kInput8bits##input,                       \
-                                   kInput8bitsImm##input_imm);               \
-    }                                                                        \
-    TEST(mnemonic##2_4S_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
-                                   4S, 8H,                                   \
-                                   kInput16bits##input,                      \
-                                   kInput16bitsImm##input_imm);              \
-    }                                                                        \
-    TEST(mnemonic##2_2D_2OPIMM) {                                            \
-      CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                              \
-                                   2D, 4S,                                   \
-                                   kInput32bits##input,                      \
-                                   kInput32bitsImm##input_imm);              \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
+  TEST(mnemonic##_2S_2OPIMM) {                                 \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
+                                 2S,                           \
+                                 2S,                           \
+                                 kInput32bits##input,          \
+                                 kInput32bitsImm##input_imm);  \
+  }                                                            \
+  TEST(mnemonic##_4S_2OPIMM) {                                 \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
+                                 4S,                           \
+                                 4S,                           \
+                                 kInput32bits##input,          \
+                                 kInput32bitsImm##input_imm);  \
+  }                                                            \
+  TEST(mnemonic##_2D_2OPIMM) {                                 \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
+                                 2D,                           \
+                                 2D,                           \
+                                 kInput64bits##input,          \
+                                 kInput64bitsImm##input_imm);  \
+  }
 
-#define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                            \
-                                        vdform, vnform, vmform,              \
-                                        input_d, input_n,                    \
-                                        input_m, indices) {                  \
-    CALL_TEST_NEON_HELPER_ByElement(mnemonic,                                \
-                                    vdform, vnform, vmform,                  \
-                                    input_d, input_n,                        \
-                                    input_m, indices);                       \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
+  TEST(mnemonic##_D_2OPIMM) {                                        \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
+                                 D,                                  \
+                                 D,                                  \
+                                 kInput64bits##input,                \
+                                 kInput64bitsImm##input_imm);        \
+  }
 
-#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m)      \
-    TEST(mnemonic##_4H_4H_H) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      4H, 4H, H,                             \
-                                      kInput16bits##input_d,                 \
-                                      kInput16bits##input_n,                 \
-                                      kInput16bits##input_m,                 \
-                                      kInputHIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_8H_8H_H) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      8H, 8H, H,                             \
-                                      kInput16bits##input_d,                 \
-                                      kInput16bits##input_n,                 \
-                                      kInput16bits##input_m,                 \
-                                      kInputHIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_2S_2S_S) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      2S, 2S, S,                             \
-                                      kInput32bits##input_d,                 \
-                                      kInput32bits##input_n,                 \
-                                      kInput32bits##input_m,                 \
-                                      kInputSIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_4S_4S_S) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      4S, 4S, S,                             \
-                                      kInput32bits##input_d,                 \
-                                      kInput32bits##input_n,                 \
-                                      kInput32bits##input_m,                 \
-                                      kInputSIndices);                       \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
+  TEST(mnemonic##_S_2OPIMM) {                                         \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
+                                 S,                                   \
+                                 S,                                   \
+                                 kInput32bits##input,                 \
+                                 kInput32bitsImm##input_imm);         \
+  }                                                                   \
+  DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
+
+#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
+  TEST(mnemonic##_D_2OPIMM) {                                           \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
+                                 D,                                     \
+                                 D,                                     \
+                                 kInputDouble##input,                   \
+                                 kInputDoubleImm##input_imm);           \
+  }
 
-#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic,                          \
-                                          input_d, input_n, input_m)         \
-    TEST(mnemonic##_H_H_H) {                                                 \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      H, H, H,                               \
-                                      kInput16bits##input_d,                 \
-                                      kInput16bits##input_n,                 \
-                                      kInput16bits##input_m,                 \
-                                      kInputHIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_S_S_S) {                                                 \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      S, S, S,                               \
-                                      kInput32bits##input_d,                 \
-                                      kInput32bits##input_n,                 \
-                                      kInput32bits##input_m,                 \
-                                      kInputSIndices);                       \
-    }
+#define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
+  TEST(mnemonic##_S_2OPIMM) {                                            \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
+                                 S,                                      \
+                                 S,                                      \
+                                 kInputFloat##input,                     \
+                                 kInputDoubleImm##input_imm);            \
+  }                                                                      \
+  DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
+
+#define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
+  TEST(mnemonic##_B_2OPIMM) {                                      \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
+                                 B,                                \
+                                 B,                                \
+                                 kInput8bits##input,               \
+                                 kInput8bitsImm##input_imm);       \
+  }                                                                \
+  TEST(mnemonic##_H_2OPIMM) {                                      \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
+                                 H,                                \
+                                 H,                                \
+                                 kInput16bits##input,              \
+                                 kInput16bitsImm##input_imm);      \
+  }                                                                \
+  DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
+
+#define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
+  TEST(mnemonic##_8H_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 8H,                             \
+                                 8B,                             \
+                                 kInput8bits##input,             \
+                                 kInput8bitsImm##input_imm);     \
+  }                                                              \
+  TEST(mnemonic##_4S_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 4S,                             \
+                                 4H,                             \
+                                 kInput16bits##input,            \
+                                 kInput16bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##_2D_2OPIMM) {                                   \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
+                                 2D,                             \
+                                 2S,                             \
+                                 kInput32bits##input,            \
+                                 kInput32bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##2_8H_2OPIMM) {                                  \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
+                                 8H,                             \
+                                 16B,                            \
+                                 kInput8bits##input,             \
+                                 kInput8bitsImm##input_imm);     \
+  }                                                              \
+  TEST(mnemonic##2_4S_2OPIMM) {                                  \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
+                                 4S,                             \
+                                 8H,                             \
+                                 kInput16bits##input,            \
+                                 kInput16bitsImm##input_imm);    \
+  }                                                              \
+  TEST(mnemonic##2_2D_2OPIMM) {                                  \
+    CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
+                                 2D,                             \
+                                 4S,                             \
+                                 kInput32bits##input,            \
+                                 kInput32bitsImm##input_imm);    \
+  }
 
-#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m)   \
-    TEST(mnemonic##_2S_2S_S) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      2S, 2S, S,                             \
-                                      kInputFloat##input_d,                  \
-                                      kInputFloat##input_n,                  \
-                                      kInputFloat##input_m,                  \
-                                      kInputSIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_4S_4S_S) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      4S, 4S, S,                             \
-                                      kInputFloat##input_d,                  \
-                                      kInputFloat##input_n,                  \
-                                      kInputFloat##input_m,                  \
-                                      kInputSIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_2D_2D_D) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      2D, 2D, D,                             \
-                                      kInputDouble##input_d,                 \
-                                      kInputDouble##input_n,                 \
-                                      kInputDouble##input_m,                 \
-                                      kInputDIndices);                       \
-    }                                                                        \
-
-#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m)  \
-    TEST(mnemonic##_S_S_S) {                                                 \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      S, S, S,                               \
-                                      kInputFloat##inp_d,                    \
-                                      kInputFloat##inp_n,                    \
-                                      kInputFloat##inp_m,                    \
-                                      kInputSIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_D_D_D) {                                                 \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      D, D, D,                               \
-                                      kInputDouble##inp_d,                   \
-                                      kInputDouble##inp_n,                   \
-                                      kInputDouble##inp_m,                   \
-                                      kInputDIndices);                       \
-    }                                                                        \
+#define CALL_TEST_NEON_HELPER_BYELEMENT(                                  \
+    mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
+  {                                                                       \
+    CALL_TEST_NEON_HELPER_ByElement(mnemonic,                             \
+                                    vdform,                               \
+                                    vnform,                               \
+                                    vmform,                               \
+                                    input_d,                              \
+                                    input_n,                              \
+                                    input_m,                              \
+                                    indices);                             \
+  }
 
+#define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
+  TEST(mnemonic##_4H_4H_H) {                                            \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
+                                    4H,                                 \
+                                    4H,                                 \
+                                    H,                                  \
+                                    kInput16bits##input_d,              \
+                                    kInput16bits##input_n,              \
+                                    kInput16bits##input_m,              \
+                                    kInputHIndices);                    \
+  }                                                                     \
+  TEST(mnemonic##_8H_8H_H) {                                            \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
+                                    8H,                                 \
+                                    8H,                                 \
+                                    H,                                  \
+                                    kInput16bits##input_d,              \
+                                    kInput16bits##input_n,              \
+                                    kInput16bits##input_m,              \
+                                    kInputHIndices);                    \
+  }                                                                     \
+  TEST(mnemonic##_2S_2S_S) {                                            \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
+                                    2S,                                 \
+                                    2S,                                 \
+                                    S,                                  \
+                                    kInput32bits##input_d,              \
+                                    kInput32bits##input_n,              \
+                                    kInput32bits##input_m,              \
+                                    kInputSIndices);                    \
+  }                                                                     \
+  TEST(mnemonic##_4S_4S_S) {                                            \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
+                                    4S,                                 \
+                                    4S,                                 \
+                                    S,                                  \
+                                    kInput32bits##input_d,              \
+                                    kInput32bits##input_n,              \
+                                    kInput32bits##input_m,              \
+                                    kInputSIndices);                    \
+  }
 
-#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
-    TEST(mnemonic##_4S_4H_H) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      4S, 4H, H,                             \
-                                      kInput32bits##input_d,                 \
-                                      kInput16bits##input_n,                 \
-                                      kInput16bits##input_m,                 \
-                                      kInputHIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##2_4S_8H_H) {                                              \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
-                                      4S, 8H, H,                             \
-                                      kInput32bits##input_d,                 \
-                                      kInput16bits##input_n,                 \
-                                      kInput16bits##input_m,                 \
-                                      kInputHIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_2D_2S_S) {                                               \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      2D, 2S, S,                             \
-                                      kInput64bits##input_d,                 \
-                                      kInput32bits##input_n,                 \
-                                      kInput32bits##input_m,                 \
-                                      kInputSIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##2_2D_4S_S) {                                              \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                           \
-                                      2D, 4S, S,                             \
-                                      kInput64bits##input_d,                 \
-                                      kInput32bits##input_n,                 \
-                                      kInput32bits##input_m,                 \
-                                      kInputSIndices);                       \
-    }
+#define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
+  TEST(mnemonic##_H_H_H) {                                                     \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
+                                    H,                                         \
+                                    H,                                         \
+                                    H,                                         \
+                                    kInput16bits##input_d,                     \
+                                    kInput16bits##input_n,                     \
+                                    kInput16bits##input_m,                     \
+                                    kInputHIndices);                           \
+  }                                                                            \
+  TEST(mnemonic##_S_S_S) {                                                     \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
+                                    S,                                         \
+                                    S,                                         \
+                                    S,                                         \
+                                    kInput32bits##input_d,                     \
+                                    kInput32bits##input_n,                     \
+                                    kInput32bits##input_m,                     \
+                                    kInputSIndices);                           \
+  }
 
-#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,                     \
-                                               input_d, input_n, input_m)    \
-    TEST(mnemonic##_S_H_H) {                                                 \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      S, H, H,                               \
-                                      kInput32bits##input_d,                 \
-                                      kInput16bits##input_n,                 \
-                                      kInput16bits##input_m,                 \
-                                      kInputHIndices);                       \
-    }                                                                        \
-    TEST(mnemonic##_D_S_S) {                                                 \
-      CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
-                                      D, S, S,                               \
-                                      kInput64bits##input_d,                 \
-                                      kInput32bits##input_n,                 \
-                                      kInput32bits##input_m,                 \
-                                      kInputSIndices);                       \
-    }
+#define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
+  TEST(mnemonic##_2S_2S_S) {                                               \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
+                                    2S,                                    \
+                                    2S,                                    \
+                                    S,                                     \
+                                    kInputFloat##input_d,                  \
+                                    kInputFloat##input_n,                  \
+                                    kInputFloat##input_m,                  \
+                                    kInputSIndices);                       \
+  }                                                                        \
+  TEST(mnemonic##_4S_4S_S) {                                               \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
+                                    4S,                                    \
+                                    4S,                                    \
+                                    S,                                     \
+                                    kInputFloat##input_d,                  \
+                                    kInputFloat##input_n,                  \
+                                    kInputFloat##input_m,                  \
+                                    kInputSIndices);                       \
+  }                                                                        \
+  TEST(mnemonic##_2D_2D_D) {                                               \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
+                                    2D,                                    \
+                                    2D,                                    \
+                                    D,                                     \
+                                    kInputDouble##input_d,                 \
+                                    kInputDouble##input_n,                 \
+                                    kInputDouble##input_m,                 \
+                                    kInputDIndices);                       \
+  }
 
+#define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
+  TEST(mnemonic##_S_S_S) {                                                  \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
+                                    S,                                      \
+                                    S,                                      \
+                                    S,                                      \
+                                    kInputFloat##inp_d,                     \
+                                    kInputFloat##inp_n,                     \
+                                    kInputFloat##inp_m,                     \
+                                    kInputSIndices);                        \
+  }                                                                         \
+  TEST(mnemonic##_D_D_D) {                                                  \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
+                                    D,                                      \
+                                    D,                                      \
+                                    D,                                      \
+                                    kInputDouble##inp_d,                    \
+                                    kInputDouble##inp_n,                    \
+                                    kInputDouble##inp_m,                    \
+                                    kInputDIndices);                        \
+  }
 
-#define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                              \
-                                      variant,                               \
-                                      input_d,                               \
-                                      input_imm1,                            \
-                                      input_n,                               \
-                                      input_imm2) {                          \
-    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,              \
-                                     mnemonic,                               \
-                                     variant, variant,                       \
-                                     input_d, input_imm1,                    \
-                                     input_n, input_imm2);                   \
-    }
 
-#define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                                   \
-                                 input_d, input_imm1,                        \
-                                 input_n, input_imm2)                        \
-    TEST(mnemonic##_B) {                                                     \
-      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
-                                    16B,                                     \
-                                    kInput8bits##input_d,                    \
-                                    kInput8bitsImm##input_imm1,              \
-                                    kInput8bits##input_n,                    \
-                                    kInput8bitsImm##input_imm2);             \
-    }                                                                        \
-    TEST(mnemonic##_H) {                                                     \
-      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
-                                    8H,                                      \
-                                    kInput16bits##input_d,                   \
-                                    kInput16bitsImm##input_imm1,             \
+#define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
+  TEST(mnemonic##_4S_4H_H) {                                                 \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
+                                    4S,                                      \
+                                    4H,                                      \
+                                    H,                                       \
+                                    kInput32bits##input_d,                   \
                                     kInput16bits##input_n,                   \
-                                    kInput16bitsImm##input_imm2);            \
-    }                                                                        \
-    TEST(mnemonic##_S) {                                                     \
-      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
+                                    kInput16bits##input_m,                   \
+                                    kInputHIndices);                         \
+  }                                                                          \
+  TEST(mnemonic##2_4S_8H_H) {                                                \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
                                     4S,                                      \
+                                    8H,                                      \
+                                    H,                                       \
                                     kInput32bits##input_d,                   \
-                                    kInput32bitsImm##input_imm1,             \
+                                    kInput16bits##input_n,                   \
+                                    kInput16bits##input_m,                   \
+                                    kInputHIndices);                         \
+  }                                                                          \
+  TEST(mnemonic##_2D_2S_S) {                                                 \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
+                                    2D,                                      \
+                                    2S,                                      \
+                                    S,                                       \
+                                    kInput64bits##input_d,                   \
                                     kInput32bits##input_n,                   \
-                                    kInput32bitsImm##input_imm2);            \
-    }                                                                        \
-    TEST(mnemonic##_D) {                                                     \
-      CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                                \
+                                    kInput32bits##input_m,                   \
+                                    kInputSIndices);                         \
+  }                                                                          \
+  TEST(mnemonic##2_2D_4S_S) {                                                \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
                                     2D,                                      \
+                                    4S,                                      \
+                                    S,                                       \
                                     kInput64bits##input_d,                   \
-                                    kInput64bitsImm##input_imm1,             \
-                                    kInput64bits##input_n,                   \
-                                    kInput64bitsImm##input_imm2);            \
-    }
+                                    kInput32bits##input_n,                   \
+                                    kInput32bits##input_m,                   \
+                                    kInputSIndices);                         \
+  }
+
+#define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(            \
+    mnemonic, input_d, input_n, input_m)                   \
+  TEST(mnemonic##_S_H_H) {                                 \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
+                                    S,                     \
+                                    H,                     \
+                                    H,                     \
+                                    kInput32bits##input_d, \
+                                    kInput16bits##input_n, \
+                                    kInput16bits##input_m, \
+                                    kInputHIndices);       \
+  }                                                        \
+  TEST(mnemonic##_D_S_S) {                                 \
+    CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
+                                    D,                     \
+                                    S,                     \
+                                    S,                     \
+                                    kInput64bits##input_d, \
+                                    kInput32bits##input_n, \
+                                    kInput32bits##input_m, \
+                                    kInputSIndices);       \
+  }
+
+
+#define CALL_TEST_NEON_HELPER_2OP2IMM(                           \
+    mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
+  {                                                              \
+    CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,  \
+                                     mnemonic,                   \
+                                     variant,                    \
+                                     variant,                    \
+                                     input_d,                    \
+                                     input_imm1,                 \
+                                     input_n,                    \
+                                     input_imm2);                \
+  }
+
+#define DEFINE_TEST_NEON_2OP2IMM(                               \
+    mnemonic, input_d, input_imm1, input_n, input_imm2)         \
+  TEST(mnemonic##_B) {                                          \
+    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
+                                  16B,                          \
+                                  kInput8bits##input_d,         \
+                                  kInput8bitsImm##input_imm1,   \
+                                  kInput8bits##input_n,         \
+                                  kInput8bitsImm##input_imm2);  \
+  }                                                             \
+  TEST(mnemonic##_H) {                                          \
+    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
+                                  8H,                           \
+                                  kInput16bits##input_d,        \
+                                  kInput16bitsImm##input_imm1,  \
+                                  kInput16bits##input_n,        \
+                                  kInput16bitsImm##input_imm2); \
+  }                                                             \
+  TEST(mnemonic##_S) {                                          \
+    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
+                                  4S,                           \
+                                  kInput32bits##input_d,        \
+                                  kInput32bitsImm##input_imm1,  \
+                                  kInput32bits##input_n,        \
+                                  kInput32bitsImm##input_imm2); \
+  }                                                             \
+  TEST(mnemonic##_D) {                                          \
+    CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
+                                  2D,                           \
+                                  kInput64bits##input_d,        \
+                                  kInput64bitsImm##input_imm1,  \
+                                  kInput64bits##input_n,        \
+                                  kInput64bitsImm##input_imm2); \
+  }
 
 
 // Advanced SIMD copy.
-DEFINE_TEST_NEON_2OP2IMM(ins,
-                         Basic, LaneCountFromZero,
-                         Basic, LaneCountFromZero)
+DEFINE_TEST_NEON_2OP2IMM(
+    ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
 
 
@@ -3890,7 +4242,8 @@ DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
-DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, \
+DEFINE_TEST_NEON_2OPIMM_SD(scvtf,
+                           FixedPointConversions,
                            TypeWidthFromZeroToWidth)
 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
@@ -3906,7 +4259,8 @@ DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
-DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, \
+DEFINE_TEST_NEON_2OPIMM_SD(ucvtf,
+                           FixedPointConversions,
                            TypeWidthFromZeroToWidth)
 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
 
@@ -3920,7 +4274,8 @@ DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
-DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, \
+DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf,
+                                  FixedPointConversions,
                                   TypeWidthFromZeroToWidth)
 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
@@ -3935,7 +4290,8 @@ DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
-DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, \
+DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf,
+                                  FixedPointConversions,
                                   TypeWidthFromZeroToWidth)
 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
 
diff --git a/test/aarch64/test-simulator-inputs-aarch64.h b/test/aarch64/test-simulator-inputs-aarch64.h
index a398625a..049ce23a 100644
--- a/test/aarch64/test-simulator-inputs-aarch64.h
+++ b/test/aarch64/test-simulator-inputs-aarch64.h
@@ -41,6 +41,7 @@ extern "C" {
 #endif
 #define VIXL_AARCH64_TEST_SIMULATOR_INPUTS_AARCH64_H_
 
+// clang-format off
 
 // Double values, stored as uint64_t representations. This ensures exact bit
 // representation, and avoids the loss of NaNs and suchlike through C++ casts.
@@ -849,137 +850,100 @@ extern "C" {
   0xfffffffffffffffe,                                                         \
   0xffffffffffffffff
 
+// clang-format on
 
 // For most 2- and 3-op instructions, use only basic inputs. Because every
 // combination is tested, the length of the output trace is very sensitive to
 // the length of this list.
-static const uint64_t kInputDoubleBasic[] = { INPUT_DOUBLE_BASIC };
-static const uint32_t kInputFloatBasic[] = { INPUT_FLOAT_BASIC };
+static const uint64_t kInputDoubleBasic[] = {INPUT_DOUBLE_BASIC};
+static const uint32_t kInputFloatBasic[] = {INPUT_FLOAT_BASIC};
 
 // TODO: Define different values when the traces file is split.
 #define INPUT_DOUBLE_ACC_DESTINATION INPUT_DOUBLE_BASIC
 #define INPUT_FLOAT_ACC_DESTINATION INPUT_FLOAT_BASIC
 
 static const uint64_t kInputDoubleAccDestination[] = {
-  INPUT_DOUBLE_ACC_DESTINATION
-};
+    INPUT_DOUBLE_ACC_DESTINATION};
 
 static const uint32_t kInputFloatAccDestination[] = {
-  INPUT_FLOAT_ACC_DESTINATION
-};
+    INPUT_FLOAT_ACC_DESTINATION};
 
 // For conversions, include several extra inputs.
 static const uint64_t kInputDoubleConversions[] = {
-  INPUT_DOUBLE_BASIC
-  INPUT_DOUBLE_CONVERSIONS
-};
+    INPUT_DOUBLE_BASIC INPUT_DOUBLE_CONVERSIONS};
 
 static const uint32_t kInputFloatConversions[] = {
-  INPUT_FLOAT_BASIC
-  INPUT_FLOAT_CONVERSIONS
-};
+    INPUT_FLOAT_BASIC INPUT_FLOAT_CONVERSIONS};
 
-static const uint64_t kInput64bitsFixedPointConversions[] = {
-  INPUT_64BITS_BASIC,
-  INPUT_64BITS_FIXEDPOINT_CONVERSIONS
-};
+static const uint64_t kInput64bitsFixedPointConversions[] =
+    {INPUT_64BITS_BASIC, INPUT_64BITS_FIXEDPOINT_CONVERSIONS};
 
-static const uint32_t kInput32bitsFixedPointConversions[] = {
-  INPUT_32BITS_BASIC,
-  INPUT_32BITS_FIXEDPOINT_CONVERSIONS
-};
+static const uint32_t kInput32bitsFixedPointConversions[] =
+    {INPUT_32BITS_BASIC, INPUT_32BITS_FIXEDPOINT_CONVERSIONS};
 
 static const uint16_t kInputFloat16Conversions[] = {
-  INPUT_FLOAT16_BASIC
-  INPUT_FLOAT16_CONVERSIONS
-};
+    INPUT_FLOAT16_BASIC INPUT_FLOAT16_CONVERSIONS};
 
-static const uint8_t kInput8bitsBasic[] = {
-  INPUT_8BITS_BASIC
-};
+static const uint8_t kInput8bitsBasic[] = {INPUT_8BITS_BASIC};
 
-static const uint16_t kInput16bitsBasic[] = {
-  INPUT_16BITS_BASIC
-};
+static const uint16_t kInput16bitsBasic[] = {INPUT_16BITS_BASIC};
 
-static const uint32_t kInput32bitsBasic[] = {
-  INPUT_32BITS_BASIC
-};
+static const uint32_t kInput32bitsBasic[] = {INPUT_32BITS_BASIC};
 
-static const uint64_t kInput64bitsBasic[] = {
-  INPUT_64BITS_BASIC
-};
+static const uint64_t kInput64bitsBasic[] = {INPUT_64BITS_BASIC};
 
-static const int kInput8bitsImmTypeWidth[] = {
-  INPUT_8BITS_IMM_TYPEWIDTH
-};
+static const int kInput8bitsImmTypeWidth[] = {INPUT_8BITS_IMM_TYPEWIDTH};
 
-static const int kInput16bitsImmTypeWidth[] = {
-  INPUT_16BITS_IMM_TYPEWIDTH
-};
+static const int kInput16bitsImmTypeWidth[] = {INPUT_16BITS_IMM_TYPEWIDTH};
 
-static const int kInput32bitsImmTypeWidth[] = {
-  INPUT_32BITS_IMM_TYPEWIDTH
-};
+static const int kInput32bitsImmTypeWidth[] = {INPUT_32BITS_IMM_TYPEWIDTH};
 
-static const int kInput64bitsImmTypeWidth[] = {
-  INPUT_64BITS_IMM_TYPEWIDTH
-};
+static const int kInput64bitsImmTypeWidth[] = {INPUT_64BITS_IMM_TYPEWIDTH};
 
 static const int kInput8bitsImmTypeWidthFromZero[] = {
-  INPUT_8BITS_IMM_TYPEWIDTH_FROMZERO
-};
+    INPUT_8BITS_IMM_TYPEWIDTH_FROMZERO};
 
 static const int kInput16bitsImmTypeWidthFromZero[] = {
-  INPUT_16BITS_IMM_TYPEWIDTH_FROMZERO
-};
+    INPUT_16BITS_IMM_TYPEWIDTH_FROMZERO};
 
 static const int kInput32bitsImmTypeWidthFromZero[] = {
-  INPUT_32BITS_IMM_TYPEWIDTH_FROMZERO
-};
+    INPUT_32BITS_IMM_TYPEWIDTH_FROMZERO};
 
 static const int kInput64bitsImmTypeWidthFromZero[] = {
-  INPUT_64BITS_IMM_TYPEWIDTH_FROMZERO
-};
+    INPUT_64BITS_IMM_TYPEWIDTH_FROMZERO};
 
 static const int kInput32bitsImmTypeWidthFromZeroToWidth[] = {
-  INPUT_32BITS_IMM_TYPEWIDTH_FROMZERO_TOWIDTH
-};
+    INPUT_32BITS_IMM_TYPEWIDTH_FROMZERO_TOWIDTH};
 
 static const int kInput64bitsImmTypeWidthFromZeroToWidth[] = {
-  INPUT_64BITS_IMM_TYPEWIDTH_FROMZERO_TOWIDTH
-};
+    INPUT_64BITS_IMM_TYPEWIDTH_FROMZERO_TOWIDTH};
 
 // These immediate values are used only in 'shll{2}' tests.
-static const int kInput8bitsImmSHLL[] = { 8 };
-static const int kInput16bitsImmSHLL[] = { 16 };
-static const int kInput32bitsImmSHLL[] = { 32 };
+static const int kInput8bitsImmSHLL[] = {8};
+static const int kInput16bitsImmSHLL[] = {16};
+static const int kInput32bitsImmSHLL[] = {32};
 
-static const double kInputDoubleImmZero[] = { 0.0 };
+static const double kInputDoubleImmZero[] = {0.0};
 
-static const int kInput8bitsImmZero[] = { 0 };
+static const int kInput8bitsImmZero[] = {0};
 
-static const int kInput16bitsImmZero[] = { 0 };
+static const int kInput16bitsImmZero[] = {0};
 
-static const int kInput32bitsImmZero[] = { 0 };
+static const int kInput32bitsImmZero[] = {0};
 
-static const int kInput64bitsImmZero[] = { 0 };
+static const int kInput64bitsImmZero[] = {0};
 
 static const int kInput8bitsImmLaneCountFromZero[] = {
-  INPUT_8BITS_IMM_LANECOUNT_FROMZERO
-};
+    INPUT_8BITS_IMM_LANECOUNT_FROMZERO};
 
 static const int kInput16bitsImmLaneCountFromZero[] = {
-  INPUT_16BITS_IMM_LANECOUNT_FROMZERO
-};
+    INPUT_16BITS_IMM_LANECOUNT_FROMZERO};
 
 static const int kInput32bitsImmLaneCountFromZero[] = {
-  INPUT_32BITS_IMM_LANECOUNT_FROMZERO
-};
+    INPUT_32BITS_IMM_LANECOUNT_FROMZERO};
 
 static const int kInput64bitsImmLaneCountFromZero[] = {
-  INPUT_64BITS_IMM_LANECOUNT_FROMZERO
-};
+    INPUT_64BITS_IMM_LANECOUNT_FROMZERO};
 
 // TODO: Define different values when the traces file is split.
 #define INPUT_8BITS_ACC_DESTINATION INPUT_8BITS_BASIC
@@ -988,30 +952,19 @@ static const int kInput64bitsImmLaneCountFromZero[] = {
 #define INPUT_64BITS_ACC_DESTINATION INPUT_64BITS_BASIC
 
 static const uint8_t kInput8bitsAccDestination[] = {
-  INPUT_8BITS_ACC_DESTINATION
-};
+    INPUT_8BITS_ACC_DESTINATION};
 
 static const uint16_t kInput16bitsAccDestination[] = {
-  INPUT_16BITS_ACC_DESTINATION
-};
+    INPUT_16BITS_ACC_DESTINATION};
 
 static const uint32_t kInput32bitsAccDestination[] = {
-  INPUT_32BITS_ACC_DESTINATION
-};
+    INPUT_32BITS_ACC_DESTINATION};
 
 static const uint64_t kInput64bitsAccDestination[] = {
-  INPUT_64BITS_ACC_DESTINATION
-};
+    INPUT_64BITS_ACC_DESTINATION};
 
-static const int kInputHIndices[] = {
-  0, 1, 2, 3,
-  4, 5, 6, 7
-};
+static const int kInputHIndices[] = {0, 1, 2, 3, 4, 5, 6, 7};
 
-static const int kInputSIndices[] = {
-  0, 1, 2, 3
-};
+static const int kInputSIndices[] = {0, 1, 2, 3};
 
-static const int kInputDIndices[] = {
-  0, 1
-};
+static const int kInputDIndices[] = {0, 1};
diff --git a/test/aarch64/test-simulator-traces-aarch64.h b/test/aarch64/test-simulator-traces-aarch64.h
index e9a95628..e374b0b5 100644
--- a/test/aarch64/test-simulator-traces-aarch64.h
+++ b/test/aarch64/test-simulator-traces-aarch64.h
@@ -49,10 +49,10 @@ extern "C" {
 // ---------------------------------------------------------------------
 // ADD DUMMY ARRAYS FOR NEW SIMULATOR TEST HERE.
 // ---------------------------------------------------------------------
-const uint64_t kExpected_dummy_64[] = { 0 };
+const uint64_t kExpected_dummy_64[] = {0};
 const size_t kExpectedCount_dummy_64 = 0;
 
-const uint32_t kExpected_dummy_32[] = { 0 };
+const uint32_t kExpected_dummy_32[] = {0};
 const size_t kExpectedCount_dummy_32 = 0;
 
 // ---------------------------------------------------------------------
diff --git a/test/aarch64/test-trace-aarch64.cc b/test/aarch64/test-trace-aarch64.cc
index a1a79277..bf767b6a 100644
--- a/test/aarch64/test-trace-aarch64.cc
+++ b/test/aarch64/test-trace-aarch64.cc
@@ -775,9 +775,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ext(v1.V16B(), v26.V16B(), v6.V16B(), 1);
   __ ext(v2.V8B(), v30.V8B(), v1.V8B(), 1);
   __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
-  __ ld1(v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(),
+  __ ld1(v23.V16B(),
+         v24.V16B(),
+         v25.V16B(),
+         v26.V16B(),
          MemOperand(x1, x2, PostIndex));
-  __ ld1(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(),
+  __ ld1(v5.V16B(),
+         v6.V16B(),
+         v7.V16B(),
+         v8.V16B(),
          MemOperand(x1, 64, PostIndex));
   __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), MemOperand(x0));
   __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), MemOperand(x1, x2, PostIndex));
@@ -789,9 +795,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v21.V16B(), MemOperand(x1, x2, PostIndex));
   __ ld1(v4.V16B(), MemOperand(x1, 16, PostIndex));
   __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), MemOperand(x0));
-  __ ld1(v17.V1D(), v18.V1D(), v19.V1D(), v20.V1D(),
+  __ ld1(v17.V1D(),
+         v18.V1D(),
+         v19.V1D(),
+         v20.V1D(),
          MemOperand(x1, x2, PostIndex));
-  __ ld1(v28.V1D(), v29.V1D(), v30.V1D(), v31.V1D(),
+  __ ld1(v28.V1D(),
+         v29.V1D(),
+         v30.V1D(),
+         v31.V1D(),
          MemOperand(x1, 32, PostIndex));
   __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), MemOperand(x0));
   __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), MemOperand(x1, x2, PostIndex));
@@ -803,9 +815,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v11.V1D(), MemOperand(x1, x2, PostIndex));
   __ ld1(v29.V1D(), MemOperand(x1, 8, PostIndex));
   __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), MemOperand(x0));
-  __ ld1(v8.V2D(), v9.V2D(), v10.V2D(), v11.V2D(),
+  __ ld1(v8.V2D(),
+         v9.V2D(),
+         v10.V2D(),
+         v11.V2D(),
          MemOperand(x1, x2, PostIndex));
-  __ ld1(v14.V2D(), v15.V2D(), v16.V2D(), v17.V2D(),
+  __ ld1(v14.V2D(),
+         v15.V2D(),
+         v16.V2D(),
+         v17.V2D(),
          MemOperand(x1, 64, PostIndex));
   __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), MemOperand(x0));
   __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
@@ -817,9 +835,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v6.V2D(), MemOperand(x1, x2, PostIndex));
   __ ld1(v15.V2D(), MemOperand(x1, 16, PostIndex));
   __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x0));
-  __ ld1(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(),
+  __ ld1(v24.V2S(),
+         v25.V2S(),
+         v26.V2S(),
+         v27.V2S(),
          MemOperand(x1, x2, PostIndex));
-  __ ld1(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(),
+  __ ld1(v27.V2S(),
+         v28.V2S(),
+         v29.V2S(),
+         v30.V2S(),
          MemOperand(x1, 32, PostIndex));
   __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x0));
   __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), MemOperand(x1, x2, PostIndex));
@@ -831,7 +855,10 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v0.V2S(), MemOperand(x1, x2, PostIndex));
   __ ld1(v11.V2S(), MemOperand(x1, 8, PostIndex));
   __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
-  __ ld1(v24.V4H(), v25.V4H(), v26.V4H(), v27.V4H(),
+  __ ld1(v24.V4H(),
+         v25.V4H(),
+         v26.V4H(),
+         v27.V4H(),
          MemOperand(x1, x2, PostIndex));
   __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
   __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), MemOperand(x0));
@@ -844,7 +871,10 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v1.V4H(), MemOperand(x1, x2, PostIndex));
   __ ld1(v14.V4H(), MemOperand(x1, 8, PostIndex));
   __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), MemOperand(x0));
-  __ ld1(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(),
+  __ ld1(v28.V4S(),
+         v29.V4S(),
+         v30.V4S(),
+         v31.V4S(),
          MemOperand(x1, x2, PostIndex));
   __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), MemOperand(x1, 64, PostIndex));
   __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), MemOperand(x0));
@@ -858,7 +888,10 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v0.V4S(), MemOperand(x1, 16, PostIndex));
   __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), MemOperand(x0));
   __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, x2, PostIndex));
-  __ ld1(v9.V8B(), v10.V8B(), v11.V8B(), v12.V8B(),
+  __ ld1(v9.V8B(),
+         v10.V8B(),
+         v11.V8B(),
+         v12.V8B(),
          MemOperand(x1, 32, PostIndex));
   __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), MemOperand(x0));
   __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x1, x2, PostIndex));
@@ -871,7 +904,10 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld1(v28.V8B(), MemOperand(x1, 8, PostIndex));
   __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), MemOperand(x0));
   __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
-  __ ld1(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(),
+  __ ld1(v10.V8H(),
+         v11.V8H(),
+         v12.V8H(),
+         v13.V8H(),
          MemOperand(x1, 64, PostIndex));
   __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
   __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
@@ -1033,34 +1069,61 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x1, x2, PostIndex));
   __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), MemOperand(x1, 6, PostIndex));
   __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), MemOperand(x0));
-  __ ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(),
+  __ ld4(v2.V16B(),
+         v3.V16B(),
+         v4.V16B(),
+         v5.V16B(),
          MemOperand(x1, x2, PostIndex));
-  __ ld4(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(),
+  __ ld4(v5.V16B(),
+         v6.V16B(),
+         v7.V16B(),
+         v8.V16B(),
          MemOperand(x1, 64, PostIndex));
   __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x0));
   __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), MemOperand(x1, x2, PostIndex));
-  __ ld4(v29.V2D(), v30.V2D(), v31.V2D(), v0.V2D(),
+  __ ld4(v29.V2D(),
+         v30.V2D(),
+         v31.V2D(),
+         v0.V2D(),
          MemOperand(x1, 64, PostIndex));
   __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), MemOperand(x0));
-  __ ld4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(),
+  __ ld4(v24.V2S(),
+         v25.V2S(),
+         v26.V2S(),
+         v27.V2S(),
          MemOperand(x1, x2, PostIndex));
   __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), MemOperand(x1, 32, PostIndex));
   __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), MemOperand(x0));
-  __ ld4(v23.V4H(), v24.V4H(), v25.V4H(), v26.V4H(),
+  __ ld4(v23.V4H(),
+         v24.V4H(),
+         v25.V4H(),
+         v26.V4H(),
          MemOperand(x1, x2, PostIndex));
   __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), MemOperand(x1, 32, PostIndex));
   __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), MemOperand(x0));
-  __ ld4(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(),
+  __ ld4(v28.V4S(),
+         v29.V4S(),
+         v30.V4S(),
+         v31.V4S(),
          MemOperand(x1, x2, PostIndex));
-  __ ld4(v29.V4S(), v30.V4S(), v31.V4S(), v0.V4S(),
+  __ ld4(v29.V4S(),
+         v30.V4S(),
+         v31.V4S(),
+         v0.V4S(),
          MemOperand(x1, 64, PostIndex));
   __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), MemOperand(x0));
-  __ ld4(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(),
+  __ ld4(v27.V8B(),
+         v28.V8B(),
+         v29.V8B(),
+         v30.V8B(),
          MemOperand(x1, x2, PostIndex));
   __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), MemOperand(x1, 32, PostIndex));
   __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), MemOperand(x0));
   __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), MemOperand(x1, x2, PostIndex));
-  __ ld4(v20.V8H(), v21.V8H(), v22.V8H(), v23.V8H(),
+  __ ld4(v20.V8H(),
+         v21.V8H(),
+         v22.V8H(),
+         v23.V8H(),
          MemOperand(x1, 64, PostIndex));
   __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, MemOperand(x0));
   __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, MemOperand(x1, x2, PostIndex));
@@ -1075,44 +1138,92 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, MemOperand(x1, x2, PostIndex));
   __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, MemOperand(x1, 16, PostIndex));
   __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), MemOperand(x0));
-  __ ld4r(v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(),
+  __ ld4r(v13.V16B(),
+          v14.V16B(),
+          v15.V16B(),
+          v16.V16B(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v9.V16B(), v10.V16B(), v11.V16B(), v12.V16B(),
+  __ ld4r(v9.V16B(),
+          v10.V16B(),
+          v11.V16B(),
+          v12.V16B(),
           MemOperand(x1, 4, PostIndex));
   __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), MemOperand(x0));
-  __ ld4r(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(),
+  __ ld4r(v4.V1D(),
+          v5.V1D(),
+          v6.V1D(),
+          v7.V1D(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v26.V1D(), v27.V1D(), v28.V1D(), v29.V1D(),
+  __ ld4r(v26.V1D(),
+          v27.V1D(),
+          v28.V1D(),
+          v29.V1D(),
           MemOperand(x1, 32, PostIndex));
   __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), MemOperand(x0));
-  __ ld4r(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(),
+  __ ld4r(v28.V2D(),
+          v29.V2D(),
+          v30.V2D(),
+          v31.V2D(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v15.V2D(), v16.V2D(), v17.V2D(), v18.V2D(),
+  __ ld4r(v15.V2D(),
+          v16.V2D(),
+          v17.V2D(),
+          v18.V2D(),
           MemOperand(x1, 32, PostIndex));
   __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x0));
-  __ ld4r(v28.V2S(), v29.V2S(), v30.V2S(), v31.V2S(),
+  __ ld4r(v28.V2S(),
+          v29.V2S(),
+          v30.V2S(),
+          v31.V2S(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v11.V2S(), v12.V2S(), v13.V2S(), v14.V2S(),
+  __ ld4r(v11.V2S(),
+          v12.V2S(),
+          v13.V2S(),
+          v14.V2S(),
           MemOperand(x1, 16, PostIndex));
   __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), MemOperand(x0));
-  __ ld4r(v22.V4H(), v23.V4H(), v24.V4H(), v25.V4H(),
+  __ ld4r(v22.V4H(),
+          v23.V4H(),
+          v24.V4H(),
+          v25.V4H(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v20.V4H(), v21.V4H(), v22.V4H(), v23.V4H(),
+  __ ld4r(v20.V4H(),
+          v21.V4H(),
+          v22.V4H(),
+          v23.V4H(),
           MemOperand(x1, 8, PostIndex));
   __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x0));
-  __ ld4r(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(),
+  __ ld4r(v25.V4S(),
+          v26.V4S(),
+          v27.V4S(),
+          v28.V4S(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v23.V4S(), v24.V4S(), v25.V4S(), v26.V4S(),
+  __ ld4r(v23.V4S(),
+          v24.V4S(),
+          v25.V4S(),
+          v26.V4S(),
           MemOperand(x1, 16, PostIndex));
   __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), MemOperand(x0));
-  __ ld4r(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(),
+  __ ld4r(v27.V8B(),
+          v28.V8B(),
+          v29.V8B(),
+          v30.V8B(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v29.V8B(), v30.V8B(), v31.V8B(), v0.V8B(),
+  __ ld4r(v29.V8B(),
+          v30.V8B(),
+          v31.V8B(),
+          v0.V8B(),
           MemOperand(x1, 4, PostIndex));
   __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), MemOperand(x0));
-  __ ld4r(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(),
+  __ ld4r(v25.V8H(),
+          v26.V8H(),
+          v27.V8H(),
+          v28.V8H(),
           MemOperand(x1, x2, PostIndex));
-  __ ld4r(v22.V8H(), v23.V8H(), v24.V8H(), v25.V8H(),
+  __ ld4r(v22.V8H(),
+          v23.V8H(),
+          v24.V8H(),
+          v25.V8H(),
           MemOperand(x1, 8, PostIndex));
   __ mla(v29.V16B(), v7.V16B(), v26.V16B());
   __ mla(v6.V2S(), v4.V2S(), v14.V2S());
@@ -1675,9 +1786,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ ssubw2(v31.V4S(), v11.V4S(), v15.V8H());
   __ ssubw2(v4.V8H(), v8.V8H(), v16.V16B());
   __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), MemOperand(x0));
-  __ st1(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B(),
+  __ st1(v10.V16B(),
+         v11.V16B(),
+         v12.V16B(),
+         v13.V16B(),
          MemOperand(x1, x2, PostIndex));
-  __ st1(v27.V16B(), v28.V16B(), v29.V16B(), v30.V16B(),
+  __ st1(v27.V16B(),
+         v28.V16B(),
+         v29.V16B(),
+         v30.V16B(),
          MemOperand(x1, 64, PostIndex));
   __ st1(v16.V16B(), v17.V16B(), v18.V16B(), MemOperand(x0));
   __ st1(v21.V16B(), v22.V16B(), v23.V16B(), MemOperand(x1, x2, PostIndex));
@@ -1689,9 +1806,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v28.V16B(), MemOperand(x1, x2, PostIndex));
   __ st1(v2.V16B(), MemOperand(x1, 16, PostIndex));
   __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), MemOperand(x0));
-  __ st1(v12.V1D(), v13.V1D(), v14.V1D(), v15.V1D(),
+  __ st1(v12.V1D(),
+         v13.V1D(),
+         v14.V1D(),
+         v15.V1D(),
          MemOperand(x1, x2, PostIndex));
-  __ st1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(),
+  __ st1(v30.V1D(),
+         v31.V1D(),
+         v0.V1D(),
+         v1.V1D(),
          MemOperand(x1, 32, PostIndex));
   __ st1(v16.V1D(), v17.V1D(), v18.V1D(), MemOperand(x0));
   __ st1(v3.V1D(), v4.V1D(), v5.V1D(), MemOperand(x1, x2, PostIndex));
@@ -1703,9 +1826,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v27.V1D(), MemOperand(x1, x2, PostIndex));
   __ st1(v23.V1D(), MemOperand(x1, 8, PostIndex));
   __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), MemOperand(x0));
-  __ st1(v22.V2D(), v23.V2D(), v24.V2D(), v25.V2D(),
+  __ st1(v22.V2D(),
+         v23.V2D(),
+         v24.V2D(),
+         v25.V2D(),
          MemOperand(x1, x2, PostIndex));
-  __ st1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(),
+  __ st1(v28.V2D(),
+         v29.V2D(),
+         v30.V2D(),
+         v31.V2D(),
          MemOperand(x1, 64, PostIndex));
   __ st1(v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
   __ st1(v16.V2D(), v17.V2D(), v18.V2D(), MemOperand(x1, x2, PostIndex));
@@ -1717,9 +1846,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v29.V2D(), MemOperand(x1, x2, PostIndex));
   __ st1(v20.V2D(), MemOperand(x1, 16, PostIndex));
   __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x0));
-  __ st1(v8.V2S(), v9.V2S(), v10.V2S(), v11.V2S(),
+  __ st1(v8.V2S(),
+         v9.V2S(),
+         v10.V2S(),
+         v11.V2S(),
          MemOperand(x1, x2, PostIndex));
-  __ st1(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(),
+  __ st1(v15.V2S(),
+         v16.V2S(),
+         v17.V2S(),
+         v18.V2S(),
          MemOperand(x1, 32, PostIndex));
   __ st1(v2.V2S(), v3.V2S(), v4.V2S(), MemOperand(x0));
   __ st1(v23.V2S(), v24.V2S(), v25.V2S(), MemOperand(x1, x2, PostIndex));
@@ -1731,9 +1866,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v11.V2S(), MemOperand(x1, x2, PostIndex));
   __ st1(v17.V2S(), MemOperand(x1, 8, PostIndex));
   __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), MemOperand(x0));
-  __ st1(v9.V4H(), v10.V4H(), v11.V4H(), v12.V4H(),
+  __ st1(v9.V4H(),
+         v10.V4H(),
+         v11.V4H(),
+         v12.V4H(),
          MemOperand(x1, x2, PostIndex));
-  __ st1(v25.V4H(), v26.V4H(), v27.V4H(), v28.V4H(),
+  __ st1(v25.V4H(),
+         v26.V4H(),
+         v27.V4H(),
+         v28.V4H(),
          MemOperand(x1, 32, PostIndex));
   __ st1(v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x0));
   __ st1(v10.V4H(), v11.V4H(), v12.V4H(), MemOperand(x1, x2, PostIndex));
@@ -1745,7 +1886,10 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v8.V4H(), MemOperand(x1, x2, PostIndex));
   __ st1(v30.V4H(), MemOperand(x1, 8, PostIndex));
   __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), MemOperand(x0));
-  __ st1(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(),
+  __ st1(v25.V4S(),
+         v26.V4S(),
+         v27.V4S(),
+         v28.V4S(),
          MemOperand(x1, x2, PostIndex));
   __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), MemOperand(x1, 64, PostIndex));
   __ st1(v31.V4S(), v0.V4S(), v1.V4S(), MemOperand(x0));
@@ -1758,9 +1902,15 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v15.V4S(), MemOperand(x1, x2, PostIndex));
   __ st1(v13.V4S(), MemOperand(x1, 16, PostIndex));
   __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
-  __ st1(v10.V8B(), v11.V8B(), v12.V8B(), v13.V8B(),
+  __ st1(v10.V8B(),
+         v11.V8B(),
+         v12.V8B(),
+         v13.V8B(),
          MemOperand(x1, x2, PostIndex));
-  __ st1(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(),
+  __ st1(v15.V8B(),
+         v16.V8B(),
+         v17.V8B(),
+         v18.V8B(),
          MemOperand(x1, 32, PostIndex));
   __ st1(v19.V8B(), v20.V8B(), v21.V8B(), MemOperand(x0));
   __ st1(v31.V8B(), v0.V8B(), v1.V8B(), MemOperand(x1, x2, PostIndex));
@@ -1773,7 +1923,10 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st1(v31.V8B(), MemOperand(x1, 8, PostIndex));
   __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x0));
   __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), MemOperand(x1, x2, PostIndex));
-  __ st1(v26.V8H(), v27.V8H(), v28.V8H(), v29.V8H(),
+  __ st1(v26.V8H(),
+         v27.V8H(),
+         v28.V8H(),
+         v29.V8H(),
          MemOperand(x1, 64, PostIndex));
   __ st1(v10.V8H(), v11.V8H(), v12.V8H(), MemOperand(x0));
   __ st1(v21.V8H(), v22.V8H(), v23.V8H(), MemOperand(x1, x2, PostIndex));
@@ -1863,37 +2016,73 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ st3(v11.S(), v12.S(), v13.S(), 1, MemOperand(x1, x2, PostIndex));
   __ st3(v15.S(), v16.S(), v17.S(), 0, MemOperand(x1, 12, PostIndex));
   __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), MemOperand(x0));
-  __ st4(v24.V16B(), v25.V16B(), v26.V16B(), v27.V16B(),
+  __ st4(v24.V16B(),
+         v25.V16B(),
+         v26.V16B(),
+         v27.V16B(),
          MemOperand(x1, x2, PostIndex));
-  __ st4(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(),
+  __ st4(v15.V16B(),
+         v16.V16B(),
+         v17.V16B(),
+         v18.V16B(),
          MemOperand(x1, 64, PostIndex));
   __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), MemOperand(x0));
-  __ st4(v17.V2D(), v18.V2D(), v19.V2D(), v20.V2D(),
+  __ st4(v17.V2D(),
+         v18.V2D(),
+         v19.V2D(),
+         v20.V2D(),
          MemOperand(x1, x2, PostIndex));
-  __ st4(v9.V2D(), v10.V2D(), v11.V2D(), v12.V2D(),
+  __ st4(v9.V2D(),
+         v10.V2D(),
+         v11.V2D(),
+         v12.V2D(),
          MemOperand(x1, 64, PostIndex));
   __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), MemOperand(x0));
-  __ st4(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(),
+  __ st4(v15.V2S(),
+         v16.V2S(),
+         v17.V2S(),
+         v18.V2S(),
          MemOperand(x1, x2, PostIndex));
-  __ st4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(),
+  __ st4(v24.V2S(),
+         v25.V2S(),
+         v26.V2S(),
+         v27.V2S(),
          MemOperand(x1, 32, PostIndex));
   __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), MemOperand(x0));
-  __ st4(v18.V4H(), v19.V4H(), v20.V4H(), v21.V4H(),
+  __ st4(v18.V4H(),
+         v19.V4H(),
+         v20.V4H(),
+         v21.V4H(),
          MemOperand(x1, x2, PostIndex));
   __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), MemOperand(x1, 32, PostIndex));
   __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), MemOperand(x0));
   __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), MemOperand(x1, x2, PostIndex));
-  __ st4(v15.V4S(), v16.V4S(), v17.V4S(), v18.V4S(),
+  __ st4(v15.V4S(),
+         v16.V4S(),
+         v17.V4S(),
+         v18.V4S(),
          MemOperand(x1, 64, PostIndex));
   __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), MemOperand(x0));
-  __ st4(v25.V8B(), v26.V8B(), v27.V8B(), v28.V8B(),
+  __ st4(v25.V8B(),
+         v26.V8B(),
+         v27.V8B(),
+         v28.V8B(),
          MemOperand(x1, x2, PostIndex));
-  __ st4(v19.V8B(), v20.V8B(), v21.V8B(), v22.V8B(),
+  __ st4(v19.V8B(),
+         v20.V8B(),
+         v21.V8B(),
+         v22.V8B(),
          MemOperand(x1, 32, PostIndex));
   __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), MemOperand(x0));
-  __ st4(v15.V8H(), v16.V8H(), v17.V8H(), v18.V8H(),
+  __ st4(v15.V8H(),
+         v16.V8H(),
+         v17.V8H(),
+         v18.V8H(),
          MemOperand(x1, x2, PostIndex));
-  __ st4(v31.V8H(), v0.V8H(), v1.V8H(), v2.V8H(),
+  __ st4(v31.V8H(),
+         v0.V8H(),
+         v1.V8H(),
+         v2.V8H(),
          MemOperand(x1, 64, PostIndex));
   __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, MemOperand(x0));
   __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, MemOperand(x1, x2, PostIndex));
@@ -1938,7 +2127,11 @@ static void GenerateTestSequenceNEON(MacroAssembler* masm) {
   __ sxtl2(v6.V2D(), v7.V4S());
   __ sxtl2(v9.V4S(), v27.V8H());
   __ sxtl2(v16.V8H(), v16.V16B());
-  __ tbl(v25.V16B(), v17.V16B(), v18.V16B(), v19.V16B(), v20.V16B(),
+  __ tbl(v25.V16B(),
+         v17.V16B(),
+         v18.V16B(),
+         v19.V16B(),
+         v20.V16B(),
          v22.V16B());
   __ tbl(v28.V16B(), v13.V16B(), v14.V16B(), v15.V16B(), v4.V16B());
   __ tbl(v3.V16B(), v0.V16B(), v1.V16B(), v2.V16B());
@@ -2539,9 +2732,9 @@ static void GenerateTestSequenceNEONFP(MacroAssembler* masm) {
 
 
 static void MaskAddresses(const char* trace) {
-  // Hexadecimal expressions of the form `\xab` do not work out-of-the box with
-  // BSD `sed`. So we use ANSI-C quoting to have the regular expressions below
-  // work both on Linux and BSD (and macOS).
+// Hexadecimal expressions of the form `\xab` do not work out-of-the box with
+// BSD `sed`. So we use ANSI-C quoting to have the regular expressions below
+// work both on Linux and BSD (and macOS).
 #ifdef __APPLE__
 #define MAYBE_ANSI_C_QUOTE "$"
 #define HEX(val) "\\x" #val
@@ -2557,26 +2750,29 @@ static void MaskAddresses(const char* trace) {
   struct {
     const char* search;
     const char* replace;
-  } patterns[] = {
-      // Mask registers that hold addresses that change from run to run.
-      {"((x0|x1|x2|sp): " COLOUR "0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
-      // Mask accessed memory addresses.
-      {"((<-|->) " COLOUR "0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
-      // Mask instruction addresses.
-      {"^0x[0-9a-f]{16}", "0x~~~~~~~~~~~~~~~~"},
-      // Mask branch targets.
-      {"(Branch" COLOUR " to 0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
-      {"addr 0x[0-9a-f]+", "addr 0x~~~~~~~~~~~~~~~~"}
-  };
+  } patterns[] =
+      {// Mask registers that hold addresses that change from run to run.
+       {"((x0|x1|x2|sp): " COLOUR "0x)[0-9a-f]{16}",
+        ESCAPE(1) "~~~~~~~~~~~~~~~~"},
+       // Mask accessed memory addresses.
+       {"((<-|->) " COLOUR "0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
+       // Mask instruction addresses.
+       {"^0x[0-9a-f]{16}", "0x~~~~~~~~~~~~~~~~"},
+       // Mask branch targets.
+       {"(Branch" COLOUR " to 0x)[0-9a-f]{16}", ESCAPE(1) "~~~~~~~~~~~~~~~~"},
+       {"addr 0x[0-9a-f]+", "addr 0x~~~~~~~~~~~~~~~~"}};
   const size_t patterns_length = sizeof(patterns) / sizeof(patterns[0]);
   // Rewrite `trace`, masking addresses and other values that legitimately vary
   // from run to run.
   char command[1024];
   for (size_t i = 0; i < patterns_length; i++) {
-    size_t length =
-        snprintf(command, sizeof(command),
-                 "sed %s " MAYBE_ANSI_C_QUOTE "'s/%s/%s/' '%s'",
-                 sed_options, patterns[i].search, patterns[i].replace, trace);
+    size_t length = snprintf(command,
+                             sizeof(command),
+                             "sed %s " MAYBE_ANSI_C_QUOTE "'s/%s/%s/' '%s'",
+                             sed_options,
+                             patterns[i].search,
+                             patterns[i].replace,
+                             trace);
     VIXL_CHECK(length < sizeof(command));
     VIXL_CHECK(system(command) == 0);
   }
@@ -2601,8 +2797,7 @@ static void TraceTestHelper(bool coloured_trace,
   const int kScratchSize = 64 * KBytes;
   const int kScratchGuardSize = 128;
   char scratch_buffer[kScratchSize + kScratchGuardSize];
-  for (size_t i = 0;
-       i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0]));
+  for (size_t i = 0; i < (sizeof(scratch_buffer) / sizeof(scratch_buffer[0]));
        i++) {
     scratch_buffer[i] = i & 0xff;
   }
@@ -2611,7 +2806,7 @@ static void TraceTestHelper(bool coloured_trace,
   // Used for pre-/post-index addressing.
   simulator.WriteRegister(1, scratch_buffer);
 
-  const int kPostIndexRegisterStep = 13;         // Arbitrary interesting value.
+  const int kPostIndexRegisterStep = 13;  // Arbitrary interesting value.
   // Used for post-index offsets.
   simulator.WriteRegister(2, kPostIndexRegisterStep);
 
@@ -2666,8 +2861,11 @@ static void TraceTestHelper(bool coloured_trace,
   } else {
     // Check trace_stream against ref_file.
     char command[1024];
-    size_t length = snprintf(command, sizeof(command),
-                             "diff -u %s %s", ref_file, trace_stream_filename);
+    size_t length = snprintf(command,
+                             sizeof(command),
+                             "diff -u %s %s",
+                             ref_file,
+                             trace_stream_filename);
     VIXL_CHECK(length < sizeof(command));
     trace_matched_reference = (system(command) == 0);
   }
@@ -2687,44 +2885,24 @@ static void TraceTestHelper(bool coloured_trace,
 #define REF(name) "test/test-trace-reference/" name
 
 // Test individual options.
-TEST(disasm) {
-  TraceTestHelper(false, LOG_DISASM, REF("log-disasm"));
-}
-TEST(regs) {
-  TraceTestHelper(false, LOG_REGS, REF("log-regs"));
-}
-TEST(vregs) {
-  TraceTestHelper(false, LOG_VREGS, REF("log-vregs"));
-}
-TEST(sysregs) {
-  TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs"));
-}
-TEST(write) {
-  TraceTestHelper(false, LOG_WRITE, REF("log-write"));
-}
-TEST(branch) {
-  TraceTestHelper(false, LOG_WRITE, REF("log-branch"));
-}
+TEST(disasm) { TraceTestHelper(false, LOG_DISASM, REF("log-disasm")); }
+TEST(regs) { TraceTestHelper(false, LOG_REGS, REF("log-regs")); }
+TEST(vregs) { TraceTestHelper(false, LOG_VREGS, REF("log-vregs")); }
+TEST(sysregs) { TraceTestHelper(false, LOG_SYSREGS, REF("log-sysregs")); }
+TEST(write) { TraceTestHelper(false, LOG_WRITE, REF("log-write")); }
+TEST(branch) { TraceTestHelper(false, LOG_WRITE, REF("log-branch")); }
 
 // Test standard combinations.
-TEST(none) {
-  TraceTestHelper(false, LOG_NONE, REF("log-none"));
-}
-TEST(state) {
-  TraceTestHelper(false, LOG_STATE, REF("log-state"));
-}
-TEST(all) {
-  TraceTestHelper(false, LOG_ALL, REF("log-all"));
-}
+TEST(none) { TraceTestHelper(false, LOG_NONE, REF("log-none")); }
+TEST(state) { TraceTestHelper(false, LOG_STATE, REF("log-state")); }
+TEST(all) { TraceTestHelper(false, LOG_ALL, REF("log-all")); }
 
 
 // Test individual options (with colour).
 TEST(disasm_colour) {
   TraceTestHelper(true, LOG_DISASM, REF("log-disasm-colour"));
 }
-TEST(regs_colour) {
-  TraceTestHelper(true, LOG_REGS, REF("log-regs-colour"));
-}
+TEST(regs_colour) { TraceTestHelper(true, LOG_REGS, REF("log-regs-colour")); }
 TEST(vregs_colour) {
   TraceTestHelper(true, LOG_VREGS, REF("log-vregs-colour"));
 }
@@ -2739,15 +2917,11 @@ TEST(branch_colour) {
 }
 
 // Test standard combinations (with colour).
-TEST(none_colour) {
-  TraceTestHelper(true, LOG_NONE, REF("log-none-colour"));
-}
+TEST(none_colour) { TraceTestHelper(true, LOG_NONE, REF("log-none-colour")); }
 TEST(state_colour) {
   TraceTestHelper(true, LOG_STATE, REF("log-state-colour"));
 }
-TEST(all_colour) {
-  TraceTestHelper(true, LOG_ALL, REF("log-all-colour"));
-}
+TEST(all_colour) { TraceTestHelper(true, LOG_ALL, REF("log-all-colour")); }
 
 
 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
diff --git a/test/aarch64/test-utils-aarch64.cc b/test/aarch64/test-utils-aarch64.cc
index 881f18df..eafe2730 100644
--- a/test/aarch64/test-utils-aarch64.cc
+++ b/test/aarch64/test-utils-aarch64.cc
@@ -42,8 +42,7 @@ namespace aarch64 {
 
 // This value is a signalling NaN as both a double and as a float (taking the
 // least-significant word).
-const double kFP64SignallingNaN =
-    RawbitsToDouble(UINT64_C(0x7ff000007f800001));
+const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f800001));
 const float kFP32SignallingNaN = RawbitsToFloat(0x7f800001);
 
 // A similar value, but as a quiet NaN.
@@ -54,7 +53,8 @@ const float kFP32QuietNaN = RawbitsToFloat(0x7fc00001);
 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
   if (result != expected) {
     printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
-           expected, result);
+           expected,
+           result);
   }
 
   return expected == result;
@@ -64,7 +64,8 @@ bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
 bool Equal64(uint64_t expected, const RegisterDump*, uint64_t result) {
   if (result != expected) {
     printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
-           expected, result);
+           expected,
+           result);
   }
 
   return expected == result;
@@ -73,9 +74,13 @@ bool Equal64(uint64_t expected, const RegisterDump*, uint64_t result) {
 
 bool Equal128(vec128_t expected, const RegisterDump*, vec128_t result) {
   if ((result.h != expected.h) || (result.l != expected.l)) {
-    printf("Expected 0x%016" PRIx64 "%016" PRIx64 "\t "
+    printf("Expected 0x%016" PRIx64 "%016" PRIx64
+           "\t "
            "Found 0x%016" PRIx64 "%016" PRIx64 "\n",
-           expected.h, expected.l, result.h, result.l);
+           expected.h,
+           expected.l,
+           result.h,
+           result.l);
   }
 
   return ((expected.h == result.h) && (expected.l == result.l));
@@ -88,12 +93,16 @@ bool EqualFP32(float expected, const RegisterDump*, float result) {
   } else {
     if (std::isnan(expected) || (expected == 0.0)) {
       printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
-             FloatToRawbits(expected), FloatToRawbits(result));
+             FloatToRawbits(expected),
+             FloatToRawbits(result));
     } else {
-      printf("Expected %.9f (0x%08" PRIx32 ")\t "
+      printf("Expected %.9f (0x%08" PRIx32
+             ")\t "
              "Found %.9f (0x%08" PRIx32 ")\n",
-             expected, FloatToRawbits(expected),
-             result, FloatToRawbits(result));
+             expected,
+             FloatToRawbits(expected),
+             result,
+             FloatToRawbits(result));
     }
     return false;
   }
@@ -107,12 +116,16 @@ bool EqualFP64(double expected, const RegisterDump*, double result) {
 
   if (std::isnan(expected) || (expected == 0.0)) {
     printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
-           DoubleToRawbits(expected), DoubleToRawbits(result));
+           DoubleToRawbits(expected),
+           DoubleToRawbits(result));
   } else {
-    printf("Expected %.17f (0x%016" PRIx64 ")\t "
+    printf("Expected %.17f (0x%016" PRIx64
+           ")\t "
            "Found %.17f (0x%016" PRIx64 ")\n",
-           expected, DoubleToRawbits(expected),
-           result, DoubleToRawbits(result));
+           expected,
+           DoubleToRawbits(expected),
+           result,
+           DoubleToRawbits(result));
   }
   return false;
 }
@@ -125,7 +138,8 @@ bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
   int64_t result_x = core->xreg(reg.GetCode());
   if ((result_x & 0xffffffff00000000) != 0) {
     printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
-           expected, result_x);
+           expected,
+           result_x);
     return false;
   }
   uint32_t result_w = core->wreg(reg.GetCode());
@@ -160,7 +174,9 @@ bool EqualFP32(float expected,
   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
   if ((result_64 & 0xffffffff00000000) != 0) {
     printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
-           FloatToRawbits(expected), expected, result_64);
+           FloatToRawbits(expected),
+           expected,
+           result_64);
     return false;
   }
 
@@ -195,24 +211,16 @@ bool Equal64(uint64_t expected,
 }
 
 
-static char FlagN(uint32_t flags) {
-  return (flags & NFlag) ? 'N' : 'n';
-}
+static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }
 
 
-static char FlagZ(uint32_t flags) {
-  return (flags & ZFlag) ? 'Z' : 'z';
-}
+static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }
 
 
-static char FlagC(uint32_t flags) {
-  return (flags & CFlag) ? 'C' : 'c';
-}
+static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }
 
 
-static char FlagV(uint32_t flags) {
-  return (flags & VFlag) ? 'V' : 'v';
-}
+static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }
 
 
 bool EqualNzcv(uint32_t expected, uint32_t result) {
@@ -220,8 +228,14 @@ bool EqualNzcv(uint32_t expected, uint32_t result) {
   VIXL_ASSERT((result & ~NZCVFlag) == 0);
   if (result != expected) {
     printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
-        FlagN(expected), FlagZ(expected), FlagC(expected), FlagV(expected),
-        FlagN(result), FlagZ(result), FlagC(result), FlagV(result));
+           FlagN(expected),
+           FlagZ(expected),
+           FlagC(expected),
+           FlagV(expected),
+           FlagN(result),
+           FlagZ(result),
+           FlagC(result),
+           FlagV(result));
     return false;
   }
 
@@ -233,7 +247,9 @@ bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
     if (a->xreg(i) != b->xreg(i)) {
       printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
-             i, a->xreg(i), b->xreg(i));
+             i,
+             a->xreg(i),
+             b->xreg(i));
       return false;
     }
   }
@@ -243,7 +259,9 @@ bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
     uint64_t b_bits = b->dreg_bits(i);
     if (a_bits != b_bits) {
       printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
-             i, a_bits, b_bits);
+             i,
+             a_bits,
+             b_bits);
       return false;
     }
   }
@@ -252,8 +270,12 @@ bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
 }
 
 
-RegList PopulateRegisterArray(Register* w, Register* x, Register* r,
-                              int reg_size, int reg_count, RegList allowed) {
+RegList PopulateRegisterArray(Register* w,
+                              Register* x,
+                              Register* r,
+                              int reg_size,
+                              int reg_count,
+                              RegList allowed) {
   RegList list = 0;
   int i = 0;
   for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
@@ -279,8 +301,12 @@ RegList PopulateRegisterArray(Register* w, Register* x, Register* r,
 }
 
 
-RegList PopulateFPRegisterArray(FPRegister* s, FPRegister* d, FPRegister* v,
-                                int reg_size, int reg_count, RegList allowed) {
+RegList PopulateFPRegisterArray(FPRegister* s,
+                                FPRegister* d,
+                                FPRegister* v,
+                                int reg_size,
+                                int reg_count,
+                                RegList allowed) {
   RegList list = 0;
   int i = 0;
   for (unsigned n = 0; (n < kNumberOfFPRegisters) && (i < reg_count); n++) {
@@ -403,35 +429,40 @@ void RegisterDump::Dump(MacroAssembler* masm) {
   // Dump X registers.
   __ Add(dump, dump_base, x_offset);
   for (unsigned i = 0; i < kNumberOfRegisters; i += 2) {
-    __ Stp(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i + 1),
+    __ Stp(Register::GetXRegFromCode(i),
+           Register::GetXRegFromCode(i + 1),
            MemOperand(dump, i * kXRegSizeInBytes));
   }
 
   // Dump W registers.
   __ Add(dump, dump_base, w_offset);
   for (unsigned i = 0; i < kNumberOfRegisters; i += 2) {
-    __ Stp(Register::GetWRegFromCode(i), Register::GetWRegFromCode(i + 1),
+    __ Stp(Register::GetWRegFromCode(i),
+           Register::GetWRegFromCode(i + 1),
            MemOperand(dump, i * kWRegSizeInBytes));
   }
 
   // Dump D registers.
   __ Add(dump, dump_base, d_offset);
   for (unsigned i = 0; i < kNumberOfFPRegisters; i += 2) {
-    __ Stp(FPRegister::GetDRegFromCode(i), FPRegister::GetDRegFromCode(i + 1),
+    __ Stp(FPRegister::GetDRegFromCode(i),
+           FPRegister::GetDRegFromCode(i + 1),
            MemOperand(dump, i * kDRegSizeInBytes));
   }
 
   // Dump S registers.
   __ Add(dump, dump_base, s_offset);
   for (unsigned i = 0; i < kNumberOfFPRegisters; i += 2) {
-    __ Stp(FPRegister::GetSRegFromCode(i), FPRegister::GetSRegFromCode(i + 1),
+    __ Stp(FPRegister::GetSRegFromCode(i),
+           FPRegister::GetSRegFromCode(i + 1),
            MemOperand(dump, i * kSRegSizeInBytes));
   }
 
   // Dump Q registers.
   __ Add(dump, dump_base, q_offset);
   for (unsigned i = 0; i < kNumberOfVRegisters; i += 2) {
-    __ Stp(VRegister::GetQRegFromCode(i), VRegister::GetQRegFromCode(i + 1),
+    __ Stp(VRegister::GetQRegFromCode(i),
+           VRegister::GetQRegFromCode(i + 1),
            MemOperand(dump, i * kQRegSizeInBytes));
   }
 
diff --git a/test/aarch64/test-utils-aarch64.h b/test/aarch64/test-utils-aarch64.h
index b33c0721..f12bdb0e 100644
--- a/test/aarch64/test-utils-aarch64.h
+++ b/test/aarch64/test-utils-aarch64.h
@@ -113,9 +113,7 @@ class RegisterDump {
     return RawbitsToDouble(dreg_bits(code));
   }
 
-  inline vec128_t qreg(unsigned code) const {
-    return dump_.q_[code];
-  }
+  inline vec128_t qreg(unsigned code) const { return dump_.q_[code]; }
 
   // Stack pointer accessors.
   inline int64_t spreg() const {
@@ -135,9 +133,7 @@ class RegisterDump {
     return dump_.flags_ & Flags_mask;
   }
 
-  inline bool IsComplete() const {
-    return completed_;
-  }
+  inline bool IsComplete() const { return completed_; }
 
  private:
   // Indicate whether the dump operation has been completed.
@@ -202,17 +198,24 @@ bool EqualFP64(double expected, const RegisterDump*, double result);
 
 bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg);
 bool Equal64(uint64_t expected, const RegisterDump* core, const Register& reg);
-bool Equal64(uint64_t expected, const RegisterDump* core, const VRegister& vreg);
+bool Equal64(uint64_t expected,
+             const RegisterDump* core,
+             const VRegister& vreg);
 
-bool EqualFP32(float expected, const RegisterDump* core,
+bool EqualFP32(float expected,
+               const RegisterDump* core,
                const FPRegister& fpreg);
-bool EqualFP64(double expected, const RegisterDump* core,
+bool EqualFP64(double expected,
+               const RegisterDump* core,
                const FPRegister& fpreg);
 
-bool Equal64(const Register& reg0, const RegisterDump* core,
+bool Equal64(const Register& reg0,
+             const RegisterDump* core,
              const Register& reg1);
-bool Equal128(uint64_t expected_h, uint64_t expected_l,
-              const RegisterDump* core, const VRegister& reg);
+bool Equal128(uint64_t expected_h,
+              uint64_t expected_l,
+              const RegisterDump* core,
+              const VRegister& reg);
 
 bool EqualNzcv(uint32_t expected, uint32_t result);
 
@@ -228,12 +231,20 @@ bool EqualRegisters(const RegisterDump* a, const RegisterDump* b);
 // Any of w, x, or r can be NULL if they are not required.
 //
 // The return value is a RegList indicating which registers were allocated.
-RegList PopulateRegisterArray(Register* w, Register* x, Register* r,
-                              int reg_size, int reg_count, RegList allowed);
+RegList PopulateRegisterArray(Register* w,
+                              Register* x,
+                              Register* r,
+                              int reg_size,
+                              int reg_count,
+                              RegList allowed);
 
 // As PopulateRegisterArray, but for floating-point registers.
-RegList PopulateFPRegisterArray(FPRegister* s, FPRegister* d, FPRegister* v,
-                                int reg_size, int reg_count, RegList allowed);
+RegList PopulateFPRegisterArray(FPRegister* s,
+                                FPRegister* d,
+                                FPRegister* v,
+                                int reg_size,
+                                int reg_count,
+                                RegList allowed);
 
 // Ovewrite the contents of the specified registers. This enables tests to
 // check that register contents are written in cases where it's likely that the
@@ -243,11 +254,13 @@ RegList PopulateFPRegisterArray(FPRegister* s, FPRegister* d, FPRegister* v,
 // registers, a subsequent write into an aliased W register should clear the
 // top word anyway, so clobbering the full X registers should make tests more
 // rigorous.
-void Clobber(MacroAssembler* masm, RegList reg_list,
+void Clobber(MacroAssembler* masm,
+             RegList reg_list,
              uint64_t const value = 0xfedcba9876543210);
 
 // As Clobber, but for FP registers.
-void ClobberFP(MacroAssembler* masm, RegList reg_list,
+void ClobberFP(MacroAssembler* masm,
+               RegList reg_list,
                double const value = kFP64SignallingNaN);
 
 // As Clobber, but for a CPURegList with either FP or integer registers. When
author	Pierre Langlois <pierre.langlois@arm.com>	2017-01-24 17:41:26 +0000
committer	Pierre Langlois <pierre.langlois@arm.com>	2017-01-25 11:10:13 +0000
commit	bde2e4b5ce376456d50a972b6f3aaee3475f8786 (patch)
tree	8588d5fcbfe1d53b568ef1ae8d45252b215521e8 /test/aarch64
parent	f4ba40fc419a9d484da9be1df051ad03327ce4f4 (diff)
download	vixl-bde2e4b5ce376456d50a972b6f3aaee3475f8786.tar.gz