Snap for 11181721 from 3259758f9a1a85933bcf4c4136fe280b21198b7b to mainline-os-statsd-releaseaml_sta_341615000 aml_sta_341511040 aml_sta_341410000 android14-mainline-os-statsd-release

Change-Id: Ie5d88eaf8138349d0d5a32b5999abd8b454462b9
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-12-06 00:16:52 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2023-12-06 00:16:52 +0000
commit: 4ca6ab327e527d617af254a2fe98bc9b8a5f5eb9 (patch)
tree: 756b70279c1f9558e435faa2e82fda941424ae04
parent: b1fe831e62beccc2e7a11a62ed00ad68058f216f (diff)
parent: 3259758f9a1a85933bcf4c4136fe280b21198b7b (diff)
download: libgav1-android14-mainline-os-statsd-release.tar.gz
62 files changed, 554 insertions, 272 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 52b1b32..73f27a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -144,7 +144,7 @@ else()
         " examples & tests and libgav1 when LIBGAV1_THREADPOOL_USE_STD_MUTEX is"
         " not defined. To continue, download the Abseil repository to"
         " third_party/abseil-cpp:\n  git \\\n    -C ${libgav1_root} \\\n"
-        "    clone \\\n"
+        "    clone -b 20220623.0 --depth 1 \\\n"
         "    https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp")
   endif()
 endif()
diff --git a/METADATA b/METADATA
index 3710207..38ae79a 100644
--- a/METADATA
+++ b/METADATA
@@ -1,7 +1,9 @@
-name: "libgav1"
-description:
-    "Google's decoder implementation of the AV1 video codec."
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update libgav1
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
 
+name: "libgav1"
+description: "Google\'s decoder implementation of the AV1 video codec."
 third_party {
   url {
     type: HOMEPAGE
@@ -11,11 +13,11 @@ third_party {
     type: GIT
     value: "https://chromium.googlesource.com/codecs/libgav1"
   }
-  version: "dc2ae123784cf1a9504d6b4eba112170574e31e0"
+  version: "v0.19.0"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2022
+    year: 2023
     month: 10
-    day: 4
+    day: 31
   }
 }
diff --git a/README.md b/README.md
index 04c6a94..bdf598c 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,8 @@ compliant AV1 decoder. More information on the AV1 video format can be found at
     From within the libgav1 directory:
 
     ```shell
-    $ git clone https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp
+    $ git clone -b 20220623.0 --depth 1 \
+      https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp
     ```
 
     Note: Abseil is required by the examples and tests. libgav1 will depend on
@@ -31,7 +32,8 @@ compliant AV1 decoder. More information on the AV1 video format can be found at
     From within the libgav1 directory:
 
     ```shell
-    $ git clone https://github.com/google/googletest.git third_party/googletest
+    $ git clone -b release-1.12.1 --depth 1 \
+      https://github.com/google/googletest.git third_party/googletest
     ```
 
 ### Compile
@@ -44,8 +46,8 @@ compliant AV1 decoder. More information on the AV1 video format can be found at
 
 Configuration options:
 
-*   `LIBGAV1_MAX_BITDEPTH`: defines the maximum supported bitdepth (8, 10;
-    default: 10).
+*   `LIBGAV1_MAX_BITDEPTH`: defines the maximum supported bitdepth (8, 10, 12;
+    default: 12).
 *   `LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS`: define to a non-zero value to disable
     [symbol reduction](#symbol-reduction) in an optimized build to keep all
     versions of dsp functions available. Automatically defined in
diff --git a/README.version b/README.version
index 860af1a..4712a93 100644
--- a/README.version
+++ b/README.version
@@ -1,5 +1,4 @@
 URL: https://chromium.googlesource.com/codecs/libgav1
-Version: v0.18.0
+Version: v0.19.0
 BugComponent: 324837
 Local Modifications:
-* Backport av1c generation - cl/463412386
diff --git a/cmake/libgav1_build_definitions.cmake b/cmake/libgav1_build_definitions.cmake
index 95c17be..1465679 100644
--- a/cmake/libgav1_build_definitions.cmake
+++ b/cmake/libgav1_build_definitions.cmake
@@ -141,7 +141,7 @@ macro(libgav1_set_build_definitions)
   endif()
 
   if(NOT LIBGAV1_MAX_BITDEPTH)
-    set(LIBGAV1_MAX_BITDEPTH 10)
+    set(LIBGAV1_MAX_BITDEPTH 12)
   elseif(NOT LIBGAV1_MAX_BITDEPTH EQUAL 8
          AND NOT LIBGAV1_MAX_BITDEPTH EQUAL 10
          AND NOT LIBGAV1_MAX_BITDEPTH EQUAL 12)
diff --git a/examples/file_reader.cc b/examples/file_reader.cc
index b096722..a01b7ab 100644
--- a/examples/file_reader.cc
+++ b/examples/file_reader.cc
@@ -82,7 +82,14 @@ std::unique_ptr<FileReaderInterface> FileReader::Open(
     return nullptr;
   }
 
-  return file;
+  // With C++11, to return |file|, an explicit move is required as the return
+  // type differs from the local variable. Overload resolution isn't guaranteed
+  // in this case, though some compilers may adopt the C++14 behavior (C++
+  // Standard Core Language Issue #1579, Return by converting move
+  // constructor):
+  // https://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1579
+  // To keep things simple we opt for the following compatible form.
+  return std::unique_ptr<FileReaderInterface>(file.release());
 }
 
 // IVF Frame Header format, from https://wiki.multimedia.cx/index.php/IVF
diff --git a/examples/file_writer_test.cc b/examples/file_writer_test.cc
index 481808c..df5be17 100644
--- a/examples/file_writer_test.cc
+++ b/examples/file_writer_test.cc
@@ -18,6 +18,7 @@
 #include <cstdint>
 #include <cstring>
 #include <memory>
+#include <new>
 #include <ostream>
 #include <string>
 #include <utility>
@@ -64,7 +65,7 @@ const char* const
 
 // TODO(tomfinegan): Add a bitdepth arg, and test writing 10 bit frame buffers.
 std::unique_ptr<DecoderBuffer> GetFakeDecoderBuffer(ImageFormat image_format) {
-  auto buffer = absl::make_unique<DecoderBuffer>();
+  auto buffer = absl::WrapUnique(new (std::nothrow) DecoderBuffer);
   if (buffer == nullptr) return nullptr;
   buffer->chroma_sample_position = kChromaSamplePositionUnknown;
   buffer->image_format = image_format;
diff --git a/src/c_decoder_test.c b/src/c_decoder_test.c
index 9587262..7c6f8c8 100644
--- a/src/c_decoder_test.c
+++ b/src/c_decoder_test.c
@@ -234,6 +234,7 @@ static void DecoderTestAPIFlowForNonFrameParallelMode(void) {
   // Signal end of stream (method 1). This should ensure that all the references
   // are released.
   status = Libgav1DecoderSignalEOS(test.decoder);
+  ASSERT_EQ(status, kLibgav1StatusOk);
 
   // libgav1 should have released all the reference frames now.
   ASSERT_EQ(test.frames_in_use, 0);
@@ -382,6 +383,7 @@ static void DecoderTestNonFrameParallelModeInvalidFrameAfterEOS(void) {
 
   // Signal end of stream.
   status = Libgav1DecoderSignalEOS(test.decoder);
+  ASSERT_EQ(status, kLibgav1StatusOk);
 
   // libgav1 should have released all the reference frames now.
   ASSERT_EQ(test.frames_in_use, 0);
@@ -459,6 +461,7 @@ static void DecoderTestMetadataObu(void) {
   ASSERT_EQ(test.buffer_private_data, buffer->buffer_private_data);
 
   status = Libgav1DecoderSignalEOS(test.decoder);
+  ASSERT_EQ(status, kLibgav1StatusOk);
   ASSERT_EQ(test.frames_in_use, 0);
 
   Libgav1DecoderDestroy(test.decoder);
diff --git a/src/decoder_test.cc b/src/decoder_test.cc
index e274122..52ec5cc 100644
--- a/src/decoder_test.cc
+++ b/src/decoder_test.cc
@@ -172,6 +172,7 @@ TEST_F(DecoderTest, APIFlowForNonFrameParallelMode) {
   // Signal end of stream (method 1). This should ensure that all the references
   // are released.
   status = decoder_->SignalEOS();
+  EXPECT_EQ(status, kStatusOk);
 
   // libgav1 should have released all the reference frames now.
   EXPECT_EQ(frames_in_use_, 0);
@@ -302,6 +303,7 @@ TEST_F(DecoderTest, NonFrameParallelModeInvalidFrameAfterEOS) {
 
   // Signal end of stream.
   status = decoder_->SignalEOS();
+  EXPECT_EQ(status, kStatusOk);
 
   // libgav1 should have released all the reference frames now.
   EXPECT_EQ(frames_in_use_, 0);
@@ -372,6 +374,7 @@ TEST_F(DecoderTest, MetadataObu) {
   EXPECT_EQ(buffer_private_data_, buffer->buffer_private_data);
 
   status = decoder_->SignalEOS();
+  EXPECT_EQ(status, kStatusOk);
   EXPECT_EQ(frames_in_use_, 0);
 }
 
diff --git a/src/dsp/arm/convolve_10bit_neon.cc b/src/dsp/arm/convolve_10bit_neon.cc
index 389f029..1aa0cc7 100644
--- a/src/dsp/arm/convolve_10bit_neon.cc
+++ b/src/dsp/arm/convolve_10bit_neon.cc
@@ -412,30 +412,21 @@ void FilterHorizontal(const uint16_t* LIBGAV1_RESTRICT const src,
                       void* LIBGAV1_RESTRICT const dest,
                       const ptrdiff_t pred_stride, const int width,
                       const int height, const int16x4_t* const v_tap) {
-  assert(width < 8 || num_taps != 4);
-  // Don't simplify the redundant if conditions with the template parameters,
-  // which helps the compiler generate compact code.
-  if (width >= 8 && num_taps != 4) {
-    FilterHorizontalWidth8AndUp<num_taps, is_compound, is_2d>(
-        src, src_stride, dest, pred_stride, width, height, v_tap);
-    return;
-  }
-
   // Horizontal passes only needs to account for number of taps 2 and 4 when
   // |width| <= 4.
   assert(width <= 4);
   assert(num_taps == 2 || num_taps == 4);
   if (num_taps == 2 || num_taps == 4) {
-    if (width == 4) {
-      FilterHorizontalWidth4<num_taps, is_compound, is_2d>(
-          src, src_stride, dest, pred_stride, height, v_tap);
-      return;
-    }
-    assert(width == 2);
-    if (!is_compound) {
+    if (width == 2 && !is_compound) {
       FilterHorizontalWidth2<num_taps, is_2d>(src, src_stride, dest,
                                               pred_stride, height, v_tap);
+      return;
     }
+    assert(width == 4);
+    FilterHorizontalWidth4<num_taps, is_compound, is_2d>(
+        src, src_stride, dest, pred_stride, height, v_tap);
+  } else {
+    assert(false);
   }
 }
 
@@ -454,19 +445,32 @@ LIBGAV1_ALWAYS_INLINE void DoHorizontalPass(
     v_tap[k] = vdup_n_s16(kHalfSubPixelFilters[filter_index][filter_id][k]);
   }
 
-  if (filter_index == 2) {  // 8 tap.
-    FilterHorizontal<8, is_compound, is_2d>(src, src_stride, dst, dst_stride,
-                                            width, height, v_tap);
-  } else if (filter_index < 2) {  // 6 tap.
-    FilterHorizontal<6, is_compound, is_2d>(src + 1, src_stride, dst,
-                                            dst_stride, width, height, v_tap);
-  } else if ((filter_index & 0x4) != 0) {  // 4 tap.
-    // ((filter_index == 4) | (filter_index == 5))
-    FilterHorizontal<4, is_compound, is_2d>(src + 2, src_stride, dst,
-                                            dst_stride, width, height, v_tap);
-  } else {  // 2 tap.
-    FilterHorizontal<2, is_compound, is_2d>(src + 3, src_stride, dst,
-                                            dst_stride, width, height, v_tap);
+  // Horizontal filter.
+  // Filter types used for width <= 4 are different from those for width > 4.
+  // When width > 4, the valid filter index range is always [0, 3].
+  // When width <= 4, the valid filter index range is always [4, 5].
+  if (width >= 8) {
+    if (filter_index == 2) {  // 8 tap.
+      FilterHorizontalWidth8AndUp<8, is_compound, is_2d>(
+          src, src_stride, dst, dst_stride, width, height, v_tap);
+    } else if (filter_index < 2) {  // 6 tap.
+      FilterHorizontalWidth8AndUp<6, is_compound, is_2d>(
+          src + 1, src_stride, dst, dst_stride, width, height, v_tap);
+    } else {  // 2 tap.
+      assert(filter_index == 3);
+      FilterHorizontalWidth8AndUp<2, is_compound, is_2d>(
+          src + 3, src_stride, dst, dst_stride, width, height, v_tap);
+    }
+  } else {
+    if ((filter_index & 0x4) != 0) {  // 4 tap.
+      // ((filter_index == 4) | (filter_index == 5))
+      FilterHorizontal<4, is_compound, is_2d>(src + 2, src_stride, dst,
+                                              dst_stride, width, height, v_tap);
+    } else {  // 2 tap.
+      assert(filter_index == 3);
+      FilterHorizontal<2, is_compound, is_2d>(src + 3, src_stride, dst,
+                                              dst_stride, width, height, v_tap);
+    }
   }
 }
 
diff --git a/src/dsp/arm/convolve_neon.cc b/src/dsp/arm/convolve_neon.cc
index 5b80da2..97b3f26 100644
--- a/src/dsp/arm/convolve_neon.cc
+++ b/src/dsp/arm/convolve_neon.cc
@@ -371,16 +371,14 @@ void FilterHorizontal(const uint8_t* LIBGAV1_RESTRICT const src,
   assert(width <= 4);
   assert(filter_index >= 3 && filter_index <= 5);
   if (filter_index >= 3 && filter_index <= 5) {
-    if (width == 4) {
-      FilterHorizontalWidth4<filter_index, is_2d, is_compound>(
-          src, src_stride, dest, pred_stride, height, v_tap);
-      return;
-    }
-    assert(width == 2);
-    if (!is_compound) {
+    if (width == 2 && !is_compound) {
       FilterHorizontalWidth2<filter_index, is_2d>(src, src_stride, dest,
                                                   pred_stride, height, v_tap);
+      return;
     }
+    assert(width == 4);
+    FilterHorizontalWidth4<filter_index, is_2d, is_compound>(
+        src, src_stride, dest, pred_stride, height, v_tap);
   }
 }
 
diff --git a/src/dsp/arm/film_grain_neon.cc b/src/dsp/arm/film_grain_neon.cc
index 76e1151..cde887c 100644
--- a/src/dsp/arm/film_grain_neon.cc
+++ b/src/dsp/arm/film_grain_neon.cc
@@ -682,26 +682,14 @@ inline int16x8_t Clip3(const int16x8_t value, const int16x8_t low,
 
 template <int bitdepth, typename Pixel>
 inline int16x8_t GetScalingFactors(const int16_t scaling_lut[],
-                                   const Pixel* source) {
+                                   const Pixel* source,
+                                   const int valid_range = 8) {
   int16_t start_vals[8];
   static_assert(bitdepth <= kBitdepth10,
                 "NEON Film Grain is not yet implemented for 12bpp.");
 #if LIBGAV1_MSAN
-  memset(start_vals, 0, sizeof(start_vals));
+  if (valid_range < 8) memset(start_vals, 0, sizeof(start_vals));
 #endif
-  for (int i = 0; i < 8; ++i) {
-    assert(source[i] < (kScalingLookupTableSize << (bitdepth - kBitdepth8)));
-    start_vals[i] = scaling_lut[source[i]];
-  }
-  return vld1q_s16(start_vals);
-}
-
-template <int bitdepth, typename Pixel>
-inline int16x8_t GetScalingFactors(const int16_t scaling_lut[],
-                                   const Pixel* source, const int valid_range) {
-  int16_t start_vals[8];
-  static_assert(bitdepth <= kBitdepth10,
-                "NEON Film Grain is not yet implemented for 12bpp.");
   for (int i = 0; i < valid_range; ++i) {
     assert(source[i] < (kScalingLookupTableSize << (bitdepth - kBitdepth8)));
     start_vals[i] = scaling_lut[source[i]];
diff --git a/src/dsp/arm/intrapred_directional_neon.cc b/src/dsp/arm/intrapred_directional_neon.cc
index e9bdcf0..d36ef5f 100644
--- a/src/dsp/arm/intrapred_directional_neon.cc
+++ b/src/dsp/arm/intrapred_directional_neon.cc
@@ -1752,7 +1752,7 @@ inline void DirectionalZone2FromLeftCol_8x8(
   const int index_scale_bits = 6;
   // The values in |offset_y| are negative, except for the first element, which
   // is zero.
-  int16x8_t offset_y = left_y;
+  int16x8_t offset_y;
   int16x8_t shift_upsampled = left_y;
   // The shift argument must be a constant, otherwise use upsample_shift
   // directly.
diff --git a/src/dsp/arm/inverse_transform_neon.cc b/src/dsp/arm/inverse_transform_neon.cc
index 452f14a..cc4e4a4 100644
--- a/src/dsp/arm/inverse_transform_neon.cc
+++ b/src/dsp/arm/inverse_transform_neon.cc
@@ -345,11 +345,12 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a,
                                                          int16x8_t* b,
                                                          const int angle,
                                                          const bool flip) {
+  // Clang < 14 targeting armv8.1-a+ optimizes vqrdmulhq_n_s16 and vqsubq_s16
+  // (in HadamardRotation) into vqrdmlshq_s16 resulting in an "off by one"
+  // error. This behavior was fixed in 14.0.0:
+  // https://github.com/llvm/llvm-project/commit/82973edfb72a95b442fa6d2bb404e15a4031855e
 #if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \
-    defined(__clang__)  // ARM v8.1-A
-  // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into
-  // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use
-  // vqrdmulhq_n_s16().
+    defined(__clang__) && __clang_major__ < 14
   const int16_t cos128 = Cos128(angle);
   const int16_t sin128 = Sin128(angle);
   const int32x4_t x0 = vmull_n_s16(vget_low_s16(*b), -sin128);
diff --git a/src/dsp/arm/loop_filter_10bit_neon.cc b/src/dsp/arm/loop_filter_10bit_neon.cc
index a9dd98f..abdc074 100644
--- a/src/dsp/arm/loop_filter_10bit_neon.cc
+++ b/src/dsp/arm/loop_filter_10bit_neon.cc
@@ -444,7 +444,6 @@ void Horizontal6_NEON(void* const dest, const ptrdiff_t stride,
   const uint64x1_t need_filter6 = vreinterpret_u64_u16(is_flat3_mask);
   if (vget_lane_u64(need_filter6, 0) == 0) {
     // Filter6() does not apply, but Filter4() applies to one or more values.
-    p0q0_output = p0q0;
     p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
     p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
   } else {
@@ -526,7 +525,6 @@ void Vertical6_NEON(void* const dest, const ptrdiff_t stride, int outer_thresh,
   const uint64x1_t need_filter6 = vreinterpret_u64_u16(is_flat3_mask);
   if (vget_lane_u64(need_filter6, 0) == 0) {
     // Filter6() does not apply, but Filter4() applies to one or more values.
-    p0q0_output = p0q0;
     p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1);
     p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0);
   } else {
diff --git a/src/dsp/arm/loop_restoration_10bit_neon.cc b/src/dsp/arm/loop_restoration_10bit_neon.cc
index 410bc20..9191080 100644
--- a/src/dsp/arm/loop_restoration_10bit_neon.cc
+++ b/src/dsp/arm/loop_restoration_10bit_neon.cc
@@ -1130,7 +1130,13 @@ inline void LookupIntermediate(const uint16x8_t sum, const uint16x8_t index,
   const uint8x8_t idx = vqmovn_u16(index);
   uint8_t temp[8];
   vst1_u8(temp, idx);
-  *ma = vsetq_lane_u8(kSgrMaLookup[temp[0]], *ma, offset + 0);
+  // offset == 0 is assumed to be the first call to this function. The value is
+  // duplicated to avoid -Wuninitialized warnings under gcc.
+  if (offset == 0) {
+    *ma = vdupq_n_u8(kSgrMaLookup[temp[0]]);
+  } else {
+    *ma = vsetq_lane_u8(kSgrMaLookup[temp[0]], *ma, offset + 0);
+  }
   *ma = vsetq_lane_u8(kSgrMaLookup[temp[1]], *ma, offset + 1);
   *ma = vsetq_lane_u8(kSgrMaLookup[temp[2]], *ma, offset + 2);
   *ma = vsetq_lane_u8(kSgrMaLookup[temp[3]], *ma, offset + 3);
@@ -1712,8 +1718,6 @@ LIBGAV1_ALWAYS_INLINE void BoxSumFilterPreProcess3(
   s[0] = Load1QMsanU16(src + 0, overread_in_bytes + 0);
   s[1] = Load1QMsanU16(src + 8, overread_in_bytes + 16);
   Square(s[0], sq);
-  // Quiet "may be used uninitialized" warning.
-  mas[0] = mas[1] = vdupq_n_u8(0);
   BoxFilterPreProcess3Lo(s, scale, sum3, square_sum3, sq, &mas[0], bs);
 
   int x = 0;
@@ -2067,8 +2071,6 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterPass2(
   s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
   s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
   Square(s[0], sq);
-  // Quiet "may be used uninitialized" warning.
-  mas[0] = mas[1] = vdupq_n_u8(0);
   BoxFilterPreProcess3Lo(s, scale, sum3, square_sum3, sq, &mas[0], bs);
 
   int x = 0;
@@ -2255,8 +2257,6 @@ inline void BoxFilterLastRow(
   s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0);
   s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16);
   Square(s[0], sq);
-  // Quiet "may be used uninitialized" warning.
-  ma3[0] = ma3[1] = vdupq_n_u8(0);
   BoxFilterPreProcessLastRowLo(s, scales, sum3, sum5, square_sum3, square_sum5,
                                sq, &ma3[0], &ma5[0], b3, b5);
 
diff --git a/src/dsp/arm/loop_restoration_neon.cc b/src/dsp/arm/loop_restoration_neon.cc
index cd8552e..adb8f36 100644
--- a/src/dsp/arm/loop_restoration_neon.cc
+++ b/src/dsp/arm/loop_restoration_neon.cc
@@ -1125,7 +1125,11 @@ inline void CalculateIntermediate(const uint16x8_t sum,
   val = AdjustValue(val, idx, 101);  // 101 is the last index which value is 3.
   val = AdjustValue(val, idx, 169);  // 169 is the last index which value is 2.
   val = AdjustValue(val, idx, 254);  // 254 is the last index which value is 1.
-  *ma = (offset == 0) ? vcombine_u8(val, vget_high_u8(*ma))
+  // offset == 0 is assumed to be the first call to this function. Note
+  // vget_high_u8(*ma) is not used in this case to avoid a -Wuninitialized
+  // warning with some versions of gcc. vdup_n_u8(0) could work as well, but in
+  // most cases clang and gcc generated better code with this version.
+  *ma = (offset == 0) ? vcombine_u8(val, val)
                       : vcombine_u8(vget_low_u8(*ma), val);
 
   // b = ma * b * one_over_n
diff --git a/src/dsp/average_blend_test.cc b/src/dsp/average_blend_test.cc
index 6d1100a..67d592f 100644
--- a/src/dsp/average_blend_test.cc
+++ b/src/dsp/average_blend_test.cc
@@ -76,9 +76,8 @@ class AverageBlendTest : public testing::TestWithParam<BlockSize>,
     if (absl::StartsWith(test_case, "C/")) {
       base_func_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        AverageBlendInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      AverageBlendInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       AverageBlendInit_NEON();
     } else {
diff --git a/src/dsp/cdef_test.cc b/src/dsp/cdef_test.cc
index c25d7df..e2db17a 100644
--- a/src/dsp/cdef_test.cc
+++ b/src/dsp/cdef_test.cc
@@ -79,11 +79,11 @@ class CdefDirectionTest : public testing::TestWithParam<int> {
     const char* const test_case = test_info->test_suite_name();
     if (absl::StartsWith(test_case, "C/")) {
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       CdefInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "AVX2/")) {
-      if ((GetCpuInfo() & kAVX2) != 0) {
-        CdefInit_AVX2();
-      }
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+      CdefInit_AVX2();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       CdefInit_NEON();
     } else {
@@ -275,11 +275,11 @@ class CdefFilteringTest : public testing::TestWithParam<CdefTestParam> {
     } else if (absl::StartsWith(test_case, "NEON/")) {
       CdefInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       CdefInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "AVX2/")) {
-      if ((GetCpuInfo() & kAVX2) != 0) {
-        CdefInit_AVX2();
-      }
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+      CdefInit_AVX2();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
@@ -304,7 +304,7 @@ template <int bitdepth, typename Pixel>
 void CdefFilteringTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
   const int id = static_cast<int>(param_.rows4x4 < 4) * 3 +
                  (param_.subsampling_x + param_.subsampling_y) * 6;
-  absl::Duration elapsed_time;
+  absl::Duration elapsed_time[kMaxPlanes];
   for (int num_tests = 0; num_tests < num_runs; ++num_tests) {
     for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
       const int subsampling_x = (plane == kPlaneY) ? 0 : param_.subsampling_x;
@@ -355,7 +355,7 @@ void CdefFilteringTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
           source_ + offset, kSourceStride, block_height, primary_strength_,
           secondary_strength_, damping_, direction_, dest_[plane],
           kTestBufferStride * sizeof(dest_[0][0]));
-      elapsed_time += absl::Now() - start;
+      elapsed_time[plane] += absl::Now() - start;
     }
   }
 
@@ -379,7 +379,7 @@ void CdefFilteringTest<bitdepth, Pixel>::TestRandomValues(int num_runs) {
     ASSERT_NE(expected_digest, nullptr);
     test_utils::CheckMd5Digest(kCdef, kCdefFilterName, expected_digest,
                                reinterpret_cast<uint8_t*>(dest_[plane]),
-                               sizeof(dest_[plane]), elapsed_time);
+                               sizeof(dest_[plane]), elapsed_time[plane]);
   }
 }
 
diff --git a/src/dsp/common_dsp_test.cc b/src/dsp/common_dsp_test.cc
new file mode 100644
index 0000000..3342ce8
--- /dev/null
+++ b/src/dsp/common_dsp_test.cc
@@ -0,0 +1,58 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/match.h"
+#include "gtest/gtest.h"
+#include "src/dsp/x86/common_avx2_test.h"
+#include "src/dsp/x86/common_sse4_test.h"
+#include "src/utils/cpu.h"
+
+namespace libgav1 {
+namespace dsp {
+namespace {
+
+class CommonDspTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    const testing::TestInfo* const test_info =
+        testing::UnitTest::GetInstance()->current_test_info();
+    const char* const test_case = test_info->name();
+    if (absl::StartsWith(test_case, "SSE41")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+    } else if (absl::StartsWith(test_case, "AVX2")) {
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+    } else {
+      FAIL() << "Unrecognized architecture prefix in test case name: "
+             << test_case;
+    }
+  }
+};
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CommonDspTest);
+
+#if LIBGAV1_ENABLE_AVX2
+TEST_F(CommonDspTest, AVX2RightShiftWithRoundingS16) {
+  AVX2RightShiftWithRoundingS16Test();
+}
+#endif  // LIBGAV1_ENABLE_AVX2
+
+#if LIBGAV1_ENABLE_SSE4_1
+TEST_F(CommonDspTest, SSE41RightShiftWithRoundingS16) {
+  SSE41RightShiftWithRoundingS16Test();
+}
+#endif  // LIBGAV1_ENABLE_SSE41
+
+}  // namespace
+}  // namespace dsp
+}  // namespace libgav1
diff --git a/src/dsp/convolve_test.cc b/src/dsp/convolve_test.cc
index 42cdeb7..b8c1f1d 100644
--- a/src/dsp/convolve_test.cc
+++ b/src/dsp/convolve_test.cc
@@ -624,13 +624,11 @@ class ConvolveTest : public testing::TestWithParam<
     if (absl::StartsWith(test_case, "C/")) {
       base_convolve_func_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        ConvolveInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      ConvolveInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "AVX2/")) {
-      if ((GetCpuInfo() & kAVX2) != 0) {
-        ConvolveInit_AVX2();
-      }
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+      ConvolveInit_AVX2();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       ConvolveInit_NEON();
 #if LIBGAV1_MAX_BITDEPTH >= 10
@@ -1084,13 +1082,11 @@ class ConvolveScaleTest
     if (absl::StartsWith(test_case, "C/")) {
       base_convolve_scale_func_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        ConvolveInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      ConvolveInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "AVX2/")) {
-      if ((GetCpuInfo() & kAVX2) != 0) {
-        ConvolveInit_AVX2();
-      }
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+      ConvolveInit_AVX2();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       ConvolveInit_NEON();
 #if LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/distance_weighted_blend_test.cc b/src/dsp/distance_weighted_blend_test.cc
index 88040b4..0d6e1cd 100644
--- a/src/dsp/distance_weighted_blend_test.cc
+++ b/src/dsp/distance_weighted_blend_test.cc
@@ -63,9 +63,8 @@ class DistanceWeightedBlendTest : public testing::TestWithParam<BlockSize>,
     if (absl::StartsWith(test_case, "C/")) {
       base_func_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        DistanceWeightedBlendInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      DistanceWeightedBlendInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       DistanceWeightedBlendInit_NEON();
     } else {
diff --git a/src/dsp/intra_edge_test.cc b/src/dsp/intra_edge_test.cc
index b287544..75c45be 100644
--- a/src/dsp/intra_edge_test.cc
+++ b/src/dsp/intra_edge_test.cc
@@ -97,9 +97,8 @@ class IntraEdgeFilterTest : public testing::TestWithParam<EdgeFilterParams> {
     if (absl::StartsWith(test_case, "C/")) {
       base_intra_edge_filter_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraEdgeInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraEdgeInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraEdgeInit_NEON();
     } else {
@@ -356,9 +355,8 @@ class IntraEdgeUpsamplerTest : public testing::TestWithParam<int> {
     if (absl::StartsWith(test_case, "C/")) {
       base_intra_edge_upsampler_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraEdgeInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraEdgeInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraEdgeInit_NEON();
     } else {
diff --git a/src/dsp/intrapred_cfl_test.cc b/src/dsp/intrapred_cfl_test.cc
index 8415d51..53f3075 100644
--- a/src/dsp/intrapred_cfl_test.cc
+++ b/src/dsp/intrapred_cfl_test.cc
@@ -156,9 +156,8 @@ class CflIntraPredTest : public IntraPredTestBase<bitdepth, Pixel> {
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraPredCflInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraPredCflInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraPredCflInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
@@ -304,9 +303,8 @@ class CflSubsamplerTest : public IntraPredTestBase<bitdepth, Pixel> {
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraPredCflInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraPredCflInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraPredCflInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
diff --git a/src/dsp/intrapred_directional_test.cc b/src/dsp/intrapred_directional_test.cc
index 8d4fa63..2c81b27 100644
--- a/src/dsp/intrapred_directional_test.cc
+++ b/src/dsp/intrapred_directional_test.cc
@@ -187,9 +187,8 @@ class DirectionalIntraPredTest : public IntraPredTestBase<bitdepth, Pixel> {
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraPredDirectionalInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraPredDirectionalInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraPredDirectionalInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
diff --git a/src/dsp/intrapred_filter_test.cc b/src/dsp/intrapred_filter_test.cc
index c8d60a0..d5694f6 100644
--- a/src/dsp/intrapred_filter_test.cc
+++ b/src/dsp/intrapred_filter_test.cc
@@ -158,9 +158,8 @@ class FilterIntraPredTest : public IntraPredTestBase<bitdepth, Pixel> {
       // No need to compare C with itself.
       base_filter_intra_pred_ = nullptr;
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraPredFilterInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraPredFilterInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraPredFilterInit_NEON();
     } else {
diff --git a/src/dsp/intrapred_test.cc b/src/dsp/intrapred_test.cc
index cca1c73..5753817 100644
--- a/src/dsp/intrapred_test.cc
+++ b/src/dsp/intrapred_test.cc
@@ -154,10 +154,9 @@ class IntraPredTest : public IntraPredTestBase<bitdepth, Pixel> {
     if (absl::StartsWith(test_case, "C/")) {
       memset(base_intrapreds_, 0, sizeof(base_intrapreds_));
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        IntraPredInit_SSE4_1();
-        IntraPredSmoothInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      IntraPredInit_SSE4_1();
+      IntraPredSmoothInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       IntraPredInit_NEON();
       IntraPredSmoothInit_NEON();
diff --git a/src/dsp/inverse_transform_test.cc b/src/dsp/inverse_transform_test.cc
index 081dcc1..d74a33a 100644
--- a/src/dsp/inverse_transform_test.cc
+++ b/src/dsp/inverse_transform_test.cc
@@ -181,9 +181,8 @@ class InverseTransformTest
     if (absl::StartsWith(test_case, "C/")) {
       memset(base_inverse_transforms_, 0, sizeof(base_inverse_transforms_));
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        InverseTransformInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      InverseTransformInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       InverseTransformInit_NEON();
       InverseTransformInit10bpp_NEON();
diff --git a/src/dsp/loop_filter_test.cc b/src/dsp/loop_filter_test.cc
index 63ed530..93a273a 100644
--- a/src/dsp/loop_filter_test.cc
+++ b/src/dsp/loop_filter_test.cc
@@ -128,9 +128,8 @@ class LoopFilterTest : public testing::TestWithParam<LoopFilterSize> {
     if (absl::StartsWith(test_case, "C/")) {
       memset(base_loop_filters_, 0, sizeof(base_loop_filters_));
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        LoopFilterInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      LoopFilterInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       LoopFilterInit_NEON();
 #if LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/loop_restoration_test.cc b/src/dsp/loop_restoration_test.cc
index 5c645b8..d6dcd9c 100644
--- a/src/dsp/loop_restoration_test.cc
+++ b/src/dsp/loop_restoration_test.cc
@@ -69,19 +69,17 @@ class SelfGuidedFilterTest : public testing::TestWithParam<int>,
     const char* const test_case = test_info->test_suite_name();
     if (absl::StartsWith(test_case, "C/")) {
     } else if (absl::StartsWith(test_case, "AVX2/")) {
-      if ((GetCpuInfo() & kAVX2) != 0) {
-        LoopRestorationInit_AVX2();
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+      LoopRestorationInit_AVX2();
 #if LIBGAV1_MAX_BITDEPTH >= 10
-        LoopRestorationInit10bpp_AVX2();
+      LoopRestorationInit10bpp_AVX2();
 #endif
-      }
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        LoopRestorationInit_SSE4_1();
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      LoopRestorationInit_SSE4_1();
 #if LIBGAV1_MAX_BITDEPTH >= 10
-        LoopRestorationInit10bpp_SSE4_1();
+      LoopRestorationInit10bpp_SSE4_1();
 #endif
-      }
     } else if (absl::StartsWith(test_case, "NEON/")) {
       LoopRestorationInit_NEON();
 #if LIBGAV1_MAX_BITDEPTH >= 10
@@ -381,19 +379,17 @@ class WienerFilterTest : public testing::TestWithParam<int>,
     const char* const test_case = test_info->test_suite_name();
     if (absl::StartsWith(test_case, "C/")) {
     } else if (absl::StartsWith(test_case, "AVX2/")) {
-      if ((GetCpuInfo() & kAVX2) != 0) {
-        LoopRestorationInit_AVX2();
+      if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!";
+      LoopRestorationInit_AVX2();
 #if LIBGAV1_MAX_BITDEPTH >= 10
-        LoopRestorationInit10bpp_AVX2();
+      LoopRestorationInit10bpp_AVX2();
 #endif
-      }
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        LoopRestorationInit_SSE4_1();
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      LoopRestorationInit_SSE4_1();
 #if LIBGAV1_MAX_BITDEPTH >= 10
-        LoopRestorationInit10bpp_SSE4_1();
+      LoopRestorationInit10bpp_SSE4_1();
 #endif
-      }
     } else if (absl::StartsWith(test_case, "NEON/")) {
       LoopRestorationInit_NEON();
 #if LIBGAV1_MAX_BITDEPTH >= 10
diff --git a/src/dsp/mask_blend_test.cc b/src/dsp/mask_blend_test.cc
index 29dd43b..06793e5 100644
--- a/src/dsp/mask_blend_test.cc
+++ b/src/dsp/mask_blend_test.cc
@@ -270,9 +270,8 @@ class MaskBlendTest : public testing::TestWithParam<MaskBlendTestParam>,
     } else if (absl::StartsWith(test_case, "NEON/")) {
       MaskBlendInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        MaskBlendInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      MaskBlendInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
@@ -427,6 +426,7 @@ void MaskBlendTest<bitdepth, Pixel>::Test(const char* const digest,
       if (bitdepth != 8) {
         ASSERT_EQ(func_8bpp_, nullptr);
       }
+      ASSERT_NE(func_, nullptr);
       func_(source1_, source2_, src_2_stride, mask_, mask_stride, width, height,
             dest_, kDestStride);
     }
diff --git a/src/dsp/motion_field_projection_test.cc b/src/dsp/motion_field_projection_test.cc
index 3a47cc7..8a57696 100644
--- a/src/dsp/motion_field_projection_test.cc
+++ b/src/dsp/motion_field_projection_test.cc
@@ -63,9 +63,8 @@ class MotionFieldProjectionTest : public testing::TestWithParam<int> {
     } else if (absl::StartsWith(test_case, "NEON/")) {
       MotionFieldProjectionInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        MotionFieldProjectionInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      MotionFieldProjectionInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
diff --git a/src/dsp/motion_vector_search_test.cc b/src/dsp/motion_vector_search_test.cc
index a7b2ec8..5c680d6 100644
--- a/src/dsp/motion_vector_search_test.cc
+++ b/src/dsp/motion_vector_search_test.cc
@@ -55,9 +55,8 @@ class MotionVectorSearchTest : public testing::TestWithParam<int>,
     } else if (absl::StartsWith(test_case, "NEON/")) {
       MotionVectorSearchInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        MotionVectorSearchInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      MotionVectorSearchInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
              << test_case;
diff --git a/src/dsp/obmc_test.cc b/src/dsp/obmc_test.cc
index a10feb2..289fd66 100644
--- a/src/dsp/obmc_test.cc
+++ b/src/dsp/obmc_test.cc
@@ -193,9 +193,8 @@ class ObmcBlendTest : public testing::TestWithParam<ObmcTestParam> {
     const absl::string_view test_case = test_info->test_suite_name();
     if (absl::StartsWith(test_case, "C/")) {
     } else if (absl::StartsWith(test_case, "SSE41/")) {
-      if ((GetCpuInfo() & kSSE4_1) != 0) {
-        ObmcInit_SSE4_1();
-      }
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+      ObmcInit_SSE4_1();
     } else if (absl::StartsWith(test_case, "NEON/")) {
       ObmcInit_NEON();
     } else {
diff --git a/src/dsp/super_res_test.cc b/src/dsp/super_res_test.cc
index 7b253ff..0c3537c 100644
--- a/src/dsp/super_res_test.cc
+++ b/src/dsp/super_res_test.cc
@@ -96,6 +96,7 @@ class SuperResTest : public testing::TestWithParam<SuperResTestParam>,
     } else if (absl::StartsWith(test_case, "NEON/")) {
       SuperResInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       SuperResInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
diff --git a/src/dsp/warp_test.cc b/src/dsp/warp_test.cc
index c64c8d6..f93ad8b 100644
--- a/src/dsp/warp_test.cc
+++ b/src/dsp/warp_test.cc
@@ -275,6 +275,7 @@ class WarpTest : public testing::TestWithParam<WarpTestParam> {
     } else if (absl::StartsWith(test_case, "NEON/")) {
       WarpInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       WarpInit_SSE4_1();
     } else {
       FAIL() << "Unrecognized architecture prefix in test case name: "
diff --git a/src/dsp/weight_mask_test.cc b/src/dsp/weight_mask_test.cc
index 74ec03c..a080ec4 100644
--- a/src/dsp/weight_mask_test.cc
+++ b/src/dsp/weight_mask_test.cc
@@ -223,6 +223,7 @@ class WeightMaskTest : public testing::TestWithParam<WeightMaskTestParam>,
     } else if (absl::StartsWith(test_case, "NEON/")) {
       WeightMaskInit_NEON();
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       WeightMaskInit_SSE4_1();
     }
     func_ = dsp->weight_mask[width_index][height_index][mask_is_inverse_];
diff --git a/src/dsp/x86/common_avx2_test.cc b/src/dsp/x86/common_avx2_test.cc
index 2062683..4b294b0 100644
--- a/src/dsp/x86/common_avx2_test.cc
+++ b/src/dsp/x86/common_avx2_test.cc
@@ -12,26 +12,27 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "src/dsp/x86/common_avx2.h"
+#include "src/dsp/x86/common_avx2_test.h"
 
 #include "gtest/gtest.h"
+#include "src/utils/cpu.h"
 
 #if LIBGAV1_TARGETING_AVX2
 
 #include <cstdint>
 
+#include "src/dsp/x86/common_avx2.h"
 #include "src/utils/common.h"
 
 namespace libgav1 {
 namespace dsp {
-namespace {
 
 // Show that RightShiftWithRounding_S16() is equal to
 // RightShiftWithRounding() only for values less than or equal to
 // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
 // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
 // negative values.
-TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
+void AVX2RightShiftWithRoundingS16Test() {
   for (int bits = 0; bits < 16; ++bits) {
     const int bias = (1 << bits) >> 1;
     for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
@@ -53,15 +54,20 @@ TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) {
   }
 }
 
-}  // namespace
 }  // namespace dsp
 }  // namespace libgav1
 
 #else  // !LIBGAV1_TARGETING_AVX2
 
-TEST(CommonDspTest, AVX2) {
+namespace libgav1 {
+namespace dsp {
+
+void AVX2RightShiftWithRoundingS16Test() {
   GTEST_SKIP() << "Build this module for x86(-64) with AVX2 enabled to enable "
                   "the tests.";
 }
 
+}  // namespace dsp
+}  // namespace libgav1
+
 #endif  // LIBGAV1_TARGETING_AVX2
diff --git a/src/dsp/x86/common_avx2_test.h b/src/dsp/x86/common_avx2_test.h
new file mode 100644
index 0000000..1124f7f
--- /dev/null
+++ b/src/dsp/x86/common_avx2_test.h
@@ -0,0 +1,26 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
+#define LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
+
+namespace libgav1 {
+namespace dsp {
+
+void AVX2RightShiftWithRoundingS16Test();
+
+}  // namespace dsp
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_
diff --git a/src/dsp/x86/common_sse4_test.cc b/src/dsp/x86/common_sse4_test.cc
index 3288cfc..592630c 100644
--- a/src/dsp/x86/common_sse4_test.cc
+++ b/src/dsp/x86/common_sse4_test.cc
@@ -12,26 +12,27 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "src/dsp/x86/common_sse4.h"
+#include "src/dsp/x86/common_sse4_test.h"
 
 #include "gtest/gtest.h"
+#include "src/utils/cpu.h"
 
 #if LIBGAV1_TARGETING_SSE4_1
 
 #include <cstdint>
 
+#include "src/dsp/x86/common_sse4.h"
 #include "src/utils/common.h"
 
 namespace libgav1 {
 namespace dsp {
-namespace {
 
 // Show that RightShiftWithRounding_S16() is equal to
 // RightShiftWithRounding() only for values less than or equal to
 // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then
 // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for
 // negative values.
-TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) {
+void SSE41RightShiftWithRoundingS16Test() {
   for (int bits = 0; bits < 16; ++bits) {
     const int bias = (1 << bits) >> 1;
     for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) {
@@ -50,15 +51,20 @@ TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) {
   }
 }
 
-}  // namespace
 }  // namespace dsp
 }  // namespace libgav1
 
 #else  // !LIBGAV1_TARGETING_SSE4_1
 
-TEST(CommonDspTest, SSE41) {
+namespace libgav1 {
+namespace dsp {
+
+void SSE41RightShiftWithRoundingS16Test() {
   GTEST_SKIP() << "Build this module for x86(-64) with SSE4 enabled to enable "
                   "the tests.";
 }
 
+}  // namespace dsp
+}  // namespace libgav1
+
 #endif  // LIBGAV1_TARGETING_SSE4_1
diff --git a/src/dsp/x86/common_sse4_test.h b/src/dsp/x86/common_sse4_test.h
new file mode 100644
index 0000000..169439a
--- /dev/null
+++ b/src/dsp/x86/common_sse4_test.h
@@ -0,0 +1,26 @@
+// Copyright 2023 The libgav1 Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
+#define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
+
+namespace libgav1 {
+namespace dsp {
+
+void SSE41RightShiftWithRoundingS16Test();
+
+}  // namespace dsp
+}  // namespace libgav1
+
+#endif  // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_
diff --git a/src/dsp/x86/convolve_avx2.cc b/src/dsp/x86/convolve_avx2.cc
index 6e94347..ff51aee 100644
--- a/src/dsp/x86/convolve_avx2.cc
+++ b/src/dsp/x86/convolve_avx2.cc
@@ -27,6 +27,7 @@
 #include "src/dsp/dsp.h"
 #include "src/dsp/x86/common_avx2.h"
 #include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
 #include "src/utils/constants.h"
 
 namespace libgav1 {
@@ -607,6 +608,10 @@ void Convolve2D_AVX2(const void* LIBGAV1_RESTRICT const reference,
   alignas(32) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
   const int intermediate_height = height + vertical_taps - 1;
 
   const ptrdiff_t src_stride = reference_stride;
@@ -1374,6 +1379,10 @@ void ConvolveCompound2D_AVX2(
   alignas(32) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
   const int intermediate_height = height + vertical_taps - 1;
 
   const ptrdiff_t src_stride = reference_stride;
diff --git a/src/dsp/x86/convolve_sse4.cc b/src/dsp/x86/convolve_sse4.cc
index f427c4c..99b87d6 100644
--- a/src/dsp/x86/convolve_sse4.cc
+++ b/src/dsp/x86/convolve_sse4.cc
@@ -28,6 +28,7 @@
 #include "src/dsp/dsp.h"
 #include "src/dsp/x86/common_sse4.h"
 #include "src/utils/common.h"
+#include "src/utils/compiler_attributes.h"
 
 namespace libgav1 {
 namespace dsp {
@@ -254,6 +255,10 @@ void Convolve2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
   alignas(16) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
   const int intermediate_height = height + vertical_taps - 1;
 
   const ptrdiff_t src_stride = reference_stride;
@@ -617,6 +622,10 @@ void ConvolveCompound2D_SSE4_1(
   alignas(16) uint16_t
       intermediate_result[kMaxSuperBlockSizeInPixels *
                           (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x33, sizeof(intermediate_result));
+#endif
 
   // Horizontal filter.
   // Filter types used for width <= 4 are different from those for width > 4.
@@ -1157,6 +1166,10 @@ void ConvolveScale2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference,
   alignas(16) int16_t
       intermediate_result[kIntermediateAllocWidth *
                           (2 * kIntermediateAllocWidth + kSubPixelTaps)];
+#if LIBGAV1_MSAN
+  // Quiet msan warnings. Set with random non-zero value to aid in debugging.
+  memset(intermediate_result, 0x44, sizeof(intermediate_result));
+#endif
   const int num_vert_taps = dsp::GetNumTapsInFilter(vert_filter_index);
   const int intermediate_height =
       (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
diff --git a/src/dsp/x86/intrapred_directional_sse4.cc b/src/dsp/x86/intrapred_directional_sse4.cc
index bc61745..2e64d21 100644
--- a/src/dsp/x86/intrapred_directional_sse4.cc
+++ b/src/dsp/x86/intrapred_directional_sse4.cc
@@ -1023,6 +1023,10 @@ void DirectionalIntraPredictorZone2_SSE4_1(void* const dest, ptrdiff_t stride,
   uint8_t left_buffer[288];
   memcpy(top_buffer + 128, static_cast<const uint8_t*>(top_row) - 16, 160);
   memcpy(left_buffer + 128, static_cast<const uint8_t*>(left_column) - 16, 160);
+#if LIBGAV1_MSAN
+  memset(top_buffer, 0x33, 128);
+  memset(left_buffer, 0x44, 128);
+#endif
   const uint8_t* top_ptr = top_buffer + 144;
   const uint8_t* left_ptr = left_buffer + 144;
   if (width == 4 || height == 4) {
diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc
index 6625d51..029e168 100644
--- a/src/dsp/x86/loop_restoration_10bit_sse4.cc
+++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc
@@ -1079,7 +1079,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index,
   // general-purpose register to process. Faster than using _mm_extract_epi8().
   uint8_t temp[8];
   StoreLo8(temp, idx);
-  *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  // offset == 0 is assumed to be the first call to this function. The value is
+  // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least
+  // equivalent if not faster than pinsrb.
+  if (offset == 0) {
+    *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]);
+  } else {
+    *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  }
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3);
diff --git a/src/dsp/x86/loop_restoration_sse4.cc b/src/dsp/x86/loop_restoration_sse4.cc
index b4df072..8c24c39 100644
--- a/src/dsp/x86/loop_restoration_sse4.cc
+++ b/src/dsp/x86/loop_restoration_sse4.cc
@@ -1222,7 +1222,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index,
   // general-purpose register to process. Faster than using _mm_extract_epi8().
   uint8_t temp[8];
   StoreLo8(temp, idx);
-  *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  // offset == 0 is assumed to be the first call to this function. The value is
+  // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least
+  // equivalent if not faster than pinsrb.
+  if (offset == 0) {
+    *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]);
+  } else {
+    *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0);
+  }
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2);
   *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3);
diff --git a/src/film_grain_test.cc b/src/film_grain_test.cc
index d5854e0..fc1f1b1 100644
--- a/src/film_grain_test.cc
+++ b/src/film_grain_test.cc
@@ -2190,8 +2190,10 @@ class BlendNoiseTest : public testing::TestWithParam<std::tuple<int, int>> {
   static_assert(bitdepth >= kBitdepth8 && bitdepth <= LIBGAV1_MAX_BITDEPTH, "");
   using GrainType =
       typename std::conditional<bitdepth == 8, int8_t, int16_t>::type;
+  ~BlendNoiseTest() override = default;
 
-  BlendNoiseTest() {
+ protected:
+  void SetUp() override {
     test_utils::ResetDspTable(bitdepth);
     FilmGrainInit_C();
     const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
@@ -2204,6 +2206,7 @@ class BlendNoiseTest : public testing::TestWithParam<std::tuple<int, int>> {
       FilmGrainInit_NEON();
 #endif
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       FilmGrainInit_SSE4_1();
     }
     const BlendNoiseTestParam test_param(GetParam());
@@ -2236,9 +2239,7 @@ class BlendNoiseTest : public testing::TestWithParam<std::tuple<int, int>> {
     dest_plane_v_ =
         dest_plane_u_ + uv_stride_ * uv_height_ + kBorderPixelsFilmGrain;
   }
-  ~BlendNoiseTest() override = default;
 
- protected:
   void TestSpeed(int num_runs);
 
  private:
@@ -2298,15 +2299,15 @@ void BlendNoiseTest<bitdepth, Pixel>::ConvertScalingLut10bpp(
 template <int bitdepth, typename Pixel>
 void BlendNoiseTest<bitdepth, Pixel>::TestSpeed(const int num_runs) {
   if (blend_chroma_func_ == nullptr || blend_luma_func_ == nullptr) return;
-  ASSERT_TRUE(noise_image_[kPlaneY].Reset(height_,
-                                          width_ + kBorderPixelsFilmGrain,
-                                          /*zero_initialize=*/false));
-  ASSERT_TRUE(noise_image_[kPlaneU].Reset(uv_height_,
-                                          uv_width_ + kBorderPixelsFilmGrain,
-                                          /*zero_initialize=*/false));
-  ASSERT_TRUE(noise_image_[kPlaneV].Reset(uv_height_,
-                                          uv_width_ + kBorderPixelsFilmGrain,
-                                          /*zero_initialize=*/false));
+  // Allow optimized code to read into the border without generating MSan
+  // warnings. This matches the behavior in FilmGrain::AllocateNoiseImage().
+  constexpr bool zero_initialize = LIBGAV1_MSAN == 1;
+  ASSERT_TRUE(noise_image_[kPlaneY].Reset(height_, width_ + kNoiseImagePadding,
+                                          zero_initialize));
+  ASSERT_TRUE(noise_image_[kPlaneU].Reset(
+      uv_height_, uv_width_ + kNoiseImagePadding, zero_initialize));
+  ASSERT_TRUE(noise_image_[kPlaneV].Reset(
+      uv_height_, uv_width_ + kNoiseImagePadding, zero_initialize));
   libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
   // Allow any valid grain values.
   const int grain_max = GetGrainMax<bitdepth>();
@@ -2533,7 +2534,10 @@ template <int bitdepth, typename Pixel>
 class FilmGrainSpeedTest : public testing::TestWithParam<int> {
  public:
   static_assert(bitdepth >= kBitdepth8 && bitdepth <= LIBGAV1_MAX_BITDEPTH, "");
-  FilmGrainSpeedTest() {
+  ~FilmGrainSpeedTest() override = default;
+
+ protected:
+  void SetUp() override {
     test_utils::ResetDspTable(bitdepth);
     FilmGrainInit_C();
 
@@ -2545,6 +2549,7 @@ class FilmGrainSpeedTest : public testing::TestWithParam<int> {
       FilmGrainInit_NEON();
 #endif
     } else if (absl::StartsWith(test_case, "SSE41/")) {
+      if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
       FilmGrainInit_SSE4_1();
     }
     uv_width_ = (width_ + subsampling_x_) >> subsampling_x_;
@@ -2566,9 +2571,7 @@ class FilmGrainSpeedTest : public testing::TestWithParam<int> {
     const int num_threads = GetParam();
     thread_pool_ = ThreadPool::Create(num_threads);
   }
-  ~FilmGrainSpeedTest() override = default;
 
- protected:
   void TestSpeed(int num_runs);
 
  private:
diff --git a/src/gav1/decoder_buffer.h b/src/gav1/decoder_buffer.h
index 0a5586e..816eca4 100644
--- a/src/gav1/decoder_buffer.h
+++ b/src/gav1/decoder_buffer.h
@@ -115,20 +115,30 @@ typedef enum Libgav1ColorRange {
   kLibgav1ColorRangeFull     // YUV/RGB [0..255]
 } Libgav1ColorRange;
 
+// Section 6.7.3.
 typedef struct Libgav1ObuMetadataHdrCll {  // NOLINT
   uint16_t max_cll;                        // Maximum content light level.
   uint16_t max_fall;                       // Maximum frame-average light level.
 } Libgav1ObuMetadataHdrCll;
 
+// Section 6.7.4.
 typedef struct Libgav1ObuMetadataHdrMdcv {  // NOLINT
+  // 0.16 fixed-point X/Y chromaticity coordinate as defined by CIE 1931 in
+  // R/G/B order.
   uint16_t primary_chromaticity_x[3];
   uint16_t primary_chromaticity_y[3];
+  // 0.16 fixed-point X/Y chromaticity coordinate as defined by CIE 1931.
   uint16_t white_point_chromaticity_x;
   uint16_t white_point_chromaticity_y;
+  // 24.8 fixed-point maximum luminance, represented in candelas per square
+  // meter.
   uint32_t luminance_max;
+  // 18.14 fixed-point minimum luminance, represented in candelas per square
+  // meter.
   uint32_t luminance_min;
 } Libgav1ObuMetadataHdrMdcv;
 
+// Section 6.7.2.
 typedef struct Libgav1ObuMetadataItutT35 {  // NOLINT
   uint8_t country_code;
   uint8_t country_code_extension_byte;  // Valid if country_code is 0xFF.
diff --git a/src/gav1/version.h b/src/gav1/version.h
index b386acc..cca2383 100644
--- a/src/gav1/version.h
+++ b/src/gav1/version.h
@@ -23,7 +23,7 @@
 // (https://semver.org).
 
 #define LIBGAV1_MAJOR_VERSION 0
-#define LIBGAV1_MINOR_VERSION 18
+#define LIBGAV1_MINOR_VERSION 19
 #define LIBGAV1_PATCH_VERSION 0
 
 #define LIBGAV1_VERSION                                           \
diff --git a/src/post_filter/cdef.cc b/src/post_filter/cdef.cc
index 037fc17..ced4096 100644
--- a/src/post_filter/cdef.cc
+++ b/src/post_filter/cdef.cc
@@ -11,6 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include <cassert>
+
 #include "src/post_filter.h"
 #include "src/utils/blocking_counter.h"
 #include "src/utils/compiler_attributes.h"
@@ -72,10 +74,23 @@ void CopyRowForCdef(const Pixel* src, int block_width, int unit_width,
   }
 }
 
+// GCC 13.x will report a false positive from the call to
+// ApplyCdefForOneSuperBlockRowHelper() with a nullptr in
+// ApplyCdefForOneSuperBlockRow(). The call to CopyPixels() in
+// ApplyCdefForOneUnit() is only made when thread_pool_ != nullptr and
+// border_columns[][] is a valid pointer.
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
 // For |height| rows, copy |width| pixels of size |pixel_size| from |src| to
 // |dst|.
 void CopyPixels(const uint8_t* src, int src_stride, uint8_t* dst,
                 int dst_stride, int width, int height, size_t pixel_size) {
+  assert(src != nullptr);
+  assert(dst != nullptr);
+  assert(height > 0);
   int y = height;
   do {
     memcpy(dst, src, width * pixel_size);
@@ -83,6 +98,9 @@ void CopyPixels(const uint8_t* src, int src_stride, uint8_t* dst,
     dst += dst_stride;
   } while (--y != 0);
 }
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
 
 }  // namespace
 
@@ -327,6 +345,7 @@ void PostFilter::ApplyCdefForOneUnit(uint16_t* cdef_block, const int index,
         GetSourceBuffer(kPlaneY, row4x4_start,
                         column4x4_start + block_width4x4) -
         kCdefBorder * sizeof(Pixel);
+    assert(border_columns != nullptr);
     CopyPixels(src_line, frame_buffer_.stride(kPlaneY),
                border_columns[border_columns_dst_index][kPlaneY],
                kCdefBorder * sizeof(Pixel), kCdefBorder,
diff --git a/src/post_filter/loop_restoration.cc b/src/post_filter/loop_restoration.cc
index 2e6982c..b5e1432 100644
--- a/src/post_filter/loop_restoration.cc
+++ b/src/post_filter/loop_restoration.cc
@@ -79,7 +79,13 @@ void PostFilter::ApplyLoopRestorationForOneRow(
           bottom_border_stride = border_stride;
         }
       }
+#if LIBGAV1_MSAN
+      // The optimized loop filter may read past initialized values within the
+      // buffer.
+      RestorationBuffer restoration_buffer = {};
+#else
       RestorationBuffer restoration_buffer;
+#endif
       const LoopRestorationType type = restoration_info[unit_column].type;
       assert(type == kLoopRestorationTypeSgrProj ||
              type == kLoopRestorationTypeWiener);
diff --git a/src/post_filter/post_filter.cc b/src/post_filter/post_filter.cc
index bc71410..9745a01 100644
--- a/src/post_filter/post_filter.cc
+++ b/src/post_filter/post_filter.cc
@@ -372,17 +372,38 @@ void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
     uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane]
                                                  : frame_buffer_.data(plane)) +
                            row * stride;
-    const int left_border = for_loop_restoration
+#if LIBGAV1_MSAN
+    const int right_padding =
+        (frame_buffer_.stride(plane) >> static_cast<int>(bitdepth_ > 8)) -
+        ((frame_buffer_.left_border(plane) + frame_buffer_.width(plane) +
+          frame_buffer_.right_border(plane)));
+    const int padded_right_border_size =
+        frame_buffer_.right_border(plane) + right_padding;
+    // The optimized loop restoration code may read into the next row's left
+    // border depending on the start of the last superblock and the size of the
+    // right border. This is safe as the post filter is applied after
+    // reconstruction is complete and the threaded implementations do not read
+    // from the left border.
+    const int left_border_overread =
+        (for_loop_restoration && padded_right_border_size < 64)
+            ? 63 - padded_right_border_size
+            : 0;
+    assert(!for_loop_restoration || left_border_overread == 0 ||
+           (frame_buffer_.bottom_border(plane) > 0 &&
+            left_border_overread <= frame_buffer_.left_border(plane)));
+    const int left_border = (for_loop_restoration && left_border_overread == 0)
                                 ? kRestorationHorizontalBorder
                                 : frame_buffer_.left_border(plane);
-#if LIBGAV1_MSAN
     // The optimized loop restoration code will overread the visible frame
     // buffer into the right border. Extend the right boundary further to
     // prevent msan warnings.
     const int right_border = for_loop_restoration
-                                 ? kRestorationHorizontalBorder + 16
+                                 ? std::min(padded_right_border_size, 63)
                                  : frame_buffer_.right_border(plane);
 #else
+    const int left_border = for_loop_restoration
+                                ? kRestorationHorizontalBorder
+                                : frame_buffer_.left_border(plane);
     const int right_border = for_loop_restoration
                                  ? kRestorationHorizontalBorder
                                  : frame_buffer_.right_border(plane);
diff --git a/src/reconstruction_test.cc b/src/reconstruction_test.cc
index fd780b3..4d09ada 100644
--- a/src/reconstruction_test.cc
+++ b/src/reconstruction_test.cc
@@ -65,9 +65,8 @@ class ReconstructionTest : public testing::TestWithParam<int> {
       const char* const test_case = test_info->test_suite_name();
       if (absl::StartsWith(test_case, "C/")) {
       } else if (absl::StartsWith(test_case, "SSE41/")) {
-        if ((GetCpuInfo() & kSSE4_1) != 0) {
-          dsp::InverseTransformInit_SSE4_1();
-        }
+        if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
+        dsp::InverseTransformInit_SSE4_1();
       } else if (absl::StartsWith(test_case, "NEON/")) {
         dsp::InverseTransformInit_NEON();
       } else {
diff --git a/src/tile/bitstream/mode_info.cc b/src/tile/bitstream/mode_info.cc
index cb7b311..ffbbf64 100644
--- a/src/tile/bitstream/mode_info.cc
+++ b/src/tile/bitstream/mode_info.cc
@@ -890,6 +890,14 @@ uint16_t* Tile::GetReferenceCdf(
         block, kReferenceFrameBackward, kReferenceFrameBackward,
         kReferenceFrameAlternate2, kReferenceFrameAlternate2);
   }
+  // When using GCC 12.x for some targets the compiler reports a false positive
+  // with the context subscript when is_single=false, is_backward=false and
+  // index=0. GetReferenceContext() can only return values between 0 and 2.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+  assert(context >= 0 && context <= 2);
   if (is_single) {
     // The index parameter for single references is offset by one since the spec
     // uses 1-based index for these elements.
@@ -900,6 +908,9 @@ uint16_t* Tile::GetReferenceCdf(
         .compound_backward_reference_cdf[context][index];
   }
   return symbol_decoder_context_.compound_reference_cdf[type][context][index];
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 }
 
 void Tile::ReadReferenceFrames(const Block& block, bool skip_mode) {
diff --git a/src/tile/tile.cc b/src/tile/tile.cc
index 5070bb6..10ebbf2 100644
--- a/src/tile/tile.cc
+++ b/src/tile/tile.cc
@@ -2605,17 +2605,17 @@ void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
   ReferenceInfo* reference_info = current_frame_.reference_info();
   for (int i = 1; i >= 0; --i) {
     const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
+    if (reference_frame_to_store <= kReferenceFrameIntra) continue;
     // Must make a local copy so that StoreMotionFieldMvs() knows there is no
     // overlap between load and store.
     const MotionVector mv_to_store = bp.mv.mv[i];
     const int mv_row = std::abs(mv_to_store.mv[0]);
     const int mv_column = std::abs(mv_to_store.mv[1]);
-    if (reference_frame_to_store > kReferenceFrameIntra &&
-        // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
-        // absolute values and then compare with kRefMvsLimit to save a branch.
-        // The next line is equivalent to:
-        // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
-        (mv_row | mv_column) <= kRefMvsLimit &&
+    // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two absolute
+    // values and then compare with kRefMvsLimit to save a branch.
+    // The next line is equivalent to:
+    // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
+    if ((mv_row | mv_column) <= kRefMvsLimit &&
         reference_info->relative_distance_from[reference_frame_to_store] < 0) {
       const int row_start8x8 = DivideBy2(row_start4x4);
       const int row_limit8x8 = DivideBy2(row_limit4x4);
diff --git a/src/utils/threadpool.cc b/src/utils/threadpool.cc
index a3099e1..6fa2e88 100644
--- a/src/utils/threadpool.cc
+++ b/src/utils/threadpool.cc
@@ -29,6 +29,7 @@
 #include <cinttypes>
 #include <cstddef>
 #include <cstdint>
+#include <cstdio>
 #include <cstring>
 #include <new>
 #include <utility>
@@ -216,7 +217,7 @@ void ThreadPool::WorkerThread::SetupName() {
     rv = pthread_setname_np(name);
     assert(rv == 0);
     static_cast<void>(rv);
-#elif defined(__ANDROID__) || defined(__GLIBC__)
+#elif defined(__ANDROID__) || (defined(__GLIBC__) && !defined(__GNU__))
     // If the |name| buffer is longer than 16 bytes, pthread_setname_np fails
     // with error 34 (ERANGE) on Android.
     char name[16];
diff --git a/src/utils/types.h b/src/utils/types.h
index 0dd6360..c2daf1f 100644
--- a/src/utils/types.h
+++ b/src/utils/types.h
@@ -390,12 +390,13 @@ struct FilmGrainParams {
   uint8_t point_v_value[10];
   uint8_t point_v_scaling[10];
 
-  uint8_t chroma_scaling;              // [8, 11].
-  uint8_t auto_regression_coeff_lag;   // [0, 3].
-  int8_t auto_regression_coeff_y[24];  // [-128, 127]
-  int8_t auto_regression_coeff_u[25];  // [-128, 127]
-  int8_t auto_regression_coeff_v[25];  // [-128, 127]
-  // Shift value: auto regression coeffs range
+  uint8_t chroma_scaling;             // grain_scaling_minus_8 + 8: [8, 11].
+  uint8_t auto_regression_coeff_lag;  // ar_coeff_lag: [0, 3].
+  // ar_coeffs_{y,u,v}_plus_128 - 128: [-128, 127].
+  int8_t auto_regression_coeff_y[24];
+  int8_t auto_regression_coeff_u[25];
+  int8_t auto_regression_coeff_v[25];
+  // Shift value: ar_coeff_shift_minus_6 + 6, auto regression coeffs range:
   // 6: [-2, 2)
   // 7: [-1, 1)
   // 8: [-0.5, 0.5)
@@ -405,16 +406,12 @@ struct FilmGrainParams {
   uint16_t grain_seed;
   int reference_index;
   int grain_scale_shift;
-  // These multipliers are encoded as nonnegative values by adding 128 first.
-  // The 128 is subtracted during parsing.
-  int8_t u_multiplier;       // [-128, 127]
-  int8_t u_luma_multiplier;  // [-128, 127]
-  // These offsets are encoded as nonnegative values by adding 256 first. The
-  // 256 is subtracted during parsing.
-  int16_t u_offset;          // [-256, 255]
-  int8_t v_multiplier;       // [-128, 127]
-  int8_t v_luma_multiplier;  // [-128, 127]
-  int16_t v_offset;          // [-256, 255]
+  int8_t u_multiplier;       // cb_mult - 128:      [-128, 127].
+  int8_t u_luma_multiplier;  // cb_luma_mult - 128: [-128, 127].
+  int16_t u_offset;          // cb_offset - 256:    [-256, 255].
+  int8_t v_multiplier;       // cr_mult - 128:      [-128, 127].
+  int8_t v_luma_multiplier;  // cr_luma_mult - 128: [-128, 127].
+  int16_t v_offset;          // cr_offset - 256:    [-256, 255].
 };
 
 struct ObuFrameHeader {
diff --git a/src/yuv_buffer.cc b/src/yuv_buffer.cc
index efb8016..85619c3 100644
--- a/src/yuv_buffer.cc
+++ b/src/yuv_buffer.cc
@@ -197,45 +197,58 @@ bool YuvBuffer::Realloc(int bitdepth, bool is_monochrome, int width, int height,
   assert(!is_monochrome || buffer_[kPlaneV] == nullptr);
 
 #if LIBGAV1_MSAN
-  const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
-  int width_in_bytes = width * pixel_size;
+  InitializeFrameBorders();
+#endif
+
+  return true;
+}
+
+#if LIBGAV1_MSAN
+void YuvBuffer::InitializeFrameBorders() {
+  const int pixel_size = (bitdepth_ == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
+  const int y_width_in_bytes = y_width_ * pixel_size;
   // The optimized loop restoration code will overread the visible frame buffer
   // into the right border. The optimized cfl subsambler uses the right border
   // as well. Initialize the right border and padding to prevent msan warnings.
-  int right_border_size_in_bytes = right_border * pixel_size;
+  const int y_right_border_size_in_bytes = right_border_[kPlaneY] * pixel_size;
   // Calculate the padding bytes for the buffer. Note: The stride of the buffer
   // is always a multiple of 16. (see yuv_buffer.h)
-  const int right_padding_in_bytes =
-      stride_[kPlaneY] - (pixel_size * (width + left_border + right_border));
-  const int padded_right_border_size =
-      right_border_size_in_bytes + right_padding_in_bytes;
-  constexpr uint8_t right_val = 0x55;
-  uint8_t* rb = buffer_[kPlaneY] + width_in_bytes;
-  for (int i = 0; i < height + bottom_border; ++i) {
-    memset(rb, right_val, padded_right_border_size);
+  const int y_right_padding_in_bytes =
+      stride_[kPlaneY] - (pixel_size * (y_width_ + left_border_[kPlaneY] +
+                                        right_border_[kPlaneY]));
+  const int y_padded_right_border_size =
+      y_right_border_size_in_bytes + y_right_padding_in_bytes;
+  constexpr uint8_t kRightValue = 0x55;
+  uint8_t* rb = buffer_[kPlaneY] + y_width_in_bytes;
+  for (int i = 0; i < y_height_ + bottom_border_[kPlaneY]; ++i) {
+    memset(rb, kRightValue, y_padded_right_border_size);
     rb += stride_[kPlaneY];
   }
-  if (!is_monochrome) {
-    int uv_width_in_bytes = uv_width * pixel_size;
-    int uv_right_border_size_in_bytes = uv_right_border * pixel_size;
+
+  if (!is_monochrome_) {
+    const int uv_width_in_bytes = uv_width_ * pixel_size;
+    const int uv_right_border_size_in_bytes =
+        right_border_[kPlaneU] * pixel_size;
+    assert(right_border_[kPlaneU] == right_border_[kPlaneV]);
     const int u_right_padding_in_bytes =
-        stride_[kPlaneU] -
-        (pixel_size * (uv_width + uv_left_border + uv_right_border));
+        stride_[kPlaneU] - (pixel_size * (uv_width_ + left_border_[kPlaneU] +
+                                          right_border_[kPlaneU]));
     const int u_padded_right_border_size =
         uv_right_border_size_in_bytes + u_right_padding_in_bytes;
     rb = buffer_[kPlaneU] + uv_width_in_bytes;
-    for (int i = 0; i < uv_height; ++i) {
-      memset(rb, right_val, u_padded_right_border_size);
+    for (int i = 0; i < uv_height_; ++i) {
+      memset(rb, kRightValue, u_padded_right_border_size);
       rb += stride_[kPlaneU];
     }
     const int v_right_padding_in_bytes =
         stride_[kPlaneV] -
-        ((uv_width + uv_left_border + uv_right_border) * pixel_size);
+        ((uv_width_ + left_border_[kPlaneV] + right_border_[kPlaneV]) *
+         pixel_size);
     const int v_padded_right_border_size =
         uv_right_border_size_in_bytes + v_right_padding_in_bytes;
     rb = buffer_[kPlaneV] + uv_width_in_bytes;
-    for (int i = 0; i < uv_height; ++i) {
-      memset(rb, right_val, v_padded_right_border_size);
+    for (int i = 0; i < uv_height_; ++i) {
+      memset(rb, kRightValue, v_padded_right_border_size);
       rb += stride_[kPlaneV];
     }
   }
@@ -244,13 +257,11 @@ bool YuvBuffer::Realloc(int bitdepth, bool is_monochrome, int width, int height,
   // block) into the uninitialized visible area. The cfl subsampler can overread
   // into the bottom border as well. Initialize the both to quiet msan warnings.
   uint8_t* y_visible = buffer_[kPlaneY];
-  for (int i = 0; i < height + bottom_border; ++i) {
-    memset(y_visible, right_val, width_in_bytes);
+  for (int i = 0; i < y_height_ + bottom_border_[kPlaneY]; ++i) {
+    memset(y_visible, kRightValue, y_width_in_bytes);
     y_visible += stride_[kPlaneY];
   }
-#endif
-
-  return true;
 }
+#endif  // LIBGAV1_MSAN
 
 }  // namespace libgav1
diff --git a/src/yuv_buffer.h b/src/yuv_buffer.h
index b9e8cd3..d7818bd 100644
--- a/src/yuv_buffer.h
+++ b/src/yuv_buffer.h
@@ -24,6 +24,7 @@
 #include <type_traits>
 
 #include "src/gav1/frame_buffer.h"
+#include "src/utils/compiler_attributes.h"
 #include "src/utils/constants.h"
 
 namespace libgav1 {
@@ -146,6 +147,11 @@ class YuvBuffer {
 
  private:
   static constexpr int kFrameBufferRowAlignment = 16;
+
+#if LIBGAV1_MSAN
+  void InitializeFrameBorders();
+#endif
+
   int bitdepth_ = 0;
   bool is_monochrome_ = false;
 
diff --git a/tests/fuzzer/fuzzer_temp_file.h b/tests/fuzzer/fuzzer_temp_file.h
index 5d12bbe..ed8f51c 100644
--- a/tests/fuzzer/fuzzer_temp_file.h
+++ b/tests/fuzzer/fuzzer_temp_file.h
@@ -25,12 +25,52 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#ifdef _WIN32
+#include <io.h>
+#include <windows.h>
+
+#define strdup _strdup
+#define unlink _unlink
+#else
 #include <unistd.h>
+#endif  // _WIN32
 
 // Pure-C interface for creating and cleaning up temporary files.
 
 static char* fuzzer_get_tmpfile_with_suffix(const uint8_t* data, size_t size,
                                             const char* suffix) {
+#ifdef _WIN32
+  // GetTempPathA generates '<path>\<pre><uuuu>.TMP'.
+  (void)suffix;  // NOLINT (this could be a C compilation unit)
+  char temp_path[MAX_PATH];
+  const DWORD ret = GetTempPathA(MAX_PATH, temp_path);
+  if (ret == 0 || ret > MAX_PATH) {
+    fprintf(stderr, "Error getting temporary directory name: %lu\n",
+            GetLastError());
+    abort();
+  }
+  char* filename_buffer =
+      (char*)malloc(MAX_PATH);  // NOLINT (this could be a C compilation unit)
+  if (!filename_buffer) {
+    perror("Failed to allocate file name buffer.");
+    abort();
+  }
+  if (GetTempFileNameA(temp_path, "ftf", /*uUnique=*/0, filename_buffer) == 0) {
+    fprintf(stderr, "Error getting temporary file name: %lu\n", GetLastError());
+    abort();
+  }
+#if defined(_MSC_VER) || defined(MINGW_HAS_SECURE_API)
+  FILE* file;
+  const errno_t err = fopen_s(&file, filename_buffer, "wb");
+  if (err != 0) file = NULL;  // NOLINT (this could be a C compilation unit)
+#else
+  FILE* file = fopen(filename_buffer, "wb");
+#endif
+  if (!file) {
+    perror("Failed to open file.");
+    abort();
+  }
+#else  // !_WIN32
   if (suffix == NULL) {  // NOLINT (this could be a C compilation unit)
     suffix = "";
   }
@@ -55,7 +95,7 @@ static char* fuzzer_get_tmpfile_with_suffix(const uint8_t* data, size_t size,
   }
 
   if (snprintf(filename_buffer, buffer_sz, "%s%s", leading_temp_path, suffix) >=
-      buffer_sz) {
+      (int)buffer_sz) {  // NOLINT (this could be a C compilation unit)
     perror("File name buffer too short.");
     abort();
   }
@@ -71,9 +111,10 @@ static char* fuzzer_get_tmpfile_with_suffix(const uint8_t* data, size_t size,
     close(file_descriptor);
     abort();
   }
+#endif  // _WIN32
   const size_t bytes_written = fwrite(data, sizeof(uint8_t), size, file);
   if (bytes_written < size) {
-    close(file_descriptor);
+    fclose(file);
     fprintf(stderr, "Failed to write all bytes to file (%zu out of %zu)",
             bytes_written, size);
     abort();
diff --git a/tests/fuzzer/obu_parser_fuzzer.cc b/tests/fuzzer/obu_parser_fuzzer.cc
index 634a802..f71ca17 100644
--- a/tests/fuzzer/obu_parser_fuzzer.cc
+++ b/tests/fuzzer/obu_parser_fuzzer.cc
@@ -41,6 +41,11 @@ constexpr size_t kMaxDataSize = 200 * 1024;
 #endif
 
 inline void ParseObu(const uint8_t* const data, size_t size) {
+  size_t av1c_size;
+  const std::unique_ptr<uint8_t[]> av1c_box =
+      libgav1::ObuParser::GetAV1CodecConfigurationBox(data, size, &av1c_size);
+  static_cast<void>(av1c_box);
+
   libgav1::InternalFrameBufferList buffer_list;
   libgav1::BufferPool buffer_pool(libgav1::OnInternalFrameBufferSizeChanged,
                                   libgav1::GetInternalFrameBuffer,
diff --git a/tests/libgav1_tests.cmake b/tests/libgav1_tests.cmake
index c759d4f..95f6361 100644
--- a/tests/libgav1_tests.cmake
+++ b/tests/libgav1_tests.cmake
@@ -28,7 +28,7 @@ if(NOT LIBGAV1_ENABLE_TESTS OR NOT EXISTS "${libgav1_googletest}")
       "GoogleTest not found, setting LIBGAV1_ENABLE_TESTS to false.\n"
       "To enable tests download the GoogleTest repository to"
       " third_party/googletest:\n\n  git \\\n    -C ${libgav1_root} \\\n"
-      "    clone \\\n"
+      "    clone -b release-1.12.1 --depth 1 \\\n"
       "    https://github.com/google/googletest.git third_party/googletest\n")
     set(LIBGAV1_ENABLE_TESTS FALSE CACHE BOOL "Enables tests." FORCE)
   endif()
@@ -86,13 +86,17 @@ list(APPEND libgav1_common_avx2_test_sources
             "${libgav1_source}/dsp/x86/common_avx2.h"
             "${libgav1_source}/dsp/x86/common_avx2.inc"
             "${libgav1_source}/dsp/x86/common_avx2_test.cc"
+            "${libgav1_source}/dsp/x86/common_avx2_test.h"
             "${libgav1_source}/dsp/x86/common_sse4.inc")
+list(APPEND libgav1_common_dsp_test_sources
+            "${libgav1_source}/dsp/common_dsp_test.cc")
 list(APPEND libgav1_common_neon_test_sources
             "${libgav1_source}/dsp/arm/common_neon_test.cc")
 list(APPEND libgav1_common_sse4_test_sources
             "${libgav1_source}/dsp/x86/common_sse4.h"
             "${libgav1_source}/dsp/x86/common_sse4.inc"
-            "${libgav1_source}/dsp/x86/common_sse4_test.cc")
+            "${libgav1_source}/dsp/x86/common_sse4_test.cc"
+            "${libgav1_source}/dsp/x86/common_sse4_test.h")
 list(APPEND libgav1_convolve_test_sources
             "${libgav1_source}/dsp/convolve_test.cc")
 list(APPEND libgav1_cpu_test_sources "${libgav1_source}/utils/cpu_test.cc")
@@ -275,19 +279,29 @@ macro(libgav1_add_tests_targets)
                          libgav1_gtest_main)
 
   if(libgav1_have_avx2)
+    list(APPEND libgav1_common_dsp_test_sources
+                ${libgav1_common_avx2_test_sources})
+  endif()
+  if(libgav1_have_sse4)
+    list(APPEND libgav1_common_dsp_test_sources
+                ${libgav1_common_sse4_test_sources})
+  endif()
+  if(libgav1_have_avx2 OR libgav1_have_sse4)
     libgav1_add_executable(TEST
                            NAME
-                           common_avx2_test
+                           common_dsp_test
                            SOURCES
-                           ${libgav1_common_avx2_test_sources}
+                           ${libgav1_common_dsp_test_sources}
                            DEFINES
                            ${libgav1_defines}
                            INCLUDES
                            ${libgav1_test_include_paths}
+                           OBJLIB_DEPS
+                           libgav1_utils
                            LIB_DEPS
                            ${libgav1_common_test_absl_deps}
-                           libgav1_gtest
-                           libgav1_gtest_main)
+                           libgav1_gtest_main
+                           libgav1_gtest)
   endif()
 
   if(libgav1_have_neon)
@@ -302,22 +316,7 @@ macro(libgav1_add_tests_targets)
                            ${libgav1_test_include_paths}
                            OBJLIB_DEPS
                            libgav1_tests_block_utils
-                           LIB_DEPS
-                           ${libgav1_common_test_absl_deps}
-                           libgav1_gtest
-                           libgav1_gtest_main)
-  endif()
-
-  if(libgav1_have_sse4)
-    libgav1_add_executable(TEST
-                           NAME
-                           common_sse4_test
-                           SOURCES
-                           ${libgav1_common_sse4_test_sources}
-                           DEFINES
-                           ${libgav1_defines}
-                           INCLUDES
-                           ${libgav1_test_include_paths}
+                           libgav1_utils
                            LIB_DEPS
                            ${libgav1_common_test_absl_deps}
                            libgav1_gtest
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-12-06 00:16:52 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2023-12-06 00:16:52 +0000
commit	4ca6ab327e527d617af254a2fe98bc9b8a5f5eb9 (patch)
tree	756b70279c1f9558e435faa2e82fda941424ae04
parent	b1fe831e62beccc2e7a11a62ed00ad68058f216f (diff)
parent	3259758f9a1a85933bcf4c4136fe280b21198b7b (diff)
download	libgav1-android14-mainline-os-statsd-release.tar.gz