From 3259758f9a1a85933bcf4c4136fe280b21198b7b Mon Sep 17 00:00:00 2001 From: Harish Mahendrakar Date: Tue, 31 Oct 2023 22:16:08 +0000 Subject: Upgrade libgav1 to v0.19.0 This project was upgraded with external_updater. Usage: tools/external_updater/updater.sh update libgav1 For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md Bug: 308688630 Test: atest-dev CtsMediaV2TestCases -- --module-arg \ CtsMediaV2TestCases:instrumentation-arg:codec-prefix:=c2.android.av1 (cherry picked from https://android-review.googlesource.com/q/commit:743695921c54d75b6d9771f667d724afa58dab02) Merged-In: I43ae2e014a9c5ef332b25a255903ba1272b015ea Change-Id: I43ae2e014a9c5ef332b25a255903ba1272b015ea --- CMakeLists.txt | 2 +- METADATA | 14 ++++--- README.md | 10 +++-- README.version | 3 +- cmake/libgav1_build_definitions.cmake | 2 +- examples/file_reader.cc | 9 ++++- examples/file_writer_test.cc | 3 +- src/c_decoder_test.c | 3 ++ src/decoder_test.cc | 3 ++ src/dsp/arm/convolve_10bit_neon.cc | 62 +++++++++++++++-------------- src/dsp/arm/convolve_neon.cc | 12 +++--- src/dsp/arm/film_grain_neon.cc | 18 ++------- src/dsp/arm/intrapred_directional_neon.cc | 2 +- src/dsp/arm/inverse_transform_neon.cc | 9 +++-- src/dsp/arm/loop_filter_10bit_neon.cc | 2 - src/dsp/arm/loop_restoration_10bit_neon.cc | 14 +++---- src/dsp/arm/loop_restoration_neon.cc | 6 ++- src/dsp/average_blend_test.cc | 5 +-- src/dsp/cdef_test.cc | 18 ++++----- src/dsp/common_dsp_test.cc | 58 +++++++++++++++++++++++++++ src/dsp/convolve_test.cc | 20 ++++------ src/dsp/distance_weighted_blend_test.cc | 5 +-- src/dsp/intra_edge_test.cc | 10 ++--- src/dsp/intrapred_cfl_test.cc | 10 ++--- src/dsp/intrapred_directional_test.cc | 5 +-- src/dsp/intrapred_filter_test.cc | 5 +-- src/dsp/intrapred_test.cc | 7 ++-- src/dsp/inverse_transform_test.cc | 5 +-- src/dsp/loop_filter_test.cc | 5 +-- src/dsp/loop_restoration_test.cc | 28 ++++++------- src/dsp/mask_blend_test.cc | 6 +-- src/dsp/motion_field_projection_test.cc | 5 +-- src/dsp/motion_vector_search_test.cc | 5 +-- src/dsp/obmc_test.cc | 5 +-- src/dsp/super_res_test.cc | 1 + src/dsp/warp_test.cc | 1 + src/dsp/weight_mask_test.cc | 1 + src/dsp/x86/common_avx2_test.cc | 16 +++++--- src/dsp/x86/common_avx2_test.h | 26 ++++++++++++ src/dsp/x86/common_sse4_test.cc | 16 +++++--- src/dsp/x86/common_sse4_test.h | 26 ++++++++++++ src/dsp/x86/convolve_avx2.cc | 9 +++++ src/dsp/x86/convolve_sse4.cc | 13 ++++++ src/dsp/x86/intrapred_directional_sse4.cc | 4 ++ src/dsp/x86/loop_restoration_10bit_sse4.cc | 9 ++++- src/dsp/x86/loop_restoration_sse4.cc | 9 ++++- src/film_grain_test.cc | 33 +++++++++------- src/gav1/decoder_buffer.h | 10 +++++ src/gav1/version.h | 2 +- src/post_filter/cdef.cc | 19 +++++++++ src/post_filter/loop_restoration.cc | 6 +++ src/post_filter/post_filter.cc | 27 +++++++++++-- src/reconstruction_test.cc | 5 +-- src/tile/bitstream/mode_info.cc | 11 ++++++ src/tile/tile.cc | 12 +++--- src/utils/threadpool.cc | 3 +- src/utils/types.h | 29 ++++++-------- src/yuv_buffer.cc | 63 ++++++++++++++++++------------ src/yuv_buffer.h | 6 +++ tests/fuzzer/fuzzer_temp_file.h | 45 ++++++++++++++++++++- tests/fuzzer/obu_parser_fuzzer.cc | 5 +++ tests/libgav1_tests.cmake | 43 ++++++++++---------- 62 files changed, 554 insertions(+), 272 deletions(-) create mode 100644 src/dsp/common_dsp_test.cc create mode 100644 src/dsp/x86/common_avx2_test.h create mode 100644 src/dsp/x86/common_sse4_test.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 52b1b32..73f27a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,7 @@ else() " examples & tests and libgav1 when LIBGAV1_THREADPOOL_USE_STD_MUTEX is" " not defined. To continue, download the Abseil repository to" " third_party/abseil-cpp:\n git \\\n -C ${libgav1_root} \\\n" - " clone \\\n" + " clone -b 20220623.0 --depth 1 \\\n" " https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp") endif() endif() diff --git a/METADATA b/METADATA index 3710207..38ae79a 100644 --- a/METADATA +++ b/METADATA @@ -1,7 +1,9 @@ -name: "libgav1" -description: - "Google's decoder implementation of the AV1 video codec." +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update libgav1 +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md +name: "libgav1" +description: "Google\'s decoder implementation of the AV1 video codec." third_party { url { type: HOMEPAGE @@ -11,11 +13,11 @@ third_party { type: GIT value: "https://chromium.googlesource.com/codecs/libgav1" } - version: "dc2ae123784cf1a9504d6b4eba112170574e31e0" + version: "v0.19.0" license_type: NOTICE last_upgrade_date { - year: 2022 + year: 2023 month: 10 - day: 4 + day: 31 } } diff --git a/README.md b/README.md index 04c6a94..bdf598c 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,8 @@ compliant AV1 decoder. More information on the AV1 video format can be found at From within the libgav1 directory: ```shell - $ git clone https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp + $ git clone -b 20220623.0 --depth 1 \ + https://github.com/abseil/abseil-cpp.git third_party/abseil-cpp ``` Note: Abseil is required by the examples and tests. libgav1 will depend on @@ -31,7 +32,8 @@ compliant AV1 decoder. More information on the AV1 video format can be found at From within the libgav1 directory: ```shell - $ git clone https://github.com/google/googletest.git third_party/googletest + $ git clone -b release-1.12.1 --depth 1 \ + https://github.com/google/googletest.git third_party/googletest ``` ### Compile @@ -44,8 +46,8 @@ compliant AV1 decoder. More information on the AV1 video format can be found at Configuration options: -* `LIBGAV1_MAX_BITDEPTH`: defines the maximum supported bitdepth (8, 10; - default: 10). +* `LIBGAV1_MAX_BITDEPTH`: defines the maximum supported bitdepth (8, 10, 12; + default: 12). * `LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS`: define to a non-zero value to disable [symbol reduction](#symbol-reduction) in an optimized build to keep all versions of dsp functions available. Automatically defined in diff --git a/README.version b/README.version index 860af1a..4712a93 100644 --- a/README.version +++ b/README.version @@ -1,5 +1,4 @@ URL: https://chromium.googlesource.com/codecs/libgav1 -Version: v0.18.0 +Version: v0.19.0 BugComponent: 324837 Local Modifications: -* Backport av1c generation - cl/463412386 diff --git a/cmake/libgav1_build_definitions.cmake b/cmake/libgav1_build_definitions.cmake index 95c17be..1465679 100644 --- a/cmake/libgav1_build_definitions.cmake +++ b/cmake/libgav1_build_definitions.cmake @@ -141,7 +141,7 @@ macro(libgav1_set_build_definitions) endif() if(NOT LIBGAV1_MAX_BITDEPTH) - set(LIBGAV1_MAX_BITDEPTH 10) + set(LIBGAV1_MAX_BITDEPTH 12) elseif(NOT LIBGAV1_MAX_BITDEPTH EQUAL 8 AND NOT LIBGAV1_MAX_BITDEPTH EQUAL 10 AND NOT LIBGAV1_MAX_BITDEPTH EQUAL 12) diff --git a/examples/file_reader.cc b/examples/file_reader.cc index b096722..a01b7ab 100644 --- a/examples/file_reader.cc +++ b/examples/file_reader.cc @@ -82,7 +82,14 @@ std::unique_ptr FileReader::Open( return nullptr; } - return file; + // With C++11, to return |file|, an explicit move is required as the return + // type differs from the local variable. Overload resolution isn't guaranteed + // in this case, though some compilers may adopt the C++14 behavior (C++ + // Standard Core Language Issue #1579, Return by converting move + // constructor): + // https://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1579 + // To keep things simple we opt for the following compatible form. + return std::unique_ptr(file.release()); } // IVF Frame Header format, from https://wiki.multimedia.cx/index.php/IVF diff --git a/examples/file_writer_test.cc b/examples/file_writer_test.cc index 481808c..df5be17 100644 --- a/examples/file_writer_test.cc +++ b/examples/file_writer_test.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,7 @@ const char* const // TODO(tomfinegan): Add a bitdepth arg, and test writing 10 bit frame buffers. std::unique_ptr GetFakeDecoderBuffer(ImageFormat image_format) { - auto buffer = absl::make_unique(); + auto buffer = absl::WrapUnique(new (std::nothrow) DecoderBuffer); if (buffer == nullptr) return nullptr; buffer->chroma_sample_position = kChromaSamplePositionUnknown; buffer->image_format = image_format; diff --git a/src/c_decoder_test.c b/src/c_decoder_test.c index 9587262..7c6f8c8 100644 --- a/src/c_decoder_test.c +++ b/src/c_decoder_test.c @@ -234,6 +234,7 @@ static void DecoderTestAPIFlowForNonFrameParallelMode(void) { // Signal end of stream (method 1). This should ensure that all the references // are released. status = Libgav1DecoderSignalEOS(test.decoder); + ASSERT_EQ(status, kLibgav1StatusOk); // libgav1 should have released all the reference frames now. ASSERT_EQ(test.frames_in_use, 0); @@ -382,6 +383,7 @@ static void DecoderTestNonFrameParallelModeInvalidFrameAfterEOS(void) { // Signal end of stream. status = Libgav1DecoderSignalEOS(test.decoder); + ASSERT_EQ(status, kLibgav1StatusOk); // libgav1 should have released all the reference frames now. ASSERT_EQ(test.frames_in_use, 0); @@ -459,6 +461,7 @@ static void DecoderTestMetadataObu(void) { ASSERT_EQ(test.buffer_private_data, buffer->buffer_private_data); status = Libgav1DecoderSignalEOS(test.decoder); + ASSERT_EQ(status, kLibgav1StatusOk); ASSERT_EQ(test.frames_in_use, 0); Libgav1DecoderDestroy(test.decoder); diff --git a/src/decoder_test.cc b/src/decoder_test.cc index e274122..52ec5cc 100644 --- a/src/decoder_test.cc +++ b/src/decoder_test.cc @@ -172,6 +172,7 @@ TEST_F(DecoderTest, APIFlowForNonFrameParallelMode) { // Signal end of stream (method 1). This should ensure that all the references // are released. status = decoder_->SignalEOS(); + EXPECT_EQ(status, kStatusOk); // libgav1 should have released all the reference frames now. EXPECT_EQ(frames_in_use_, 0); @@ -302,6 +303,7 @@ TEST_F(DecoderTest, NonFrameParallelModeInvalidFrameAfterEOS) { // Signal end of stream. status = decoder_->SignalEOS(); + EXPECT_EQ(status, kStatusOk); // libgav1 should have released all the reference frames now. EXPECT_EQ(frames_in_use_, 0); @@ -372,6 +374,7 @@ TEST_F(DecoderTest, MetadataObu) { EXPECT_EQ(buffer_private_data_, buffer->buffer_private_data); status = decoder_->SignalEOS(); + EXPECT_EQ(status, kStatusOk); EXPECT_EQ(frames_in_use_, 0); } diff --git a/src/dsp/arm/convolve_10bit_neon.cc b/src/dsp/arm/convolve_10bit_neon.cc index 389f029..1aa0cc7 100644 --- a/src/dsp/arm/convolve_10bit_neon.cc +++ b/src/dsp/arm/convolve_10bit_neon.cc @@ -412,30 +412,21 @@ void FilterHorizontal(const uint16_t* LIBGAV1_RESTRICT const src, void* LIBGAV1_RESTRICT const dest, const ptrdiff_t pred_stride, const int width, const int height, const int16x4_t* const v_tap) { - assert(width < 8 || num_taps != 4); - // Don't simplify the redundant if conditions with the template parameters, - // which helps the compiler generate compact code. - if (width >= 8 && num_taps != 4) { - FilterHorizontalWidth8AndUp( - src, src_stride, dest, pred_stride, width, height, v_tap); - return; - } - // Horizontal passes only needs to account for number of taps 2 and 4 when // |width| <= 4. assert(width <= 4); assert(num_taps == 2 || num_taps == 4); if (num_taps == 2 || num_taps == 4) { - if (width == 4) { - FilterHorizontalWidth4( - src, src_stride, dest, pred_stride, height, v_tap); - return; - } - assert(width == 2); - if (!is_compound) { + if (width == 2 && !is_compound) { FilterHorizontalWidth2(src, src_stride, dest, pred_stride, height, v_tap); + return; } + assert(width == 4); + FilterHorizontalWidth4( + src, src_stride, dest, pred_stride, height, v_tap); + } else { + assert(false); } } @@ -454,19 +445,32 @@ LIBGAV1_ALWAYS_INLINE void DoHorizontalPass( v_tap[k] = vdup_n_s16(kHalfSubPixelFilters[filter_index][filter_id][k]); } - if (filter_index == 2) { // 8 tap. - FilterHorizontal<8, is_compound, is_2d>(src, src_stride, dst, dst_stride, - width, height, v_tap); - } else if (filter_index < 2) { // 6 tap. - FilterHorizontal<6, is_compound, is_2d>(src + 1, src_stride, dst, - dst_stride, width, height, v_tap); - } else if ((filter_index & 0x4) != 0) { // 4 tap. - // ((filter_index == 4) | (filter_index == 5)) - FilterHorizontal<4, is_compound, is_2d>(src + 2, src_stride, dst, - dst_stride, width, height, v_tap); - } else { // 2 tap. - FilterHorizontal<2, is_compound, is_2d>(src + 3, src_stride, dst, - dst_stride, width, height, v_tap); + // Horizontal filter. + // Filter types used for width <= 4 are different from those for width > 4. + // When width > 4, the valid filter index range is always [0, 3]. + // When width <= 4, the valid filter index range is always [4, 5]. + if (width >= 8) { + if (filter_index == 2) { // 8 tap. + FilterHorizontalWidth8AndUp<8, is_compound, is_2d>( + src, src_stride, dst, dst_stride, width, height, v_tap); + } else if (filter_index < 2) { // 6 tap. + FilterHorizontalWidth8AndUp<6, is_compound, is_2d>( + src + 1, src_stride, dst, dst_stride, width, height, v_tap); + } else { // 2 tap. + assert(filter_index == 3); + FilterHorizontalWidth8AndUp<2, is_compound, is_2d>( + src + 3, src_stride, dst, dst_stride, width, height, v_tap); + } + } else { + if ((filter_index & 0x4) != 0) { // 4 tap. + // ((filter_index == 4) | (filter_index == 5)) + FilterHorizontal<4, is_compound, is_2d>(src + 2, src_stride, dst, + dst_stride, width, height, v_tap); + } else { // 2 tap. + assert(filter_index == 3); + FilterHorizontal<2, is_compound, is_2d>(src + 3, src_stride, dst, + dst_stride, width, height, v_tap); + } } } diff --git a/src/dsp/arm/convolve_neon.cc b/src/dsp/arm/convolve_neon.cc index 5b80da2..97b3f26 100644 --- a/src/dsp/arm/convolve_neon.cc +++ b/src/dsp/arm/convolve_neon.cc @@ -371,16 +371,14 @@ void FilterHorizontal(const uint8_t* LIBGAV1_RESTRICT const src, assert(width <= 4); assert(filter_index >= 3 && filter_index <= 5); if (filter_index >= 3 && filter_index <= 5) { - if (width == 4) { - FilterHorizontalWidth4( - src, src_stride, dest, pred_stride, height, v_tap); - return; - } - assert(width == 2); - if (!is_compound) { + if (width == 2 && !is_compound) { FilterHorizontalWidth2(src, src_stride, dest, pred_stride, height, v_tap); + return; } + assert(width == 4); + FilterHorizontalWidth4( + src, src_stride, dest, pred_stride, height, v_tap); } } diff --git a/src/dsp/arm/film_grain_neon.cc b/src/dsp/arm/film_grain_neon.cc index 76e1151..cde887c 100644 --- a/src/dsp/arm/film_grain_neon.cc +++ b/src/dsp/arm/film_grain_neon.cc @@ -682,26 +682,14 @@ inline int16x8_t Clip3(const int16x8_t value, const int16x8_t low, template inline int16x8_t GetScalingFactors(const int16_t scaling_lut[], - const Pixel* source) { + const Pixel* source, + const int valid_range = 8) { int16_t start_vals[8]; static_assert(bitdepth <= kBitdepth10, "NEON Film Grain is not yet implemented for 12bpp."); #if LIBGAV1_MSAN - memset(start_vals, 0, sizeof(start_vals)); + if (valid_range < 8) memset(start_vals, 0, sizeof(start_vals)); #endif - for (int i = 0; i < 8; ++i) { - assert(source[i] < (kScalingLookupTableSize << (bitdepth - kBitdepth8))); - start_vals[i] = scaling_lut[source[i]]; - } - return vld1q_s16(start_vals); -} - -template -inline int16x8_t GetScalingFactors(const int16_t scaling_lut[], - const Pixel* source, const int valid_range) { - int16_t start_vals[8]; - static_assert(bitdepth <= kBitdepth10, - "NEON Film Grain is not yet implemented for 12bpp."); for (int i = 0; i < valid_range; ++i) { assert(source[i] < (kScalingLookupTableSize << (bitdepth - kBitdepth8))); start_vals[i] = scaling_lut[source[i]]; diff --git a/src/dsp/arm/intrapred_directional_neon.cc b/src/dsp/arm/intrapred_directional_neon.cc index e9bdcf0..d36ef5f 100644 --- a/src/dsp/arm/intrapred_directional_neon.cc +++ b/src/dsp/arm/intrapred_directional_neon.cc @@ -1752,7 +1752,7 @@ inline void DirectionalZone2FromLeftCol_8x8( const int index_scale_bits = 6; // The values in |offset_y| are negative, except for the first element, which // is zero. - int16x8_t offset_y = left_y; + int16x8_t offset_y; int16x8_t shift_upsampled = left_y; // The shift argument must be a constant, otherwise use upsample_shift // directly. diff --git a/src/dsp/arm/inverse_transform_neon.cc b/src/dsp/arm/inverse_transform_neon.cc index 452f14a..cc4e4a4 100644 --- a/src/dsp/arm/inverse_transform_neon.cc +++ b/src/dsp/arm/inverse_transform_neon.cc @@ -345,11 +345,12 @@ LIBGAV1_ALWAYS_INLINE void ButterflyRotation_FirstIsZero(int16x8_t* a, int16x8_t* b, const int angle, const bool flip) { + // Clang < 14 targeting armv8.1-a+ optimizes vqrdmulhq_n_s16 and vqsubq_s16 + // (in HadamardRotation) into vqrdmlshq_s16 resulting in an "off by one" + // error. This behavior was fixed in 14.0.0: + // https://github.com/llvm/llvm-project/commit/82973edfb72a95b442fa6d2bb404e15a4031855e #if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) && \ - defined(__clang__) // ARM v8.1-A - // Clang optimizes vqrdmulhq_n_s16 and vqsubq_s16 (in HadamardRotation) into - // vqrdmlshq_s16 resulting in an "off by one" error. For now, do not use - // vqrdmulhq_n_s16(). + defined(__clang__) && __clang_major__ < 14 const int16_t cos128 = Cos128(angle); const int16_t sin128 = Sin128(angle); const int32x4_t x0 = vmull_n_s16(vget_low_s16(*b), -sin128); diff --git a/src/dsp/arm/loop_filter_10bit_neon.cc b/src/dsp/arm/loop_filter_10bit_neon.cc index a9dd98f..abdc074 100644 --- a/src/dsp/arm/loop_filter_10bit_neon.cc +++ b/src/dsp/arm/loop_filter_10bit_neon.cc @@ -444,7 +444,6 @@ void Horizontal6_NEON(void* const dest, const ptrdiff_t stride, const uint64x1_t need_filter6 = vreinterpret_u64_u16(is_flat3_mask); if (vget_lane_u64(need_filter6, 0) == 0) { // Filter6() does not apply, but Filter4() applies to one or more values. - p0q0_output = p0q0; p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1); p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0); } else { @@ -526,7 +525,6 @@ void Vertical6_NEON(void* const dest, const ptrdiff_t stride, int outer_thresh, const uint64x1_t need_filter6 = vreinterpret_u64_u16(is_flat3_mask); if (vget_lane_u64(need_filter6, 0) == 0) { // Filter6() does not apply, but Filter4() applies to one or more values. - p0q0_output = p0q0; p1q1_output = vbslq_u16(needs_filter_mask_8, f4_p1q1, p1q1); p0q0_output = vbslq_u16(needs_filter_mask_8, f4_p0q0, p0q0); } else { diff --git a/src/dsp/arm/loop_restoration_10bit_neon.cc b/src/dsp/arm/loop_restoration_10bit_neon.cc index 410bc20..9191080 100644 --- a/src/dsp/arm/loop_restoration_10bit_neon.cc +++ b/src/dsp/arm/loop_restoration_10bit_neon.cc @@ -1130,7 +1130,13 @@ inline void LookupIntermediate(const uint16x8_t sum, const uint16x8_t index, const uint8x8_t idx = vqmovn_u16(index); uint8_t temp[8]; vst1_u8(temp, idx); - *ma = vsetq_lane_u8(kSgrMaLookup[temp[0]], *ma, offset + 0); + // offset == 0 is assumed to be the first call to this function. The value is + // duplicated to avoid -Wuninitialized warnings under gcc. + if (offset == 0) { + *ma = vdupq_n_u8(kSgrMaLookup[temp[0]]); + } else { + *ma = vsetq_lane_u8(kSgrMaLookup[temp[0]], *ma, offset + 0); + } *ma = vsetq_lane_u8(kSgrMaLookup[temp[1]], *ma, offset + 1); *ma = vsetq_lane_u8(kSgrMaLookup[temp[2]], *ma, offset + 2); *ma = vsetq_lane_u8(kSgrMaLookup[temp[3]], *ma, offset + 3); @@ -1712,8 +1718,6 @@ LIBGAV1_ALWAYS_INLINE void BoxSumFilterPreProcess3( s[0] = Load1QMsanU16(src + 0, overread_in_bytes + 0); s[1] = Load1QMsanU16(src + 8, overread_in_bytes + 16); Square(s[0], sq); - // Quiet "may be used uninitialized" warning. - mas[0] = mas[1] = vdupq_n_u8(0); BoxFilterPreProcess3Lo(s, scale, sum3, square_sum3, sq, &mas[0], bs); int x = 0; @@ -2067,8 +2071,6 @@ LIBGAV1_ALWAYS_INLINE void BoxFilterPass2( s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0); s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16); Square(s[0], sq); - // Quiet "may be used uninitialized" warning. - mas[0] = mas[1] = vdupq_n_u8(0); BoxFilterPreProcess3Lo(s, scale, sum3, square_sum3, sq, &mas[0], bs); int x = 0; @@ -2255,8 +2257,6 @@ inline void BoxFilterLastRow( s[0] = Load1QMsanU16(src0 + 0, overread_in_bytes + 0); s[1] = Load1QMsanU16(src0 + 8, overread_in_bytes + 16); Square(s[0], sq); - // Quiet "may be used uninitialized" warning. - ma3[0] = ma3[1] = vdupq_n_u8(0); BoxFilterPreProcessLastRowLo(s, scales, sum3, sum5, square_sum3, square_sum5, sq, &ma3[0], &ma5[0], b3, b5); diff --git a/src/dsp/arm/loop_restoration_neon.cc b/src/dsp/arm/loop_restoration_neon.cc index cd8552e..adb8f36 100644 --- a/src/dsp/arm/loop_restoration_neon.cc +++ b/src/dsp/arm/loop_restoration_neon.cc @@ -1125,7 +1125,11 @@ inline void CalculateIntermediate(const uint16x8_t sum, val = AdjustValue(val, idx, 101); // 101 is the last index which value is 3. val = AdjustValue(val, idx, 169); // 169 is the last index which value is 2. val = AdjustValue(val, idx, 254); // 254 is the last index which value is 1. - *ma = (offset == 0) ? vcombine_u8(val, vget_high_u8(*ma)) + // offset == 0 is assumed to be the first call to this function. Note + // vget_high_u8(*ma) is not used in this case to avoid a -Wuninitialized + // warning with some versions of gcc. vdup_n_u8(0) could work as well, but in + // most cases clang and gcc generated better code with this version. + *ma = (offset == 0) ? vcombine_u8(val, val) : vcombine_u8(vget_low_u8(*ma), val); // b = ma * b * one_over_n diff --git a/src/dsp/average_blend_test.cc b/src/dsp/average_blend_test.cc index 6d1100a..67d592f 100644 --- a/src/dsp/average_blend_test.cc +++ b/src/dsp/average_blend_test.cc @@ -76,9 +76,8 @@ class AverageBlendTest : public testing::TestWithParam, if (absl::StartsWith(test_case, "C/")) { base_func_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - AverageBlendInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + AverageBlendInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { AverageBlendInit_NEON(); } else { diff --git a/src/dsp/cdef_test.cc b/src/dsp/cdef_test.cc index c25d7df..e2db17a 100644 --- a/src/dsp/cdef_test.cc +++ b/src/dsp/cdef_test.cc @@ -79,11 +79,11 @@ class CdefDirectionTest : public testing::TestWithParam { const char* const test_case = test_info->test_suite_name(); if (absl::StartsWith(test_case, "C/")) { } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; CdefInit_SSE4_1(); } else if (absl::StartsWith(test_case, "AVX2/")) { - if ((GetCpuInfo() & kAVX2) != 0) { - CdefInit_AVX2(); - } + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + CdefInit_AVX2(); } else if (absl::StartsWith(test_case, "NEON/")) { CdefInit_NEON(); } else { @@ -275,11 +275,11 @@ class CdefFilteringTest : public testing::TestWithParam { } else if (absl::StartsWith(test_case, "NEON/")) { CdefInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; CdefInit_SSE4_1(); } else if (absl::StartsWith(test_case, "AVX2/")) { - if ((GetCpuInfo() & kAVX2) != 0) { - CdefInit_AVX2(); - } + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + CdefInit_AVX2(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; @@ -304,7 +304,7 @@ template void CdefFilteringTest::TestRandomValues(int num_runs) { const int id = static_cast(param_.rows4x4 < 4) * 3 + (param_.subsampling_x + param_.subsampling_y) * 6; - absl::Duration elapsed_time; + absl::Duration elapsed_time[kMaxPlanes]; for (int num_tests = 0; num_tests < num_runs; ++num_tests) { for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) { const int subsampling_x = (plane == kPlaneY) ? 0 : param_.subsampling_x; @@ -355,7 +355,7 @@ void CdefFilteringTest::TestRandomValues(int num_runs) { source_ + offset, kSourceStride, block_height, primary_strength_, secondary_strength_, damping_, direction_, dest_[plane], kTestBufferStride * sizeof(dest_[0][0])); - elapsed_time += absl::Now() - start; + elapsed_time[plane] += absl::Now() - start; } } @@ -379,7 +379,7 @@ void CdefFilteringTest::TestRandomValues(int num_runs) { ASSERT_NE(expected_digest, nullptr); test_utils::CheckMd5Digest(kCdef, kCdefFilterName, expected_digest, reinterpret_cast(dest_[plane]), - sizeof(dest_[plane]), elapsed_time); + sizeof(dest_[plane]), elapsed_time[plane]); } } diff --git a/src/dsp/common_dsp_test.cc b/src/dsp/common_dsp_test.cc new file mode 100644 index 0000000..3342ce8 --- /dev/null +++ b/src/dsp/common_dsp_test.cc @@ -0,0 +1,58 @@ +// Copyright 2023 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/match.h" +#include "gtest/gtest.h" +#include "src/dsp/x86/common_avx2_test.h" +#include "src/dsp/x86/common_sse4_test.h" +#include "src/utils/cpu.h" + +namespace libgav1 { +namespace dsp { +namespace { + +class CommonDspTest : public ::testing::Test { + protected: + void SetUp() override { + const testing::TestInfo* const test_info = + testing::UnitTest::GetInstance()->current_test_info(); + const char* const test_case = test_info->name(); + if (absl::StartsWith(test_case, "SSE41")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + } else if (absl::StartsWith(test_case, "AVX2")) { + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + } else { + FAIL() << "Unrecognized architecture prefix in test case name: " + << test_case; + } + } +}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CommonDspTest); + +#if LIBGAV1_ENABLE_AVX2 +TEST_F(CommonDspTest, AVX2RightShiftWithRoundingS16) { + AVX2RightShiftWithRoundingS16Test(); +} +#endif // LIBGAV1_ENABLE_AVX2 + +#if LIBGAV1_ENABLE_SSE4_1 +TEST_F(CommonDspTest, SSE41RightShiftWithRoundingS16) { + SSE41RightShiftWithRoundingS16Test(); +} +#endif // LIBGAV1_ENABLE_SSE41 + +} // namespace +} // namespace dsp +} // namespace libgav1 diff --git a/src/dsp/convolve_test.cc b/src/dsp/convolve_test.cc index 42cdeb7..b8c1f1d 100644 --- a/src/dsp/convolve_test.cc +++ b/src/dsp/convolve_test.cc @@ -624,13 +624,11 @@ class ConvolveTest : public testing::TestWithParam< if (absl::StartsWith(test_case, "C/")) { base_convolve_func_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - ConvolveInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + ConvolveInit_SSE4_1(); } else if (absl::StartsWith(test_case, "AVX2/")) { - if ((GetCpuInfo() & kAVX2) != 0) { - ConvolveInit_AVX2(); - } + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + ConvolveInit_AVX2(); } else if (absl::StartsWith(test_case, "NEON/")) { ConvolveInit_NEON(); #if LIBGAV1_MAX_BITDEPTH >= 10 @@ -1084,13 +1082,11 @@ class ConvolveScaleTest if (absl::StartsWith(test_case, "C/")) { base_convolve_scale_func_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - ConvolveInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + ConvolveInit_SSE4_1(); } else if (absl::StartsWith(test_case, "AVX2/")) { - if ((GetCpuInfo() & kAVX2) != 0) { - ConvolveInit_AVX2(); - } + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + ConvolveInit_AVX2(); } else if (absl::StartsWith(test_case, "NEON/")) { ConvolveInit_NEON(); #if LIBGAV1_MAX_BITDEPTH >= 10 diff --git a/src/dsp/distance_weighted_blend_test.cc b/src/dsp/distance_weighted_blend_test.cc index 88040b4..0d6e1cd 100644 --- a/src/dsp/distance_weighted_blend_test.cc +++ b/src/dsp/distance_weighted_blend_test.cc @@ -63,9 +63,8 @@ class DistanceWeightedBlendTest : public testing::TestWithParam, if (absl::StartsWith(test_case, "C/")) { base_func_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - DistanceWeightedBlendInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + DistanceWeightedBlendInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { DistanceWeightedBlendInit_NEON(); } else { diff --git a/src/dsp/intra_edge_test.cc b/src/dsp/intra_edge_test.cc index b287544..75c45be 100644 --- a/src/dsp/intra_edge_test.cc +++ b/src/dsp/intra_edge_test.cc @@ -97,9 +97,8 @@ class IntraEdgeFilterTest : public testing::TestWithParam { if (absl::StartsWith(test_case, "C/")) { base_intra_edge_filter_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraEdgeInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraEdgeInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { IntraEdgeInit_NEON(); } else { @@ -356,9 +355,8 @@ class IntraEdgeUpsamplerTest : public testing::TestWithParam { if (absl::StartsWith(test_case, "C/")) { base_intra_edge_upsampler_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraEdgeInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraEdgeInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { IntraEdgeInit_NEON(); } else { diff --git a/src/dsp/intrapred_cfl_test.cc b/src/dsp/intrapred_cfl_test.cc index 8415d51..53f3075 100644 --- a/src/dsp/intrapred_cfl_test.cc +++ b/src/dsp/intrapred_cfl_test.cc @@ -156,9 +156,8 @@ class CflIntraPredTest : public IntraPredTestBase { } else if (absl::StartsWith(test_case, "NEON/")) { IntraPredCflInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraPredCflInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraPredCflInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; @@ -304,9 +303,8 @@ class CflSubsamplerTest : public IntraPredTestBase { } else if (absl::StartsWith(test_case, "NEON/")) { IntraPredCflInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraPredCflInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraPredCflInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; diff --git a/src/dsp/intrapred_directional_test.cc b/src/dsp/intrapred_directional_test.cc index 8d4fa63..2c81b27 100644 --- a/src/dsp/intrapred_directional_test.cc +++ b/src/dsp/intrapred_directional_test.cc @@ -187,9 +187,8 @@ class DirectionalIntraPredTest : public IntraPredTestBase { } else if (absl::StartsWith(test_case, "NEON/")) { IntraPredDirectionalInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraPredDirectionalInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraPredDirectionalInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; diff --git a/src/dsp/intrapred_filter_test.cc b/src/dsp/intrapred_filter_test.cc index c8d60a0..d5694f6 100644 --- a/src/dsp/intrapred_filter_test.cc +++ b/src/dsp/intrapred_filter_test.cc @@ -158,9 +158,8 @@ class FilterIntraPredTest : public IntraPredTestBase { // No need to compare C with itself. base_filter_intra_pred_ = nullptr; } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraPredFilterInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraPredFilterInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { IntraPredFilterInit_NEON(); } else { diff --git a/src/dsp/intrapred_test.cc b/src/dsp/intrapred_test.cc index cca1c73..5753817 100644 --- a/src/dsp/intrapred_test.cc +++ b/src/dsp/intrapred_test.cc @@ -154,10 +154,9 @@ class IntraPredTest : public IntraPredTestBase { if (absl::StartsWith(test_case, "C/")) { memset(base_intrapreds_, 0, sizeof(base_intrapreds_)); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - IntraPredInit_SSE4_1(); - IntraPredSmoothInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + IntraPredInit_SSE4_1(); + IntraPredSmoothInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { IntraPredInit_NEON(); IntraPredSmoothInit_NEON(); diff --git a/src/dsp/inverse_transform_test.cc b/src/dsp/inverse_transform_test.cc index 081dcc1..d74a33a 100644 --- a/src/dsp/inverse_transform_test.cc +++ b/src/dsp/inverse_transform_test.cc @@ -181,9 +181,8 @@ class InverseTransformTest if (absl::StartsWith(test_case, "C/")) { memset(base_inverse_transforms_, 0, sizeof(base_inverse_transforms_)); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - InverseTransformInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + InverseTransformInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { InverseTransformInit_NEON(); InverseTransformInit10bpp_NEON(); diff --git a/src/dsp/loop_filter_test.cc b/src/dsp/loop_filter_test.cc index 63ed530..93a273a 100644 --- a/src/dsp/loop_filter_test.cc +++ b/src/dsp/loop_filter_test.cc @@ -128,9 +128,8 @@ class LoopFilterTest : public testing::TestWithParam { if (absl::StartsWith(test_case, "C/")) { memset(base_loop_filters_, 0, sizeof(base_loop_filters_)); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - LoopFilterInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + LoopFilterInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { LoopFilterInit_NEON(); #if LIBGAV1_MAX_BITDEPTH >= 10 diff --git a/src/dsp/loop_restoration_test.cc b/src/dsp/loop_restoration_test.cc index 5c645b8..d6dcd9c 100644 --- a/src/dsp/loop_restoration_test.cc +++ b/src/dsp/loop_restoration_test.cc @@ -69,19 +69,17 @@ class SelfGuidedFilterTest : public testing::TestWithParam, const char* const test_case = test_info->test_suite_name(); if (absl::StartsWith(test_case, "C/")) { } else if (absl::StartsWith(test_case, "AVX2/")) { - if ((GetCpuInfo() & kAVX2) != 0) { - LoopRestorationInit_AVX2(); + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + LoopRestorationInit_AVX2(); #if LIBGAV1_MAX_BITDEPTH >= 10 - LoopRestorationInit10bpp_AVX2(); + LoopRestorationInit10bpp_AVX2(); #endif - } } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - LoopRestorationInit_SSE4_1(); + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + LoopRestorationInit_SSE4_1(); #if LIBGAV1_MAX_BITDEPTH >= 10 - LoopRestorationInit10bpp_SSE4_1(); + LoopRestorationInit10bpp_SSE4_1(); #endif - } } else if (absl::StartsWith(test_case, "NEON/")) { LoopRestorationInit_NEON(); #if LIBGAV1_MAX_BITDEPTH >= 10 @@ -381,19 +379,17 @@ class WienerFilterTest : public testing::TestWithParam, const char* const test_case = test_info->test_suite_name(); if (absl::StartsWith(test_case, "C/")) { } else if (absl::StartsWith(test_case, "AVX2/")) { - if ((GetCpuInfo() & kAVX2) != 0) { - LoopRestorationInit_AVX2(); + if ((GetCpuInfo() & kAVX2) == 0) GTEST_SKIP() << "No AVX2 support!"; + LoopRestorationInit_AVX2(); #if LIBGAV1_MAX_BITDEPTH >= 10 - LoopRestorationInit10bpp_AVX2(); + LoopRestorationInit10bpp_AVX2(); #endif - } } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - LoopRestorationInit_SSE4_1(); + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + LoopRestorationInit_SSE4_1(); #if LIBGAV1_MAX_BITDEPTH >= 10 - LoopRestorationInit10bpp_SSE4_1(); + LoopRestorationInit10bpp_SSE4_1(); #endif - } } else if (absl::StartsWith(test_case, "NEON/")) { LoopRestorationInit_NEON(); #if LIBGAV1_MAX_BITDEPTH >= 10 diff --git a/src/dsp/mask_blend_test.cc b/src/dsp/mask_blend_test.cc index 29dd43b..06793e5 100644 --- a/src/dsp/mask_blend_test.cc +++ b/src/dsp/mask_blend_test.cc @@ -270,9 +270,8 @@ class MaskBlendTest : public testing::TestWithParam, } else if (absl::StartsWith(test_case, "NEON/")) { MaskBlendInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - MaskBlendInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + MaskBlendInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; @@ -427,6 +426,7 @@ void MaskBlendTest::Test(const char* const digest, if (bitdepth != 8) { ASSERT_EQ(func_8bpp_, nullptr); } + ASSERT_NE(func_, nullptr); func_(source1_, source2_, src_2_stride, mask_, mask_stride, width, height, dest_, kDestStride); } diff --git a/src/dsp/motion_field_projection_test.cc b/src/dsp/motion_field_projection_test.cc index 3a47cc7..8a57696 100644 --- a/src/dsp/motion_field_projection_test.cc +++ b/src/dsp/motion_field_projection_test.cc @@ -63,9 +63,8 @@ class MotionFieldProjectionTest : public testing::TestWithParam { } else if (absl::StartsWith(test_case, "NEON/")) { MotionFieldProjectionInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - MotionFieldProjectionInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + MotionFieldProjectionInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; diff --git a/src/dsp/motion_vector_search_test.cc b/src/dsp/motion_vector_search_test.cc index a7b2ec8..5c680d6 100644 --- a/src/dsp/motion_vector_search_test.cc +++ b/src/dsp/motion_vector_search_test.cc @@ -55,9 +55,8 @@ class MotionVectorSearchTest : public testing::TestWithParam, } else if (absl::StartsWith(test_case, "NEON/")) { MotionVectorSearchInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - MotionVectorSearchInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + MotionVectorSearchInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " << test_case; diff --git a/src/dsp/obmc_test.cc b/src/dsp/obmc_test.cc index a10feb2..289fd66 100644 --- a/src/dsp/obmc_test.cc +++ b/src/dsp/obmc_test.cc @@ -193,9 +193,8 @@ class ObmcBlendTest : public testing::TestWithParam { const absl::string_view test_case = test_info->test_suite_name(); if (absl::StartsWith(test_case, "C/")) { } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - ObmcInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + ObmcInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { ObmcInit_NEON(); } else { diff --git a/src/dsp/super_res_test.cc b/src/dsp/super_res_test.cc index 7b253ff..0c3537c 100644 --- a/src/dsp/super_res_test.cc +++ b/src/dsp/super_res_test.cc @@ -96,6 +96,7 @@ class SuperResTest : public testing::TestWithParam, } else if (absl::StartsWith(test_case, "NEON/")) { SuperResInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; SuperResInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " diff --git a/src/dsp/warp_test.cc b/src/dsp/warp_test.cc index c64c8d6..f93ad8b 100644 --- a/src/dsp/warp_test.cc +++ b/src/dsp/warp_test.cc @@ -275,6 +275,7 @@ class WarpTest : public testing::TestWithParam { } else if (absl::StartsWith(test_case, "NEON/")) { WarpInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; WarpInit_SSE4_1(); } else { FAIL() << "Unrecognized architecture prefix in test case name: " diff --git a/src/dsp/weight_mask_test.cc b/src/dsp/weight_mask_test.cc index 74ec03c..a080ec4 100644 --- a/src/dsp/weight_mask_test.cc +++ b/src/dsp/weight_mask_test.cc @@ -223,6 +223,7 @@ class WeightMaskTest : public testing::TestWithParam, } else if (absl::StartsWith(test_case, "NEON/")) { WeightMaskInit_NEON(); } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; WeightMaskInit_SSE4_1(); } func_ = dsp->weight_mask[width_index][height_index][mask_is_inverse_]; diff --git a/src/dsp/x86/common_avx2_test.cc b/src/dsp/x86/common_avx2_test.cc index 2062683..4b294b0 100644 --- a/src/dsp/x86/common_avx2_test.cc +++ b/src/dsp/x86/common_avx2_test.cc @@ -12,26 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "src/dsp/x86/common_avx2.h" +#include "src/dsp/x86/common_avx2_test.h" #include "gtest/gtest.h" +#include "src/utils/cpu.h" #if LIBGAV1_TARGETING_AVX2 #include +#include "src/dsp/x86/common_avx2.h" #include "src/utils/common.h" namespace libgav1 { namespace dsp { -namespace { // Show that RightShiftWithRounding_S16() is equal to // RightShiftWithRounding() only for values less than or equal to // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for // negative values. -TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) { +void AVX2RightShiftWithRoundingS16Test() { for (int bits = 0; bits < 16; ++bits) { const int bias = (1 << bits) >> 1; for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) { @@ -53,15 +54,20 @@ TEST(CommonDspTest, AVX2RightShiftWithRoundingS16) { } } -} // namespace } // namespace dsp } // namespace libgav1 #else // !LIBGAV1_TARGETING_AVX2 -TEST(CommonDspTest, AVX2) { +namespace libgav1 { +namespace dsp { + +void AVX2RightShiftWithRoundingS16Test() { GTEST_SKIP() << "Build this module for x86(-64) with AVX2 enabled to enable " "the tests."; } +} // namespace dsp +} // namespace libgav1 + #endif // LIBGAV1_TARGETING_AVX2 diff --git a/src/dsp/x86/common_avx2_test.h b/src/dsp/x86/common_avx2_test.h new file mode 100644 index 0000000..1124f7f --- /dev/null +++ b/src/dsp/x86/common_avx2_test.h @@ -0,0 +1,26 @@ +// Copyright 2023 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_ +#define LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_ + +namespace libgav1 { +namespace dsp { + +void AVX2RightShiftWithRoundingS16Test(); + +} // namespace dsp +} // namespace libgav1 + +#endif // LIBGAV1_SRC_DSP_X86_COMMON_AVX2_TEST_H_ diff --git a/src/dsp/x86/common_sse4_test.cc b/src/dsp/x86/common_sse4_test.cc index 3288cfc..592630c 100644 --- a/src/dsp/x86/common_sse4_test.cc +++ b/src/dsp/x86/common_sse4_test.cc @@ -12,26 +12,27 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "src/dsp/x86/common_sse4.h" +#include "src/dsp/x86/common_sse4_test.h" #include "gtest/gtest.h" +#include "src/utils/cpu.h" #if LIBGAV1_TARGETING_SSE4_1 #include +#include "src/dsp/x86/common_sse4.h" #include "src/utils/common.h" namespace libgav1 { namespace dsp { -namespace { // Show that RightShiftWithRounding_S16() is equal to // RightShiftWithRounding() only for values less than or equal to // INT16_MAX - ((1 << bits) >> 1). In particular, if bits == 16, then // RightShiftWithRounding_S16() is equal to RightShiftWithRounding() only for // negative values. -TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) { +void SSE41RightShiftWithRoundingS16Test() { for (int bits = 0; bits < 16; ++bits) { const int bias = (1 << bits) >> 1; for (int32_t value = INT16_MIN; value <= INT16_MAX; ++value) { @@ -50,15 +51,20 @@ TEST(CommonDspTest, SSE41RightShiftWithRoundingS16) { } } -} // namespace } // namespace dsp } // namespace libgav1 #else // !LIBGAV1_TARGETING_SSE4_1 -TEST(CommonDspTest, SSE41) { +namespace libgav1 { +namespace dsp { + +void SSE41RightShiftWithRoundingS16Test() { GTEST_SKIP() << "Build this module for x86(-64) with SSE4 enabled to enable " "the tests."; } +} // namespace dsp +} // namespace libgav1 + #endif // LIBGAV1_TARGETING_SSE4_1 diff --git a/src/dsp/x86/common_sse4_test.h b/src/dsp/x86/common_sse4_test.h new file mode 100644 index 0000000..169439a --- /dev/null +++ b/src/dsp/x86/common_sse4_test.h @@ -0,0 +1,26 @@ +// Copyright 2023 The libgav1 Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_ +#define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_ + +namespace libgav1 { +namespace dsp { + +void SSE41RightShiftWithRoundingS16Test(); + +} // namespace dsp +} // namespace libgav1 + +#endif // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_TEST_H_ diff --git a/src/dsp/x86/convolve_avx2.cc b/src/dsp/x86/convolve_avx2.cc index 6e94347..ff51aee 100644 --- a/src/dsp/x86/convolve_avx2.cc +++ b/src/dsp/x86/convolve_avx2.cc @@ -27,6 +27,7 @@ #include "src/dsp/dsp.h" #include "src/dsp/x86/common_avx2.h" #include "src/utils/common.h" +#include "src/utils/compiler_attributes.h" #include "src/utils/constants.h" namespace libgav1 { @@ -607,6 +608,10 @@ void Convolve2D_AVX2(const void* LIBGAV1_RESTRICT const reference, alignas(32) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif const int intermediate_height = height + vertical_taps - 1; const ptrdiff_t src_stride = reference_stride; @@ -1374,6 +1379,10 @@ void ConvolveCompound2D_AVX2( alignas(32) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif const int intermediate_height = height + vertical_taps - 1; const ptrdiff_t src_stride = reference_stride; diff --git a/src/dsp/x86/convolve_sse4.cc b/src/dsp/x86/convolve_sse4.cc index f427c4c..99b87d6 100644 --- a/src/dsp/x86/convolve_sse4.cc +++ b/src/dsp/x86/convolve_sse4.cc @@ -28,6 +28,7 @@ #include "src/dsp/dsp.h" #include "src/dsp/x86/common_sse4.h" #include "src/utils/common.h" +#include "src/utils/compiler_attributes.h" namespace libgav1 { namespace dsp { @@ -254,6 +255,10 @@ void Convolve2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference, alignas(16) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif const int intermediate_height = height + vertical_taps - 1; const ptrdiff_t src_stride = reference_stride; @@ -617,6 +622,10 @@ void ConvolveCompound2D_SSE4_1( alignas(16) uint16_t intermediate_result[kMaxSuperBlockSizeInPixels * (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x33, sizeof(intermediate_result)); +#endif // Horizontal filter. // Filter types used for width <= 4 are different from those for width > 4. @@ -1157,6 +1166,10 @@ void ConvolveScale2D_SSE4_1(const void* LIBGAV1_RESTRICT const reference, alignas(16) int16_t intermediate_result[kIntermediateAllocWidth * (2 * kIntermediateAllocWidth + kSubPixelTaps)]; +#if LIBGAV1_MSAN + // Quiet msan warnings. Set with random non-zero value to aid in debugging. + memset(intermediate_result, 0x44, sizeof(intermediate_result)); +#endif const int num_vert_taps = dsp::GetNumTapsInFilter(vert_filter_index); const int intermediate_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >> diff --git a/src/dsp/x86/intrapred_directional_sse4.cc b/src/dsp/x86/intrapred_directional_sse4.cc index bc61745..2e64d21 100644 --- a/src/dsp/x86/intrapred_directional_sse4.cc +++ b/src/dsp/x86/intrapred_directional_sse4.cc @@ -1023,6 +1023,10 @@ void DirectionalIntraPredictorZone2_SSE4_1(void* const dest, ptrdiff_t stride, uint8_t left_buffer[288]; memcpy(top_buffer + 128, static_cast(top_row) - 16, 160); memcpy(left_buffer + 128, static_cast(left_column) - 16, 160); +#if LIBGAV1_MSAN + memset(top_buffer, 0x33, 128); + memset(left_buffer, 0x44, 128); +#endif const uint8_t* top_ptr = top_buffer + 144; const uint8_t* left_ptr = left_buffer + 144; if (width == 4 || height == 4) { diff --git a/src/dsp/x86/loop_restoration_10bit_sse4.cc b/src/dsp/x86/loop_restoration_10bit_sse4.cc index 6625d51..029e168 100644 --- a/src/dsp/x86/loop_restoration_10bit_sse4.cc +++ b/src/dsp/x86/loop_restoration_10bit_sse4.cc @@ -1079,7 +1079,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index, // general-purpose register to process. Faster than using _mm_extract_epi8(). uint8_t temp[8]; StoreLo8(temp, idx); - *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + // offset == 0 is assumed to be the first call to this function. The value is + // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least + // equivalent if not faster than pinsrb. + if (offset == 0) { + *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]); + } else { + *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + } *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3); diff --git a/src/dsp/x86/loop_restoration_sse4.cc b/src/dsp/x86/loop_restoration_sse4.cc index b4df072..8c24c39 100644 --- a/src/dsp/x86/loop_restoration_sse4.cc +++ b/src/dsp/x86/loop_restoration_sse4.cc @@ -1222,7 +1222,14 @@ inline void LookupIntermediate(const __m128i sum, const __m128i index, // general-purpose register to process. Faster than using _mm_extract_epi8(). uint8_t temp[8]; StoreLo8(temp, idx); - *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + // offset == 0 is assumed to be the first call to this function. The value is + // mov'd to avoid -Wuninitialized warnings under gcc. mov should at least + // equivalent if not faster than pinsrb. + if (offset == 0) { + *ma = _mm_cvtsi32_si128(kSgrMaLookup[temp[0]]); + } else { + *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[0]], offset + 0); + } *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[1]], offset + 1); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[2]], offset + 2); *ma = _mm_insert_epi8(*ma, kSgrMaLookup[temp[3]], offset + 3); diff --git a/src/film_grain_test.cc b/src/film_grain_test.cc index d5854e0..fc1f1b1 100644 --- a/src/film_grain_test.cc +++ b/src/film_grain_test.cc @@ -2190,8 +2190,10 @@ class BlendNoiseTest : public testing::TestWithParam> { static_assert(bitdepth >= kBitdepth8 && bitdepth <= LIBGAV1_MAX_BITDEPTH, ""); using GrainType = typename std::conditional::type; + ~BlendNoiseTest() override = default; - BlendNoiseTest() { + protected: + void SetUp() override { test_utils::ResetDspTable(bitdepth); FilmGrainInit_C(); const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth); @@ -2204,6 +2206,7 @@ class BlendNoiseTest : public testing::TestWithParam> { FilmGrainInit_NEON(); #endif } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; FilmGrainInit_SSE4_1(); } const BlendNoiseTestParam test_param(GetParam()); @@ -2236,9 +2239,7 @@ class BlendNoiseTest : public testing::TestWithParam> { dest_plane_v_ = dest_plane_u_ + uv_stride_ * uv_height_ + kBorderPixelsFilmGrain; } - ~BlendNoiseTest() override = default; - protected: void TestSpeed(int num_runs); private: @@ -2298,15 +2299,15 @@ void BlendNoiseTest::ConvertScalingLut10bpp( template void BlendNoiseTest::TestSpeed(const int num_runs) { if (blend_chroma_func_ == nullptr || blend_luma_func_ == nullptr) return; - ASSERT_TRUE(noise_image_[kPlaneY].Reset(height_, - width_ + kBorderPixelsFilmGrain, - /*zero_initialize=*/false)); - ASSERT_TRUE(noise_image_[kPlaneU].Reset(uv_height_, - uv_width_ + kBorderPixelsFilmGrain, - /*zero_initialize=*/false)); - ASSERT_TRUE(noise_image_[kPlaneV].Reset(uv_height_, - uv_width_ + kBorderPixelsFilmGrain, - /*zero_initialize=*/false)); + // Allow optimized code to read into the border without generating MSan + // warnings. This matches the behavior in FilmGrain::AllocateNoiseImage(). + constexpr bool zero_initialize = LIBGAV1_MSAN == 1; + ASSERT_TRUE(noise_image_[kPlaneY].Reset(height_, width_ + kNoiseImagePadding, + zero_initialize)); + ASSERT_TRUE(noise_image_[kPlaneU].Reset( + uv_height_, uv_width_ + kNoiseImagePadding, zero_initialize)); + ASSERT_TRUE(noise_image_[kPlaneV].Reset( + uv_height_, uv_width_ + kNoiseImagePadding, zero_initialize)); libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed()); // Allow any valid grain values. const int grain_max = GetGrainMax(); @@ -2533,7 +2534,10 @@ template class FilmGrainSpeedTest : public testing::TestWithParam { public: static_assert(bitdepth >= kBitdepth8 && bitdepth <= LIBGAV1_MAX_BITDEPTH, ""); - FilmGrainSpeedTest() { + ~FilmGrainSpeedTest() override = default; + + protected: + void SetUp() override { test_utils::ResetDspTable(bitdepth); FilmGrainInit_C(); @@ -2545,6 +2549,7 @@ class FilmGrainSpeedTest : public testing::TestWithParam { FilmGrainInit_NEON(); #endif } else if (absl::StartsWith(test_case, "SSE41/")) { + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; FilmGrainInit_SSE4_1(); } uv_width_ = (width_ + subsampling_x_) >> subsampling_x_; @@ -2566,9 +2571,7 @@ class FilmGrainSpeedTest : public testing::TestWithParam { const int num_threads = GetParam(); thread_pool_ = ThreadPool::Create(num_threads); } - ~FilmGrainSpeedTest() override = default; - protected: void TestSpeed(int num_runs); private: diff --git a/src/gav1/decoder_buffer.h b/src/gav1/decoder_buffer.h index 0a5586e..816eca4 100644 --- a/src/gav1/decoder_buffer.h +++ b/src/gav1/decoder_buffer.h @@ -115,20 +115,30 @@ typedef enum Libgav1ColorRange { kLibgav1ColorRangeFull // YUV/RGB [0..255] } Libgav1ColorRange; +// Section 6.7.3. typedef struct Libgav1ObuMetadataHdrCll { // NOLINT uint16_t max_cll; // Maximum content light level. uint16_t max_fall; // Maximum frame-average light level. } Libgav1ObuMetadataHdrCll; +// Section 6.7.4. typedef struct Libgav1ObuMetadataHdrMdcv { // NOLINT + // 0.16 fixed-point X/Y chromaticity coordinate as defined by CIE 1931 in + // R/G/B order. uint16_t primary_chromaticity_x[3]; uint16_t primary_chromaticity_y[3]; + // 0.16 fixed-point X/Y chromaticity coordinate as defined by CIE 1931. uint16_t white_point_chromaticity_x; uint16_t white_point_chromaticity_y; + // 24.8 fixed-point maximum luminance, represented in candelas per square + // meter. uint32_t luminance_max; + // 18.14 fixed-point minimum luminance, represented in candelas per square + // meter. uint32_t luminance_min; } Libgav1ObuMetadataHdrMdcv; +// Section 6.7.2. typedef struct Libgav1ObuMetadataItutT35 { // NOLINT uint8_t country_code; uint8_t country_code_extension_byte; // Valid if country_code is 0xFF. diff --git a/src/gav1/version.h b/src/gav1/version.h index b386acc..cca2383 100644 --- a/src/gav1/version.h +++ b/src/gav1/version.h @@ -23,7 +23,7 @@ // (https://semver.org). #define LIBGAV1_MAJOR_VERSION 0 -#define LIBGAV1_MINOR_VERSION 18 +#define LIBGAV1_MINOR_VERSION 19 #define LIBGAV1_PATCH_VERSION 0 #define LIBGAV1_VERSION \ diff --git a/src/post_filter/cdef.cc b/src/post_filter/cdef.cc index 037fc17..ced4096 100644 --- a/src/post_filter/cdef.cc +++ b/src/post_filter/cdef.cc @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "src/post_filter.h" #include "src/utils/blocking_counter.h" #include "src/utils/compiler_attributes.h" @@ -72,10 +74,23 @@ void CopyRowForCdef(const Pixel* src, int block_width, int unit_width, } } +// GCC 13.x will report a false positive from the call to +// ApplyCdefForOneSuperBlockRowHelper() with a nullptr in +// ApplyCdefForOneSuperBlockRow(). The call to CopyPixels() in +// ApplyCdefForOneUnit() is only made when thread_pool_ != nullptr and +// border_columns[][] is a valid pointer. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif // For |height| rows, copy |width| pixels of size |pixel_size| from |src| to // |dst|. void CopyPixels(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height, size_t pixel_size) { + assert(src != nullptr); + assert(dst != nullptr); + assert(height > 0); int y = height; do { memcpy(dst, src, width * pixel_size); @@ -83,6 +98,9 @@ void CopyPixels(const uint8_t* src, int src_stride, uint8_t* dst, dst += dst_stride; } while (--y != 0); } +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif } // namespace @@ -327,6 +345,7 @@ void PostFilter::ApplyCdefForOneUnit(uint16_t* cdef_block, const int index, GetSourceBuffer(kPlaneY, row4x4_start, column4x4_start + block_width4x4) - kCdefBorder * sizeof(Pixel); + assert(border_columns != nullptr); CopyPixels(src_line, frame_buffer_.stride(kPlaneY), border_columns[border_columns_dst_index][kPlaneY], kCdefBorder * sizeof(Pixel), kCdefBorder, diff --git a/src/post_filter/loop_restoration.cc b/src/post_filter/loop_restoration.cc index 2e6982c..b5e1432 100644 --- a/src/post_filter/loop_restoration.cc +++ b/src/post_filter/loop_restoration.cc @@ -79,7 +79,13 @@ void PostFilter::ApplyLoopRestorationForOneRow( bottom_border_stride = border_stride; } } +#if LIBGAV1_MSAN + // The optimized loop filter may read past initialized values within the + // buffer. + RestorationBuffer restoration_buffer = {}; +#else RestorationBuffer restoration_buffer; +#endif const LoopRestorationType type = restoration_info[unit_column].type; assert(type == kLoopRestorationTypeSgrProj || type == kLoopRestorationTypeWiener); diff --git a/src/post_filter/post_filter.cc b/src/post_filter/post_filter.cc index bc71410..9745a01 100644 --- a/src/post_filter/post_filter.cc +++ b/src/post_filter/post_filter.cc @@ -372,17 +372,38 @@ void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4, uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane] : frame_buffer_.data(plane)) + row * stride; - const int left_border = for_loop_restoration +#if LIBGAV1_MSAN + const int right_padding = + (frame_buffer_.stride(plane) >> static_cast(bitdepth_ > 8)) - + ((frame_buffer_.left_border(plane) + frame_buffer_.width(plane) + + frame_buffer_.right_border(plane))); + const int padded_right_border_size = + frame_buffer_.right_border(plane) + right_padding; + // The optimized loop restoration code may read into the next row's left + // border depending on the start of the last superblock and the size of the + // right border. This is safe as the post filter is applied after + // reconstruction is complete and the threaded implementations do not read + // from the left border. + const int left_border_overread = + (for_loop_restoration && padded_right_border_size < 64) + ? 63 - padded_right_border_size + : 0; + assert(!for_loop_restoration || left_border_overread == 0 || + (frame_buffer_.bottom_border(plane) > 0 && + left_border_overread <= frame_buffer_.left_border(plane))); + const int left_border = (for_loop_restoration && left_border_overread == 0) ? kRestorationHorizontalBorder : frame_buffer_.left_border(plane); -#if LIBGAV1_MSAN // The optimized loop restoration code will overread the visible frame // buffer into the right border. Extend the right boundary further to // prevent msan warnings. const int right_border = for_loop_restoration - ? kRestorationHorizontalBorder + 16 + ? std::min(padded_right_border_size, 63) : frame_buffer_.right_border(plane); #else + const int left_border = for_loop_restoration + ? kRestorationHorizontalBorder + : frame_buffer_.left_border(plane); const int right_border = for_loop_restoration ? kRestorationHorizontalBorder : frame_buffer_.right_border(plane); diff --git a/src/reconstruction_test.cc b/src/reconstruction_test.cc index fd780b3..4d09ada 100644 --- a/src/reconstruction_test.cc +++ b/src/reconstruction_test.cc @@ -65,9 +65,8 @@ class ReconstructionTest : public testing::TestWithParam { const char* const test_case = test_info->test_suite_name(); if (absl::StartsWith(test_case, "C/")) { } else if (absl::StartsWith(test_case, "SSE41/")) { - if ((GetCpuInfo() & kSSE4_1) != 0) { - dsp::InverseTransformInit_SSE4_1(); - } + if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!"; + dsp::InverseTransformInit_SSE4_1(); } else if (absl::StartsWith(test_case, "NEON/")) { dsp::InverseTransformInit_NEON(); } else { diff --git a/src/tile/bitstream/mode_info.cc b/src/tile/bitstream/mode_info.cc index cb7b311..ffbbf64 100644 --- a/src/tile/bitstream/mode_info.cc +++ b/src/tile/bitstream/mode_info.cc @@ -890,6 +890,14 @@ uint16_t* Tile::GetReferenceCdf( block, kReferenceFrameBackward, kReferenceFrameBackward, kReferenceFrameAlternate2, kReferenceFrameAlternate2); } + // When using GCC 12.x for some targets the compiler reports a false positive + // with the context subscript when is_single=false, is_backward=false and + // index=0. GetReferenceContext() can only return values between 0 and 2. +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + assert(context >= 0 && context <= 2); if (is_single) { // The index parameter for single references is offset by one since the spec // uses 1-based index for these elements. @@ -900,6 +908,9 @@ uint16_t* Tile::GetReferenceCdf( .compound_backward_reference_cdf[context][index]; } return symbol_decoder_context_.compound_reference_cdf[type][context][index]; +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif } void Tile::ReadReferenceFrames(const Block& block, bool skip_mode) { diff --git a/src/tile/tile.cc b/src/tile/tile.cc index 5070bb6..10ebbf2 100644 --- a/src/tile/tile.cc +++ b/src/tile/tile.cc @@ -2605,17 +2605,17 @@ void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) { ReferenceInfo* reference_info = current_frame_.reference_info(); for (int i = 1; i >= 0; --i) { const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i]; + if (reference_frame_to_store <= kReferenceFrameIntra) continue; // Must make a local copy so that StoreMotionFieldMvs() knows there is no // overlap between load and store. const MotionVector mv_to_store = bp.mv.mv[i]; const int mv_row = std::abs(mv_to_store.mv[0]); const int mv_column = std::abs(mv_to_store.mv[1]); - if (reference_frame_to_store > kReferenceFrameIntra && - // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two - // absolute values and then compare with kRefMvsLimit to save a branch. - // The next line is equivalent to: - // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit - (mv_row | mv_column) <= kRefMvsLimit && + // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two absolute + // values and then compare with kRefMvsLimit to save a branch. + // The next line is equivalent to: + // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit + if ((mv_row | mv_column) <= kRefMvsLimit && reference_info->relative_distance_from[reference_frame_to_store] < 0) { const int row_start8x8 = DivideBy2(row_start4x4); const int row_limit8x8 = DivideBy2(row_limit4x4); diff --git a/src/utils/threadpool.cc b/src/utils/threadpool.cc index a3099e1..6fa2e88 100644 --- a/src/utils/threadpool.cc +++ b/src/utils/threadpool.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -216,7 +217,7 @@ void ThreadPool::WorkerThread::SetupName() { rv = pthread_setname_np(name); assert(rv == 0); static_cast(rv); -#elif defined(__ANDROID__) || defined(__GLIBC__) +#elif defined(__ANDROID__) || (defined(__GLIBC__) && !defined(__GNU__)) // If the |name| buffer is longer than 16 bytes, pthread_setname_np fails // with error 34 (ERANGE) on Android. char name[16]; diff --git a/src/utils/types.h b/src/utils/types.h index 0dd6360..c2daf1f 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -390,12 +390,13 @@ struct FilmGrainParams { uint8_t point_v_value[10]; uint8_t point_v_scaling[10]; - uint8_t chroma_scaling; // [8, 11]. - uint8_t auto_regression_coeff_lag; // [0, 3]. - int8_t auto_regression_coeff_y[24]; // [-128, 127] - int8_t auto_regression_coeff_u[25]; // [-128, 127] - int8_t auto_regression_coeff_v[25]; // [-128, 127] - // Shift value: auto regression coeffs range + uint8_t chroma_scaling; // grain_scaling_minus_8 + 8: [8, 11]. + uint8_t auto_regression_coeff_lag; // ar_coeff_lag: [0, 3]. + // ar_coeffs_{y,u,v}_plus_128 - 128: [-128, 127]. + int8_t auto_regression_coeff_y[24]; + int8_t auto_regression_coeff_u[25]; + int8_t auto_regression_coeff_v[25]; + // Shift value: ar_coeff_shift_minus_6 + 6, auto regression coeffs range: // 6: [-2, 2) // 7: [-1, 1) // 8: [-0.5, 0.5) @@ -405,16 +406,12 @@ struct FilmGrainParams { uint16_t grain_seed; int reference_index; int grain_scale_shift; - // These multipliers are encoded as nonnegative values by adding 128 first. - // The 128 is subtracted during parsing. - int8_t u_multiplier; // [-128, 127] - int8_t u_luma_multiplier; // [-128, 127] - // These offsets are encoded as nonnegative values by adding 256 first. The - // 256 is subtracted during parsing. - int16_t u_offset; // [-256, 255] - int8_t v_multiplier; // [-128, 127] - int8_t v_luma_multiplier; // [-128, 127] - int16_t v_offset; // [-256, 255] + int8_t u_multiplier; // cb_mult - 128: [-128, 127]. + int8_t u_luma_multiplier; // cb_luma_mult - 128: [-128, 127]. + int16_t u_offset; // cb_offset - 256: [-256, 255]. + int8_t v_multiplier; // cr_mult - 128: [-128, 127]. + int8_t v_luma_multiplier; // cr_luma_mult - 128: [-128, 127]. + int16_t v_offset; // cr_offset - 256: [-256, 255]. }; struct ObuFrameHeader { diff --git a/src/yuv_buffer.cc b/src/yuv_buffer.cc index efb8016..85619c3 100644 --- a/src/yuv_buffer.cc +++ b/src/yuv_buffer.cc @@ -197,45 +197,58 @@ bool YuvBuffer::Realloc(int bitdepth, bool is_monochrome, int width, int height, assert(!is_monochrome || buffer_[kPlaneV] == nullptr); #if LIBGAV1_MSAN - const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t); - int width_in_bytes = width * pixel_size; + InitializeFrameBorders(); +#endif + + return true; +} + +#if LIBGAV1_MSAN +void YuvBuffer::InitializeFrameBorders() { + const int pixel_size = (bitdepth_ == 8) ? sizeof(uint8_t) : sizeof(uint16_t); + const int y_width_in_bytes = y_width_ * pixel_size; // The optimized loop restoration code will overread the visible frame buffer // into the right border. The optimized cfl subsambler uses the right border // as well. Initialize the right border and padding to prevent msan warnings. - int right_border_size_in_bytes = right_border * pixel_size; + const int y_right_border_size_in_bytes = right_border_[kPlaneY] * pixel_size; // Calculate the padding bytes for the buffer. Note: The stride of the buffer // is always a multiple of 16. (see yuv_buffer.h) - const int right_padding_in_bytes = - stride_[kPlaneY] - (pixel_size * (width + left_border + right_border)); - const int padded_right_border_size = - right_border_size_in_bytes + right_padding_in_bytes; - constexpr uint8_t right_val = 0x55; - uint8_t* rb = buffer_[kPlaneY] + width_in_bytes; - for (int i = 0; i < height + bottom_border; ++i) { - memset(rb, right_val, padded_right_border_size); + const int y_right_padding_in_bytes = + stride_[kPlaneY] - (pixel_size * (y_width_ + left_border_[kPlaneY] + + right_border_[kPlaneY])); + const int y_padded_right_border_size = + y_right_border_size_in_bytes + y_right_padding_in_bytes; + constexpr uint8_t kRightValue = 0x55; + uint8_t* rb = buffer_[kPlaneY] + y_width_in_bytes; + for (int i = 0; i < y_height_ + bottom_border_[kPlaneY]; ++i) { + memset(rb, kRightValue, y_padded_right_border_size); rb += stride_[kPlaneY]; } - if (!is_monochrome) { - int uv_width_in_bytes = uv_width * pixel_size; - int uv_right_border_size_in_bytes = uv_right_border * pixel_size; + + if (!is_monochrome_) { + const int uv_width_in_bytes = uv_width_ * pixel_size; + const int uv_right_border_size_in_bytes = + right_border_[kPlaneU] * pixel_size; + assert(right_border_[kPlaneU] == right_border_[kPlaneV]); const int u_right_padding_in_bytes = - stride_[kPlaneU] - - (pixel_size * (uv_width + uv_left_border + uv_right_border)); + stride_[kPlaneU] - (pixel_size * (uv_width_ + left_border_[kPlaneU] + + right_border_[kPlaneU])); const int u_padded_right_border_size = uv_right_border_size_in_bytes + u_right_padding_in_bytes; rb = buffer_[kPlaneU] + uv_width_in_bytes; - for (int i = 0; i < uv_height; ++i) { - memset(rb, right_val, u_padded_right_border_size); + for (int i = 0; i < uv_height_; ++i) { + memset(rb, kRightValue, u_padded_right_border_size); rb += stride_[kPlaneU]; } const int v_right_padding_in_bytes = stride_[kPlaneV] - - ((uv_width + uv_left_border + uv_right_border) * pixel_size); + ((uv_width_ + left_border_[kPlaneV] + right_border_[kPlaneV]) * + pixel_size); const int v_padded_right_border_size = uv_right_border_size_in_bytes + v_right_padding_in_bytes; rb = buffer_[kPlaneV] + uv_width_in_bytes; - for (int i = 0; i < uv_height; ++i) { - memset(rb, right_val, v_padded_right_border_size); + for (int i = 0; i < uv_height_; ++i) { + memset(rb, kRightValue, v_padded_right_border_size); rb += stride_[kPlaneV]; } } @@ -244,13 +257,11 @@ bool YuvBuffer::Realloc(int bitdepth, bool is_monochrome, int width, int height, // block) into the uninitialized visible area. The cfl subsampler can overread // into the bottom border as well. Initialize the both to quiet msan warnings. uint8_t* y_visible = buffer_[kPlaneY]; - for (int i = 0; i < height + bottom_border; ++i) { - memset(y_visible, right_val, width_in_bytes); + for (int i = 0; i < y_height_ + bottom_border_[kPlaneY]; ++i) { + memset(y_visible, kRightValue, y_width_in_bytes); y_visible += stride_[kPlaneY]; } -#endif - - return true; } +#endif // LIBGAV1_MSAN } // namespace libgav1 diff --git a/src/yuv_buffer.h b/src/yuv_buffer.h index b9e8cd3..d7818bd 100644 --- a/src/yuv_buffer.h +++ b/src/yuv_buffer.h @@ -24,6 +24,7 @@ #include #include "src/gav1/frame_buffer.h" +#include "src/utils/compiler_attributes.h" #include "src/utils/constants.h" namespace libgav1 { @@ -146,6 +147,11 @@ class YuvBuffer { private: static constexpr int kFrameBufferRowAlignment = 16; + +#if LIBGAV1_MSAN + void InitializeFrameBorders(); +#endif + int bitdepth_ = 0; bool is_monochrome_ = false; diff --git a/tests/fuzzer/fuzzer_temp_file.h b/tests/fuzzer/fuzzer_temp_file.h index 5d12bbe..ed8f51c 100644 --- a/tests/fuzzer/fuzzer_temp_file.h +++ b/tests/fuzzer/fuzzer_temp_file.h @@ -25,12 +25,52 @@ #include #include #include +#ifdef _WIN32 +#include +#include + +#define strdup _strdup +#define unlink _unlink +#else #include +#endif // _WIN32 // Pure-C interface for creating and cleaning up temporary files. static char* fuzzer_get_tmpfile_with_suffix(const uint8_t* data, size_t size, const char* suffix) { +#ifdef _WIN32 + // GetTempPathA generates '\
.TMP'.
+  (void)suffix;  // NOLINT (this could be a C compilation unit)
+  char temp_path[MAX_PATH];
+  const DWORD ret = GetTempPathA(MAX_PATH, temp_path);
+  if (ret == 0 || ret > MAX_PATH) {
+    fprintf(stderr, "Error getting temporary directory name: %lu\n",
+            GetLastError());
+    abort();
+  }
+  char* filename_buffer =
+      (char*)malloc(MAX_PATH);  // NOLINT (this could be a C compilation unit)
+  if (!filename_buffer) {
+    perror("Failed to allocate file name buffer.");
+    abort();
+  }
+  if (GetTempFileNameA(temp_path, "ftf", /*uUnique=*/0, filename_buffer) == 0) {
+    fprintf(stderr, "Error getting temporary file name: %lu\n", GetLastError());
+    abort();
+  }
+#if defined(_MSC_VER) || defined(MINGW_HAS_SECURE_API)
+  FILE* file;
+  const errno_t err = fopen_s(&file, filename_buffer, "wb");
+  if (err != 0) file = NULL;  // NOLINT (this could be a C compilation unit)
+#else
+  FILE* file = fopen(filename_buffer, "wb");
+#endif
+  if (!file) {
+    perror("Failed to open file.");
+    abort();
+  }
+#else  // !_WIN32
   if (suffix == NULL) {  // NOLINT (this could be a C compilation unit)
     suffix = "";
   }
@@ -55,7 +95,7 @@ static char* fuzzer_get_tmpfile_with_suffix(const uint8_t* data, size_t size,
   }
 
   if (snprintf(filename_buffer, buffer_sz, "%s%s", leading_temp_path, suffix) >=
-      buffer_sz) {
+      (int)buffer_sz) {  // NOLINT (this could be a C compilation unit)
     perror("File name buffer too short.");
     abort();
   }
@@ -71,9 +111,10 @@ static char* fuzzer_get_tmpfile_with_suffix(const uint8_t* data, size_t size,
     close(file_descriptor);
     abort();
   }
+#endif  // _WIN32
   const size_t bytes_written = fwrite(data, sizeof(uint8_t), size, file);
   if (bytes_written < size) {
-    close(file_descriptor);
+    fclose(file);
     fprintf(stderr, "Failed to write all bytes to file (%zu out of %zu)",
             bytes_written, size);
     abort();
diff --git a/tests/fuzzer/obu_parser_fuzzer.cc b/tests/fuzzer/obu_parser_fuzzer.cc
index 634a802..f71ca17 100644
--- a/tests/fuzzer/obu_parser_fuzzer.cc
+++ b/tests/fuzzer/obu_parser_fuzzer.cc
@@ -41,6 +41,11 @@ constexpr size_t kMaxDataSize = 200 * 1024;
 #endif
 
 inline void ParseObu(const uint8_t* const data, size_t size) {
+  size_t av1c_size;
+  const std::unique_ptr av1c_box =
+      libgav1::ObuParser::GetAV1CodecConfigurationBox(data, size, &av1c_size);
+  static_cast(av1c_box);
+
   libgav1::InternalFrameBufferList buffer_list;
   libgav1::BufferPool buffer_pool(libgav1::OnInternalFrameBufferSizeChanged,
                                   libgav1::GetInternalFrameBuffer,
diff --git a/tests/libgav1_tests.cmake b/tests/libgav1_tests.cmake
index c759d4f..95f6361 100644
--- a/tests/libgav1_tests.cmake
+++ b/tests/libgav1_tests.cmake
@@ -28,7 +28,7 @@ if(NOT LIBGAV1_ENABLE_TESTS OR NOT EXISTS "${libgav1_googletest}")
       "GoogleTest not found, setting LIBGAV1_ENABLE_TESTS to false.\n"
       "To enable tests download the GoogleTest repository to"
       " third_party/googletest:\n\n  git \\\n    -C ${libgav1_root} \\\n"
-      "    clone \\\n"
+      "    clone -b release-1.12.1 --depth 1 \\\n"
       "    https://github.com/google/googletest.git third_party/googletest\n")
     set(LIBGAV1_ENABLE_TESTS FALSE CACHE BOOL "Enables tests." FORCE)
   endif()
@@ -86,13 +86,17 @@ list(APPEND libgav1_common_avx2_test_sources
             "${libgav1_source}/dsp/x86/common_avx2.h"
             "${libgav1_source}/dsp/x86/common_avx2.inc"
             "${libgav1_source}/dsp/x86/common_avx2_test.cc"
+            "${libgav1_source}/dsp/x86/common_avx2_test.h"
             "${libgav1_source}/dsp/x86/common_sse4.inc")
+list(APPEND libgav1_common_dsp_test_sources
+            "${libgav1_source}/dsp/common_dsp_test.cc")
 list(APPEND libgav1_common_neon_test_sources
             "${libgav1_source}/dsp/arm/common_neon_test.cc")
 list(APPEND libgav1_common_sse4_test_sources
             "${libgav1_source}/dsp/x86/common_sse4.h"
             "${libgav1_source}/dsp/x86/common_sse4.inc"
-            "${libgav1_source}/dsp/x86/common_sse4_test.cc")
+            "${libgav1_source}/dsp/x86/common_sse4_test.cc"
+            "${libgav1_source}/dsp/x86/common_sse4_test.h")
 list(APPEND libgav1_convolve_test_sources
             "${libgav1_source}/dsp/convolve_test.cc")
 list(APPEND libgav1_cpu_test_sources "${libgav1_source}/utils/cpu_test.cc")
@@ -275,19 +279,29 @@ macro(libgav1_add_tests_targets)
                          libgav1_gtest_main)
 
   if(libgav1_have_avx2)
+    list(APPEND libgav1_common_dsp_test_sources
+                ${libgav1_common_avx2_test_sources})
+  endif()
+  if(libgav1_have_sse4)
+    list(APPEND libgav1_common_dsp_test_sources
+                ${libgav1_common_sse4_test_sources})
+  endif()
+  if(libgav1_have_avx2 OR libgav1_have_sse4)
     libgav1_add_executable(TEST
                            NAME
-                           common_avx2_test
+                           common_dsp_test
                            SOURCES
-                           ${libgav1_common_avx2_test_sources}
+                           ${libgav1_common_dsp_test_sources}
                            DEFINES
                            ${libgav1_defines}
                            INCLUDES
                            ${libgav1_test_include_paths}
+                           OBJLIB_DEPS
+                           libgav1_utils
                            LIB_DEPS
                            ${libgav1_common_test_absl_deps}
-                           libgav1_gtest
-                           libgav1_gtest_main)
+                           libgav1_gtest_main
+                           libgav1_gtest)
   endif()
 
   if(libgav1_have_neon)
@@ -302,22 +316,7 @@ macro(libgav1_add_tests_targets)
                            ${libgav1_test_include_paths}
                            OBJLIB_DEPS
                            libgav1_tests_block_utils
-                           LIB_DEPS
-                           ${libgav1_common_test_absl_deps}
-                           libgav1_gtest
-                           libgav1_gtest_main)
-  endif()
-
-  if(libgav1_have_sse4)
-    libgav1_add_executable(TEST
-                           NAME
-                           common_sse4_test
-                           SOURCES
-                           ${libgav1_common_sse4_test_sources}
-                           DEFINES
-                           ${libgav1_defines}
-                           INCLUDES
-                           ${libgav1_test_include_paths}
+                           libgav1_utils
                            LIB_DEPS
                            ${libgav1_common_test_absl_deps}
                            libgav1_gtest
-- 
cgit v1.2.3