diff options
Diffstat (limited to 'src/libFLAC/fixed_intrin_sse42.c')
-rw-r--r-- | src/libFLAC/fixed_intrin_sse42.c | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/src/libFLAC/fixed_intrin_sse42.c b/src/libFLAC/fixed_intrin_sse42.c index 6be407e2..0556eaad 100644 --- a/src/libFLAC/fixed_intrin_sse42.c +++ b/src/libFLAC/fixed_intrin_sse42.c @@ -1,6 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library * Copyright (C) 2000-2009 Josh Coalson - * Copyright (C) 2011-2022 Xiph.Org Foundation + * Copyright (C) 2011-2023 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -130,7 +130,7 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_intrin_sse42(const FL tempB = _mm_xor_si128(tempA, bitmask); tempB = _mm_sub_epi64(tempB, bitmask); total_err0 = _mm_add_epi64(total_err0,tempB); - shadow_err0 = _mm_xor_si128(shadow_err0,tempB); + shadow_err0 = _mm_or_si128(shadow_err0,tempB); tempB = _mm_sub_epi64(tempA,prev_err0); prev_err0 = tempA; /* Next three intrinsics calculate tempA as abs of tempB */ @@ -138,7 +138,7 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_intrin_sse42(const FL tempA = _mm_xor_si128(tempB, bitmask); tempA = _mm_sub_epi64(tempA, bitmask); total_err1 = _mm_add_epi64(total_err1,tempA); - shadow_err1 = _mm_xor_si128(shadow_err1,tempA); + shadow_err1 = _mm_or_si128(shadow_err1,tempA); tempA = _mm_sub_epi64(tempB,prev_err1); prev_err1 = tempB; /* Next three intrinsics calculate tempB as abs of tempA */ @@ -146,7 +146,7 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_intrin_sse42(const FL tempB = _mm_xor_si128(tempA, bitmask); tempB = _mm_sub_epi64(tempB, bitmask); total_err2 = _mm_add_epi64(total_err2,tempB); - shadow_err2 = _mm_xor_si128(shadow_err2,tempB); + shadow_err2 = _mm_or_si128(shadow_err2,tempB); tempB = _mm_sub_epi64(tempA,prev_err2); prev_err2 = tempA; /* Next three intrinsics calculate tempA as abs of tempB */ @@ -154,7 +154,7 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_intrin_sse42(const FL tempA = _mm_xor_si128(tempB, bitmask); tempA = _mm_sub_epi64(tempA, bitmask); total_err3 = _mm_add_epi64(total_err3,tempA); - shadow_err3 = _mm_xor_si128(shadow_err3,tempA); + shadow_err3 = _mm_or_si128(shadow_err3,tempA); tempA = _mm_sub_epi64(tempB,prev_err3); prev_err3 = tempB; /* Next three intrinsics calculate tempB as abs of tempA */ @@ -162,7 +162,7 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_intrin_sse42(const FL tempB = _mm_xor_si128(tempA, bitmask); tempB = _mm_sub_epi64(tempB, bitmask); total_err4 = _mm_add_epi64(total_err4,tempB); - shadow_err4 = _mm_xor_si128(shadow_err4,tempB); + shadow_err4 = _mm_or_si128(shadow_err4,tempB); } _mm_storeu_si128((__m128i*)data_scalar,total_err0); total_error_0 += data_scalar[0] + data_scalar[1]; @@ -175,15 +175,15 @@ uint32_t FLAC__fixed_compute_best_predictor_limit_residual_intrin_sse42(const FL _mm_storeu_si128((__m128i*)data_scalar,total_err4); total_error_4 += data_scalar[0] + data_scalar[1]; _mm_storeu_si128((__m128i*)data_scalar,shadow_err0); - shadow_error_0 += data_scalar[0] | data_scalar[1]; + shadow_error_0 |= data_scalar[0] | data_scalar[1]; _mm_storeu_si128((__m128i*)data_scalar,shadow_err1); - shadow_error_1 += data_scalar[0] | data_scalar[1]; + shadow_error_1 |= data_scalar[0] | data_scalar[1]; _mm_storeu_si128((__m128i*)data_scalar,shadow_err2); - shadow_error_2 += data_scalar[0] | data_scalar[1]; + shadow_error_2 |= data_scalar[0] | data_scalar[1]; _mm_storeu_si128((__m128i*)data_scalar,shadow_err3); - shadow_error_3 += data_scalar[0] | data_scalar[1]; + shadow_error_3 |= data_scalar[0] | data_scalar[1]; _mm_storeu_si128((__m128i*)data_scalar,shadow_err4); - shadow_error_4 += data_scalar[0] | data_scalar[1]; + shadow_error_4 |= data_scalar[0] | data_scalar[1]; /* Take care of remaining sample */ if(data_len_int % 2 > 0) { |