diff options
author | Chih-hung Hsieh <chh@google.com> | 2016-01-20 17:50:13 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2016-01-20 17:50:13 +0000 |
commit | b3cb8ab4ede8bb77f0bdef2715efc2c1e6267072 (patch) | |
tree | 28c4cf735dd5bd9cc8f1ccd06fff8a173b20d1cb /webrtc/modules/video_processing/util/denoiser_filter_neon.cc | |
parent | a4acd9d6bc9b3b033d7d274316e75ee067df8d20 (diff) | |
parent | 9a337512d97e37afc142dee4fd50a41b741a87d2 (diff) | |
download | webrtc-nougat-mr2-pixel-release.tar.gz |
Merge "Merge upstream SHA 04cb763"android-cts_7.1_r1android-cts-7.1_r9android-cts-7.1_r8android-cts-7.1_r7android-cts-7.1_r6android-cts-7.1_r5android-cts-7.1_r4android-cts-7.1_r3android-cts-7.1_r29android-cts-7.1_r28android-cts-7.1_r27android-cts-7.1_r26android-cts-7.1_r25android-cts-7.1_r24android-cts-7.1_r23android-cts-7.1_r22android-cts-7.1_r21android-cts-7.1_r20android-cts-7.1_r2android-cts-7.1_r19android-cts-7.1_r18android-cts-7.1_r17android-cts-7.1_r16android-cts-7.1_r15android-cts-7.1_r14android-cts-7.1_r13android-cts-7.1_r12android-cts-7.1_r11android-cts-7.1_r10android-cts-7.1_r1android-cts-7.0_r9android-cts-7.0_r8android-cts-7.0_r7android-cts-7.0_r6android-cts-7.0_r5android-cts-7.0_r4android-cts-7.0_r33android-cts-7.0_r32android-cts-7.0_r31android-cts-7.0_r30android-cts-7.0_r3android-cts-7.0_r29android-cts-7.0_r28android-cts-7.0_r27android-cts-7.0_r26android-cts-7.0_r25android-cts-7.0_r24android-cts-7.0_r23android-cts-7.0_r22android-cts-7.0_r21android-cts-7.0_r20android-cts-7.0_r2android-cts-7.0_r19android-cts-7.0_r18android-cts-7.0_r17android-cts-7.0_r16android-cts-7.0_r15android-cts-7.0_r14android-cts-7.0_r13android-cts-7.0_r12android-cts-7.0_r11android-cts-7.0_r10android-cts-7.0_r1android-7.1.2_r9android-7.1.2_r8android-7.1.2_r6android-7.1.2_r5android-7.1.2_r4android-7.1.2_r39android-7.1.2_r38android-7.1.2_r37android-7.1.2_r36android-7.1.2_r33android-7.1.2_r32android-7.1.2_r30android-7.1.2_r3android-7.1.2_r29android-7.1.2_r28android-7.1.2_r27android-7.1.2_r25android-7.1.2_r24android-7.1.2_r23android-7.1.2_r2android-7.1.2_r19android-7.1.2_r18android-7.1.2_r17android-7.1.2_r16android-7.1.2_r15android-7.1.2_r14android-7.1.2_r13android-7.1.2_r12android-7.1.2_r11android-7.1.2_r10android-7.1.2_r1android-7.1.1_r9android-7.1.1_r8android-7.1.1_r7android-7.1.1_r61android-7.1.1_r60android-7.1.1_r6android-7.1.1_r59android-7.1.1_r58android-7.1.1_r57android-7.1.1_r56android-7.1.1_r55android-7.1.1_r54android-7.1.1_r53android-7.1.1_r52android-7.1.1_r51android-7.1.1_r50android-7.1.1_r49android-7.1.1_r48android-7.1.1_r47android-7.1.1_r46android-7.1.1_r45android-7.1.1_r44android-7.1.1_r43android-7.1.1_r42android-7.1.1_r41android-7.1.1_r40android-7.1.1_r4android-7.1.1_r39android-7.1.1_r38android-7.1.1_r35android-7.1.1_r33android-7.1.1_r32android-7.1.1_r31android-7.1.1_r3android-7.1.1_r28android-7.1.1_r27android-7.1.1_r26android-7.1.1_r25android-7.1.1_r24android-7.1.1_r23android-7.1.1_r22android-7.1.1_r21android-7.1.1_r20android-7.1.1_r2android-7.1.1_r17android-7.1.1_r16android-7.1.1_r15android-7.1.1_r14android-7.1.1_r13android-7.1.1_r12android-7.1.1_r11android-7.1.1_r10android-7.1.1_r1android-7.1.0_r7android-7.1.0_r6android-7.1.0_r5android-7.1.0_r4android-7.1.0_r3android-7.1.0_r2android-7.1.0_r1android-7.0.0_r9android-7.0.0_r8android-7.0.0_r7android-7.0.0_r6android-7.0.0_r5android-7.0.0_r4android-7.0.0_r36android-7.0.0_r35android-7.0.0_r34android-7.0.0_r33android-7.0.0_r32android-7.0.0_r31android-7.0.0_r30android-7.0.0_r3android-7.0.0_r29android-7.0.0_r28android-7.0.0_r27android-7.0.0_r24android-7.0.0_r21android-7.0.0_r19android-7.0.0_r17android-7.0.0_r15android-7.0.0_r14android-7.0.0_r13android-7.0.0_r12android-7.0.0_r11android-7.0.0_r10android-7.0.0_r1nougat-releasenougat-mr2.3-releasenougat-mr2.2-releasenougat-mr2.1-releasenougat-mr2-security-releasenougat-mr2-releasenougat-mr2-pixel-releasenougat-mr2-devnougat-mr1.8-releasenougat-mr1.7-releasenougat-mr1.6-releasenougat-mr1.5-releasenougat-mr1.4-releasenougat-mr1.3-releasenougat-mr1.2-releasenougat-mr1.1-releasenougat-mr1-volantis-releasenougat-mr1-security-releasenougat-mr1-releasenougat-mr1-flounder-releasenougat-mr1-devnougat-mr1-cts-releasenougat-mr0.5-releasenougat-dr1-releasenougat-devnougat-cts-releasenougat-bugfix-release
am: 9a337512d9
* commit '9a337512d97e37afc142dee4fd50a41b741a87d2': (797 commits)
Add tests for verifying transport feedback for audio and video.
Eliminate defines in talk/
Revert of Update with new default boringssl no-aes cipher suites. Re-enable tests. (patchset #3 id:40001 of https://codereview.webrtc.org/1550773002/ )
Remove assert which was incorrectly added to TcpPort::OnSentPacket.
Reland Connect TurnPort and TCPPort to AsyncPacketSocket::SignalSentPacket.
Update with new default boringssl no-aes cipher suites. Re-enable tests.
Revert of Connect TurnPort and TCPPort to AsyncPacketSocket::SignalSentPacket. (patchset #3 id:40001 of https://codereview.webrtc.org/1577873003/ )
Re-land: "Use an explicit identifier in Config"
Connect TurnPort and TCPPort to AsyncPacketSocket::SignalSentPacket.
Revert of Delete remnants of non-square pixel support from cricket::VideoFrame. (patchset #1 id:1 of https://codereview.webrtc.org/1586613002/ )
Remove libfuzzer trybot from default trybot set.
Add ramp-up tests for transport sequence number with and w/o audio.
Delete remnants of non-square pixel support from cricket::VideoFrame.
Fix IPAddress::ToSensitiveString() to avoid dependency on inet_ntop().
Revert of Storing raw audio sink for default audio track. (patchset #7 id:120001 of https://codereview.chromium.org/1551813002/ )
Re-enable tests that failed under Linux_Msan.
Revert of Use an explicit identifier in Config (patchset #4 id:60001 of https://codereview.webrtc.org/1538643004/ )
Roll chromium_revision 346fea9..099be58 (369082:369139)
Disable WebRtcVideoChannel2BaseTest.SendManyResizeOnce for TSan
Add build_protobuf variable.
...
Diffstat (limited to 'webrtc/modules/video_processing/util/denoiser_filter_neon.cc')
-rw-r--r-- | webrtc/modules/video_processing/util/denoiser_filter_neon.cc | 283 |
1 files changed, 283 insertions, 0 deletions
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc new file mode 100644 index 0000000000..b522bf002b --- /dev/null +++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +#include "webrtc/modules/video_processing/util/denoiser_filter_neon.h" + +namespace webrtc { + +static int HorizontalAddS16x8(const int16x8_t v_16x8) { + const int32x4_t a = vpaddlq_s16(v_16x8); + const int64x2_t b = vpaddlq_s32(a); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +} + +static int HorizontalAddS32x4(const int32x4_t v_32x4) { + const int64x2_t b = vpaddlq_s32(v_32x4); + const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)), + vreinterpret_s32_s64(vget_high_s64(b))); + return vget_lane_s32(c, 0); +} + +static void VarianceNeonW8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + int w, + int h, + uint32_t* sse, + int64_t* sum) { + int16x8_t v_sum = vdupq_n_s16(0); + int32x4_t v_sse_lo = vdupq_n_s32(0); + int32x4_t v_sse_hi = vdupq_n_s32(0); + + for (int i = 0; i < h; ++i) { + for (int j = 0; j < w; j += 8) { + const uint8x8_t v_a = vld1_u8(&a[j]); + const uint8x8_t v_b = vld1_u8(&b[j]); + const uint16x8_t v_diff = vsubl_u8(v_a, v_b); + const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff); + v_sum = vaddq_s16(v_sum, sv_diff); + v_sse_lo = + vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff)); + v_sse_hi = + vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff)); + } + a += a_stride; + b += b_stride; + } + + *sum = HorizontalAddS16x8(v_sum); + *sse = + static_cast<uint32_t>(HorizontalAddS32x4(vaddq_s32(v_sse_lo, v_sse_hi))); +} + +void DenoiserFilterNEON::CopyMem16x16(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + uint8x16_t qtmp; + for (int r = 0; r < 16; r++) { + qtmp = vld1q_u8(src); + vst1q_u8(dst, qtmp); + src += src_stride; + dst += dst_stride; + } +} + +void DenoiserFilterNEON::CopyMem8x8(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride) { + uint8x8_t vtmp; + + for (int r = 0; r < 8; r++) { + vtmp = vld1_u8(src); + vst1_u8(dst, vtmp); + src += src_stride; + dst += dst_stride; + } +} + +uint32_t DenoiserFilterNEON::Variance16x8(const uint8_t* a, + int a_stride, + const uint8_t* b, + int b_stride, + uint32_t* sse) { + int64_t sum = 0; + VarianceNeonW8(a, a_stride << 1, b, b_stride << 1, 16, 8, sse, &sum); + return *sse - ((sum * sum) >> 7); +} + +DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y, + int mc_running_avg_y_stride, + uint8_t* running_avg_y, + int running_avg_y_stride, + const uint8_t* sig, + int sig_stride, + uint8_t motion_magnitude, + int increase_denoising) { + // If motion_magnitude is small, making the denoiser more aggressive by + // increasing the adjustment for each level, level1 adjustment is + // increased, the deltas stay the same. + int shift_inc = + (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) ? 1 + : 0; + const uint8x16_t v_level1_adjustment = vmovq_n_u8( + (motion_magnitude <= kMotionMagnitudeThreshold) ? 4 + shift_inc : 3); + const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); + const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); + const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); + const uint8x16_t v_level2_threshold = vdupq_n_u8(8); + const uint8x16_t v_level3_threshold = vdupq_n_u8(16); + int64x2_t v_sum_diff_total = vdupq_n_s64(0); + + // Go over lines. + for (int r = 0; r < 16; ++r) { + // Load inputs. + const uint8x16_t v_sig = vld1q_u8(sig); + const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); + + // Calculate absolute difference and sign masks. + const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); + + // Figure out which level that put us in. + const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, v_abs_diff); + const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, v_abs_diff); + const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, v_abs_diff); + + // Calculate absolute adjustments for level 1, 2 and 3. + const uint8x16_t v_level2_adjustment = + vandq_u8(v_level2_mask, v_delta_level_1_and_2); + const uint8x16_t v_level3_adjustment = + vandq_u8(v_level3_mask, v_delta_level_2_and_3); + const uint8x16_t v_level1and2_adjustment = + vaddq_u8(v_level1_adjustment, v_level2_adjustment); + const uint8x16_t v_level1and2and3_adjustment = + vaddq_u8(v_level1and2_adjustment, v_level3_adjustment); + + // Figure adjustment absolute value by selecting between the absolute + // difference if in level0 or the value for level 1, 2 and 3. + const uint8x16_t v_abs_adjustment = + vbslq_u8(v_level1_mask, v_level1and2and3_adjustment, v_abs_diff); + + // Calculate positive and negative adjustments. Apply them to the signal + // and accumulate them. Adjustments are less than eight and the maximum + // sum of them (7 * 16) can fit in a signed char. + const uint8x16_t v_pos_adjustment = + vandq_u8(v_diff_pos_mask, v_abs_adjustment); + const uint8x16_t v_neg_adjustment = + vandq_u8(v_diff_neg_mask, v_abs_adjustment); + + uint8x16_t v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment); + v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment); + + // Store results. + vst1q_u8(running_avg_y, v_running_avg_y); + + // Sum all the accumulators to have the sum of all pixel differences + // for this macroblock. + { + const int8x16_t v_sum_diff = + vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), + vreinterpretq_s8_u8(v_neg_adjustment)); + const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); + const int32x4_t fedc_ba98_7654_3210 = + vpaddlq_s16(fe_dc_ba_98_76_54_32_10); + const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); + + v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); + } + + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_running_avg_y_stride; + running_avg_y += running_avg_y_stride; + } + + // Too much adjustments => copy block. + { + int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), + vget_low_s64(v_sum_diff_total)); + int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); + int sum_diff_thresh = kSumDiffThreshold; + + if (increase_denoising) + sum_diff_thresh = kSumDiffThresholdHigh; + if (sum_diff > sum_diff_thresh) { + // Before returning to copy the block (i.e., apply no denoising), + // checK if we can still apply some (weaker) temporal filtering to + // this block, that would otherwise not be denoised at all. Simplest + // is to apply an additional adjustment to running_avg_y to bring it + // closer to sig. The adjustment is capped by a maximum delta, and + // chosen such that in most cases the resulting sum_diff will be + // within the accceptable range given by sum_diff_thresh. + + // The delta is set by the excess of absolute pixel diff over the + // threshold. + int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; + // Only apply the adjustment for max delta up to 3. + if (delta < 4) { + const uint8x16_t k_delta = vmovq_n_u8(delta); + sig -= sig_stride * 16; + mc_running_avg_y -= mc_running_avg_y_stride * 16; + running_avg_y -= running_avg_y_stride * 16; + for (int r = 0; r < 16; ++r) { + uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); + const uint8x16_t v_sig = vld1q_u8(sig); + const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); + + // Calculate absolute difference and sign masks. + const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_pos_mask = + vcltq_u8(v_sig, v_mc_running_avg_y); + const uint8x16_t v_diff_neg_mask = + vcgtq_u8(v_sig, v_mc_running_avg_y); + // Clamp absolute difference to delta to get the adjustment. + const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); + + const uint8x16_t v_pos_adjustment = + vandq_u8(v_diff_pos_mask, v_abs_adjustment); + const uint8x16_t v_neg_adjustment = + vandq_u8(v_diff_neg_mask, v_abs_adjustment); + + v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); + v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); + + // Store results. + vst1q_u8(running_avg_y, v_running_avg_y); + + { + const int8x16_t v_sum_diff = + vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), + vreinterpretq_s8_u8(v_pos_adjustment)); + + const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); + const int32x4_t fedc_ba98_7654_3210 = + vpaddlq_s16(fe_dc_ba_98_76_54_32_10); + const int64x2_t fedcba98_76543210 = + vpaddlq_s32(fedc_ba98_7654_3210); + + v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); + } + // Update pointers for next iteration. + sig += sig_stride; + mc_running_avg_y += mc_running_avg_y_stride; + running_avg_y += running_avg_y_stride; + } + { + // Update the sum of all pixel differences of this MB. + x = vqadd_s64(vget_high_s64(v_sum_diff_total), + vget_low_s64(v_sum_diff_total)); + sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); + + if (sum_diff > sum_diff_thresh) { + return COPY_BLOCK; + } + } + } else { + return COPY_BLOCK; + } + } + } + + // Tell above level that block was filtered. + running_avg_y -= running_avg_y_stride * 16; + sig -= sig_stride * 16; + + return FILTER_BLOCK; +} + +} // namespace webrtc |