aboutsummaryrefslogtreecommitdiff
path: root/webrtc/modules/video_processing/content_analysis.cc
diff options
context:
space:
mode:
Diffstat (limited to 'webrtc/modules/video_processing/content_analysis.cc')
-rw-r--r--webrtc/modules/video_processing/content_analysis.cc281
1 files changed, 281 insertions, 0 deletions
diff --git a/webrtc/modules/video_processing/content_analysis.cc b/webrtc/modules/video_processing/content_analysis.cc
new file mode 100644
index 0000000000..54c04da466
--- /dev/null
+++ b/webrtc/modules/video_processing/content_analysis.cc
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "webrtc/modules/video_processing/content_analysis.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
+#include "webrtc/system_wrappers/include/tick_util.h"
+
+namespace webrtc {
+
+VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
+ : orig_frame_(NULL),
+ prev_frame_(NULL),
+ width_(0),
+ height_(0),
+ skip_num_(1),
+ border_(8),
+ motion_magnitude_(0.0f),
+ spatial_pred_err_(0.0f),
+ spatial_pred_err_h_(0.0f),
+ spatial_pred_err_v_(0.0f),
+ first_frame_(true),
+ ca_Init_(false),
+ content_metrics_(NULL) {
+ ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
+ TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;
+
+ if (runtime_cpu_detection) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ if (WebRtc_GetCPUInfo(kSSE2)) {
+ ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
+ TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
+ }
+#endif
+ }
+ Release();
+}
+
+VPMContentAnalysis::~VPMContentAnalysis() {
+ Release();
+}
+
+VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
+ const VideoFrame& inputFrame) {
+ if (inputFrame.IsZeroSize())
+ return NULL;
+
+ // Init if needed (native dimension change).
+ if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
+ if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
+ return NULL;
+ }
+ // Only interested in the Y plane.
+ orig_frame_ = inputFrame.buffer(kYPlane);
+
+ // Compute spatial metrics: 3 spatial prediction errors.
+ (this->*ComputeSpatialMetrics)();
+
+ // Compute motion metrics
+ if (first_frame_ == false)
+ ComputeMotionMetrics();
+
+ // Saving current frame as previous one: Y only.
+ memcpy(prev_frame_, orig_frame_, width_ * height_);
+
+ first_frame_ = false;
+ ca_Init_ = true;
+
+ return ContentMetrics();
+}
+
+int32_t VPMContentAnalysis::Release() {
+ if (content_metrics_ != NULL) {
+ delete content_metrics_;
+ content_metrics_ = NULL;
+ }
+
+ if (prev_frame_ != NULL) {
+ delete[] prev_frame_;
+ prev_frame_ = NULL;
+ }
+
+ width_ = 0;
+ height_ = 0;
+ first_frame_ = true;
+
+ return VPM_OK;
+}
+
+int32_t VPMContentAnalysis::Initialize(int width, int height) {
+ width_ = width;
+ height_ = height;
+ first_frame_ = true;
+
+ // skip parameter: # of skipped rows: for complexity reduction
+ // temporal also currently uses it for column reduction.
+ skip_num_ = 1;
+
+ // use skipNum = 2 for 4CIF, WHD
+ if ((height_ >= 576) && (width_ >= 704)) {
+ skip_num_ = 2;
+ }
+ // use skipNum = 4 for FULLL_HD images
+ if ((height_ >= 1080) && (width_ >= 1920)) {
+ skip_num_ = 4;
+ }
+
+ if (content_metrics_ != NULL) {
+ delete content_metrics_;
+ }
+
+ if (prev_frame_ != NULL) {
+ delete[] prev_frame_;
+ }
+
+ // Spatial Metrics don't work on a border of 8. Minimum processing
+ // block size is 16 pixels. So make sure the width and height support this.
+ if (width_ <= 32 || height_ <= 32) {
+ ca_Init_ = false;
+ return VPM_PARAMETER_ERROR;
+ }
+
+ content_metrics_ = new VideoContentMetrics();
+ if (content_metrics_ == NULL) {
+ return VPM_MEMORY;
+ }
+
+ prev_frame_ = new uint8_t[width_ * height_]; // Y only.
+ if (prev_frame_ == NULL)
+ return VPM_MEMORY;
+
+ return VPM_OK;
+}
+
+// Compute motion metrics: magnitude over non-zero motion vectors,
+// and size of zero cluster
+int32_t VPMContentAnalysis::ComputeMotionMetrics() {
+ // Motion metrics: only one is derived from normalized
+ // (MAD) temporal difference
+ (this->*TemporalDiffMetric)();
+ return VPM_OK;
+}
+
+// Normalized temporal difference (MAD): used as a motion level metric
+// Normalize MAD by spatial contrast: images with more contrast
+// (pixel variance) likely have larger temporal difference
+// To reduce complexity, we compute the metric for a reduced set of points.
+int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
+ // size of original frame
+ int sizei = height_;
+ int sizej = width_;
+ uint32_t tempDiffSum = 0;
+ uint32_t pixelSum = 0;
+ uint64_t pixelSqSum = 0;
+
+ uint32_t num_pixels = 0; // Counter for # of pixels.
+ const int width_end = ((width_ - 2 * border_) & -16) + border_;
+
+ for (int i = border_; i < sizei - border_; i += skip_num_) {
+ for (int j = border_; j < width_end; j++) {
+ num_pixels += 1;
+ int ssn = i * sizej + j;
+
+ uint8_t currPixel = orig_frame_[ssn];
+ uint8_t prevPixel = prev_frame_[ssn];
+
+ tempDiffSum +=
+ static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel)));
+ pixelSum += static_cast<uint32_t>(currPixel);
+ pixelSqSum += static_cast<uint64_t>(currPixel * currPixel);
+ }
+ }
+
+ // Default.
+ motion_magnitude_ = 0.0f;
+
+ if (tempDiffSum == 0)
+ return VPM_OK;
+
+ // Normalize over all pixels.
+ float const tempDiffAvg =
+ static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels);
+ float const pixelSumAvg =
+ static_cast<float>(pixelSum) / static_cast<float>(num_pixels);
+ float const pixelSqSumAvg =
+ static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels);
+ float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);
+
+ if (contrast > 0.0) {
+ contrast = sqrt(contrast);
+ motion_magnitude_ = tempDiffAvg / contrast;
+ }
+ return VPM_OK;
+}
+
+// Compute spatial metrics:
+// To reduce complexity, we compute the metric for a reduced set of points.
+// The spatial metrics are rough estimates of the prediction error cost for
+// each QM spatial mode: 2x2,1x2,2x1
+// The metrics are a simple estimate of the up-sampling prediction error,
+// estimated assuming sub-sampling for decimation (no filtering),
+// and up-sampling back up with simple bilinear interpolation.
+int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
+ const int sizei = height_;
+ const int sizej = width_;
+
+ // Pixel mean square average: used to normalize the spatial metrics.
+ uint32_t pixelMSA = 0;
+
+ uint32_t spatialErrSum = 0;
+ uint32_t spatialErrVSum = 0;
+ uint32_t spatialErrHSum = 0;
+
+ // make sure work section is a multiple of 16
+ const int width_end = ((sizej - 2 * border_) & -16) + border_;
+
+ for (int i = border_; i < sizei - border_; i += skip_num_) {
+ for (int j = border_; j < width_end; j++) {
+ int ssn1 = i * sizej + j;
+ int ssn2 = (i + 1) * sizej + j; // bottom
+ int ssn3 = (i - 1) * sizej + j; // top
+ int ssn4 = i * sizej + j + 1; // right
+ int ssn5 = i * sizej + j - 1; // left
+
+ uint16_t refPixel1 = orig_frame_[ssn1] << 1;
+ uint16_t refPixel2 = orig_frame_[ssn1] << 2;
+
+ uint8_t bottPixel = orig_frame_[ssn2];
+ uint8_t topPixel = orig_frame_[ssn3];
+ uint8_t rightPixel = orig_frame_[ssn4];
+ uint8_t leftPixel = orig_frame_[ssn5];
+
+ spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
+ refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel +
+ rightPixel))));
+ spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
+ refPixel1 - static_cast<uint16_t>(bottPixel + topPixel))));
+ spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
+ refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel))));
+ pixelMSA += orig_frame_[ssn1];
+ }
+ }
+
+ // Normalize over all pixels.
+ const float spatialErr = static_cast<float>(spatialErrSum >> 2);
+ const float spatialErrH = static_cast<float>(spatialErrHSum >> 1);
+ const float spatialErrV = static_cast<float>(spatialErrVSum >> 1);
+ const float norm = static_cast<float>(pixelMSA);
+
+ // 2X2:
+ spatial_pred_err_ = spatialErr / norm;
+ // 1X2:
+ spatial_pred_err_h_ = spatialErrH / norm;
+ // 2X1:
+ spatial_pred_err_v_ = spatialErrV / norm;
+ return VPM_OK;
+}
+
+VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
+ if (ca_Init_ == false)
+ return NULL;
+
+ content_metrics_->spatial_pred_err = spatial_pred_err_;
+ content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
+ content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
+ // Motion metric: normalized temporal difference (MAD).
+ content_metrics_->motion_magnitude = motion_magnitude_;
+
+ return content_metrics_;
+}
+
+} // namespace webrtc