1 files changed, 281 insertions, 0 deletions
diff --git a/webrtc/modules/video_processing/content_analysis.cc b/webrtc/modules/video_processing/content_analysis.cc
new file mode 100644
index 0000000000..54c04da466
--- /dev/null
+++ b/webrtc/modules/video_processing/content_analysis.cc
@@ -0,0 +1,281 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "webrtc/modules/video_processing/content_analysis.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
+#include "webrtc/system_wrappers/include/tick_util.h"
+
+namespace webrtc {
+
+VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection)
+    : orig_frame_(NULL),
+      prev_frame_(NULL),
+      width_(0),
+      height_(0),
+      skip_num_(1),
+      border_(8),
+      motion_magnitude_(0.0f),
+      spatial_pred_err_(0.0f),
+      spatial_pred_err_h_(0.0f),
+      spatial_pred_err_v_(0.0f),
+      first_frame_(true),
+      ca_Init_(false),
+      content_metrics_(NULL) {
+  ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C;
+  TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C;
+
+  if (runtime_cpu_detection) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+    if (WebRtc_GetCPUInfo(kSSE2)) {
+      ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2;
+      TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2;
+    }
+#endif
+  }
+  Release();
+}
+
+VPMContentAnalysis::~VPMContentAnalysis() {
+  Release();
+}
+
+VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics(
+    const VideoFrame& inputFrame) {
+  if (inputFrame.IsZeroSize())
+    return NULL;
+
+  // Init if needed (native dimension change).
+  if (width_ != inputFrame.width() || height_ != inputFrame.height()) {
+    if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height()))
+      return NULL;
+  }
+  // Only interested in the Y plane.
+  orig_frame_ = inputFrame.buffer(kYPlane);
+
+  // Compute spatial metrics: 3 spatial prediction errors.
+  (this->*ComputeSpatialMetrics)();
+
+  // Compute motion metrics
+  if (first_frame_ == false)
+    ComputeMotionMetrics();
+
+  // Saving current frame as previous one: Y only.
+  memcpy(prev_frame_, orig_frame_, width_ * height_);
+
+  first_frame_ = false;
+  ca_Init_ = true;
+
+  return ContentMetrics();
+}
+
+int32_t VPMContentAnalysis::Release() {
+  if (content_metrics_ != NULL) {
+    delete content_metrics_;
+    content_metrics_ = NULL;
+  }
+
+  if (prev_frame_ != NULL) {
+    delete[] prev_frame_;
+    prev_frame_ = NULL;
+  }
+
+  width_ = 0;
+  height_ = 0;
+  first_frame_ = true;
+
+  return VPM_OK;
+}
+
+int32_t VPMContentAnalysis::Initialize(int width, int height) {
+  width_ = width;
+  height_ = height;
+  first_frame_ = true;
+
+  // skip parameter: # of skipped rows: for complexity reduction
+  //  temporal also currently uses it for column reduction.
+  skip_num_ = 1;
+
+  // use skipNum = 2 for 4CIF, WHD
+  if ((height_ >= 576) && (width_ >= 704)) {
+    skip_num_ = 2;
+  }
+  // use skipNum = 4 for FULLL_HD images
+  if ((height_ >= 1080) && (width_ >= 1920)) {
+    skip_num_ = 4;
+  }
+
+  if (content_metrics_ != NULL) {
+    delete content_metrics_;
+  }
+
+  if (prev_frame_ != NULL) {
+    delete[] prev_frame_;
+  }
+
+  // Spatial Metrics don't work on a border of 8. Minimum processing
+  // block size is 16 pixels.  So make sure the width and height support this.
+  if (width_ <= 32 || height_ <= 32) {
+    ca_Init_ = false;
+    return VPM_PARAMETER_ERROR;
+  }
+
+  content_metrics_ = new VideoContentMetrics();
+  if (content_metrics_ == NULL) {
+    return VPM_MEMORY;
+  }
+
+  prev_frame_ = new uint8_t[width_ * height_];  // Y only.
+  if (prev_frame_ == NULL)
+    return VPM_MEMORY;
+
+  return VPM_OK;
+}
+
+// Compute motion metrics: magnitude over non-zero motion vectors,
+//  and size of zero cluster
+int32_t VPMContentAnalysis::ComputeMotionMetrics() {
+  // Motion metrics: only one is derived from normalized
+  //  (MAD) temporal difference
+  (this->*TemporalDiffMetric)();
+  return VPM_OK;
+}
+
+// Normalized temporal difference (MAD): used as a motion level metric
+// Normalize MAD by spatial contrast: images with more contrast
+//  (pixel variance) likely have larger temporal difference
+// To reduce complexity, we compute the metric for a reduced set of points.
+int32_t VPMContentAnalysis::TemporalDiffMetric_C() {
+  // size of original frame
+  int sizei = height_;
+  int sizej = width_;
+  uint32_t tempDiffSum = 0;
+  uint32_t pixelSum = 0;
+  uint64_t pixelSqSum = 0;
+
+  uint32_t num_pixels = 0;  // Counter for # of pixels.
+  const int width_end = ((width_ - 2 * border_) & -16) + border_;
+
+  for (int i = border_; i < sizei - border_; i += skip_num_) {
+    for (int j = border_; j < width_end; j++) {
+      num_pixels += 1;
+      int ssn = i * sizej + j;
+
+      uint8_t currPixel = orig_frame_[ssn];
+      uint8_t prevPixel = prev_frame_[ssn];
+
+      tempDiffSum +=
+          static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel)));
+      pixelSum += static_cast<uint32_t>(currPixel);
+      pixelSqSum += static_cast<uint64_t>(currPixel * currPixel);
+    }
+  }
+
+  // Default.
+  motion_magnitude_ = 0.0f;
+
+  if (tempDiffSum == 0)
+    return VPM_OK;
+
+  // Normalize over all pixels.
+  float const tempDiffAvg =
+      static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels);
+  float const pixelSumAvg =
+      static_cast<float>(pixelSum) / static_cast<float>(num_pixels);
+  float const pixelSqSumAvg =
+      static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels);
+  float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg);
+
+  if (contrast > 0.0) {
+    contrast = sqrt(contrast);
+    motion_magnitude_ = tempDiffAvg / contrast;
+  }
+  return VPM_OK;
+}
+
+// Compute spatial metrics:
+// To reduce complexity, we compute the metric for a reduced set of points.
+// The spatial metrics are rough estimates of the prediction error cost for
+//  each QM spatial mode: 2x2,1x2,2x1
+// The metrics are a simple estimate of the up-sampling prediction error,
+// estimated assuming sub-sampling for decimation (no filtering),
+// and up-sampling back up with simple bilinear interpolation.
+int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() {
+  const int sizei = height_;
+  const int sizej = width_;
+
+  // Pixel mean square average: used to normalize the spatial metrics.
+  uint32_t pixelMSA = 0;
+
+  uint32_t spatialErrSum = 0;
+  uint32_t spatialErrVSum = 0;
+  uint32_t spatialErrHSum = 0;
+
+  // make sure work section is a multiple of 16
+  const int width_end = ((sizej - 2 * border_) & -16) + border_;
+
+  for (int i = border_; i < sizei - border_; i += skip_num_) {
+    for (int j = border_; j < width_end; j++) {
+      int ssn1 = i * sizej + j;
+      int ssn2 = (i + 1) * sizej + j;  // bottom
+      int ssn3 = (i - 1) * sizej + j;  // top
+      int ssn4 = i * sizej + j + 1;    // right
+      int ssn5 = i * sizej + j - 1;    // left
+
+      uint16_t refPixel1 = orig_frame_[ssn1] << 1;
+      uint16_t refPixel2 = orig_frame_[ssn1] << 2;
+
+      uint8_t bottPixel = orig_frame_[ssn2];
+      uint8_t topPixel = orig_frame_[ssn3];
+      uint8_t rightPixel = orig_frame_[ssn4];
+      uint8_t leftPixel = orig_frame_[ssn5];
+
+      spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
+          refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel +
+                                            rightPixel))));
+      spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
+          refPixel1 - static_cast<uint16_t>(bottPixel + topPixel))));
+      spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>(
+          refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel))));
+      pixelMSA += orig_frame_[ssn1];
+    }
+  }
+
+  // Normalize over all pixels.
+  const float spatialErr = static_cast<float>(spatialErrSum >> 2);
+  const float spatialErrH = static_cast<float>(spatialErrHSum >> 1);
+  const float spatialErrV = static_cast<float>(spatialErrVSum >> 1);
+  const float norm = static_cast<float>(pixelMSA);
+
+  // 2X2:
+  spatial_pred_err_ = spatialErr / norm;
+  // 1X2:
+  spatial_pred_err_h_ = spatialErrH / norm;
+  // 2X1:
+  spatial_pred_err_v_ = spatialErrV / norm;
+  return VPM_OK;
+}
+
+VideoContentMetrics* VPMContentAnalysis::ContentMetrics() {
+  if (ca_Init_ == false)
+    return NULL;
+
+  content_metrics_->spatial_pred_err = spatial_pred_err_;
+  content_metrics_->spatial_pred_err_h = spatial_pred_err_h_;
+  content_metrics_->spatial_pred_err_v = spatial_pred_err_v_;
+  // Motion metric: normalized temporal difference (MAD).
+  content_metrics_->motion_magnitude = motion_magnitude_;
+
+  return content_metrics_;
+}
+
+}  // namespace webrtc