aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Presler <noahp@google.com>2015-08-11 11:45:45 -0700
committerNoah Presler <noahp@google.com>2015-08-12 11:44:17 -0700
commit35d67d271b9a2f241fbaeb3dce9eed6bc71d7308 (patch)
tree52f0b30a31e17c5f3f1c7f8b3dd08cefbcf9101e
parent4720b041f19d864ae5b80bbef71f691db6806f8b (diff)
downloadopencv3-35d67d271b9a2f241fbaeb3dce9eed6bc71d7308.tar.gz
integrating module: rsobjdetect for renderscript parallel objdetect
Change-Id: Id9638f09d43b58be8187328abbdf0c00acc53fc4
-rw-r--r--Android.mk49
-rw-r--r--modules/objdetect/src/HaarStructs.h59
-rw-r--r--modules/objdetect/src/cascadedetect.cpp192
-rw-r--r--modules/objdetect/src/cascadedetect.hpp19
-rw-r--r--modules/rsobjdetect/src/innerloop.cpp86
-rw-r--r--modules/rsobjdetect/src/rs/detectAt.rs192
-rw-r--r--modules/rsobjdetect/src/rsobjdetect.hpp5
7 files changed, 593 insertions, 9 deletions
diff --git a/Android.mk b/Android.mk
index 02d891b..0e698db 100644
--- a/Android.mk
+++ b/Android.mk
@@ -634,7 +634,7 @@ LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/3rdparty/libjasper/jasper \
$(LOCAL_PATH)/3rdparty/libjpeg \
$(LOCAL_PATH)/3rdparty/libtiff \
- $(LOCAL_PATH)/3rdparty/libpng \
+ $(LOCAL_PATH)/../libpng \
$(LOCAL_PATH)/../zlib \
$(LOCAL_PATH)/../zlib/src
@@ -798,6 +798,9 @@ include $(BUILD_SHARED_LIBRARY)
include $(CLEAR_VARS)
+#Use true to build with renderscript, false to build without
+WITH_RENDERSCRIPT = true
+
LOCAL_NDK_STL_VARIANT := gnustl_static
LOCAL_SDK_VERSION := 21
@@ -811,6 +814,7 @@ LOCAL_LDLIBS := -L$(SYSROOT)/usr/lib -llog -ldl
LOCAL_C_INCLUDES := \
$(LOCAL_PATH) \
$(LOCAL_PATH)/modules/objdetect \
+ $(LOCAL_PATH)/modules/objdetect/src \
$(LOCAL_PATH)/modules/core/include \
$(LOCAL_PATH)/modules/hal/include \
$(LOCAL_PATH)/modules/objdetect/include \
@@ -835,6 +839,13 @@ LOCAL_SRC_FILES := \
modules/java/generator/src/cpp/converters.cpp
LOCAL_SHARED_LIBRARIES := libopencv_core libopencv_imgproc libopencv_ml libopencv_imgcodecs libopencv_videoio libopencv_highgui
+
+ifeq ($(WITH_RENDERSCRIPT), true)
+LOCAL_SHARED_LIBRARIES += libopencv_rsobjdetect
+LOCAL_CFLAGS += -DRENDERSCRIPT=1
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/modules/rsobjdetect/src
+endif
+
LOCAL_STATIC_LBIRARIES := libopencv_hal
include $(BUILD_SHARED_LIBRARY)
@@ -842,6 +853,40 @@ include $(BUILD_SHARED_LIBRARY)
+ifeq ($(WITH_RENDERSCRIPT),true)
+include $(CLEAR_VARS)
+LOCAL_MODULE := libopencv_rsobjdetect
+
+LOCAL_NDK_STL_VARIANT := gnustl_static
+LOCAL_SDK_VERSION := 21
+
+LOCAL_CFLAGS := $(LOCAL_C_INCLUDES:%=-I%)
+
+LOCAL_C_INCLUDES := \
+ modules/rsobjdetect/src
+
+LOCAL_SRC_FILES:= \
+ modules/rsobjdetect/src/rs/detectAt.rs \
+ modules/rsobjdetect/src/innerloop.cpp
+
+LOCAL_LDFLAGS := -llog -ldl
+
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_C_INCLUDES := frameworks/rs/cpp
+LOCAL_C_INCLUDES += frameworks/rs
+LOCAL_C_INCLUDES += $(call intermediates-dir-for,STATIC_LIBRARIES,libRS,TARGET,)
+
+LOCAL_SHARED_LIBRARIES := libRScpp
+
+LOCAL_CLANG := true
+
+include $(BUILD_SHARED_LIBRARY)
+endif
+
+
+
+
include $(CLEAR_VARS)
LOCAL_NDK_STL_VARIANT := gnustl_static
@@ -1339,4 +1384,4 @@ LOCAL_SRC_FILES := \
LOCAL_SHARED_LIBRARIES := libopencv_core libopencv_flann libopencv_imgproc libopencv_ml libopencv_photo libopencv_video libopencv_imgcodecs libopencv_videoio libopencv_highgui libopencv_objdetect libopencv_features2d libopencv_calib3d
LOCAL_STATIC_LIBRARIES := libopencv_hal
-include $(BUILD_SHARED_LIBRARY)
+include $(BUILD_SHARED_LIBRARY) \ No newline at end of file
diff --git a/modules/objdetect/src/HaarStructs.h b/modules/objdetect/src/HaarStructs.h
new file mode 100644
index 0000000..29dc6ad
--- /dev/null
+++ b/modules/objdetect/src/HaarStructs.h
@@ -0,0 +1,59 @@
+#pragma once
+
+typedef struct
+{
+ int first;
+ int ntrees;
+ float threshold;
+} HaarStage;
+
+typedef struct
+{
+ int featureIdx;
+ float threshold;
+ float left;
+ float right;
+} HaarStump;
+
+
+typedef struct optFe
+{
+ int ofs0[4];
+ int ofs1[4];
+ int ofs2[4];
+ float weight[4];
+} HaarOptFeature;
+
+typedef struct Fe
+{
+ int x[3];
+ int y[3];
+ int width[3];
+ int height[3];
+ float weight0;
+ float weight1;
+ float weight2;
+} HaarFeature;
+
+typedef struct hr
+{
+ int x;
+ int y;
+ int width;
+ int height;
+} HaarRect;
+
+typedef struct
+{
+ int sqofs;
+ int nofs[4];
+ HaarRect nrect;
+ double normRectArea;
+ HaarStump* stumps;
+ HaarStage* stages;
+ HaarOptFeature* haarOptFeatures;
+ HaarFeature* haarFeatures;
+ int stagesSize;
+ int nFeatures;
+ int nStumps;
+} HaarVars; \ No newline at end of file
diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp
index 20800ae..af1a1f8 100644
--- a/modules/objdetect/src/cascadedetect.cpp
+++ b/modules/objdetect/src/cascadedetect.cpp
@@ -46,6 +46,10 @@
#include "opencv2/objdetect/objdetect_c.h"
#include "opencl_kernels_objdetect.hpp"
+#if defined ANDROID && defined RENDERSCRIPT
+#include "rsobjdetect.hpp"
+#endif
+
namespace cv
{
@@ -470,6 +474,29 @@ bool FeatureEvaluator::updateScaleData( Size imgsz, const std::vector<float>& _s
return recalcOptFeatures;
}
+#if defined ANDROID && defined RENDERSCRIPT
+void haarIntegral(Mat in, int width, int height, int* out, int* outSq) {
+ int sum = 0, sumSq =0, val = 0, idx = 0;
+ uchar *src = in.data;
+ size_t step = in.step;
+
+ memset(out, 0, width*sizeof(out[0]));
+ memset(outSq, 0, width*sizeof(outSq[0]));
+ outSq += width;
+ out += width;
+
+ for (int y = 1; y < height; y++, out += width, outSq += width, src += step) {
+ sum = sumSq = out[0] = outSq[0] = 0;
+ for (int x = 1; x < width; x++) {
+ val = src[x];
+ sum += val;
+ sumSq += val * val;
+ out[x] = out[x - width] + sum;
+ outSq[x] = outSq[x - width] + sumSq;
+ }
+ }
+}
+#endif
bool FeatureEvaluator::setImage( InputArray _image, const std::vector<float>& _scales )
{
@@ -510,12 +537,30 @@ bool FeatureEvaluator::setImage( InputArray _image, const std::vector<float>& _s
sbuf.create(sbufSize.height*nchannels, sbufSize.width, CV_32S);
rbuf.create(sz0, CV_8U);
+
+#if defined ANDROID && defined RENDERSCRIPT
+ integralImages = (int **) malloc(sizeof(int *)*nscales);
+ integralImagesSq = (int **) malloc(sizeof(int *)*nscales);
+#endif
+
for (i = 0; i < nscales; i++)
{
+#if defined ANDROID && defined RENDERSCRIPT
+ const ScaleData& s = scaleData->at(i);
+ Mat dst(s.szi.height - 1, s.szi.width - 1 , CV_8U);
+ resize(image, dst, dst.size(), 1. / s.scale, 1. / s.scale, INTER_LINEAR);
+ const Size sz = s.getWorkingSize(origWinSize);
+ int* intImg = (int *)malloc(sizeof(int)*s.szi.area());
+ int* intImgSq = (int *)malloc(sizeof(int)*s.szi.area());
+ haarIntegral(dst, sz.width, sz.height, intImg, intImgSq);
+ integralImages[i] = intImg;
+ integralImagesSq[i] = intImgSq;
+#else
const ScaleData& s = scaleData->at(i);
Mat dst(s.szi.height - 1, s.szi.width - 1, CV_8U, rbuf.ptr());
resize(image, dst, dst.size(), 1. / s.scale, 1. / s.scale, INTER_LINEAR);
computeChannels((int)i, dst);
+#endif
}
sbufFlag = SBUF_VALID;
}
@@ -761,6 +806,10 @@ LBPEvaluator::~LBPEvaluator()
bool LBPEvaluator::read( const FileNode& node, Size _origWinSize )
{
+#if defined RENDERSCRIPT
+ CV_Error(Error::StsNotImplemented, "Renderscript cannot be used with LBP in 3.0");
+#endif
+
if (!FeatureEvaluator::read(node, _origWinSize))
return false;
if(features.empty())
@@ -1283,7 +1332,6 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
tryOpenCL = false;
- // CPU code
featureEvaluator->getMats();
{
Mat currentMask;
@@ -1301,13 +1349,138 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
szw = s[i].getWorkingSize(data.origWinSize);
stripeSizes[i] = std::max((szw.height/s[i].ystep + nstripes-1)/nstripes, 1)*s[i].ystep;
}
+#if defined ANDROID && defined RENDERSCRIPT
+ rs_parallel_detect(candidates, nscales);
+#else
+ CascadeClassifierInvoker invoker(*this, (int)nscales, nstripes, s, stripeSizes,
+ candidates, rejectLevels, levelWeights,
+ outputRejectLevels, currentMask, &mtx);
+ parallel_for_(Range(0, nstripes), invoker);
+#endif
+ }
+}
+
+#if defined ANDROID && defined RENDERSCRIPT
+void CascadeClassifierImpl::rs_parallel_detect(std::vector<Rect>& candidates, int nscales) {
+ HaarEvaluator& heval = (HaarEvaluator&)*featureEvaluator;
+ const FeatureEvaluator::ScaleData* s = &featureEvaluator->getScaleData(0);
+ Size origWinSize = data.origWinSize;
+ const int origWidth = origWinSize.width;
+ const int origHeight = origWinSize.height;
+ const int stepSize = heval.sbuf.step.p[0];
- CascadeClassifierInvoker invoker(*this, (int)nscales, nstripes, s, stripeSizes,
- candidates, rejectLevels, levelWeights,
- outputRejectLevels, currentMask, &mtx);
- parallel_for_(Range(0, nstripes), invoker);
+ if (!loadedHaarVars) setHaarVars();
+
+ unsigned char* inData = heval.sbuf.data;
+ const char* fin;
+ for( int scaleIdx = 0; scaleIdx < nscales; scaleIdx++ )
+ {
+ const FeatureEvaluator::ScaleData& sd = s[scaleIdx];
+ const float scalingFactor = sd.scale;
+ const Size sz = sd.getWorkingSize(origWinSize);
+ const int winWidth = origWinSize.width * scalingFactor;
+ const int winHeight = origWinSize.height * scalingFactor;
+ const int layerOfs = sd.layer_ofs;
+ const int area = sz.width*sz.height;
+
+ bool *outData = (bool *)malloc(sizeof(bool)*area);
+ int* arr = heval.integralImages[scaleIdx];
+ int* arrSq = heval.integralImagesSq[scaleIdx];
+
+
+ innerloops(sz.height,sz.width,arr,arrSq,sd.ystep,outData);
+
+ for (int y=0; y < sz.height; y += sd.ystep) {
+ for (int x = 0; x< sz.width; x += sd.ystep) {
+ if (*(outData + x + y*sz.width)) {
+ candidates.push_back(Rect(cvRound(x*scalingFactor),
+ cvRound(y*scalingFactor),
+ winWidth, winHeight));
+ }
+ }
+ }
+
+ free(outData);
+ }
+ cleanUpInnerLoops();
+ for (int i = 0; i < nscales; i++)
+ {
+ free(heval.integralImages[i]);
+ free(heval.integralImagesSq[i]);
+ }
+ free(heval.integralImages);
+ free(heval.integralImagesSq);
+}
+
+void CascadeClassifierImpl::setHaarVars() {
+ HaarVars hf;
+ Size origWinSize = data.origWinSize;
+ const int origWidth = origWinSize.width;
+ const int origHeight = origWinSize.height;
+ HaarEvaluator& heval = (HaarEvaluator&)*featureEvaluator;
+ int _nofs[4] = {(heval.nofs)[0], (heval.nofs)[1], (heval.nofs)[2], (heval.nofs)[3]};
+ memcpy (haarVars.nofs, _nofs, sizeof(_nofs));
+ hf.sqofs = heval.sqofs;
+ hf.normRectArea = heval.normrect.area();
+
+ int nOptFeatures = (*heval.optfeatures).size();
+ HaarFeature *haf = (HaarFeature *)malloc(sizeof(HaarFeature)*nOptFeatures);
+ const std::vector<HaarEvaluator::Feature>& ff = *heval.features;
+ for (int i = 0; i < nOptFeatures; i++ ){
+ HaarFeature f;
+ for (int j = 0; j < 3; j++) {
+ f.x[j]=ff[i].rect[j].r.x;
+ f.y[j]=ff[i].rect[j].r.y;
+ f.width[j] = ff[i].rect[j].r.width;
+ f.height[j] = ff[i].rect[j].r.height;
+ }
+ f.weight0 = ff[i].rect[0].weight;
+ f.weight1 = ff[i].rect[1].weight;
+ f.weight2 = ff[i].rect[2].weight;
+ haf[i] = f;
+ }
+ hf.haarFeatures = &haf[0];
+
+
+ int nstages = (int) data.stages.size();
+ HaarStage *stageArr = (HaarStage *)malloc(sizeof(HaarStage)*nstages);
+ for (int j = 0; j < nstages; j++ ){
+ HaarStage st;
+ st.first = data.stages[j].first;
+ st.ntrees = data.stages[j].ntrees;
+ st.threshold = data.stages[j].threshold;
+ stageArr[j] = st;
+ }
+ hf.stages = &stageArr[0];
+ hf.stagesSize = nstages;
+ hf.nStumps = (int) data.stumps.size();
+
+ int nstumps = data.stumps.size();
+ HaarStump *stumpArr = (HaarStump *)malloc(sizeof(HaarStump)*nstumps);
+ for (int j = 0; j < nstumps; j++){
+ HaarStump st;
+ st.featureIdx = data.stumps[j].featureIdx;
+ st.threshold = data.stumps[j].threshold;
+ st.left = data.stumps[j].left;
+ st.right = data.stumps[j].right;
+ stumpArr[j] = st;
}
+ hf.stumps = &stumpArr[0];
+ hf.nFeatures = (int) heval.optfeatures->size();
+
+ HaarRect nr;
+ nr.x = heval.normrect.x;
+ nr.y = heval.normrect.y;
+ nr.width = heval.normrect.width;
+ nr.height = heval.normrect.height;
+ hf.nrect = nr;
+
+ haarVars = hf;
+
+ loadedHaarVars = true;
+ initInnerLoop(haarVars,origWidth,origHeight);
}
+#endif
void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rect>& objects,
@@ -1553,9 +1726,16 @@ BaseCascadeClassifier::~BaseCascadeClassifier()
{
}
-CascadeClassifier::CascadeClassifier() {}
+CascadeClassifier::CascadeClassifier() {
+#if defined(RENDERSCRIPT) && !defined(ANDROID)
+ CV_Error(Error::StsNotImplemented, "Renderscript cannot be used on non-Android devices");
+#endif
+}
CascadeClassifier::CascadeClassifier(const String& filename)
{
+#if defined(RENDERSCRIPT) && !defined(ANDROID)
+ CV_Error(Error::StsNotImplemented, "Renderscript cannot be used on non-Android devices");
+#endif
load(filename);
}
diff --git a/modules/objdetect/src/cascadedetect.hpp b/modules/objdetect/src/cascadedetect.hpp
index 696ab40..38ba64d 100644
--- a/modules/objdetect/src/cascadedetect.hpp
+++ b/modules/objdetect/src/cascadedetect.hpp
@@ -1,6 +1,7 @@
#pragma once
#include "opencv2/core/ocl.hpp"
+#include "HaarStructs.h"
namespace cv
{
@@ -71,6 +72,12 @@ protected:
UMat urbuf, usbuf, ufbuf, uscaleData;
Ptr<std::vector<ScaleData> > scaleData;
+
+#if defined ANDROID && defined RENDERSCRIPT
+ int** integralImages;
+ int** integralImagesSq;
+ friend class CascadeClassifierImpl;
+#endif
};
@@ -132,6 +139,10 @@ protected:
std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels = false );
+#if defined ANDROID && defined RENDERSCRIPT
+ void setHaarVars();
+ void rs_parallel_detect(std::vector<Rect>& candidates, int nscales);
+#endif
enum { MAX_FACES = 10000 };
enum { BOOST = 0 };
@@ -222,6 +233,10 @@ protected:
bool tryOpenCL;
Mutex mtx;
+#if defined ANDROID && defined RENDERSCRIPT
+ HaarVars haarVars;
+ bool loadedHaarVars;
+#endif
};
#define CC_CASCADE_PARAMS "cascadeParams"
@@ -363,6 +378,8 @@ protected:
virtual void computeChannels( int i, InputArray img );
virtual void computeOptFeatures();
+ friend class CascadeClassifierImpl;
+
Ptr<std::vector<Feature> > features;
Ptr<std::vector<OptFeature> > optfeatures;
Ptr<std::vector<OptFeature> > optfeatures_lbuf;
@@ -641,4 +658,4 @@ inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
sum = (double)tmp;
return 1;
}
-}
+} \ No newline at end of file
diff --git a/modules/rsobjdetect/src/innerloop.cpp b/modules/rsobjdetect/src/innerloop.cpp
new file mode 100644
index 0000000..42593f8
--- /dev/null
+++ b/modules/rsobjdetect/src/innerloop.cpp
@@ -0,0 +1,86 @@
+#include "RenderScript.h"
+#include "ScriptC_detectAt.h"
+#include "../../objdetect/src/HaarStructs.h"
+
+using namespace android;
+using namespace RSC;
+using namespace std;
+
+static sp<RS> rs;
+static sp<ScriptC_detectAt> sc;
+
+void initInnerLoop(HaarVars hf, int origWidth, int origHeight) {
+ rs = new RS();
+ bool r = rs->init("/data/data/com.example.noahp.facialrecogrs/cache");
+
+ sc = new ScriptC_detectAt(rs);
+
+ sc->set_origWidth(origWidth);
+ sc->set_origHeight(origHeight);
+ sc->set_sqofs(hf.sqofs);
+ sc->set_normRectArea(hf.normRectArea);
+ sc->set_stagesSize(hf.stagesSize);
+
+ sc->invoke_initCurr();
+
+ const HaarStump* cascadeStumps = &hf.stumps[0];
+ const HaarStage* cascadeStages = &hf.stages[0];
+
+ for( int i = 0; i < hf.nStumps; i++ )
+ {
+ const HaarStump stump = cascadeStumps[i];
+ sc->invoke_addStump(i, stump.threshold, stump.left, stump.right);
+ }
+
+ for(int stageIdx = 0; stageIdx < hf.stagesSize; stageIdx++) {
+ const HaarStage stage = cascadeStages[stageIdx];
+ sc->invoke_addStage(stage.first, stage.ntrees, stage.threshold);
+ int ntrees = stage.ntrees;
+ }
+
+ for( int i = 0; i < hf.nFeatures; i++ )
+ {
+ const HaarFeature f = hf.haarFeatures[i];
+ sc->invoke_addHF(f.x[0],f.y[0],f.width[0],f.height[0],
+ f.x[1],f.y[1],f.width[1],f.height[1],
+ f.x[2],f.y[2],f.width[2],f.height[2],
+ f.weight0, f.weight1, f.weight2);
+ }
+
+ sc->set_nrect(UInt4(hf.nrect.x, hf.nrect.y, hf.nrect.width, hf.nrect.height));
+}
+
+void innerloops(const int height, const int width, const int* inArr, const int* inArrSq, const int yStep, bool* outData) {
+ sp<Allocation> outAllocation;
+ sp<const Element> e2 = Element::BOOLEAN(rs);
+ Type::Builder tb2(rs, e2);
+ tb2.setX(width*height);
+ sp<const Type> t2 = tb2.create();
+ outAllocation = Allocation::createTyped(rs,t2);
+
+ sp<Allocation> inAllocation;
+ sp<const Element> e = Element::I32(rs);
+ Type::Builder tb(rs, e);
+ tb.setX(width*height);
+ sp<const Type> t = tb.create();
+ inAllocation = Allocation::createTyped(rs,t);
+ inAllocation->copy1DRangeFrom(0,width*height,inArr);
+ sc->set_inAlloc(inAllocation);
+
+ sp<Allocation> inAllocationSq;
+ sp<const Element> e3 = Element::I32(rs);
+ inAllocationSq = Allocation::createTyped(rs,t);
+ inAllocationSq->copy1DRangeFrom(0,width*height,inArrSq);
+ sc->set_inAllocSq(inAllocationSq);
+
+ sc->set_width(width);
+ sc->set_height(height);
+ sc->set_yStep(yStep);
+
+ sc->forEach_runAtHaarKernel(inAllocation, outAllocation);
+ outAllocation->copy1DRangeTo(0,width*height,outData);
+}
+
+void cleanUpInnerLoops() {
+ rs->finish();
+} \ No newline at end of file
diff --git a/modules/rsobjdetect/src/rs/detectAt.rs b/modules/rsobjdetect/src/rs/detectAt.rs
new file mode 100644
index 0000000..3e2c2ba
--- /dev/null
+++ b/modules/rsobjdetect/src/rs/detectAt.rs
@@ -0,0 +1,192 @@
+#pragma version(1)
+#pragma rs java_package_name(unused)
+#pragma rs_fp_relaxed
+
+#define numStumps 5000
+#define numStages 22
+#define numRect 3
+#define numFeatures 10000
+
+typedef struct stag
+{
+ int first;
+ int ntrees;
+ float threshold;
+} HaarStage;
+
+typedef struct stum
+{
+ int featureIdx;
+ float threshold;
+ float left;
+ float right;
+} HaarStump;
+
+typedef struct optFe
+{
+ uint4 ofs0;
+ uint4 ofs1;
+ uint4 ofs2;
+ float4 weight;
+} HaarOptFeature;
+
+typedef struct Fe
+{
+ int x[numRect];
+ int y[numRect];
+ int width[numRect];
+ int height[numRect];
+ float weight[numRect];
+} HaarFeature;
+
+int sqofs;
+uint4 nrect;
+float normRectArea;
+int stagesSize;
+int width;
+int height;
+int origWidth;
+int origHeight;
+int yStep;
+rs_allocation inAlloc;
+rs_allocation inAllocSq;
+
+static HaarStump stumps[numStumps];
+static HaarStage stages[numStages];
+static HaarFeature haarFeatures[numFeatures];
+static int currStage;
+static int currStump;
+static int currHf;
+
+static int calcSumOfs(const int x, const int of0, const int of1, const int of2, const int of3, const rs_allocation in) {
+ int t1 = rsGetElementAt_int(in, x + of0);
+ int t2 = rsGetElementAt_int(in, x + of1);
+ int t3 = rsGetElementAt_int(in, x + of2);
+ int t4 = rsGetElementAt_int(in, x + of3);
+ return t1 - t2 - t3 + t4;
+}
+
+static int evaluateIntegral(const int x, const int imgWidth, const int offNum, const HaarFeature _f) {
+ int of0 = _f.x[offNum] + imgWidth * _f.y[offNum];
+ int of1 = _f.x[offNum] + _f.width[offNum] + imgWidth * _f.y[offNum];
+ int of2 = _f.x[offNum] + imgWidth * (_f.y[offNum] + _f.height[offNum]);
+ int of3 = _f.x[offNum] + _f.width[offNum] + imgWidth * (_f.y[offNum] + _f.height[offNum]);
+ return calcSumOfs(x,of0,of1,of2,of3, inAlloc);
+}
+
+static int evaluateIntegralNof(const rs_allocation in, const int x, const int imgWidth) {
+ int of0 = nrect.s0 + imgWidth * nrect.s1;
+ int of1 = nrect.s0 + nrect.s2 + imgWidth * nrect.s1;
+ int of2 = nrect.s0 + imgWidth * (nrect.s1 + nrect.s3);
+ int of3 = nrect.s0 + nrect.s2 + imgWidth * (nrect.s1 + nrect.s3);
+ return calcSumOfs(x,of0,of1,of2,of3,in);
+}
+
+bool RS_KERNEL runAtHaarKernel(const int in, const int x)
+{
+ int x_check = x % width;
+ int y_check = x / width;
+ if (!(x_check % yStep == 0 && y_check % yStep == 0 ))
+ return false;
+ if( !(x_check < 0 || y_check < 0 ||
+ x_check + origWidth >= width ||
+ y_check + origHeight >= height )) {
+ float varianceNormFactor;
+ int valsum = evaluateIntegralNof(inAlloc,x, width);
+ unsigned valsqsum = (unsigned) evaluateIntegralNof(inAllocSq, x, width);
+ float area = normRectArea;
+ float nf = area * valsqsum - (float)valsum * valsum;
+
+ if( nf > 0.f ) {
+ nf = sqrt((float)nf);
+ varianceNormFactor = (float)(1./nf);
+ if(!(area*varianceNormFactor < 0.1f)) return false;
+ }
+ else {
+ varianceNormFactor = 1.0f;
+ return false;
+ }
+
+ int nstages = currStage;
+ float tmp = 0.f;
+ int stumpOfs = 0;
+
+ for( int stageIdx = 0; stageIdx < nstages; stageIdx++ ) {
+ const HaarStage stage = stages[stageIdx];
+ tmp = 0.f;
+ int ntrees = stage.ntrees;
+
+ for( int i = 0; i < ntrees; i++ ) {
+ const HaarStump stump = stumps[i + stumpOfs];
+ float ret = haarFeatures[stump.featureIdx].weight[0]
+ * evaluateIntegral(x, width, 0, haarFeatures[stump.featureIdx])
+ + haarFeatures[stump.featureIdx].weight[1]
+ * evaluateIntegral(x, width, 1, haarFeatures[stump.featureIdx]);
+ if( haarFeatures[stump.featureIdx].weight[2] != 0.0f )
+ ret += haarFeatures[stump.featureIdx].weight[2]
+ * evaluateIntegral(x, width, 2, haarFeatures[stump.featureIdx]);
+ ret *= varianceNormFactor;
+ tmp += ret < stump.threshold ? stump.left : stump.right;
+ }
+
+ if( tmp < stage.threshold ) return false;
+ stumpOfs += ntrees;
+ }
+ return true;
+ }
+ return false;
+}
+
+void initCurr() {
+ currStump = 0;
+ currStage = 0;
+ currHf = 0;
+}
+
+void addStage(const int first, const int ntrees, const float threshold) {
+ HaarStage h;
+ h.first = first;
+ h.ntrees = ntrees;
+ h.threshold = threshold;
+ stages[currStage] = h;
+ currStage++;
+}
+
+void addStump(const int featureIdx, const float threshold, const float left, const float right) {
+ HaarStump h;
+ h.featureIdx = featureIdx;
+ h.threshold = threshold;
+ h.left = left;
+ h.right = right;
+ stumps[currStump] = h;
+ currStump++;
+}
+
+void addHF(const int x0, const int y0, const int w0, const int h0,
+ const int x1, const int y1, const int w1, const int h1,
+ const int x2, const int y2, const int w2, const int h2,
+ const float we0, const float we1, const float we2) {
+ HaarFeature f;
+ f.x[0] = x0;
+ f.x[1] = x1;
+ f.x[2] = x2;
+
+ f.y[0] = y0;
+ f.y[1] = y1;
+ f.y[2] = y2;
+
+ f.width[0] = w0;
+ f.width[1] = w1;
+ f.width[2] = w2;
+
+ f.height[0] = h0;
+ f.height[1] = h1;
+ f.height[2] = h2;
+
+ f.weight[0] = we0;
+ f.weight[1] = we1;
+ f.weight[2] = we2;
+
+ haarFeatures[currHf] = f;
+ currHf++;
+} \ No newline at end of file
diff --git a/modules/rsobjdetect/src/rsobjdetect.hpp b/modules/rsobjdetect/src/rsobjdetect.hpp
new file mode 100644
index 0000000..2ec7139
--- /dev/null
+++ b/modules/rsobjdetect/src/rsobjdetect.hpp
@@ -0,0 +1,5 @@
+#pragma once
+
+extern void initInnerLoop(HaarVars hf, int origWidth, int origHeight);
+extern void innerloops(const int height, const int width, const int* inArr, const int* inArrSq, const int yStep, bool* outData);
+extern void cleanUpInnerLoops(); \ No newline at end of file