aboutsummaryrefslogtreecommitdiff
path: root/helium
diff options
context:
space:
mode:
Diffstat (limited to 'helium')
-rw-r--r--helium/tesseract.cpp21
-rw-r--r--helium/tesseract.h4
-rw-r--r--helium/test.cpp5
-rw-r--r--helium/textrecognition.cpp6
-rw-r--r--helium/textrecognition.h4
5 files changed, 32 insertions, 8 deletions
diff --git a/helium/tesseract.cpp b/helium/tesseract.cpp
index 8b00760..861f7ce 100644
--- a/helium/tesseract.cpp
+++ b/helium/tesseract.cpp
@@ -16,6 +16,10 @@
#include "ccmain/control.h"
#endif
+// This is to make the ratings file parser happy.
+BOOL_VAR (tessedit_write_images, FALSE,
+ "Capture the image from the IPE");
+
#undef LOG
// Local includes
@@ -29,8 +33,13 @@ using namespace helium;
const char* kArguments[3] = { "tesseract", "out", "batch" };
-int Tesseract::Init(const char* datapath, const char *lang) {
- return api_.Init(datapath, lang);
+int Tesseract::Init(const char* datapath,
+ const char *lang,
+ const char *configfile) {
+ int res = api_.Init(datapath, lang);
+ if (!res && configfile)
+ api_.ReadConfigFile(configfile);
+ return res;
}
void Tesseract::ReadMask(const Mask& mask, bool flipped) {
@@ -70,8 +79,14 @@ char* Tesseract::RecognizeText(const Mask& mask) {
MaskToBuffer(mask, buf);
api_.SetImage(buf, mask.width(), mask.height(), 1, mask.width());
api_.Recognize(NULL);
+
delete[] buf;
- return api_.GetUTF8Text();
+ char *text = api_.GetUTF8Text();
+
+ if (tessedit_write_images)
+ page_image.write("tessinput.tif");
+
+ return text;
}
void Tesseract::End() {
diff --git a/helium/tesseract.h b/helium/tesseract.h
index f92d21d..dc88ebe 100644
--- a/helium/tesseract.h
+++ b/helium/tesseract.h
@@ -38,7 +38,9 @@ class Tesseract {
// Call this method exactly once to initialize the Tesseract engine with
// the data files at the specified path (This should be the path, that
// contains the 'tessdata' folder).
- static int Init(const char* datapath, const char *lang = NULL);
+ static int Init(const char* datapath,
+ const char *lang = NULL,
+ const char *configfile = NULL);
// Find the baseline, specified by the offset and slope, for the given
// Mask. If flipped is true, this method will flip the image vertically
diff --git a/helium/test.cpp b/helium/test.cpp
index 4c4891d..1d384aa 100644
--- a/helium/test.cpp
+++ b/helium/test.cpp
@@ -82,7 +82,10 @@ int main(int argc, char** argv) {
// Run OCR
printf("OCRing (language %s)...\n", lang);
TextAreas text;
- TextRecognition::Init("/sdcard/", lang);
+ TextRecognition::Init("/sdcard/",
+ lang,
+ "/sdcard/tessdata/ratings");
+
TextRecognition::RecognizeUsingBinarizer(&binarizer, text);
// Output Text
diff --git a/helium/textrecognition.cpp b/helium/textrecognition.cpp
index 5768449..f2fdf90 100644
--- a/helium/textrecognition.cpp
+++ b/helium/textrecognition.cpp
@@ -16,9 +16,11 @@ using namespace helium;
bool TextRecognition::recognizer_initialized_ = false;
-void TextRecognition::Init(const char* data_path, const char *lang) {
+void TextRecognition::Init(const char* data_path,
+ const char *lang,
+ const char *configfile) {
// Allow reinitialization of Tesseract to get around its adaptation.
- Tesseract::Init(data_path, lang);
+ Tesseract::Init(data_path, lang, configfile);
recognizer_initialized_ = true;
}
diff --git a/helium/textrecognition.h b/helium/textrecognition.h
index 53f2233..fa4ecf5 100644
--- a/helium/textrecognition.h
+++ b/helium/textrecognition.h
@@ -23,7 +23,9 @@ class TextRecognition {
// tessdata directory.
// This must be called before using RecognizeUsingBinarizer(...), but it
// can be callled multiple times to clear OCR's internal adaptation.
- static void Init(const char* data_path, const char *lang = NULL);
+ static void Init(const char* data_path,
+ const char *lang = NULL,
+ const char *configfile = NULL);
// This method passes all the binarized masks, that were extracted by the
// specified Binarizer, through perspective correction, and