diff options
Diffstat (limited to 'helium')
-rw-r--r-- | helium/tesseract.cpp | 21 | ||||
-rw-r--r-- | helium/tesseract.h | 4 | ||||
-rw-r--r-- | helium/test.cpp | 5 | ||||
-rw-r--r-- | helium/textrecognition.cpp | 6 | ||||
-rw-r--r-- | helium/textrecognition.h | 4 |
5 files changed, 32 insertions, 8 deletions
diff --git a/helium/tesseract.cpp b/helium/tesseract.cpp index 8b00760..861f7ce 100644 --- a/helium/tesseract.cpp +++ b/helium/tesseract.cpp @@ -16,6 +16,10 @@ #include "ccmain/control.h" #endif +// This is to make the ratings file parser happy. +BOOL_VAR (tessedit_write_images, FALSE, + "Capture the image from the IPE"); + #undef LOG // Local includes @@ -29,8 +33,13 @@ using namespace helium; const char* kArguments[3] = { "tesseract", "out", "batch" }; -int Tesseract::Init(const char* datapath, const char *lang) { - return api_.Init(datapath, lang); +int Tesseract::Init(const char* datapath, + const char *lang, + const char *configfile) { + int res = api_.Init(datapath, lang); + if (!res && configfile) + api_.ReadConfigFile(configfile); + return res; } void Tesseract::ReadMask(const Mask& mask, bool flipped) { @@ -70,8 +79,14 @@ char* Tesseract::RecognizeText(const Mask& mask) { MaskToBuffer(mask, buf); api_.SetImage(buf, mask.width(), mask.height(), 1, mask.width()); api_.Recognize(NULL); + delete[] buf; - return api_.GetUTF8Text(); + char *text = api_.GetUTF8Text(); + + if (tessedit_write_images) + page_image.write("tessinput.tif"); + + return text; } void Tesseract::End() { diff --git a/helium/tesseract.h b/helium/tesseract.h index f92d21d..dc88ebe 100644 --- a/helium/tesseract.h +++ b/helium/tesseract.h @@ -38,7 +38,9 @@ class Tesseract { // Call this method exactly once to initialize the Tesseract engine with // the data files at the specified path (This should be the path, that // contains the 'tessdata' folder). - static int Init(const char* datapath, const char *lang = NULL); + static int Init(const char* datapath, + const char *lang = NULL, + const char *configfile = NULL); // Find the baseline, specified by the offset and slope, for the given // Mask. If flipped is true, this method will flip the image vertically diff --git a/helium/test.cpp b/helium/test.cpp index 4c4891d..1d384aa 100644 --- a/helium/test.cpp +++ b/helium/test.cpp @@ -82,7 +82,10 @@ int main(int argc, char** argv) { // Run OCR printf("OCRing (language %s)...\n", lang); TextAreas text; - TextRecognition::Init("/sdcard/", lang); + TextRecognition::Init("/sdcard/", + lang, + "/sdcard/tessdata/ratings"); + TextRecognition::RecognizeUsingBinarizer(&binarizer, text); // Output Text diff --git a/helium/textrecognition.cpp b/helium/textrecognition.cpp index 5768449..f2fdf90 100644 --- a/helium/textrecognition.cpp +++ b/helium/textrecognition.cpp @@ -16,9 +16,11 @@ using namespace helium; bool TextRecognition::recognizer_initialized_ = false; -void TextRecognition::Init(const char* data_path, const char *lang) { +void TextRecognition::Init(const char* data_path, + const char *lang, + const char *configfile) { // Allow reinitialization of Tesseract to get around its adaptation. - Tesseract::Init(data_path, lang); + Tesseract::Init(data_path, lang, configfile); recognizer_initialized_ = true; } diff --git a/helium/textrecognition.h b/helium/textrecognition.h index 53f2233..fa4ecf5 100644 --- a/helium/textrecognition.h +++ b/helium/textrecognition.h @@ -23,7 +23,9 @@ class TextRecognition { // tessdata directory. // This must be called before using RecognizeUsingBinarizer(...), but it // can be callled multiple times to clear OCR's internal adaptation. - static void Init(const char* data_path, const char *lang = NULL); + static void Init(const char* data_path, + const char *lang = NULL, + const char *configfile = NULL); // This method passes all the binarized masks, that were extracted by the // specified Binarizer, through perspective correction, and |