00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00019
00020 #ifndef TESSERACT_API_BASEAPI_H__
00021 #define TESSERACT_API_BASEAPI_H__
00022
00023 #include <stdio.h>
00024
00025
00026
00027 #include "apitypes.h"
00028 #include "thresholder.h"
00029 #include "unichar.h"
00030 #include "tesscallback.h"
00031 #include "publictypes.h"
00032
00033 template <typename T> class GenericVector;
00034 class PAGE_RES;
00035 class PAGE_RES_IT;
00036 class ParagraphModel;
00037 class BlamerBundle;
00038 class BLOCK_LIST;
00039 class DENORM;
00040 class IMAGE;
00041 class MATRIX;
00042 class PBLOB;
00043 class ROW;
00044 class STRING;
00045 class WERD;
00046 struct Pix;
00047 struct Box;
00048 struct Pixa;
00049 struct Boxa;
00050 class ETEXT_DESC;
00051 struct OSResults;
00052 class TBOX;
00053 class UNICHARSET;
00054
00055
00056
00057 struct list_rec;
00058 typedef list_rec *LIST;
00059
00060 #define MAX_NUM_INT_FEATURES 512
00061 struct INT_FEATURE_STRUCT;
00062 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00063 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES];
00064 struct TBLOB;
00065
00066 #ifdef TESSDLL_EXPORTS
00067 #define TESSDLL_API __declspec(dllexport)
00068 #elif defined(TESSDLL_IMPORTS)
00069 #define TESSDLL_API __declspec(dllimport)
00070 #else
00071 #define TESSDLL_API
00072 #endif
00073
00074
00075 namespace tesseract {
00076
00077 class CubeRecoContext;
00078 class Dawg;
00079 class Dict;
00080 class EquationDetect;
00081 class PageIterator;
00082 class LTRResultIterator;
00083 class ResultIterator;
00084 class MutableIterator;
00085 class Tesseract;
00086 class Trie;
00087 class Wordrec;
00088
00089 typedef int (Dict::*DictFunc)(void* void_dawg_args,
00090 UNICHAR_ID unichar_id, bool word_end) const;
00091 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
00092 const char* context,
00093 int context_bytes,
00094 const char* character,
00095 int character_bytes);
00096 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
00097 const LIST &best_choices,
00098 const UNICHARSET &unicharset,
00099 BlamerBundle *blamer_bundle);
00100 typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> TruthCallback;
00101
00110 class TESSDLL_API TessBaseAPI {
00111 public:
00112 TessBaseAPI();
00113 virtual ~TessBaseAPI();
00114
00118 static const char* Version();
00119
00124 void SetInputName(const char* name);
00125
00127 void SetOutputName(const char* name);
00128
00144 bool SetVariable(const char* name, const char* value);
00145 bool SetDebugVariable(const char* name, const char* value);
00146
00147
00148
00149 bool GetIntVariable(const char *name, int *value) const;
00150 bool GetBoolVariable(const char *name, bool *value) const;
00151 bool GetDoubleVariable(const char *name, double *value) const;
00152
00153
00154 const char *GetStringVariable(const char *name) const;
00155
00156
00157 void PrintVariables(FILE *fp) const;
00158
00159 bool GetVariableAsString(const char *name, STRING *val);
00160
00199 int Init(const char* datapath, const char* language, OcrEngineMode mode,
00200 char **configs, int configs_size,
00201 const GenericVector<STRING> *vars_vec,
00202 const GenericVector<STRING> *vars_values,
00203 bool set_only_non_debug_params);
00204 int Init(const char* datapath, const char* language, OcrEngineMode oem) {
00205 return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
00206 }
00207 int Init(const char* datapath, const char* language) {
00208 return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
00209 }
00210
00211
00212
00213
00214
00215
00216
00217 const char* GetInitLanguagesAsString() const;
00218
00219
00220
00221
00222 void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
00223
00230 int InitLangMod(const char* datapath, const char* language);
00231
00232
00233
00234 void InitForAnalysePage();
00235
00242 void ReadConfigFile(const char* filename);
00244 void ReadDebugConfigFile(const char* filename);
00245
00251 void SetPageSegMode(PageSegMode mode);
00252
00254 PageSegMode GetPageSegMode() const;
00255
00273 char* TesseractRect(const unsigned char* imagedata,
00274 int bytes_per_pixel, int bytes_per_line,
00275 int left, int top, int width, int height);
00276
00281 void ClearAdaptiveClassifier();
00282
00289
00290
00300 void SetImage(const unsigned char* imagedata, int width, int height,
00301 int bytes_per_pixel, int bytes_per_line);
00302
00313 void SetImage(const Pix* pix);
00314
00319 void SetSourceResolution(int ppi);
00320
00326 void SetRectangle(int left, int top, int width, int height);
00327
00335 void SetThresholder(ImageThresholder* thresholder) {
00336 if (thresholder_ != NULL)
00337 delete thresholder_;
00338 thresholder_ = thresholder;
00339 ClearResults();
00340 }
00341
00347 Pix* GetThresholdedImage();
00348
00354 Boxa* GetRegions(Pixa** pixa);
00355
00363 Boxa* GetTextlines(Pixa** pixa, int** blockids);
00364
00373 Boxa* GetStrips(Pixa** pixa, int** blockids);
00374
00380 Boxa* GetWords(Pixa** pixa);
00381
00382
00383
00384
00385
00386
00387
00388 Boxa* GetConnectedComponents(Pixa** cc);
00389
00390
00391
00392
00393
00394
00395
00396 Boxa* GetComponentImages(PageIteratorLevel level,
00397 bool text_only,
00398 Pixa** pixa, int** blockids);
00399
00400
00401
00402
00403
00404 int GetThresholdedImageScaleFactor() const;
00405
00411 void DumpPGM(const char* filename);
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422 PageIterator* AnalyseLayout();
00423
00430 int Recognize(ETEXT_DESC* monitor);
00431
00438 int RecognizeForChopTest(ETEXT_DESC* monitor);
00439
00456 bool ProcessPages(const char* filename,
00457 const char* retry_config, int timeout_millisec,
00458 STRING* text_out);
00459
00471 bool ProcessPage(Pix* pix, int page_index, const char* filename,
00472 const char* retry_config, int timeout_millisec,
00473 STRING* text_out);
00474
00475
00476
00477
00478
00479
00480
00481 ResultIterator* GetIterator();
00482
00483
00484
00485
00486
00487
00488
00489 MutableIterator* GetMutableIterator();
00490
00495 char* GetUTF8Text();
00496
00502 char* GetHOCRText(int page_number);
00510 char* GetBoxText(int page_number);
00516 char* GetUNLVText();
00518 int MeanTextConf();
00525 int* AllWordConfidences();
00526
00537 bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
00538
00545 void Clear();
00546
00553 void End();
00554
00561 int IsValidWord(const char *word);
00562
00563 bool GetTextDirection(int* out_offset, float* out_slope);
00564
00566 void SetDictFunc(DictFunc f);
00567
00571 void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
00572
00574 void SetFillLatticeFunc(FillLatticeFunc f);
00575
00580 bool DetectOS(OSResults*);
00581
00583 void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
00584 INT_FEATURE_ARRAY int_features,
00585 int* num_features, int* FeatureOutlineIndex);
00586
00587
00588
00589 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
00590 int right, int bottom);
00591
00592
00593
00594 void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
00595 int num_max_matches,
00596 int* unichar_ids,
00597 float* ratings,
00598 int* num_matches_returned);
00599
00600
00601 const char* GetUnichar(int unichar_id);
00602
00604 const Dawg *GetDawg(int i) const;
00605
00607 int NumDawgs() const;
00608
00610 const char* GetLastInitLanguage() const;
00611
00612
00613 static ROW *MakeTessOCRRow(float baseline, float xheight,
00614 float descender, float ascender);
00615
00616
00617 static TBLOB *MakeTBLOB(Pix *pix);
00618
00619
00620
00621
00622 static void NormalizeTBLOB(TBLOB *tblob, ROW *row,
00623 bool numeric_mode, DENORM *denorm);
00624
00625 Tesseract* const tesseract() const {
00626 return tesseract_;
00627 }
00628 OcrEngineMode const oem() const {
00629 return last_oem_requested_;
00630 }
00631
00632 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
00633
00634
00635 CubeRecoContext *GetCubeRecoContext() const;
00636
00637 void set_min_orientation_margin(double margin);
00638
00639
00640
00641 void GetBlockTextOrientations(int** block_orientation,
00642 bool** vertical_writing);
00643
00645 BLOCK_LIST* FindLinesCreateBlockList();
00646
00652 static void DeleteBlockList(BLOCK_LIST* block_list);
00653
00654
00655 protected:
00656
00658 bool InternalSetImage();
00659
00664 virtual void Threshold(Pix** pix);
00665
00670 int FindLines();
00671
00673 void ClearResults();
00674
00675
00676
00677
00678 LTRResultIterator* GetLTRIterator();
00679
00686 int TextLength(int* blob_count);
00687
00689
00690
00695 void AdaptToCharacter(const char *unichar_repr,
00696 int length,
00697 float baseline,
00698 float xheight,
00699 float descender,
00700 float ascender);
00701
00703 PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00704 PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
00705
00707
00708 void DetectParagraphs(int debug_level);
00709
00714 static int TesseractExtractResult(char** text,
00715 int** lengths,
00716 float** costs,
00717 int** x0,
00718 int** y0,
00719 int** x1,
00720 int** y1,
00721 PAGE_RES* page_res);
00722
00723 const PAGE_RES* GetPageRes() const {
00724 return page_res_;
00725 };
00726
00727 protected:
00728 Tesseract* tesseract_;
00729 Tesseract* osd_tesseract_;
00730 EquationDetect* equ_detect_;
00731 ImageThresholder* thresholder_;
00732 GenericVector<ParagraphModel *>* paragraph_models_;
00733 BLOCK_LIST* block_list_;
00734 PAGE_RES* page_res_;
00735 STRING* input_file_;
00736 STRING* output_file_;
00737 STRING* datapath_;
00738 STRING* language_;
00739 OcrEngineMode last_oem_requested_;
00740 bool recognition_done_;
00741 TruthCallback *truth_cb_;
00742
00747
00748 int rect_left_;
00749 int rect_top_;
00750 int rect_width_;
00751 int rect_height_;
00752 int image_width_;
00753 int image_height_;
00754
00755 };
00756
00757 }
00758
00759 #endif // TESSERACT_API_BASEAPI_H__