00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00018
00019 #ifndef TESSERACT_CLASSIFY_CLASSIFY_H__
00020 #define TESSERACT_CLASSIFY_CLASSIFY_H__
00021
00022 #include "adaptive.h"
00023 #include "ccstruct.h"
00024 #include "classify.h"
00025 #include "dict.h"
00026 #include "featdefs.h"
00027 #include "fontinfo.h"
00028 #include "intfx.h"
00029 #include "intmatcher.h"
00030 #include "normalis.h"
00031 #include "ratngs.h"
00032 #include "ocrfeatures.h"
00033 #include "unicity_table.h"
00034
00035 class ScrollView;
00036 class WERD_CHOICE;
00037 class WERD_RES;
00038 struct ADAPT_RESULTS;
00039 struct NORM_PROTOS;
00040
00041 static const int kUnknownFontinfoId = -1;
00042 static const int kBlankFontinfoId = -2;
00043
00044 namespace tesseract {
00045
00046 struct ShapeRating;
00047 class ShapeTable;
00048
00049
00050
00051 enum CharSegmentationType {
00052 CST_FRAGMENT,
00053 CST_WHOLE,
00054 CST_IMPROPER,
00055 CST_NGRAM
00056 };
00057
00058 class Classify : public CCStruct {
00059 public:
00060 Classify();
00061 virtual ~Classify();
00062 Dict& getDict() {
00063 return dict_;
00064 }
00065
00066 const ShapeTable* shape_table() const {
00067 return shape_table_;
00068 }
00069
00070
00071 ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset);
00072 int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId);
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
00088 int num_features,
00089 const INT_FEATURE_STRUCT* features,
00090 const uinT8* normalization_factors,
00091 const uinT16* expected_num_features,
00092 CP_RESULT_STRUCT* results);
00093 void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
00094 CLASS_CUTOFF_ARRAY Cutoffs);
00095 void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
00096 void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
00097 ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File);
00098
00099 FLOAT32 ComputeNormMatch(CLASS_ID ClassId,
00100 const FEATURE_STRUCT& feature, BOOL8 DebugMatch);
00101 void FreeNormProtos();
00102 NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
00103
00104 void ReadClassFile();
00105 void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
00106 INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
00107 const UNICHARSET& target_unicharset);
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117 void LearnWord(const char* filename, const char *rejmap, WERD_RES *word);
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 void LearnPieces(const char* filename, int start, int length,
00129 float threshold, CharSegmentationType segmentation,
00130 const char* correct_text, WERD_RES *word);
00131 void InitAdaptiveClassifier(bool load_pre_trained_templates);
00132 void InitAdaptedClass(TBLOB *Blob,
00133 const DENORM& denorm,
00134 CLASS_ID ClassId,
00135 int FontinfoId,
00136 ADAPT_CLASS Class,
00137 ADAPT_TEMPLATES Templates);
00138 void AdaptToPunc(TBLOB *Blob,
00139 const DENORM& denorm,
00140 CLASS_ID ClassId,
00141 int FontinfoId,
00142 FLOAT32 Threshold);
00143 void AmbigClassifier(TBLOB *Blob,
00144 const DENORM& denorm,
00145 INT_TEMPLATES Templates,
00146 ADAPT_CLASS *Classes,
00147 UNICHAR_ID *Ambiguities,
00148 ADAPT_RESULTS *Results);
00149 void MasterMatcher(INT_TEMPLATES templates,
00150 inT16 num_features,
00151 const INT_FEATURE_STRUCT* features,
00152 const uinT8* norm_factors,
00153 ADAPT_CLASS* classes,
00154 int debug,
00155 int num_classes,
00156 const TBOX& blob_box,
00157 CLASS_PRUNER_RESULTS results,
00158 ADAPT_RESULTS* final_results);
00159
00160
00161
00162
00163
00164 void ExpandShapesAndApplyCorrections(ADAPT_CLASS* classes,
00165 bool debug,
00166 int class_id,
00167 int bottom, int top,
00168 float cp_rating,
00169 int blob_length,
00170 const uinT8* cn_factors,
00171 INT_RESULT_STRUCT& int_result,
00172 ADAPT_RESULTS* final_results);
00173
00174
00175
00176 double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
00177 double im_rating, int feature_misses,
00178 int bottom, int top,
00179 int blob_length, const uinT8* cn_factors);
00180 void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
00181 ADAPT_RESULTS *Results,
00182 BLOB_CHOICE_LIST *Choices);
00183 void AddNewResult(ADAPT_RESULTS *results,
00184 CLASS_ID class_id,
00185 int shape_id,
00186 FLOAT32 rating,
00187 bool adapted,
00188 int config,
00189 int fontinfo_id,
00190 int fontinfo_id2);
00191 int GetAdaptiveFeatures(TBLOB *Blob,
00192 INT_FEATURE_ARRAY IntFeatures,
00193 FEATURE_SET *FloatFeatures);
00194
00195 #ifndef GRAPHICS_DISABLED
00196 void DebugAdaptiveClassifier(TBLOB *Blob,
00197 const DENORM& denorm,
00198 ADAPT_RESULTS *Results);
00199 #endif
00200 void GetAdaptThresholds (TWERD * Word,
00201 const DENORM& denorm,
00202 const WERD_CHOICE& BestChoice,
00203 const WERD_CHOICE& BestRawChoice,
00204 FLOAT32 Thresholds[]);
00205
00206 PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
00207 int NumBadFeat,
00208 FEATURE_ID BadFeat[],
00209 INT_CLASS IClass,
00210 ADAPT_CLASS Class,
00211 BIT_VECTOR TempProtoMask);
00212 int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
00213 CLASS_ID ClassId,
00214 int FontinfoId,
00215 int NumFeatures,
00216 INT_FEATURE_ARRAY Features,
00217 FEATURE_SET FloatFeatures);
00218 void MakePermanent(ADAPT_TEMPLATES Templates,
00219 CLASS_ID ClassId,
00220 int ConfigId,
00221 const DENORM& denorm,
00222 TBLOB *Blob);
00223 void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
00224 void RemoveExtraPuncs(ADAPT_RESULTS *Results);
00225 void RemoveBadMatches(ADAPT_RESULTS *Results);
00226 void SetAdaptiveThreshold(FLOAT32 Threshold);
00227 void ShowBestMatchFor(TBLOB *Blob,
00228 const DENORM& denorm,
00229 CLASS_ID ClassId,
00230 int shape_id,
00231 BOOL8 AdaptiveOn,
00232 BOOL8 PreTrainedOn,
00233 ADAPT_RESULTS *Results);
00234
00235
00236 STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates,
00237 int class_id, int config_id) const;
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248 int ClassAndConfigIDToFontOrShapeID(int class_id,
00249 int int_result_config) const;
00250
00251
00252 int ShapeIDToClassID(int shape_id) const;
00253 UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
00254 const DENORM& denorm,
00255 ADAPT_TEMPLATES Templates,
00256 ADAPT_RESULTS *Results);
00257 int CharNormClassifier(TBLOB *Blob,
00258 const DENORM& denorm,
00259 INT_TEMPLATES Templates,
00260 ADAPT_RESULTS *Results);
00261
00262
00263
00264 int CharNormTrainingSample(bool pruner_only, const TrainingSample& sample,
00265 GenericVector<ShapeRating>* results);
00266 UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
00267 const DENORM& denorm,
00268 CLASS_ID CorrectClass);
00269 void DoAdaptiveMatch(TBLOB *Blob,
00270 const DENORM& denorm,
00271 ADAPT_RESULTS *Results);
00272 void AdaptToChar(TBLOB *Blob,
00273 const DENORM& denorm,
00274 CLASS_ID ClassId,
00275 int FontinfoId,
00276 FLOAT32 Threshold);
00277 void DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm,
00278 INT_CLASS_STRUCT* int_class);
00279 int AdaptableWord(TWERD *Word,
00280 const WERD_CHOICE &BestChoiceWord,
00281 const WERD_CHOICE &RawChoiceWord);
00282 void EndAdaptiveClassifier();
00283 void PrintAdaptiveStatistics(FILE *File);
00284 void SettupPass1();
00285 void SettupPass2();
00286 void AdaptiveClassifier(TBLOB *Blob,
00287 const DENORM& denorm,
00288 BLOB_CHOICE_LIST *Choices,
00289 CLASS_PRUNER_RESULTS cp_results);
00290 void ClassifyAsNoise(ADAPT_RESULTS *Results);
00291 void ResetAdaptiveClassifierInternal();
00292
00293 int GetBaselineFeatures(TBLOB *Blob,
00294 const DENORM& denorm,
00295 INT_TEMPLATES Templates,
00296 INT_FEATURE_ARRAY IntFeatures,
00297 uinT8* CharNormArray,
00298 inT32 *BlobLength);
00299 int GetCharNormFeatures(TBLOB *Blob,
00300 const DENORM& denorm,
00301 INT_TEMPLATES Templates,
00302 INT_FEATURE_ARRAY IntFeatures,
00303 uinT8* PrunerNormArray,
00304 uinT8* CharNormArray,
00305 inT32 *BlobLength,
00306 inT32 *FeatureOutlineIndex);
00307
00308
00309
00310 void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature,
00311 INT_TEMPLATES_STRUCT* templates,
00312 uinT8* char_norm_array,
00313 uinT8* pruner_array);
00314
00315 bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
00316 void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM& denorm, TBLOB *Blob);
00317
00318 void ResetFeaturesHaveBeenExtracted();
00319 bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
00320 bool LooksLikeGarbage(const DENORM& denorm, TBLOB *blob);
00321 void RefreshDebugWindow(ScrollView **win, const char *msg,
00322 int y_offset, const TBOX &wbox);
00323
00324 void ClearCharNormArray(uinT8* char_norm_array);
00325 void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
00326 uinT8* char_norm_array);
00327 void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
00328
00329 INT_TEMPLATES ReadIntTemplates(FILE *File);
00330 void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
00331 const UNICHARSET& target_unicharset);
00332 CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,
00333 bool* pretrained_on, int* shape_id);
00334 void ShowMatchDisplay();
00335
00336 UnicityTable<FontInfo>& get_fontinfo_table() {
00337 return fontinfo_table_;
00338 }
00339 UnicityTable<FontSet>& get_fontset_table() {
00340 return fontset_table_;
00341 }
00342
00343 void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale);
00344
00345 FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
00346
00347 FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);
00348
00349
00350
00351
00352
00353 BOOL_VAR_H(prioritize_division, FALSE,
00354 "Prioritize blob division over chopping");
00355 INT_VAR_H(tessedit_single_match, FALSE, "Top choice only from CP");
00356 BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier");
00357 INT_VAR_H(classify_debug_level, 0, "Classify debug level");
00358
00359
00360
00361 INT_VAR_H(classify_norm_method, character, "Normalization Method ...");
00362 double_VAR_H(classify_char_norm_range, 0.2,
00363 "Character Normalization Range ...");
00364 double_VAR_H(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...");
00365 double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...");
00366 double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...");
00367 double_VAR_H(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...");
00368
00369
00370 BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching");
00371 BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching");
00372 BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier");
00373 BOOL_VAR_H(classify_use_pre_adapted_templates, 0,
00374 "Use pre-adapted classifier templates");
00375 BOOL_VAR_H(classify_save_adapted_templates, 0,
00376 "Save adapted templates to a file");
00377 BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger");
00378 INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level");
00379 INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags");
00380 INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: ");
00381 double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)");
00382 double_VAR_H(matcher_great_threshold, 0.0, "Great Match (0-1)");
00383 double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)");
00384 double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)");
00385 double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)");
00386 double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: ");
00387 INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes");
00388 INT_VAR_H(matcher_min_examples_for_prototyping, 3,
00389 "Reliable Config Threshold");
00390 INT_VAR_H(matcher_sufficient_examples_for_prototyping, 5,
00391 "Enable adaption even if the ambiguities have not been seen");
00392 double_VAR_H(matcher_clustering_max_angle_delta, 0.015,
00393 "Maximum angle delta for prototype clustering");
00394 double_VAR_H(classify_misfit_junk_penalty, 0.0,
00395 "Penalty to apply when a non-alnum is vertically out of "
00396 "its expected textline position");
00397 double_VAR_H(rating_scale, 1.5, "Rating scaling factor");
00398 double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor");
00399 double_VAR_H(tessedit_class_miss_scale, 0.00390625,
00400 "Scale factor for features not used");
00401 INT_VAR_H(classify_adapt_proto_threshold, 230,
00402 "Threshold for good protos during adaptive 0-255");
00403 INT_VAR_H(classify_adapt_feature_threshold, 230,
00404 "Threshold for good features during adaptive 0-255");
00405 BOOL_VAR_H(disable_character_fragments, TRUE,
00406 "Do not include character fragments in the"
00407 " results of the classifier");
00408 double_VAR_H(classify_character_fragments_garbage_certainty_threshold, -3.0,
00409 "Exclude fragments that do not match any whole character"
00410 " with at least this certainty");
00411 BOOL_VAR_H(classify_debug_character_fragments, FALSE,
00412 "Bring up graphical debugging windows for fragments training");
00413 BOOL_VAR_H(matcher_debug_separate_windows, FALSE,
00414 "Use two different windows for debugging the matching: "
00415 "One for the protos and one for the features.");
00416 STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning");
00417
00418
00419 INT_VAR_H(classify_class_pruner_threshold, 229,
00420 "Class Pruner Threshold 0-255");
00421 INT_VAR_H(classify_class_pruner_multiplier, 30,
00422 "Class Pruner Multiplier 0-255: ");
00423 INT_VAR_H(classify_cp_cutoff_strength, 7,
00424 "Class Pruner CutoffStrength: ");
00425 INT_VAR_H(classify_integer_matcher_multiplier, 14,
00426 "Integer Matcher Multiplier 0-255: ");
00427
00428
00429 INT_TEMPLATES PreTrainedTemplates;
00430 ADAPT_TEMPLATES AdaptedTemplates;
00431
00432
00433 BIT_VECTOR AllProtosOn;
00434 BIT_VECTOR PrunedProtos;
00435 BIT_VECTOR AllConfigsOn;
00436 BIT_VECTOR AllProtosOff;
00437 BIT_VECTOR AllConfigsOff;
00438 BIT_VECTOR TempProtoMask;
00439 bool EnableLearning;
00440
00441 NORM_PROTOS *NormProtos;
00442
00443 UnicityTable<FontInfo> fontinfo_table_;
00444
00445
00446
00447
00448
00449
00450
00451 UnicityTable<FontSet> fontset_table_;
00452
00453 INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word");
00454 BOOL_VAR_H(classify_bln_numeric_mode, 0,
00455 "Assume the input is numbers [0-9].");
00456
00457 protected:
00458 IntegerMatcher im_;
00459 FEATURE_DEFS_STRUCT feature_defs_;
00460
00461
00462
00463
00464 ShapeTable* shape_table_;
00465
00466 private:
00467
00468 Dict dict_;
00469
00470
00471 int AdaptiveMatcherCalls;
00472 int BaselineClassifierCalls;
00473 int CharNormClassifierCalls;
00474 int AmbigClassifierCalls;
00475 int NumWordsAdaptedTo;
00476 int NumCharsAdaptedTo;
00477 int NumBaselineClassesTried;
00478 int NumCharNormClassesTried;
00479 int NumAmbigClassesTried;
00480 int NumClassesOutput;
00481 int NumAdaptationsFailed;
00482
00483
00484
00485
00486
00487 bool FeaturesHaveBeenExtracted;
00488 bool FeaturesOK;
00489 INT_FEATURE_ARRAY BaselineFeatures;
00490 INT_FEATURE_ARRAY CharNormFeatures;
00491 INT_FX_RESULT_STRUCT FXInfo;
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501 uinT16* CharNormCutoffs;
00502 uinT16* BaselineCutoffs;
00503 GenericVector<uinT16> shapetable_cutoffs_;
00504 ScrollView* learn_debug_win_;
00505 ScrollView* learn_fragmented_word_debug_win_;
00506 ScrollView* learn_fragments_debug_win_;
00507 };
00508 }
00509
00510 #endif // TESSERACT_CLASSIFY_CLASSIFY_H__