tesseract Namespace Reference

Classes

struct  TESS_CHAR
class  TessBaseAPI
class  CubeRecoContext
class  CubeClassifier
class  CubeTessClassifier
struct  DocQualCallbacks
class  EquationDetect
class  LTRResultIterator
class  ChoiceIterator
class  MutableIterator
class  PageIterator
class  UnicodeSpanSkipper
struct  Cluster
class  SimpleClusterer
struct  GeometricClassifierState
struct  Interval
class  RowInfo
struct  LineHypothesis
class  RowScratchRegisters
class  ParagraphTheory
class  ParagraphModelSmearer
class  ResultIterator
class  TesseractCubeCombiner
struct  TesseractStats
class  Tesseract
class  ImageThresholder
class  BoxWord
class  CCStruct
class  DetLineFit
class  DPPoint
struct  FontSpacingInfo
struct  FontInfo
struct  FontSet
struct  ParamsTrainingHypothesis
class  ParamsTrainingBundle
class  UnicharIdArrayUtils
class  AmbigSpec
class  UnicharAmbigs
class  BitVector
class  CCUtilMutex
class  CCUtil
class  PointerVector
class  IndexMap
class  IndexMapBiDi
struct  ParamsVectors
class  ParamUtils
class  Param
class  IntParam
class  BoolParam
class  StringParam
class  DoubleParam
class  TessdataManager
class  Classify
class  ErrorCounter
class  IntFeatureDist
class  IntFeatureMap
class  IntFeatureSpace
class  ClassPruner
struct  ShapeDist
class  MasterTrainer
class  SampleIterator
struct  ShapeRating
class  ShapeClassifier
struct  UnicharAndFonts
class  Shape
class  ShapeTable
class  TessClassifier
class  TrainingSample
class  TrainingSampleSet
class  AltList
class  BeamSearch
class  Bmp8
class  CachedFile
class  CharAltList
struct  Bigram
struct  CharBigram
struct  CharBigramTable
class  CharBigrams
class  CharSamp
class  CharSampEnum
class  CharSampSet
class  CharSet
class  CharClassifier
class  CharClassifierFactory
class  ConCompPt
class  ConComp
class  ConvNetCharClassifier
class  CubeLineObject
class  CubeLineSegmenter
class  CubeObject
class  CubeSearchObject
class  CubeTuningParams
class  CubeUtils
class  FeatureBase
class  FeatureBmp
class  FeatureChebyshev
class  FeatureHybrid
class  HybridNeuralNetCharClassifier
class  LangModEdge
class  LangModel
class  SearchColumn
class  SearchNode
class  SearchNodeHashTable
class  SearchObject
class  TessLangModEdge
class  TessLangModel
class  TuningParams
class  WordAltList
class  WordListLangModel
struct  PairSizeInfo
struct  FontPairSizeInfo
class  WordSizeModel
class  WordUnigrams
class  CUtil
struct  NodeChild
class  Dawg
struct  DawgInfo
class  DawgInfoVector
class  SquishedDawg
struct  DawgArgs
class  Dict
class  PermuterState
class  Trie
class  Image
class  InputFileBuffer
class  NeuralNet
class  Neuron
struct  AlignedBlobParams
class  AlignedBlob
class  GridBase
class  IntGrid
class  BBGrid
class  GridSearch
class  TabEventHandler
class  BlobGrid
class  CCNonTextDetect
class  ColumnFinder
class  ColPartition
class  ColPartitionGrid
class  ColPartitionSet
class  PixelHistogram
class  ShiroRekhaSplitter
class  EquationDetectBase
class  ImageFind
class  LineFinder
class  StrokeWidth
class  TabFind
class  ColSegment
class  TableFinder
class  StructuredTable
class  TableRecognizer
class  TabConstraint
class  TabVector
class  TextlineProjection
class  Textord
class  WorkingPartSet
struct  AssociateStats
class  AssociateUtils
struct  LanguageModelConsistencyInfo
struct  LanguageModelDawgInfo
struct  LanguageModelNgramInfo
struct  ViterbiStateEntry
struct  LanguageModelState
struct  BestChoiceBundle
struct  BestPathByColumn
class  LanguageModel
struct  MATCH
class  BlobMatchTable
class  FRAGMENT
class  Wordrec

Typedefs

typedef int(Dict::* DictFunc )(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
typedef double(Dict::* ProbabilityInContextFunc )(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
typedef void(Wordrec::* FillLatticeFunc )(const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
typedef TessCallback3< const
UNICHARSET &, int, PAGE_RES * > 
TruthCallback
typedef GenericVectorEqEq
< const ParagraphModel * > 
SetOfModels
typedef void(Tesseract::* WordRecognizer )(BLOCK *block, ROW *row, WERD_RES *word)
typedef GenericVector
< ParamsTrainingHypothesis
ParamsTrainingHypothesisList
typedef GenericVector< UNICHAR_IDUnicharIdVector
typedef GenericVector
< AmbigSpec_LIST * > 
UnicharAmbigsVector
typedef signed int char_32
typedef basic_string< char_32string_32
typedef GenericVector< NodeChildNodeChildVector
typedef GenericVector< int > SuccessorList
typedef GenericVector
< SuccessorList * > 
SuccessorListsVector
typedef GenericVector< Dawg * > DawgVector
typedef GridSearch< BLOBNBOX,
BLOBNBOX_CLIST, BLOBNBOX_C_IT > 
BlobGridSearch
typedef GridSearch
< ColPartition,
ColPartition_CLIST,
ColPartition_C_IT > 
ColPartitionGridSearch
typedef GenericVector
< ColPartitionSet * > 
PartSetVector
typedef TessResultCallback1
< bool, int > 
WidthCallback
typedef BBGrid< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > 
ColSegmentGrid
typedef GridSearch< ColSegment,
ColSegment_CLIST,
ColSegment_C_IT > 
ColSegmentGridSearch
typedef unsigned char LanguageModelFlagsType

Enumerations

enum  LineType { LT_START = 'S', LT_BODY = 'C', LT_UNKNOWN = 'U', LT_MULTIPLE = 'M' }
enum  CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }
enum  ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }
enum  NormalizationMode { NM_BASELINE = -3, NM_CHAR_ISOTROPIC = -2, NM_CHAR_ANISOTROPIC = -1 }
enum  ParamsTrainingRawFeatureType {
  PTRAIN_RAW_FEATURE_DICT_MATCH_TYPE, PTRAIN_RAW_FEATURE_UNAMBIG_DICT_MATCH, PTRAIN_RAW_FEATURE_SHAPE_COST, PTRAIN_RAW_FEATURE_NGRAM_PROB,
  PTRAIN_RAW_FEATURE_NUM_BAD_PUNC, PTRAIN_RAW_FEATURE_NUM_BAD_CASE, PTRAIN_RAW_FEATURE_NUM_BAD_CHAR_TYPE, PTRAIN_RAW_FEATURE_NUM_BAD_SPACING,
  PTRAIN_RAW_FEATURE_NUM_BAD_SCRIPT, PTRAIN_RAW_FEATURE_NUM_BAD_FONT, PTRAIN_RAW_FEATURE_WORST_CERT, PTRAIN_RAW_FEATURE_RATING,
  PTRAIN_RAW_FEATURE_ADAPTED, PTRAIN_RAW_FEATURE_NUM_UNICHARS, PTRAIN_RAW_FEATURE_OUTLINE_LEN, PTRAIN_NUM_RAW_FEATURE_TYPES
}
enum  Orientation { ORIENTATION_PAGE_UP = 0, ORIENTATION_PAGE_RIGHT = 1, ORIENTATION_PAGE_DOWN = 2, ORIENTATION_PAGE_LEFT = 3 }
enum  WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }
enum  TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }
enum  PageSegMode {
  PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO,
  PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE,
  PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_COUNT
}
enum  PageIteratorLevel {
  RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD,
  RIL_SYMBOL
}
enum  ParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT }
enum  OcrEngineMode { OEM_TESSERACT_ONLY, OEM_CUBE_ONLY, OEM_TESSERACT_CUBE_COMBINED, OEM_DEFAULT }
enum  AmbigType {
  NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG,
  CASE_AMBIG, AMBIG_TYPE_COUNT
}
enum  SetParamConstraint { SET_PARAM_CONSTRAINT_NONE, SET_PARAM_CONSTRAINT_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_INIT_ONLY }
enum  TessdataType {
  TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP,
  TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG,
  TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_FIXED_LENGTH_DAWGS, TESSDATA_CUBE_UNICHARSET,
  TESSDATA_CUBE_SYSTEM_DAWG, TESSDATA_SHAPE_TABLE, TESSDATA_BIGRAM_DAWG, TESSDATA_UNAMBIG_DAWG,
  TESSDATA_PARAMS_TRAINING_MODEL, TESSDATA_NUM_ENTRIES
}
enum  CharSegmentationType { CST_FRAGMENT, CST_WHOLE, CST_IMPROPER, CST_NGRAM }
enum  CountTypes {
  CT_SHAPE_TOP_CORRECT, CT_SHAPE_TOP_ERR, CT_FONT_ATTR_ERR, CT_UNICHAR_TOP1_ERR,
  CT_UNICHAR_TOP2_ERR, CT_UNICHAR_TOPN_ERR, CT_OK_MULTI_UNICHAR, CT_REJECT,
  CT_NUM_RESULTS, CT_RANK, CT_REJECTED_JUNK, CT_ACCEPTED_JUNK,
  CT_SIZE
}
enum  DawgType {
  DAWG_TYPE_PUNCTUATION, DAWG_TYPE_WORD, DAWG_TYPE_NUMBER, DAWG_TYPE_PATTERN,
  DAWG_TYPE_COUNT
}
enum  ColumnSpanningType {
  CST_NOISE, CST_FLOWING, CST_HEADING, CST_PULLOUT,
  CST_COUNT
}
enum  NeighbourPartitionType {
  NPT_HTEXT, NPT_VTEXT, NPT_WEAK_HTEXT, NPT_WEAK_VTEXT,
  NPT_IMAGE, NPT_COUNT
}
enum  LeftOrRight { LR_LEFT, LR_RIGHT }
enum  ColSegType {
  COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED,
  COL_COUNT
}
enum  TabAlignment {
  TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT
}

Functions

int CubeAPITest (Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res)
TBLOBmake_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)
TBOX char_box_to_tbox (Box *char_box, TBOX word_box, int x_offset)
bool IsTextOrEquationType (PolyBlockType type)
bool IsLeftIndented (const EquationDetect::IndentType type)
bool IsRightIndented (const EquationDetect::IndentType type)
template<typename T >
void SimpleSwap (T &a, T &b)
STRING RtlEmbed (const STRING &word, bool rtlify)
bool IsLatinLetter (int ch)
bool IsDigitLike (int ch)
bool IsOpeningPunct (int ch)
bool IsTerminalPunct (int ch)
const char * SkipChars (const char *str, const char *toskip)
const char * SkipChars (const char *str, bool(*skip)(int))
const char * SkipOne (const char *str, const char *toskip)
bool LikelyListNumeral (const STRING &word)
bool LikelyListMark (const STRING &word)
bool AsciiLikelyListItem (const STRING &word)
int UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
bool LikelyListMarkUnicode (int ch)
bool UniLikelyListItem (const UNICHARSET *u, const WERD_CHOICE *werd)
void LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
void RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
int ClosestCluster (const GenericVector< Cluster > &clusters, int value)
void CalculateTabStops (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > *left_tabs, GenericVector< Cluster > *right_tabs)
void MarkRowsWithModel (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, const ParagraphModel *model, bool ltr, int eop_threshold)
void GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory)
void GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
bool ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)
void DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory)
void DowngradeWeakestToCrowns (int debug_level, ParagraphTheory *theory, GenericVector< RowScratchRegisters > *rows)
void RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)
int InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)
bool TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after)
bool LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after)
bool LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j)
ParagraphModel InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance, bool *consistent)
ParagraphModel ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance)
bool RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)
void MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end)
void ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory *theory)
void StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
void SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
void ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA * > *row_owners, ParagraphTheory *theory)
bool RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row)
void LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end)
void CanonicalizeDetectionResults (GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs)
void DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
void InitializeRowInfo (const MutableIterator &it, RowInfo *info)
void DetectParagraphs (int debug_level, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models)
bool StrongModel (const ParagraphModel *model)
bool read_t (PAGE_RES_IT *page_res_it, TBOX *tbox)
ICOORD ComputeEndFromGradient (const ICOORD &start, double m)
bool CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2)
bool CompareFontSet (const FontSet &fs1, const FontSet &fs2)
void FontInfoDeleteCallback (FontInfo f)
void FontSetDeleteCallback (FontSet fs)
bool read_info (FILE *f, FontInfo *fi, bool swap)
bool write_info (FILE *f, const FontInfo &fi)
bool read_spacing_info (FILE *f, FontInfo *fi, bool swap)
bool write_spacing_info (FILE *f, const FontInfo &fi)
bool read_set (FILE *f, FontSet *fs, bool swap)
bool write_set (FILE *f, const FontSet &fs)
void OtsuThreshold (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int **thresholds, int **hi_values)
void HistogramRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int *histogram)
int OtsuStats (const int *histogram, int *H_out, int *omega0_out)
 ELISTIZE (AmbigSpec)
 ELISTIZEH (AmbigSpec)
template<typename T >
bool cmp_eq (T const &t1, T const &t2)
template<typename T >
int sort_cmp (const void *t1, const void *t2)
template<typename T >
int sort_ptr_cmp (const void *t1, const void *t2)
void ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)
WERD_CHOICEget_best_delete_other (WERD_CHOICE *choice1, WERD_CHOICE *choice2)
BLOB_CHOICEget_nth_choice (BLOB_CHOICE_LIST *blob_list, int n)
UNICHAR_ID get_top_choice_uid (BLOB_CHOICE_LIST *blob_list)
int find_choice_by_uid (BLOB_CHOICE_LIST *blob_list, UNICHAR_ID target_uid)
WERD_CHOICEget_choice_from_posstr (const UNICHARSET *unicharset, const BLOB_CHOICE_LIST_VECTOR &char_choices, int start_pos, const char *pos_str, float *certainties)
void get_posstr_from_choice (const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *word_choice, int start_pos, char *pos_str)
BLOB_CHOICEfind_choice_by_type (BLOB_CHOICE_LIST *blob_choices, char target_type, const UNICHARSET &unicharset)
BLOB_CHOICEfind_choice_by_script (BLOB_CHOICE_LIST *blob_choices, int target_sid, int backup_sid, int secondary_sid)
Pix * GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom)
Pix * TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom)
Pix * TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom)
template<class BBC >
int SortByBoxLeft (const void *void1, const void *void2)
template<class BBC >
int SortRightToLeft (const void *void1, const void *void2)
template<class BBC >
int SortByBoxBottom (const void *void1, const void *void2)
template<typename T >
void DeleteObject (T *object)
ShapeTableLoadShapeTable (const STRING &file_prefix)
void WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
MasterTrainerLoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 ELISTIZE (ViterbiStateEntry)
 ELISTIZEH (ViterbiStateEntry)
template<class BLOB_CHOICE >
int SortByUnicharID (const void *void1, const void *void2)
template<class BLOB_CHOICE >
int SortByRating (const void *void1, const void *void2)

Variables

const int kMinRectSize = 10
const char kTesseractReject = '~'
const char kUNLVReject = '~'
const char kUNLVSuspect = '^'
const char * kInputFile = "noname.tif"
const char * kOldVarsFile = "failed_vars.txt"
const int kMaxIntSize = 22
const int kMinCredibleResolution = 70
 Minimum believable resolution.
const int kMaxCredibleResolution = 2400
const int kNumbersPerBlob = 5
const int kBytesPerNumber = 5
const int kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1
const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1
const int kBytesPer64BitNumber = 20
const int kMaxBytesPerLine
const int kUniChs []
const int kLatinChs []
const float kMathDigitDensityTh1 = 0.25
const float kMathDigitDensityTh2 = 0.1
const float kMathItalicDensityTh = 0.5
const float kUnclearDensityTh = 0.25
const int kSeedBlobsCountTh = 10
const int kLeftIndentAlignmentCountTh = 1
const int kMaxCharTopRange = 48
const int kDefaultResolution = 300
 Default resolution used if input in not believable.
const int kMaxCircleErosions = 8
const int kStrayLinePer = 6
const ParagraphModelkCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)
const ParagraphModelkCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)
const inT16 kMaxBoxEdgeDiff = 2
const int kBoxClipTolerance = 2
const int kMinSubscriptOffset = 20
const int kMinSuperscriptOffset = 20
const int kMaxDropCapBottom = -128
const int kNumEndPoints = 3
const int kHistogramSize = 256
CCUtilMutex tprintfMutex
const char * kUTF8LineSeparator = "\u2028"
const char * kUTF8ParagraphSeparator = "\u2029"
const char * kLRM = "\u200E"
const char * kRLM = "\u200F"
const char * kRLE = "\u202A"
const char * kPDF = "\u202C"
const char * kHyphenLikeUTF8 []
const char * kApostropheLikeUTF8 []
const int kMaxOffsetDist = 32
const double kMinPCLengthIncrease = 1.0 / 1024
const int kMinClusteredShapes = 1
const int kMaxUnicharsPerCluster = 2000
const float kFontMergeDistance = 0.025
const float kInfiniteDist = 999.0f
const int kRandomizingCenter = 128
const int kTestChar = -1
const int kSquareLimit = 25
const int kPrime1 = 17
const int kPrime2 = 13
const int kMinOutlierSamples = 5
const int kStateCnt = 4
const int kNumLiteralCnt = 5
const int case_state_table [6][4]
const char kDoNotReverse [] = "RRP_DO_NO_REVERSE"
const char kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"
const char kForceReverse [] = "RRP_FORCE_REVERSE"
const char *const RTLReversePolicyNames []
const double kAlignedFraction = 0.03125
const double kRaggedFraction = 2.5
const double kAlignedGapFraction = 0.75
const double kRaggedGapFraction = 1.0
const int kVLineAlignment = 3
const int kVLineGutter = 1
const int kVLineSearchSize = 150
const int kMinRaggedTabs = 5
const int kMinAlignedTabs = 4
const int kVLineMinLength = 500
const double kMinTabGradient = 4.0
const int kMaxSkewFactor = 15
const char * kTextordDebugPix = "psdebug_pix"
const double kMaxSmallNeighboursPerPix = 1.0 / 32
const int kMaxLargeOverlapsWithSmall = 3
const int kMaxMediumOverlapsWithSmall = 12
const int kMaxLargeOverlapsWithMedium = 12
const int kOriginalNoiseMultiple = 8
const int kNoisePadding = 4
const double kPhotoOffsetFraction = 0.375
const double kMinGoodTextPARatio = 1.5
const int kMinColumnWidth = 100
const int kMaxIncompatibleColumnCount = 2
const double kMarginOverlapFraction = 0.25
const double kHorizontalGapMergeFraction = 0.5
const double kMinNonNoiseFraction = 0.5
const double kMinGutterWidthGrid = 0.5
const double kMaxDistToPartSizeRatio = 1.5
bool textord_tabfind_show_initial_partitions = false
bool textord_tabfind_show_reject_blobs = false
int textord_tabfind_show_partitions = 0
bool textord_tabfind_show_columns = false
bool textord_tabfind_show_blocks = false
bool textord_tabfind_find_tables = true
const int kMaxPartnerDepth = 4
const double kMaxSpacingDrift = 1.0 / 72
const double kMaxTopSpacingFraction = 0.25
const double kMaxSameBlockLineSpacing = 3
const double kMaxSizeRatio = 1.5
const double kMaxLeaderGapFractionOfMax = 0.25
const double kMaxLeaderGapFractionOfMin = 0.5
const int kMinLeaderCount = 5
const int kLeaderCutCost = 8
const int kMinStrongTextValue = 6
const int kMinChainTextValue = 3
const int kHorzStrongTextlineCount = 8
const int kHorzStrongTextlineHeight = 10
const int kHorzStrongTextlineAspect = 5
const double kMaxBaselineError = 0.4375
const double kMinBaselineCoverage = 0.5
const int kMaxRMSColorNoise = 128
const int kMaxColorDistance = 900
const int kRGBRMSColors = 4
bool textord_tabfind_show_color_fit = false
const int kMaxPadFactor = 6
const int kMaxNeighbourDistFactor = 4
const int kMaxCaptionLines = 7
const double kMinCaptionGapRatio = 2.0
const double kMinCaptionGapHeightRatio = 0.5
const double kBigPartSizeRatio = 1.75
const double kStrokeWidthFractionTolerance = 0.25
const double kStrokeWidthConstantTolerance = 2.0
const double kTinyEnoughTextlineOverlapFraction = 0.25
const double kMaxPartitionSpacing = 1.75
const int kSmoothDecisionMargin = 4
const double kMinRectangularFraction = 0.125
const double kMaxRectangularFraction = 0.75
const double kMaxRectangularGradient = 0.1
const int kMinImageFindSize = 100
const double kRMSFitScaling = 8.0
const int kMinColorDifference = 16
const int kThinLineFraction = 20
 Denominator of resolution makes max pixel width to allow thin lines.
const int kMinLineLengthFraction = 4
 Denominator of resolution makes min pixels to demand line lengths to be.
const int kCrackSpacing = 100
 Spacing of cracks across the page to break up tall vertical lines.
const int kLineFindGridSize = 50
 Grid size used by line finder. Not very critical.
const int kMinThickLineWidth = 12
const int kMaxLineResidue = 6
const double kThickLengthMultiple = 0.75
const double kMaxNonLineDensity = 0.25
const double kMaxStaveHeight = 1.0
const double kMinMusicPixelFraction = 0.75
int textord_tabfind_show_strokewidths = 0
bool textord_tabfind_only_strokewidths = false
bool textord_tabfind_vertical_text = true
bool textord_tabfind_force_vertical_text = false
bool textord_tabfind_vertical_horizontal_mix = true
double textord_tabfind_vertical_text_ratio = 0.5
const double kStrokeWidthTolerance = 1.5
const double kStrokeWidthFractionCJK = 0.25
const double kStrokeWidthCJK = 2.0
const int kCJKRadius = 2
const double kCJKBrokenDistanceFraction = 0.25
const int kCJKMaxComponents = 8
const double kCJKAspectRatio = 1.25
const double kCJKAspectRatioIncrease = 1.0625
const int kMaxCJKSizeRatio = 5
const double kBrokenCJKIterationFraction = 0.125
const double kDiacriticXPadRatio = 7.0
const double kDiacriticYPadRatio = 1.75
const double kMinDiacriticSizeRatio = 1.0625
const double kMaxDiacriticDistanceRatio = 1.25
const double kMaxDiacriticGapToBaseCharHeight = 1.0
const int kSearchRadius = 2
const int kLineTrapLongest = 4
const int kLineTrapShortest = 2
const int kMostlyOneDirRatio = 3
const double kLineResidueAspectRatio = 8.0
const int kLineResiduePadRatio = 3
const double kLineResidueSizeRatio = 1.75
const float kSizeRatioToReject = 2.0
const int kMaxLargeOverlaps = 3
const double kNeighbourSearchFactor = 2.5
const int kTabRadiusFactor = 5
const int kMinVerticalSearch = 3
const int kMaxVerticalSearch = 12
const int kMaxRaggedSearch = 25
const int kMinLinesInColumn = 10
const double kMinFractionalLinesInColumn = 0.125
const double kMinGutterWidthAbsolute = 0.02
const double kMaxGutterWidthAbsolute = 2.00
const int kRaggedGutterMultiple = 5
const double kLineFragmentAspectRatio = 10.0
const double kSmoothFactor = 0.25
const double kCharVerticalOverlapFraction = 0.375
const double kMaxHorizontalGap = 3.0
const int kMinEvaluatedTabs = 3
const int kMaxTextLineBlobRatio = 5
const int kMinTextLineBlobRatio = 3
const double kMinImageArea = 0.5
const double kCosMaxSkewAngle = 0.866025
bool textord_tabfind_show_initialtabs = false
bool textord_tabfind_show_finaltabs = false
double textord_tabfind_aligned_gap_fraction = 0.75
const int kColumnWidthFactor = 20
const int kMaxVerticalSpacing = 500
const int kMaxBlobWidth = 500
const double kSplitPartitionSize = 2.0
const double kAllowTextHeight = 0.5
const double kAllowTextWidth = 0.6
const double kAllowTextArea = 0.8
const double kAllowBlobHeight = 0.3
const double kAllowBlobWidth = 0.4
const double kAllowBlobArea = 0.05
const int kMinBoxesInTextPartition = 10
const int kMaxBoxesInDataPartition = 20
const double kMaxGapInTextPartition = 4.0
const double kMinMaxGapInTextPartition = 0.5
const double kMaxBlobOverlapFactor = 4.0
const double kMaxTableCellXheight = 2.0
const int kMaxColumnHeaderDistance = 4
const double kTableColumnThreshold = 3.0
const int kRulingVerticalMargin = 3
const double kMinOverlapWithTable = 0.6
const int kSideSpaceMargin = 10
const double kSmallTableProjectionThreshold = 0.35
const double kLargeTableProjectionThreshold = 0.45
const int kLargeTableRowCount = 6
const int kMinRowsInTable = 3
const double kRequiredFullJustifiedSpacing = 4.0
const int kAdjacentLeaderSearchPadding = 2
const double kParagraphEndingPreviousLineRatio = 1.3
const double kMaxParagraphEndingLeftSpaceMultiple = 3.0
const double kMinParagraphEndingTextToWhitespaceRatio = 3.0
const double kMaxXProjectionGapFactor = 2.0
const double kStrokeWidthFractionalTolerance = 0.25
bool textord_dump_table_images = false
bool textord_show_tables = false
bool textord_tablefind_show_mark = false
bool textord_tablefind_show_stats = false
bool textord_tablefind_recognize_tables = false
const double kHorizontalSpacing = 0.30
const double kVerticalSpacing = -0.2
const int kCellSplitRowThreshold = 0
const int kCellSplitColumnThreshold = 0
const int kLinedTableMinVerticalLines = 3
const int kLinedTableMinHorizontalLines = 3
const double kRequiredColumns = 0.7
const double kMarginFactor = 1.1
const double kMaxRowSize = 2.5
const double kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 }
const int kGoodRowNumberOfColumnsSmallSize
const double kGoodRowNumberOfColumnsLarge = 0.7
const double kMinFilledArea = 0.35
const int kGutterMultiple = 4
const int kGutterToNeighbourRatio = 3
const int kSimilarVectorDist = 10
const int kSimilarRaggedDist = 50
const int kMaxFillinMultiple = 11
const double kMinGutterFraction = 0.5
const double kLineCountReciprocal = 4.0
const double kMinAlignedGutter = 0.25
const double kMinRaggedGutter = 1.5
double textord_tabvector_vertical_gap_fraction = 0.5
double textord_tabvector_vertical_box_ratio = 0.5
const char * kAlignmentNames []

Detailed Description

recog_pseudo_word

Make a word from the selected blobs and run Tess on them.

Parameters:
page_res recognise blobs
selection_box within this box

fp_eval_word_spacing() Evaluation function for fixed pitch word lists.

Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars

process_selected_words()

Walk the current block list applying the specified word processor function to each word that overlaps the selection_box.

build_menu()

Construct the menu tree used by the command window

process_cmd_win_event()

Process a command returned from the command window (Just call the appropriate command handler)

word_blank_and_set_display() Word processor

Blank display of word then redisplay word according to current display mode settings

---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------

---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------

---------------------------------------------------------------------------- Include Files and Type Defines ---------------------------------------------------------------------------- ---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------


Typedef Documentation

typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> tesseract::BlobGridSearch
typedef signed int tesseract::char_32
typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> tesseract::ColPartitionGridSearch
typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGrid
typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGridSearch
typedef int(Dict::* tesseract::DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
typedef void(Wordrec::* tesseract::FillLatticeFunc)(const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
typedef unsigned char tesseract::LanguageModelFlagsType
typedef double(Dict::* tesseract::ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
typedef basic_string<char_32> tesseract::string_32
typedef GenericVector<AmbigSpec_LIST *> tesseract::UnicharAmbigsVector
typedef void(Tesseract::* tesseract::WordRecognizer)(BLOCK *block, ROW *row, WERD_RES *word)

Enumeration Type Documentation

Enumerator:
NOT_AMBIG 
REPLACE_AMBIG 
DEFINITE_AMBIG 
SIMILAR_AMBIG 
CASE_AMBIG 
AMBIG_TYPE_COUNT 
Enumerator:
CST_FRAGMENT 
CST_WHOLE 
CST_IMPROPER 
CST_NGRAM 
Enumerator:
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 
Enumerator:
COL_UNKNOWN 
COL_TEXT 
COL_TABLE 
COL_MIXED 
COL_COUNT 
Enumerator:
CST_NOISE 
CST_FLOWING 
CST_HEADING 
CST_PULLOUT 
CST_COUNT 
Enumerator:
CT_SHAPE_TOP_CORRECT 
CT_SHAPE_TOP_ERR 
CT_FONT_ATTR_ERR 
CT_UNICHAR_TOP1_ERR 
CT_UNICHAR_TOP2_ERR 
CT_UNICHAR_TOPN_ERR 
CT_OK_MULTI_UNICHAR 
CT_REJECT 
CT_NUM_RESULTS 
CT_RANK 
CT_REJECTED_JUNK 
CT_ACCEPTED_JUNK 
CT_SIZE 
Enumerator:
DAWG_TYPE_PUNCTUATION 
DAWG_TYPE_WORD 
DAWG_TYPE_NUMBER 
DAWG_TYPE_PATTERN 
DAWG_TYPE_COUNT 
Enumerator:
LR_LEFT 
LR_RIGHT 
Enumerator:
LT_START 
LT_BODY 
LT_UNKNOWN 
LT_MULTIPLE 
Enumerator:
NPT_HTEXT 
NPT_VTEXT 
NPT_WEAK_HTEXT 
NPT_WEAK_VTEXT 
NPT_IMAGE 
NPT_COUNT 
Enumerator:
NM_BASELINE 
NM_CHAR_ISOTROPIC 
NM_CHAR_ANISOTROPIC 
Enumerator:
OEM_TESSERACT_ONLY 
OEM_CUBE_ONLY 
OEM_TESSERACT_CUBE_COMBINED 
OEM_DEFAULT 
Enumerator:
ORIENTATION_PAGE_UP 
ORIENTATION_PAGE_RIGHT 
ORIENTATION_PAGE_DOWN 
ORIENTATION_PAGE_LEFT 
Enumerator:
RIL_BLOCK 
RIL_PARA 
RIL_TEXTLINE 
RIL_WORD 
RIL_SYMBOL 
Enumerator:
PSM_OSD_ONLY 

Orientation and script detection only.

PSM_AUTO_OSD 

Automatic page segmentation with orientation and script detection. (OSD)

PSM_AUTO_ONLY 

Automatic page segmentation, but no OSD, or OCR.

PSM_AUTO 

Fully automatic page segmentation, but no OSD.

PSM_SINGLE_COLUMN 

Assume a single column of text of variable sizes.

PSM_SINGLE_BLOCK_VERT_TEXT 

Assume a single uniform block of vertically aligned text.

PSM_SINGLE_BLOCK 

Assume a single uniform block of text. (Default.).

PSM_SINGLE_LINE 

Treat the image as a single text line.

PSM_SINGLE_WORD 

Treat the image as a single word.

PSM_CIRCLE_WORD 

Treat the image as a single word in a circle.

PSM_SINGLE_CHAR 

Treat the image as a single character.

PSM_COUNT 

Number of enum entries.

Enumerator:
JUSTIFICATION_UNKNOWN 
JUSTIFICATION_LEFT 
JUSTIFICATION_CENTER 
JUSTIFICATION_RIGHT 
Enumerator:
PTRAIN_RAW_FEATURE_DICT_MATCH_TYPE 
PTRAIN_RAW_FEATURE_UNAMBIG_DICT_MATCH 
PTRAIN_RAW_FEATURE_SHAPE_COST 
PTRAIN_RAW_FEATURE_NGRAM_PROB 
PTRAIN_RAW_FEATURE_NUM_BAD_PUNC 
PTRAIN_RAW_FEATURE_NUM_BAD_CASE 
PTRAIN_RAW_FEATURE_NUM_BAD_CHAR_TYPE 
PTRAIN_RAW_FEATURE_NUM_BAD_SPACING 
PTRAIN_RAW_FEATURE_NUM_BAD_SCRIPT 
PTRAIN_RAW_FEATURE_NUM_BAD_FONT 
PTRAIN_RAW_FEATURE_WORST_CERT 
PTRAIN_RAW_FEATURE_RATING 
PTRAIN_RAW_FEATURE_ADAPTED 
PTRAIN_RAW_FEATURE_NUM_UNICHARS 
PTRAIN_RAW_FEATURE_OUTLINE_LEN 
PTRAIN_NUM_RAW_FEATURE_TYPES 
Enumerator:
SP_NORMAL 
SP_SUBSCRIPT 
SP_SUPERSCRIPT 
SP_DROPCAP 
Enumerator:
SET_PARAM_CONSTRAINT_NONE 
SET_PARAM_CONSTRAINT_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_INIT_ONLY 
Enumerator:
TA_LEFT_ALIGNED 
TA_LEFT_RAGGED 
TA_CENTER_JUSTIFIED 
TA_RIGHT_ALIGNED 
TA_RIGHT_RAGGED 
TA_SEPARATOR 
TA_COUNT 
Enumerator:
TESSDATA_LANG_CONFIG 
TESSDATA_UNICHARSET 
TESSDATA_AMBIGS 
TESSDATA_INTTEMP 
TESSDATA_PFFMTABLE 
TESSDATA_NORMPROTO 
TESSDATA_PUNC_DAWG 
TESSDATA_SYSTEM_DAWG 
TESSDATA_NUMBER_DAWG 
TESSDATA_FREQ_DAWG 
TESSDATA_FIXED_LENGTH_DAWGS 
TESSDATA_CUBE_UNICHARSET 
TESSDATA_CUBE_SYSTEM_DAWG 
TESSDATA_SHAPE_TABLE 
TESSDATA_BIGRAM_DAWG 
TESSDATA_UNAMBIG_DAWG 
TESSDATA_PARAMS_TRAINING_MODEL 
TESSDATA_NUM_ENTRIES 
Enumerator:
TEXTLINE_ORDER_LEFT_TO_RIGHT 
TEXTLINE_ORDER_RIGHT_TO_LEFT 
TEXTLINE_ORDER_TOP_TO_BOTTOM 
Enumerator:
WRITING_DIRECTION_LEFT_TO_RIGHT 
WRITING_DIRECTION_RIGHT_TO_LEFT 
WRITING_DIRECTION_TOP_TO_BOTTOM 

Function Documentation

bool tesseract::AsciiLikelyListItem ( const STRING word  ) 
void tesseract::CalculateTabStops ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
int  tolerance,
GenericVector< Cluster > *  left_tabs,
GenericVector< Cluster > *  right_tabs 
)
void tesseract::CanonicalizeDetectionResults ( GenericVector< PARA * > *  row_owners,
PARA_LIST *  paragraphs 
)
TBOX tesseract::char_box_to_tbox ( Box *  char_box,
TBOX  word_box,
int  x_offset 
)
void tesseract::ClearFeatureSpaceWindow ( NORM_METHOD  norm_method,
ScrollView window 
)
int tesseract::ClosestCluster ( const GenericVector< Cluster > &  clusters,
int  value 
)
template<typename T >
bool tesseract::cmp_eq ( T const &  t1,
T const &  t2 
) [inline]
bool tesseract::CompareFontInfo ( const FontInfo &  fi1,
const FontInfo &  fi2 
)
bool tesseract::CompareFontSet ( const FontSet &  fs1,
const FontSet &  fs2 
)
ICOORD tesseract::ComputeEndFromGradient ( const ICOORD start,
double  m 
)
void tesseract::ConvertHypothesizedModelRunsToParagraphs ( int  debug_level,
const GenericVector< RowScratchRegisters > &  rows,
GenericVector< PARA * > *  row_owners,
ParagraphTheory *  theory 
)
bool tesseract::CrownCompatible ( const GenericVector< RowScratchRegisters > *  rows,
int  a,
int  b,
const ParagraphModel model 
)
int tesseract::CubeAPITest ( Boxa *  boxa_blocks,
Pixa *  pixa_blocks,
Boxa *  boxa_words,
Pixa *  pixa_words,
const FCOORD reskew,
Pix *  page_pix,
PAGE_RES page_res 
)
template<typename T >
void tesseract::DeleteObject ( T *  object  )  [inline]
void tesseract::DetectParagraphs ( int  debug_level,
const MutableIterator *  block_start,
GenericVector< ParagraphModel * > *  models 
)
void tesseract::DetectParagraphs ( int  debug_level,
GenericVector< RowInfo > *  row_infos,
GenericVector< PARA * > *  row_owners,
PARA_LIST *  paragraphs,
GenericVector< ParagraphModel * > *  models 
)
void tesseract::DiscardUnusedModels ( const GenericVector< RowScratchRegisters > &  rows,
ParagraphTheory *  theory 
)
void tesseract::DowngradeWeakestToCrowns ( int  debug_level,
ParagraphTheory *  theory,
GenericVector< RowScratchRegisters > *  rows 
)
tesseract::ELISTIZE ( ViterbiStateEntry   ) 
tesseract::ELISTIZE ( AmbigSpec   ) 
tesseract::ELISTIZEH ( ViterbiStateEntry   ) 
tesseract::ELISTIZEH ( AmbigSpec   ) 
BLOB_CHOICE* tesseract::find_choice_by_script ( BLOB_CHOICE_LIST *  blob_choices,
int  target_sid,
int  backup_sid,
int  secondary_sid 
)

Iterate through all the character choices (for a single blob) and return the first that matches the target script ID. If backup_sid is not 0, then a match on either the target or backup sid is allowed. Note that there is no preference between a target or backup sid. To search for another sid only if no target_sid matched, use secondary_sid. So for example, to find first Han or Common char choice, do find_choice_by_script(cchoice, han_sid, common_sid, 0); To find first Han choice, but allow Common if none is found, do find_choice_by_script(cchoice, han_sid, 0, common_sid);

BLOB_CHOICE* tesseract::find_choice_by_type ( BLOB_CHOICE_LIST *  blob_choices,
char  target_type,
const UNICHARSET unicharset 
)

Iterate through all the character choices (for a single blob) and return the first that matches the given type, which is one of 'aA0px*', for lower, upper, digit, punctuation, other, and 'any', respectively. If not match is found, a NULL is returned.

int tesseract::find_choice_by_uid ( BLOB_CHOICE_LIST *  blob_list,
UNICHAR_ID  target_uid 
)

Returns the rank (starting at 0) of a given unichar ID in the char choice list, or -1 if not found.

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters &  before,
const RowScratchRegisters &  after 
)
bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters &  before,
const RowScratchRegisters &  after,
tesseract::ParagraphJustification  justification 
)
void tesseract::FontInfoDeleteCallback ( FontInfo  f  ) 
void tesseract::FontSetDeleteCallback ( FontSet  fs  ) 
void tesseract::GeometricClassify ( int  debug_level,
GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory *  theory 
)
void tesseract::GeometricClassifyThreeTabStopTextBlock ( int  debug_level,
GeometricClassifierState &  s,
ParagraphTheory *  theory 
)
WERD_CHOICE* tesseract::get_best_delete_other ( WERD_CHOICE choice1,
WERD_CHOICE choice2 
)

get_best_delete_other

Returns the best of two choices and deletes the other (worse) choice. A choice is better if it has a non-empty string and has a lower rating than the other choice. If the ratings are the same, choice2 is preferred over choice1.

WERD_CHOICE* tesseract::get_choice_from_posstr ( const UNICHARSET unicharset,
const BLOB_CHOICE_LIST_VECTOR char_choices,
int  start_pos,
const char *  pos_str,
float *  certainties 
)

Returns a WERD formed by taking the specified position (nth choice) string from char_choices starting at the given position. For example, if start_pos=2, pos_str="0121" will form a word using the 1st choice of char 3, 2nd choice of char 4, 3rd choice of char 5, 2nd choice of char 6. If n > number of choice, the closest (last) one is used.

BLOB_CHOICE* tesseract::get_nth_choice ( BLOB_CHOICE_LIST *  blob_list,
int  n 
)

Returns the n-th choice in the given blob_list (top-K choices). If n > K, the last choice is returned.

void tesseract::get_posstr_from_choice ( const BLOB_CHOICE_LIST_VECTOR char_choices,
WERD_CHOICE word_choice,
int  start_pos,
char *  pos_str 
)

Given a WERD_CHOICE, find the corresponding position string from char_choices. Pos_str must have been allocated already. This is the reverse of get_choice_from_posstr.

UNICHAR_ID tesseract::get_top_choice_uid ( BLOB_CHOICE_LIST *  blob_list  ) 

Returns the top choice char id. A helper function to make code cleaner.

Pix* tesseract::GridReducedPix ( const TBOX box,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)
void tesseract::HistogramRect ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height,
int *  histogram 
)
void tesseract::InitializeRowInfo ( const MutableIterator &  it,
RowInfo *  info 
)
ParagraphModel tesseract::InternalParagraphModelByOutline ( const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  tolerance,
bool *  consistent 
)
int tesseract::InterwordSpace ( const GenericVector< RowScratchRegisters > &  rows,
int  row_start,
int  row_end 
)
bool tesseract::IsDigitLike ( int  ch  ) 
bool tesseract::IsLatinLetter ( int  ch  ) 
bool tesseract::IsLeftIndented ( const EquationDetect::IndentType  type  )  [inline]
bool tesseract::IsOpeningPunct ( int  ch  ) 
bool tesseract::IsRightIndented ( const EquationDetect::IndentType  type  )  [inline]
bool tesseract::IsTerminalPunct ( int  ch  ) 
bool tesseract::IsTextOrEquationType ( PolyBlockType  type  )  [inline]
void tesseract::LeftoverSegments ( const GenericVector< RowScratchRegisters > &  rows,
GenericVector< Interval > *  to_fix,
int  row_start,
int  row_end 
)
void tesseract::LeftWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)
bool tesseract::LikelyListMark ( const STRING word  ) 
bool tesseract::LikelyListMarkUnicode ( int  ch  ) 
bool tesseract::LikelyListNumeral ( const STRING word  ) 
bool tesseract::LikelyParagraphStart ( const RowScratchRegisters &  before,
const RowScratchRegisters &  after,
tesseract::ParagraphJustification  j 
)
bool tesseract::LikelyParagraphStart ( const RowScratchRegisters &  before,
const RowScratchRegisters &  after 
)
ShapeTable * tesseract::LoadShapeTable ( const STRING file_prefix  ) 
MasterTrainer * tesseract::LoadTrainingData ( int  argc,
const char *const *  argv,
bool  replication,
ShapeTable **  shape_table,
STRING file_prefix 
)
TBLOB* tesseract::make_tesseract_blob ( float  baseline,
float  xheight,
float  descender,
float  ascender,
bool  numeric_mode,
Pix *  pix 
)
void tesseract::MarkRowsWithModel ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
const ParagraphModel model,
bool  ltr,
int  eop_threshold 
)
void tesseract::MarkStrongEvidence ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end 
)
void tesseract::ModelStrongEvidence ( int  debug_level,
GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
bool  allow_flush_models,
ParagraphTheory *  theory 
)
int tesseract::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
)
void tesseract::OtsuThreshold ( const unsigned char *  imagedata,
int  bytes_per_pixel,
int  bytes_per_line,
int  left,
int  top,
int  width,
int  height,
int **  thresholds,
int **  hi_values 
)
ParagraphModel tesseract::ParagraphModelByOutline ( int  debug_level,
const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  tolerance 
)
bool tesseract::read_info ( FILE *  f,
FontInfo *  fi,
bool  swap 
)
bool tesseract::read_set ( FILE *  f,
FontSet *  fs,
bool  swap 
)
bool tesseract::read_spacing_info ( FILE *  f,
FontInfo *  fi,
bool  swap 
)
bool tesseract::read_t ( PAGE_RES_IT page_res_it,
TBOX tbox 
)
void tesseract::RecomputeMarginsAndClearHypotheses ( GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  percentile 
)
void tesseract::RightWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)
bool tesseract::RowIsStranded ( const GenericVector< RowScratchRegisters > &  rows,
int  row 
)
bool tesseract::RowsFitModel ( const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
const ParagraphModel model 
)
STRING tesseract::RtlEmbed ( const STRING word,
bool  rtlify 
)
void tesseract::SeparateSimpleLeaderLines ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory *  theory 
)
template<typename T >
void tesseract::SimpleSwap ( T &  a,
T &  b 
) [inline]
const char* tesseract::SkipChars ( const char *  str,
bool(*)(int)  skip 
)
const char* tesseract::SkipChars ( const char *  str,
const char *  toskip 
)
const char* tesseract::SkipOne ( const char *  str,
const char *  toskip 
)
template<typename T >
int tesseract::sort_cmp ( const void *  t1,
const void *  t2 
) [inline]
template<typename T >
int tesseract::sort_ptr_cmp ( const void *  t1,
const void *  t2 
) [inline]
template<class BBC >
int tesseract::SortByBoxBottom ( const void *  void1,
const void *  void2 
) [inline]
template<class BBC >
int tesseract::SortByBoxLeft ( const void *  void1,
const void *  void2 
) [inline]
template<class BLOB_CHOICE >
int tesseract::SortByRating ( const void *  void1,
const void *  void2 
) [inline]
template<class BLOB_CHOICE >
int tesseract::SortByUnicharID ( const void *  void1,
const void *  void2 
) [inline]
template<class BBC >
int tesseract::SortRightToLeft ( const void *  void1,
const void *  void2 
) [inline]
void tesseract::StrongEvidenceClassify ( int  debug_level,
GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory *  theory 
)
bool tesseract::StrongModel ( const ParagraphModel model  )  [inline]
bool tesseract::TextSupportsBreak ( const RowScratchRegisters &  before,
const RowScratchRegisters &  after 
)
Pix * tesseract::TraceBlockOnReducedPix ( BLOCK block,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)
Pix * tesseract::TraceOutlineOnReducedPix ( C_OUTLINE outline,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)
int tesseract::UnicodeFor ( const UNICHARSET u,
const WERD_CHOICE werd,
int  pos 
)
bool tesseract::UniLikelyListItem ( const UNICHARSET u,
const WERD_CHOICE werd 
)
bool tesseract::ValidBodyLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)
bool tesseract::ValidFirstLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)
bool tesseract::write_info ( FILE *  f,
const FontInfo &  fi 
)
bool tesseract::write_set ( FILE *  f,
const FontSet &  fs 
)
bool tesseract::write_spacing_info ( FILE *  f,
const FontInfo &  fi 
)
void tesseract::WriteShapeTable ( const STRING file_prefix,
const ShapeTable &  shape_table 
)

Variable Documentation

const int tesseract::case_state_table[6][4]
Initial value:
 { {
                                  
    
                                  
      0, 1, 5, 4
    },
    {                            
      0, 3, 2, 4
    },
    {                            
      0, -1, 2, -1
    },
    {                            
      0, 3, -1, 4
    },
    {                            
      0, -1, -1, 4
    },
    {                            
      5, -1, 2, -1
    },
  }
const double tesseract::kAlignedFraction = 0.03125
const double tesseract::kAlignedGapFraction = 0.75
Initial value:
 {
  "Left Aligned",
  "Left Ragged",
  "Center",
  "Right Aligned",
  "Right Ragged",
  "Separator"
}
const double tesseract::kAllowBlobArea = 0.05
const double tesseract::kAllowBlobHeight = 0.3
const double tesseract::kAllowBlobWidth = 0.4
const double tesseract::kAllowTextArea = 0.8
const double tesseract::kAllowTextHeight = 0.5
const double tesseract::kAllowTextWidth = 0.6
Initial value:
 {
  "'",       
  "`",       
  "\u2018",  
  "\u2019",  
  "\u2032",  
  NULL,      
}
const double tesseract::kBigPartSizeRatio = 1.75
const double tesseract::kCJKAspectRatio = 1.25
const double tesseract::kCJKAspectRatioIncrease = 1.0625
const int tesseract::kCJKRadius = 2

Pixel resolution of column width estimates.

const double tesseract::kCosMaxSkewAngle = 0.866025
const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)
const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)

Default resolution used if input in not believable.

const double tesseract::kDiacriticXPadRatio = 7.0
const double tesseract::kDiacriticYPadRatio = 1.75
const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"
const float tesseract::kFontMergeDistance = 0.025
const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"
const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }
Initial value:
 
    sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1
const int tesseract::kHistogramSize = 256
const double tesseract::kHorizontalSpacing = 0.30
Initial value:
 {
  "-",       
  "\u05BE",  
  "\u2010",  
  "\u2011",  
  "\u2012",  
  "\u2013",  
  "\u2014",  
  "\u2015",  
  "\u2212",  
  "\uFE58",  
  "\uFE63",  
  "\uFF0D",  
  NULL,      
}
const float tesseract::kInfiniteDist = 999.0f
const char* tesseract::kInputFile = "noname.tif"
const int tesseract::kLatinChs[]
Initial value:
 {
  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}
const double tesseract::kLineCountReciprocal = 4.0

Grid size used by line finder. Not very critical.

const double tesseract::kLineResidueSizeRatio = 1.75
const char * tesseract::kLRM = "\u200E"
const double tesseract::kMarginFactor = 1.1
const double tesseract::kMarginOverlapFraction = 0.25
const float tesseract::kMathDigitDensityTh1 = 0.25
const double tesseract::kMaxBaselineError = 0.4375
const int tesseract::kMaxBlobWidth = 500
Initial value:
const int tesseract::kMaxDropCapBottom = -128
const double tesseract::kMaxHorizontalGap = 3.0
const int tesseract::kMaxIntSize = 22
const double tesseract::kMaxNonLineDensity = 0.25
const int tesseract::kMaxOffsetDist = 32
const int tesseract::kMaxPadFactor = 6
const double tesseract::kMaxPartitionSpacing = 1.75
const double tesseract::kMaxRowSize = 2.5
const double tesseract::kMaxSizeRatio = 1.5
const int tesseract::kMaxSkewFactor = 15
const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32
const double tesseract::kMaxSpacingDrift = 1.0 / 72
const double tesseract::kMaxStaveHeight = 1.0
const double tesseract::kMaxTableCellXheight = 2.0
const double tesseract::kMaxTopSpacingFraction = 0.25
const double tesseract::kMinAlignedGutter = 0.25
const double tesseract::kMinBaselineCoverage = 0.5
const double tesseract::kMinCaptionGapRatio = 2.0
const int tesseract::kMinColumnWidth = 100

Minimum believable resolution.

const double tesseract::kMinDiacriticSizeRatio = 1.0625
const double tesseract::kMinFilledArea = 0.35
const double tesseract::kMinGoodTextPARatio = 1.5
const double tesseract::kMinGutterFraction = 0.5
const double tesseract::kMinGutterWidthGrid = 0.5
const double tesseract::kMinImageArea = 0.5

Denominator of resolution makes min pixels to demand line lengths to be.

const double tesseract::kMinMusicPixelFraction = 0.75
const double tesseract::kMinNonNoiseFraction = 0.5
const double tesseract::kMinOverlapWithTable = 0.6
const double tesseract::kMinPCLengthIncrease = 1.0 / 1024
const double tesseract::kMinRaggedGutter = 1.5
const double tesseract::kMinRectangularFraction = 0.125
const int tesseract::kMinRectSize = 10
const double tesseract::kMinTabGradient = 4.0
const int tesseract::kNoisePadding = 4
const int tesseract::kNumEndPoints = 3
const char* tesseract::kOldVarsFile = "failed_vars.txt"
const char * tesseract::kPDF = "\u202C"
const double tesseract::kPhotoOffsetFraction = 0.375
const int tesseract::kPrime1 = 17
const int tesseract::kPrime2 = 13
const double tesseract::kRaggedFraction = 2.5
const double tesseract::kRaggedGapFraction = 1.0
const double tesseract::kRequiredColumns = 0.7
const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"
const int tesseract::kRGBRMSColors = 4
const char * tesseract::kRLE = "\u202A"
const char * tesseract::kRLM = "\u200F"
const double tesseract::kRMSFitScaling = 8.0
const int tesseract::kSearchRadius = 2
const float tesseract::kSizeRatioToReject = 2.0
const double tesseract::kSmoothFactor = 0.25
const double tesseract::kSplitPartitionSize = 2.0
const int tesseract::kSquareLimit = 25
const int tesseract::kStateCnt = 4
const int tesseract::kStrayLinePer = 6
const double tesseract::kStrokeWidthCJK = 2.0

Allowed proportional change in stroke width to be the same font.

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

const char tesseract::kTesseractReject = '~'
const int tesseract::kTestChar = -1
const char* tesseract::kTextordDebugPix = "psdebug_pix"
const double tesseract::kThickLengthMultiple = 0.75

Denominator of resolution makes max pixel width to allow thin lines.

const float tesseract::kUnclearDensityTh = 0.25
const int tesseract::kUniChs[]
Initial value:
 {
  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}
const char tesseract::kUNLVReject = '~'
const char tesseract::kUNLVSuspect = '^'
const char * tesseract::kUTF8LineSeparator = "\u2028"
const char * tesseract::kUTF8ParagraphSeparator = "\u2029"
const double tesseract::kVerticalSpacing = -0.2
const int tesseract::kVLineGutter = 1
const int tesseract::kVLineMinLength = 500
const int tesseract::kVLineSearchSize = 150
const char* const tesseract::RTLReversePolicyNames[]

"Paint table detection output"

"Show table regions"

"Fraction of height used as a minimum gap for aligned blobs."

"run table detection"

"Force using vertical text page mode"

"Only run stroke widths"

"Show final block bounds"

"Show stroke widths"

"Show column bounds"

"Show tab vectors"

"Show partition bounds"

"Show tab candidates"

"Show partition bounds, waiting if >1"

"Show blobs rejected as noise"

"Show stroke widths"

"find horizontal lines such as headers in vertical page mode"

"Enable vertical detection"

"Fraction of textlines deemed vertical to use vertical page mode"

"Enables the table recognizer for table layout and filtering."

"Debug table marking steps in detail"

"Show page stats used in table finding"

"Fraction of box matches required to declare a line vertical"

"max fraction of mean blob width allowed for vertical gaps in vertical text"

"Max fraction of mean blob width allowed for vertical gaps in vertical text"

Generated on Thu Feb 2 08:19:25 2012 for Tesseract by  doxygen 1.6.3