|
virtual const char * | GetClassName () |
|
virtual int | IsA (const char *type) |
|
void | PrintSelf (ostream &os, vtkIndent indent) |
|
void | ClearStrategies () |
|
|
void | PrependStrategy (vtkTextExtractionStrategy *strategy) |
|
void | AppendStrategy (vtkTextExtractionStrategy *strategy) |
|
|
virtual void | SetOutputArray (const char *) |
|
virtual char * | GetOutputArray () |
|
virtual int | ProcessRequest (vtkInformation *, vtkInformationVector **, vtkInformationVector *) |
|
vtkTable * | GetOutput () |
|
vtkTable * | GetOutput (int index) |
|
void | SetInput (vtkDataObject *obj) |
|
void | SetInput (int index, vtkDataObject *obj) |
|
int | HasExecutive () |
|
vtkExecutive * | GetExecutive () |
|
virtual void | SetExecutive (vtkExecutive *executive) |
|
virtual int | ModifyRequest (vtkInformation *request, int when) |
|
vtkInformation * | GetInputPortInformation (int port) |
|
vtkInformation * | GetOutputPortInformation (int port) |
|
int | GetNumberOfInputPorts () |
|
int | GetNumberOfOutputPorts () |
|
void | UpdateProgress (double amount) |
|
vtkInformation * | GetInputArrayInformation (int idx) |
|
void | RemoveAllInputs () |
|
vtkDataObject * | GetOutputDataObject (int port) |
|
virtual void | RemoveInputConnection (int port, vtkAlgorithmOutput *input) |
|
int | GetNumberOfInputConnections (int port) |
|
int | GetTotalNumberOfInputConnections () |
|
vtkAlgorithmOutput * | GetInputConnection (int port, int index) |
|
virtual void | Update () |
|
virtual void | UpdateInformation () |
|
virtual void | UpdateWholeExtent () |
|
void | ConvertTotalInputToPortConnection (int ind, int &port, int &conn) |
|
virtual double | ComputePriority () |
|
int | ProcessRequest (vtkInformation *request, vtkCollection *inInfo, vtkInformationVector *outInfo) |
|
virtual int | ComputePipelineMTime (vtkInformation *request, vtkInformationVector **inInfoVec, vtkInformationVector *outInfoVec, int requestFromOutputPort, unsigned long *mtime) |
|
virtual vtkInformation * | GetInformation () |
|
virtual void | SetInformation (vtkInformation *) |
|
virtual void | Register (vtkObjectBase *o) |
|
virtual void | UnRegister (vtkObjectBase *o) |
|
virtual void | SetAbortExecute (int) |
|
virtual int | GetAbortExecute () |
|
virtual void | AbortExecuteOn () |
|
virtual void | AbortExecuteOff () |
|
virtual void | SetProgress (double) |
|
virtual double | GetProgress () |
|
void | SetProgressText (const char *ptext) |
|
virtual char * | GetProgressText () |
|
virtual unsigned long | GetErrorCode () |
|
virtual void | SetInputArrayToProcess (int idx, int port, int connection, int fieldAssociation, const char *name) |
|
virtual void | SetInputArrayToProcess (int idx, int port, int connection, int fieldAssociation, int fieldAttributeType) |
|
virtual void | SetInputArrayToProcess (int idx, vtkInformation *info) |
|
virtual void | SetInputArrayToProcess (int idx, int port, int connection, const char *fieldAssociation, const char *attributeTypeorName) |
|
vtkDataObject * | GetInputDataObject (int port, int connection) |
|
virtual void | SetInputConnection (int port, vtkAlgorithmOutput *input) |
|
virtual void | SetInputConnection (vtkAlgorithmOutput *input) |
|
virtual void | AddInputConnection (int port, vtkAlgorithmOutput *input) |
|
virtual void | AddInputConnection (vtkAlgorithmOutput *input) |
|
vtkAlgorithmOutput * | GetOutputPort (int index) |
|
vtkAlgorithmOutput * | GetOutputPort () |
|
virtual void | SetReleaseDataFlag (int) |
|
virtual int | GetReleaseDataFlag () |
|
void | ReleaseDataFlagOn () |
|
void | ReleaseDataFlagOff () |
|
int | UpdateExtentIsEmpty (vtkDataObject *output) |
|
int | UpdateExtentIsEmpty (vtkInformation *pinfo, int extentType) |
|
virtual void | DebugOn () |
|
virtual void | DebugOff () |
|
unsigned char | GetDebug () |
|
void | SetDebug (unsigned char debugFlag) |
|
virtual void | Modified () |
|
virtual unsigned long | GetMTime () |
|
unsigned long | AddObserver (unsigned long event, vtkCommand *, float priority=0.0f) |
|
unsigned long | AddObserver (const char *event, vtkCommand *, float priority=0.0f) |
|
vtkCommand * | GetCommand (unsigned long tag) |
|
void | RemoveObserver (vtkCommand *) |
|
void | RemoveObservers (unsigned long event, vtkCommand *) |
|
void | RemoveObservers (const char *event, vtkCommand *) |
|
int | HasObserver (unsigned long event, vtkCommand *) |
|
int | HasObserver (const char *event, vtkCommand *) |
|
void | RemoveObserver (unsigned long tag) |
|
void | RemoveObservers (unsigned long event) |
|
void | RemoveObservers (const char *event) |
|
void | RemoveAllObservers () |
|
int | HasObserver (unsigned long event) |
|
int | HasObserver (const char *event) |
|
template<class U , class T > |
unsigned long | AddObserver (unsigned long event, U observer, void(T::*callback)(), float priority=0.0f) |
|
template<class U , class T > |
unsigned long | AddObserver (unsigned long event, U observer, void(T::*callback)(vtkObject *, unsigned long, void *), float priority=0.0f) |
|
int | InvokeEvent (unsigned long event, void *callData) |
|
int | InvokeEvent (const char *event, void *callData) |
|
int | InvokeEvent (unsigned long event) |
|
int | InvokeEvent (const char *event) |
|
const char * | GetClassName () const |
|
virtual void | Delete () |
|
virtual void | FastDelete () |
|
void | Print (ostream &os) |
|
void | SetReferenceCount (int) |
|
void | PrintRevisions (ostream &os) |
|
virtual void | PrintHeader (ostream &os, vtkIndent indent) |
|
virtual void | PrintTrailer (ostream &os, vtkIndent indent) |
|
int | GetReferenceCount () |
|
|
| vtkTextExtraction () |
|
| ~vtkTextExtraction () |
|
virtual int | RequestData (vtkInformation *request, vtkInformationVector **inputVector, vtkInformationVector *outputVector) |
|
| vtkTableAlgorithm () |
|
| ~vtkTableAlgorithm () |
|
virtual int | RequestInformation (vtkInformation *request, vtkInformationVector **inputVector, vtkInformationVector *outputVector) |
|
virtual int | FillOutputPortInformation (int port, vtkInformation *info) |
|
virtual int | FillInputPortInformation (int port, vtkInformation *info) |
|
virtual int | RequestUpdateExtent (vtkInformation *, vtkInformationVector **, vtkInformationVector *) |
|
| vtkAlgorithm () |
|
| ~vtkAlgorithm () |
|
virtual void | SetNumberOfInputPorts (int n) |
|
virtual void | SetNumberOfOutputPorts (int n) |
|
int | InputPortIndexInRange (int index, const char *action) |
|
int | OutputPortIndexInRange (int index, const char *action) |
|
int | GetInputArrayAssociation (int idx, vtkInformationVector **inputVector) |
|
virtual vtkExecutive * | CreateDefaultExecutive () |
|
virtual void | ReportReferences (vtkGarbageCollector *) |
|
virtual void | SetNumberOfInputConnections (int port, int n) |
|
int | GetInputArrayAssociation (int idx, int connection, vtkInformationVector **inputVector) |
|
int | GetInputArrayAssociation (int idx, vtkDataObject *input) |
|
vtkDataArray * | GetInputArrayToProcess (int idx, vtkInformationVector **inputVector) |
|
vtkDataArray * | GetInputArrayToProcess (int idx, vtkInformationVector **inputVector, int &association) |
|
vtkDataArray * | GetInputArrayToProcess (int idx, int connection, vtkInformationVector **inputVector) |
|
vtkDataArray * | GetInputArrayToProcess (int idx, int connection, vtkInformationVector **inputVector, int &association) |
|
vtkDataArray * | GetInputArrayToProcess (int idx, vtkDataObject *input) |
|
vtkDataArray * | GetInputArrayToProcess (int idx, vtkDataObject *input, int &association) |
|
vtkAbstractArray * | GetInputAbstractArrayToProcess (int idx, vtkInformationVector **inputVector) |
|
vtkAbstractArray * | GetInputAbstractArrayToProcess (int idx, vtkInformationVector **inputVector, int &association) |
|
vtkAbstractArray * | GetInputAbstractArrayToProcess (int idx, int connection, vtkInformationVector **inputVector) |
|
vtkAbstractArray * | GetInputAbstractArrayToProcess (int idx, int connection, vtkInformationVector **inputVector, int &association) |
|
vtkAbstractArray * | GetInputAbstractArrayToProcess (int idx, vtkDataObject *input) |
|
vtkAbstractArray * | GetInputAbstractArrayToProcess (int idx, vtkDataObject *input, int &association) |
|
vtkInformation * | GetInputArrayFieldInformation (int idx, vtkInformationVector **inputVector) |
|
virtual void | SetNthInputConnection (int port, int index, vtkAlgorithmOutput *input) |
|
virtual void | SetErrorCode (unsigned long) |
|
| vtkObject () |
|
virtual | ~vtkObject () |
|
virtual void | RegisterInternal (vtkObjectBase *, int check) |
|
virtual void | UnRegisterInternal (vtkObjectBase *, int check) |
|
void | InternalGrabFocus (vtkCommand *mouseEvents, vtkCommand *keypressEvents=NULL) |
|
void | InternalReleaseFocus () |
|
| vtkObjectBase () |
|
virtual | ~vtkObjectBase () |
|
virtual void | CollectRevisions (ostream &os) |
|
| vtkObjectBase (const vtkObjectBase &) |
|
void | operator= (const vtkObjectBase &) |
|
Extracts text from documents based on their MIME type.
Given a table containing document ids, URIs, Mime types and document contents, extracts plain text from each document, and generates a list of 'tags' that delineate ranges of text. The actual work of extracting text and generating tags is performed by an ordered list of vtkTextExtractionStrategy objects.
By default, vtkTextExtraction has just a single strategy for extracting plain text documents. Callers will almost certainly want to supplement or replace the default with their own strategies.
Inputs: Input port 0: (required) A vtkTable containing document ids, Mime types and document contents (which could be binary).
Outputs: Output port 0: The same table with an additional "text" column that contains the text extracted from each document. Output port 1: A table of document tags that includes "document", "uri", "begin", "end", and "type" columns.
Use SetInputArrayToProcess(0, ...) to specify the input table column that contains document ids (must be a vtkIdTypeArray). Default: "document".
Use SetInputArrayToProcess(1, ...) to specify the input table column that contains URIs (must be a vtkStringArray). Default: "uri".
Use SetInputArrayToProcess(2, ...) to specify the input table column that contains Mime types (must be a vtkStringArray). Default: "mime_type".
Use SetInputArrayToProcess(3, ...) to specify the input table column that contains document contents (must be a vtkStringArray). Default: "content".
- Warning
- The input document contents array must be a string array, even though the individual document contents may be binary data.
- See also
- vtkTextExtractionStrategy, vtkPlainTextExtractionStrategy
- Thanks:
- Developed by Timothy M. Shead (tshea.nosp@m.d@sa.nosp@m.ndia..nosp@m.gov) at Sandia National Laboratories.
- Events:
- vtkCommand::ProgressEvent
- Tests:
- vtkTextExtraction (Tests)
Definition at line 80 of file vtkTextExtraction.h.