VTK
Public Types | Public Member Functions | Static Public Member Functions | Protected Member Functions | List of all members
vtkTextExtraction Class Reference

Extracts text from documents based on their MIME type. More...

#include <vtkTextExtraction.h>

Inheritance diagram for vtkTextExtraction:
[legend]
Collaboration diagram for vtkTextExtraction:
[legend]

Public Types

typedef vtkTableAlgorithm Superclass
 
- Public Types inherited from vtkTableAlgorithm
typedef vtkAlgorithm Superclass
 
- Public Types inherited from vtkAlgorithm
typedef vtkObject Superclass
 
- Public Types inherited from vtkObject
typedef vtkObjectBase Superclass
 

Public Member Functions

virtual const char * GetClassName ()
 
virtual int IsA (const char *type)
 
void PrintSelf (ostream &os, vtkIndent indent)
 
void ClearStrategies ()
 
void PrependStrategy (vtkTextExtractionStrategy *strategy)
 
void AppendStrategy (vtkTextExtractionStrategy *strategy)
 
virtual void SetOutputArray (const char *)
 
virtual char * GetOutputArray ()
 
- Public Member Functions inherited from vtkTableAlgorithm
virtual int ProcessRequest (vtkInformation *, vtkInformationVector **, vtkInformationVector *)
 
vtkTableGetOutput ()
 
vtkTableGetOutput (int index)
 
void SetInput (vtkDataObject *obj)
 
void SetInput (int index, vtkDataObject *obj)
 
- Public Member Functions inherited from vtkAlgorithm
int HasExecutive ()
 
vtkExecutiveGetExecutive ()
 
virtual void SetExecutive (vtkExecutive *executive)
 
virtual int ModifyRequest (vtkInformation *request, int when)
 
vtkInformationGetInputPortInformation (int port)
 
vtkInformationGetOutputPortInformation (int port)
 
int GetNumberOfInputPorts ()
 
int GetNumberOfOutputPorts ()
 
void UpdateProgress (double amount)
 
vtkInformationGetInputArrayInformation (int idx)
 
void RemoveAllInputs ()
 
vtkDataObjectGetOutputDataObject (int port)
 
virtual void RemoveInputConnection (int port, vtkAlgorithmOutput *input)
 
int GetNumberOfInputConnections (int port)
 
int GetTotalNumberOfInputConnections ()
 
vtkAlgorithmOutputGetInputConnection (int port, int index)
 
virtual void Update ()
 
virtual void UpdateInformation ()
 
virtual void UpdateWholeExtent ()
 
void ConvertTotalInputToPortConnection (int ind, int &port, int &conn)
 
virtual double ComputePriority ()
 
int ProcessRequest (vtkInformation *request, vtkCollection *inInfo, vtkInformationVector *outInfo)
 
virtual int ComputePipelineMTime (vtkInformation *request, vtkInformationVector **inInfoVec, vtkInformationVector *outInfoVec, int requestFromOutputPort, unsigned long *mtime)
 
virtual vtkInformationGetInformation ()
 
virtual void SetInformation (vtkInformation *)
 
virtual void Register (vtkObjectBase *o)
 
virtual void UnRegister (vtkObjectBase *o)
 
virtual void SetAbortExecute (int)
 
virtual int GetAbortExecute ()
 
virtual void AbortExecuteOn ()
 
virtual void AbortExecuteOff ()
 
virtual void SetProgress (double)
 
virtual double GetProgress ()
 
void SetProgressText (const char *ptext)
 
virtual char * GetProgressText ()
 
virtual unsigned long GetErrorCode ()
 
virtual void SetInputArrayToProcess (int idx, int port, int connection, int fieldAssociation, const char *name)
 
virtual void SetInputArrayToProcess (int idx, int port, int connection, int fieldAssociation, int fieldAttributeType)
 
virtual void SetInputArrayToProcess (int idx, vtkInformation *info)
 
virtual void SetInputArrayToProcess (int idx, int port, int connection, const char *fieldAssociation, const char *attributeTypeorName)
 
vtkDataObjectGetInputDataObject (int port, int connection)
 
virtual void SetInputConnection (int port, vtkAlgorithmOutput *input)
 
virtual void SetInputConnection (vtkAlgorithmOutput *input)
 
virtual void AddInputConnection (int port, vtkAlgorithmOutput *input)
 
virtual void AddInputConnection (vtkAlgorithmOutput *input)
 
vtkAlgorithmOutputGetOutputPort (int index)
 
vtkAlgorithmOutputGetOutputPort ()
 
virtual void SetReleaseDataFlag (int)
 
virtual int GetReleaseDataFlag ()
 
void ReleaseDataFlagOn ()
 
void ReleaseDataFlagOff ()
 
int UpdateExtentIsEmpty (vtkDataObject *output)
 
int UpdateExtentIsEmpty (vtkInformation *pinfo, int extentType)
 
- Public Member Functions inherited from vtkObject
virtual void DebugOn ()
 
virtual void DebugOff ()
 
unsigned char GetDebug ()
 
void SetDebug (unsigned char debugFlag)
 
virtual void Modified ()
 
virtual unsigned long GetMTime ()
 
unsigned long AddObserver (unsigned long event, vtkCommand *, float priority=0.0f)
 
unsigned long AddObserver (const char *event, vtkCommand *, float priority=0.0f)
 
vtkCommandGetCommand (unsigned long tag)
 
void RemoveObserver (vtkCommand *)
 
void RemoveObservers (unsigned long event, vtkCommand *)
 
void RemoveObservers (const char *event, vtkCommand *)
 
int HasObserver (unsigned long event, vtkCommand *)
 
int HasObserver (const char *event, vtkCommand *)
 
void RemoveObserver (unsigned long tag)
 
void RemoveObservers (unsigned long event)
 
void RemoveObservers (const char *event)
 
void RemoveAllObservers ()
 
int HasObserver (unsigned long event)
 
int HasObserver (const char *event)
 
template<class U , class T >
unsigned long AddObserver (unsigned long event, U observer, void(T::*callback)(), float priority=0.0f)
 
template<class U , class T >
unsigned long AddObserver (unsigned long event, U observer, void(T::*callback)(vtkObject *, unsigned long, void *), float priority=0.0f)
 
int InvokeEvent (unsigned long event, void *callData)
 
int InvokeEvent (const char *event, void *callData)
 
int InvokeEvent (unsigned long event)
 
int InvokeEvent (const char *event)
 
- Public Member Functions inherited from vtkObjectBase
const char * GetClassName () const
 
virtual void Delete ()
 
virtual void FastDelete ()
 
void Print (ostream &os)
 
void SetReferenceCount (int)
 
void PrintRevisions (ostream &os)
 
virtual void PrintHeader (ostream &os, vtkIndent indent)
 
virtual void PrintTrailer (ostream &os, vtkIndent indent)
 
int GetReferenceCount ()
 

Static Public Member Functions

static vtkTextExtractionNew ()
 
static int IsTypeOf (const char *type)
 
static vtkTextExtractionSafeDownCast (vtkObject *o)
 
- Static Public Member Functions inherited from vtkTableAlgorithm
static vtkTableAlgorithmNew ()
 
static int IsTypeOf (const char *type)
 
static vtkTableAlgorithmSafeDownCast (vtkObject *o)
 
- Static Public Member Functions inherited from vtkAlgorithm
static vtkAlgorithmNew ()
 
static int IsTypeOf (const char *type)
 
static vtkAlgorithmSafeDownCast (vtkObject *o)
 
static void SetDefaultExecutivePrototype (vtkExecutive *proto)
 
static vtkInformationIntegerKeyINPUT_IS_OPTIONAL ()
 
static vtkInformationIntegerKeyINPUT_IS_REPEATABLE ()
 
static vtkInformationInformationVectorKeyINPUT_REQUIRED_FIELDS ()
 
static vtkInformationStringVectorKeyINPUT_REQUIRED_DATA_TYPE ()
 
static vtkInformationInformationVectorKeyINPUT_ARRAYS_TO_PROCESS ()
 
static vtkInformationIntegerKeyINPUT_PORT ()
 
static vtkInformationIntegerKeyINPUT_CONNECTION ()
 
static vtkInformationIntegerKeyPRESERVES_DATASET ()
 
static vtkInformationIntegerKeyPRESERVES_GEOMETRY ()
 
static vtkInformationIntegerKeyPRESERVES_BOUNDS ()
 
static vtkInformationIntegerKeyPRESERVES_TOPOLOGY ()
 
static vtkInformationIntegerKeyPRESERVES_ATTRIBUTES ()
 
static vtkInformationIntegerKeyPRESERVES_RANGES ()
 
- Static Public Member Functions inherited from vtkObject
static int IsTypeOf (const char *type)
 
static vtkObjectSafeDownCast (vtkObject *o)
 
static vtkObjectNew ()
 
static void BreakOnError ()
 
static void SetGlobalWarningDisplay (int val)
 
static void GlobalWarningDisplayOn ()
 
static void GlobalWarningDisplayOff ()
 
static int GetGlobalWarningDisplay ()
 
- Static Public Member Functions inherited from vtkObjectBase
static int IsTypeOf (const char *name)
 
static vtkObjectBaseNew ()
 

Protected Member Functions

 vtkTextExtraction ()
 
 ~vtkTextExtraction ()
 
virtual int RequestData (vtkInformation *request, vtkInformationVector **inputVector, vtkInformationVector *outputVector)
 
- Protected Member Functions inherited from vtkTableAlgorithm
 vtkTableAlgorithm ()
 
 ~vtkTableAlgorithm ()
 
virtual int RequestInformation (vtkInformation *request, vtkInformationVector **inputVector, vtkInformationVector *outputVector)
 
virtual int FillOutputPortInformation (int port, vtkInformation *info)
 
virtual int FillInputPortInformation (int port, vtkInformation *info)
 
virtual int RequestUpdateExtent (vtkInformation *, vtkInformationVector **, vtkInformationVector *)
 
- Protected Member Functions inherited from vtkAlgorithm
 vtkAlgorithm ()
 
 ~vtkAlgorithm ()
 
virtual void SetNumberOfInputPorts (int n)
 
virtual void SetNumberOfOutputPorts (int n)
 
int InputPortIndexInRange (int index, const char *action)
 
int OutputPortIndexInRange (int index, const char *action)
 
int GetInputArrayAssociation (int idx, vtkInformationVector **inputVector)
 
virtual vtkExecutiveCreateDefaultExecutive ()
 
virtual void ReportReferences (vtkGarbageCollector *)
 
virtual void SetNumberOfInputConnections (int port, int n)
 
int GetInputArrayAssociation (int idx, int connection, vtkInformationVector **inputVector)
 
int GetInputArrayAssociation (int idx, vtkDataObject *input)
 
vtkDataArrayGetInputArrayToProcess (int idx, vtkInformationVector **inputVector)
 
vtkDataArrayGetInputArrayToProcess (int idx, vtkInformationVector **inputVector, int &association)
 
vtkDataArrayGetInputArrayToProcess (int idx, int connection, vtkInformationVector **inputVector)
 
vtkDataArrayGetInputArrayToProcess (int idx, int connection, vtkInformationVector **inputVector, int &association)
 
vtkDataArrayGetInputArrayToProcess (int idx, vtkDataObject *input)
 
vtkDataArrayGetInputArrayToProcess (int idx, vtkDataObject *input, int &association)
 
vtkAbstractArrayGetInputAbstractArrayToProcess (int idx, vtkInformationVector **inputVector)
 
vtkAbstractArrayGetInputAbstractArrayToProcess (int idx, vtkInformationVector **inputVector, int &association)
 
vtkAbstractArrayGetInputAbstractArrayToProcess (int idx, int connection, vtkInformationVector **inputVector)
 
vtkAbstractArrayGetInputAbstractArrayToProcess (int idx, int connection, vtkInformationVector **inputVector, int &association)
 
vtkAbstractArrayGetInputAbstractArrayToProcess (int idx, vtkDataObject *input)
 
vtkAbstractArrayGetInputAbstractArrayToProcess (int idx, vtkDataObject *input, int &association)
 
vtkInformationGetInputArrayFieldInformation (int idx, vtkInformationVector **inputVector)
 
virtual void SetNthInputConnection (int port, int index, vtkAlgorithmOutput *input)
 
virtual void SetErrorCode (unsigned long)
 
- Protected Member Functions inherited from vtkObject
 vtkObject ()
 
virtual ~vtkObject ()
 
virtual void RegisterInternal (vtkObjectBase *, int check)
 
virtual void UnRegisterInternal (vtkObjectBase *, int check)
 
void InternalGrabFocus (vtkCommand *mouseEvents, vtkCommand *keypressEvents=NULL)
 
void InternalReleaseFocus ()
 
- Protected Member Functions inherited from vtkObjectBase
 vtkObjectBase ()
 
virtual ~vtkObjectBase ()
 
virtual void CollectRevisions (ostream &os)
 
 vtkObjectBase (const vtkObjectBase &)
 
void operator= (const vtkObjectBase &)
 

Additional Inherited Members

- Public Attributes inherited from vtkAlgorithm
int AbortExecute
 
- Static Protected Member Functions inherited from vtkAlgorithm
static vtkInformationIntegerKeyPORT_REQUIREMENTS_FILLED ()
 
- Protected Attributes inherited from vtkAlgorithm
vtkInformationInformation
 
double Progress
 
char * ProgressText
 
unsigned long ErrorCode
 
- Protected Attributes inherited from vtkObject
unsigned char Debug
 
vtkTimeStamp MTime
 
vtkSubjectHelper * SubjectHelper
 
- Protected Attributes inherited from vtkObjectBase
int ReferenceCount
 
vtkWeakPointerBase ** WeakPointers
 
- Static Protected Attributes inherited from vtkAlgorithm
static vtkExecutiveDefaultExecutivePrototype
 

Detailed Description

Extracts text from documents based on their MIME type.

Given a table containing document ids, URIs, Mime types and document contents, extracts plain text from each document, and generates a list of 'tags' that delineate ranges of text. The actual work of extracting text and generating tags is performed by an ordered list of vtkTextExtractionStrategy objects.

By default, vtkTextExtraction has just a single strategy for extracting plain text documents. Callers will almost certainly want to supplement or replace the default with their own strategies.

Inputs: Input port 0: (required) A vtkTable containing document ids, Mime types and document contents (which could be binary).

Outputs: Output port 0: The same table with an additional "text" column that contains the text extracted from each document. Output port 1: A table of document tags that includes "document", "uri", "begin", "end", and "type" columns.

Use SetInputArrayToProcess(0, ...) to specify the input table column that contains document ids (must be a vtkIdTypeArray). Default: "document".

Use SetInputArrayToProcess(1, ...) to specify the input table column that contains URIs (must be a vtkStringArray). Default: "uri".

Use SetInputArrayToProcess(2, ...) to specify the input table column that contains Mime types (must be a vtkStringArray). Default: "mime_type".

Use SetInputArrayToProcess(3, ...) to specify the input table column that contains document contents (must be a vtkStringArray). Default: "content".

Warning
The input document contents array must be a string array, even though the individual document contents may be binary data.
See also
vtkTextExtractionStrategy, vtkPlainTextExtractionStrategy
Thanks:
Developed by Timothy M. Shead (tshea.nosp@m.d@sa.nosp@m.ndia..nosp@m.gov) at Sandia National Laboratories.
Events:
vtkCommand::ProgressEvent
Tests:
vtkTextExtraction (Tests)

Definition at line 80 of file vtkTextExtraction.h.

Member Typedef Documentation

Definition at line 85 of file vtkTextExtraction.h.

Constructor & Destructor Documentation

vtkTextExtraction::vtkTextExtraction ( )
protected
vtkTextExtraction::~vtkTextExtraction ( )
protected

Member Function Documentation

static vtkTextExtraction* vtkTextExtraction::New ( )
static
virtual const char* vtkTextExtraction::GetClassName ( )
virtual

Reimplemented from vtkTableAlgorithm.

static int vtkTextExtraction::IsTypeOf ( const char *  type)
static
virtual int vtkTextExtraction::IsA ( const char *  name)
virtual

Return 1 if this class is the same type of (or a subclass of) the named class. Returns 0 otherwise. This method works in combination with vtkTypeMacro found in vtkSetGet.h.

Reimplemented from vtkTableAlgorithm.

static vtkTextExtraction* vtkTextExtraction::SafeDownCast ( vtkObject o)
static
void vtkTextExtraction::PrintSelf ( ostream &  os,
vtkIndent  indent 
)
virtual

Methods invoked by print to print information about the object including superclasses. Typically not called by the user (use Print() instead) but used in the hierarchical print process to combine the output of several classes.

Reimplemented from vtkTableAlgorithm.

void vtkTextExtraction::ClearStrategies ( )

Clear the list of strategies.

void vtkTextExtraction::PrependStrategy ( vtkTextExtractionStrategy strategy)

Prepend a strategy to the list of strategies. vtkTextExtraction assumes ownership of the supplied object.

void vtkTextExtraction::AppendStrategy ( vtkTextExtractionStrategy strategy)

Prepend a strategy to the list of strategies. vtkTextExtraction assumes ownership of the supplied object.

virtual void vtkTextExtraction::SetOutputArray ( const char *  )
virtual

Specifies the name of the output text array. Default: "text".

virtual char* vtkTextExtraction::GetOutputArray ( )
virtual

Specifies the name of the output text array. Default: "text".

virtual int vtkTextExtraction::RequestData ( vtkInformation request,
vtkInformationVector **  inputVector,
vtkInformationVector outputVector 
)
protectedvirtual

This is called by the superclass. This is the method you should override.

Reimplemented from vtkTableAlgorithm.


The documentation for this class was generated from the following file: