public class TokenFilter extends Object
Modifier and Type | Field and Description |
---|---|
protected HashSet<String> |
excludedTokenClasses |
protected HashSet<Integer> |
excludedTokenTypes |
protected HashSet<String> |
includedTokenClasses |
protected HashSet<Integer> |
includedTokenTypes |
static String |
PARAM_EXCLUDEDTOKENCLASSES
Configuration parameter for list of token classes to include in lookups
|
static String |
PARAM_EXCLUDEDTOKENTYPES
Configuration parameter for list of token classes to include in lookups
|
static String |
PARAM_INCLUDEDTOKENCLASSES
Configuration parameter for list of token classes to include in lookups
|
static String |
PARAM_INCLUDEDTOKENTYPES
Configuration parameter for list of token classes to include in lookups
|
static String |
PARAM_STOPWORDS |
static String |
PARAM_TOKENANNOTATION
Configuration parameter giving type of tokens
|
Constructor and Description |
---|
TokenFilter(String tokenAnnotationName,
String tokenTypeFeatureName,
String tokenClassFeatureName,
Logger logger) |
Modifier and Type | Method and Description |
---|---|
boolean |
checkTokenClass(org.apache.uima.cas.text.AnnotationFS token) |
boolean |
checkTokenClass(DictionaryToken token) |
boolean |
checkTokenType(org.apache.uima.cas.text.AnnotationFS token) |
boolean |
checkTokenType(DictionaryToken token) |
String |
getTokenAnnotationName() |
org.apache.uima.cas.Feature |
getTokenClassFeature() |
String |
getTokenClassFeatureName() |
org.apache.uima.cas.Feature |
getTokenTypeFeature() |
String |
getTokenTypeFeatureName() |
void |
initConfig(org.apache.uima.analysis_engine.annotator.AnnotatorContext annotatorContext) |
static Set<String> |
initializeStopWordList(String[] stopWordsStrings) |
void |
initTypes(org.apache.uima.cas.TypeSystem typeSystem) |
void |
initTypes(org.apache.uima.cas.TypeSystem typeSystem,
boolean requireFeatureExistence) |
boolean |
isOK_Token(org.apache.uima.cas.text.AnnotationFS token,
TokenNormalizer tokenNormalizer) |
boolean |
isOK_Token(DictionaryToken token,
TokenNormalizer tokenNormalizer) |
static boolean |
isStopWord(Set<String> stopWords,
String tokenText) |
boolean |
isStopWord(String tokenText) |
void |
setTokenAnnotationName(String tokenAnnotationName) |
void |
setTokenClassFeature(org.apache.uima.cas.Feature tokenClassFeature) |
void |
setTokenClassFeatureName(String tokenClassFeatureName) |
void |
setTokenTypeFeature(org.apache.uima.cas.Feature tokenTypeFeature) |
void |
setTokenTypeFeatureName(String tokenTypeFeatureName) |
public static final String PARAM_INCLUDEDTOKENCLASSES
public static final String PARAM_EXCLUDEDTOKENCLASSES
public static final String PARAM_INCLUDEDTOKENTYPES
public static final String PARAM_EXCLUDEDTOKENTYPES
public static final String PARAM_STOPWORDS
public static final String PARAM_TOKENANNOTATION
public String getTokenClassFeatureName()
public void setTokenClassFeatureName(String tokenClassFeatureName)
public org.apache.uima.cas.Feature getTokenClassFeature()
public void setTokenClassFeature(org.apache.uima.cas.Feature tokenClassFeature)
public String getTokenTypeFeatureName()
public void setTokenTypeFeatureName(String tokenTypeFeatureName)
public org.apache.uima.cas.Feature getTokenTypeFeature()
public void setTokenTypeFeature(org.apache.uima.cas.Feature tokenTypeFeature)
public String getTokenAnnotationName()
public void setTokenAnnotationName(String tokenAnnotationName)
public void initConfig(org.apache.uima.analysis_engine.annotator.AnnotatorContext annotatorContext) throws org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException
org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException
public static Set<String> initializeStopWordList(String[] stopWordsStrings) throws org.apache.uima.analysis_engine.annotator.AnnotatorContextException
org.apache.uima.analysis_engine.annotator.AnnotatorContextException
public boolean checkTokenClass(org.apache.uima.cas.text.AnnotationFS token)
token
- tokenClass to look uppublic boolean checkTokenClass(DictionaryToken token)
public boolean isStopWord(String tokenText)
public boolean checkTokenType(org.apache.uima.cas.text.AnnotationFS token)
token
- public boolean checkTokenType(DictionaryToken token)
public void initTypes(org.apache.uima.cas.TypeSystem typeSystem) throws UnknownTypeException
UnknownTypeException
public void initTypes(org.apache.uima.cas.TypeSystem typeSystem, boolean requireFeatureExistence) throws UnknownTypeException
typeSystem
- requireFeatureExistence
- -
if true, if the tokenType and/or tokenClass features of the tokenAnnotation are
specified, they must exist. This is to allow for the situation where these features
might not exist during dictionary loading, but are needed at annotator runtimeUnknownTypeException
public boolean isOK_Token(org.apache.uima.cas.text.AnnotationFS token, TokenNormalizer tokenNormalizer)
public boolean isOK_Token(DictionaryToken token, TokenNormalizer tokenNormalizer)
Copyright © 2016. All rights reserved.