/**
* A CREOLE wrapper for the Snowball stemmer Java implementation.
* See <a href="http://snowball.tartarus.org/index.php">http://snowball.tartarus.org/index.php</a>.
*/
package stemmer;
import gate.*;
import gate.creole.*;
import gate.util.*;
import java.util.Iterator;
/**
* A simple CREOLE wrapper for the Snowball stemmer.
*/
public class SnowballStemmer extends AbstractLanguageAnalyser implements
ProcessingResource {
public Resource init() throws ResourceInstantiationException{
fireStatusChanged("Creating a stemmer");
fireProgressChanged(0);
try {
Class stemClass = Class.forName(String.valueOf(String
.valueOf((new StringBuffer("org.tartarus.snowball.ext.")).append(
language).append("Stemmer"))));
stemmer = (org.tartarus.snowball.SnowballStemmer)stemClass.newInstance();
} catch(ClassNotFoundException e) {
throw new ResourceInstantiationException("Unsupported language: " +
language);
}
catch(InstantiationException e) {
throw new ResourceInstantiationException("Exception while instantiating stemmer",
e);
}
catch(IllegalAccessException e) {
throw new ResourceInstantiationException("Exception while instantiating stemmer",
e);
} finally{
fireProgressChanged(100);
fireProcessFinished();
}
return this;
}
public void execute() throws ExecutionException {
super.interrupted = false;
if(super.document == null) throw new GateRuntimeException(
"No document to process!");
fireProgressChanged(0);
fireStatusChanged("Stemming " + document.getName() + "...");
if(annotationSetName != null && annotationSetName.equals(""))
annotationSetName = null;
AnnotationSet inputAS =
(annotationSetName == null || annotationSetName.trim().length() == 0) ?
document.getAnnotations() :
document.getAnnotations(annotationSetName);
AnnotationSet tokensAS = inputAS.get(annotationType);
if(tokensAS == null){
throw new GateRuntimeException(
"No annotations to process!\n" +
"Please run Tokeniser first, if using default Stemmer features!");
}
Iterator<Annotation> iter = tokensAS.iterator();
int allTokens = tokensAS.size();
int processedTokens = 0;
int lastReport = 0;
while(iter.hasNext()){
if(isInterrupted()){
throw new ExecutionInterruptedException(String
.valueOf(String.valueOf((new StringBuffer(
"The execution of the \"")).append(getName()).append(
"\" stemmer has been abruptly interrupted!"))));
}
Annotation token = (Annotation)iter.next();
FeatureMap allFeatures = token.getFeatures();
String tokenString = (String)allFeatures.get(annotationFeature);
stemmer.setCurrent(tokenString.toLowerCase());
stemmer.stem();
allFeatures.put("stem", stemmer.getCurrent());
if(++processedTokens - lastReport > 100) {
lastReport = processedTokens;
fireProgressChanged((processedTokens * 100) / allTokens);
}
}
fireProcessFinished();
}
public void setLanguage(String language) {
this.language = language;
}
public String getLanguage() {
return language;
}
public void setAnnotationSetName(String annotationSetName) {
this.annotationSetName = annotationSetName;
}
public String getAnnotationSetName() {
return annotationSetName;
}
public void setAnnotationType(String annotationType) {
this.annotationType = annotationType;
}
public String getAnnotationType() {
return annotationType;
}
public void setAnnotationFeature(String annotationFeature) {
this.annotationFeature = annotationFeature;
}
public String getAnnotationFeature() {
return annotationFeature;
}
public static final String SNOW_STAM_DOCUMENT_PARAMETER_NAME = "document";
public static final String SNOW_STAM_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
public static final String SNOW_STAM_ANNOT_TYPE_PARAMETER_NAME = "annotationType";
public static final String SNOW_STAM_ANNOT_FEATURE_PARAMETER_NAME = "annotationFeature";
public static final String SNOW_STAM_LANGUAGE_PARAMETER_NAME = "language";
/**
* The actual stemmer implementation.
*/
private org.tartarus.snowball.SnowballStemmer stemmer;
private String language;
private String annotationSetName;
private String annotationType;
private String annotationFeature;
}