/** * Example of beanshell script to be used as input for the * BSFAnnotator * @author Olivier Terrier * @version 1.0 */ import org.apache.uima.analysis_engine.ResultSpecification; import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException; import org.apache.uima.analysis_engine.annotator.AnnotatorContext; import org.apache.uima.analysis_engine.annotator.AnnotatorContextException; import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException; import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException; import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase; import org.apache.uima.jcas.JCas; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import org.apache.uima.cas.*; import org.apache.uima.jcas.cas.*; import org.apache.uima.jcas.tcas.*; import org.apache.uima.annotator.bsf.types.Token; import java.util.regex.*; /** * Initialisation of global variables */ String scriptName; Pattern matchPattern; /** * Performs any startup tasks required by this annotator. * The Analysis Engine calls this method only once, just after an Annotator has been instantiated. * @param aContext Provides access to external resources that may be used by this annotator. * This includes configuration parameters, logging and instrumentation services, and access to external analysis resources. * @see com.ibm.uima.analysis_engine.annotator.BaseAnnotator#initialize(com.ibm.uima.analysis_engine.annotator.AnnotatorContext) */ public void initialize(AnnotatorContext aContext) throws AnnotatorInitializationException, AnnotatorConfigurationException { String source = (String) aContext.getConfigParameterValue("SourceFile"); String regexp = (String) aContext.getConfigParameterValue("Regexp"); if (regexp == null) regexp = "Dave|David|Bob|Tim|Joe"; matchPattern = Pattern.compile(regexp); } /** * Invokes this annotator's analysis logic. * This annotator will access the data in the JCas and add new data to the JCas. * @param jcas contains the document to be analyzed and may contain other metadata about that document. * @param rs a list of output types and features that this annotator should produce. */ public void process(JCas jcas, ResultSpecification rs) throws AnnotatorProcessException { // Looking for regexp in the document text and storing start/end indexes in arrays String text = jcas.getDocumentText(); Matcher matcher = matchPattern.matcher(text); List annotArray = new ArrayList(); while (matcher.find()) { // Create an EntityOccurrence for each firstname found in the text Token token = new Token(jcas, matcher.start(), matcher.end()); token.addToIndexes(); } }