/**
 * Example of beanshell script to be used as input for the
 * BSFAnnotator 
 * @author Olivier Terrier
 * @version 1.0 
 */

import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.cas.*;
import org.apache.uima.jcas.cas.*;
import org.apache.uima.jcas.tcas.*;
import org.apache.uima.annotator.bsf.types.Token;

import java.util.regex.*;

/**
 * Initialisation of global variables
 */ 
String scriptName;
Pattern matchPattern;

/**
 * Performs any startup tasks required by this annotator.
 * The Analysis Engine calls this method only once, just after an Annotator has been instantiated.
 * @param aContext Provides access to external resources that may be used by this annotator.
 *                 This includes configuration parameters, logging and instrumentation services, and access to external analysis resources. 
 * @see com.ibm.uima.analysis_engine.annotator.BaseAnnotator#initialize(com.ibm.uima.analysis_engine.annotator.AnnotatorContext)
 */ 
public void initialize(AnnotatorContext aContext)
			throws AnnotatorInitializationException,
			AnnotatorConfigurationException {
			
			String source = (String) aContext.getConfigParameterValue("SourceFile");
			String regexp = (String) aContext.getConfigParameterValue("Regexp");
			if (regexp == null)
				regexp = "Dave|David|Bob|Tim|Joe";
			matchPattern = Pattern.compile(regexp);
}

/**
 * Invokes this annotator's analysis logic.
 * This annotator will access the data in the JCas and add new data to the JCas.
 * @param jcas contains the document to be analyzed and may contain other metadata about that document.
 * @param rs a list of output types and features that this annotator should produce. 
 */  
public void process(JCas jcas, ResultSpecification rs)
      throws AnnotatorProcessException {
      
      // Looking for regexp in the document text and storing start/end indexes in arrays
      String text = jcas.getDocumentText();
      Matcher matcher = matchPattern.matcher(text);
      List annotArray = new ArrayList();
      while (matcher.find()) {
        // Create an EntityOccurrence for each firstname found in the text
        Token token = new Token(jcas, matcher.start(), matcher.end());
        token.addToIndexes();
      }
}