/* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /** * Example of beanshell script to be used as input for the * BSFAnnotator * @author Olivier Terrier * @version 1.0 */ import org.apache.uima.UimaContext; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.jcas.JCas; import org.apache.uima.util.Level; import org.apache.uima.util.Logger; import org.apache.uima.cas.*; import org.apache.uima.jcas.cas.*; import org.apache.uima.jcas.tcas.*; import org.apache.uima.annotator.bsf.types.Token; import java.util.regex.*; /** * Initialisation of global variables */ String scriptName; Pattern matchPattern; /** * Performs any startup tasks required by this annotator. * The Analysis Engine calls this method only once, just after an Annotator has been instantiated. * @param aContext Provides access to external resources that may be used by this annotator. * This includes configuration parameters, logging and instrumentation services, and access to external analysis resources. * @see com.ibm.uima.analysis_engine.annotator.BaseAnnotator#initialize(com.ibm.uima.analysis_engine.annotator.AnnotatorContext) */ public void initialize(UimaContext aContext) throws ResourceInitializationException { String source = (String) aContext.getConfigParameterValue("SourceFile"); String regexp = (String) aContext.getConfigParameterValue("Regexp"); if (regexp == null) regexp = "Dave|David|Bob|Tim|Joe"; matchPattern = Pattern.compile(regexp); } /** * Invokes this annotator's analysis logic. * This annotator will access the data in the JCas and add new data to the JCas. * @param jcas contains the document to be analyzed and may contain other metadata about that document. * @param rs a list of output types and features that this annotator should produce. */ public void process(JCas jcas) throws AnalysisEngineProcessException { // Looking for regexp in the document text and storing start/end indexes in arrays String text = jcas.getDocumentText(); Matcher matcher = matchPattern.matcher(text); List annotArray = new ArrayList(); while (matcher.find()) { // Create an EntityOccurrence for each firstname found in the text Token token = new Token(jcas, matcher.start(), matcher.end()); token.addToIndexes(); } }