#!/usr/bin/env groovy
/**
** This assumes that you have installed Groovy and
** that you have the command groovy available in your path.
** On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy.
** You can download groovy from http://groovy.codehaus.org/
** The first run may be slow since it needs to download all of the dependencies.
** Usage: $groovy cTAKES-without-resources.groovy [inputDir]
** or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true cTAKES-without-resources.groovy [inputDir]
**/
// @GrabResolver from Richard Eckart de Castilho, needed while using OpenNLP pre-1.5.3 version.
// To fix issue with jwnl unresolved dependency
@GrabResolver(name='opennlp.sf.net',
root='http://opennlp.sourceforge.net/maven2')
// Tried getting core-res first in hopes it will get added to classpath and that can avoid:
//Caused by: java.io.FileNotFoundException: org\apache\ctakes\core\sentdetect\sd-med-model.zip (The system cannot find the path specified)
// but it didn't make a difference....
@Grapes([
@Grab(group='org.scala-lang', module='scala-library', version='2.9.0'),
@Grab(group='org.scala-tools.sbinary', module='sbinary_2.9.0', version='0.4.0'),
// @Grab(group='org.apache.ctakes',
// module='ctakes-core-res',
// version='3.1.1'),
@Grab(group='org.apache.ctakes',
module='ctakes-clinical-pipeline',
version='3.1.1'),
//@Grab(group='net.sf.mastif', module='mastif-i2b2', version='1.4'),
//@Grab(group='net.sf.mastif', module='mastif-zoner', version='1.4'),
//@Grab(group='net.sf.carafe.jcarafe', module='jcarafe-ext_2.9.1', version='0.9.8.3.RC4'),
//@Grab(group='net.sf.carafe.jcarafe', module='jcarafe-core_2.9.1', version='0.9.8.3.RC4'),
// @Grab(group='org.apache.ctakes',
// module='ctakes-dependency-parser-res',
// version='3.1.1'),
//net.sourceforge.ctakesresources
//ctakes-resources-umls2011ab
//3.1.1
@Grab(group='net.sourceforge.ctakesresources',
module='ctakes-resources-umls2011ab',
version='3.1.1')
])
import java.io.File;
import org.apache.uima.jcas.JCas;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.cleartk.util.cr.FilesCollectionReader;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.component.xwriter.XWriter;
import org.uimafit.factory.TypeSystemDescriptionFactory;
import org.uimafit.factory.TypePrioritiesFactory;
import static org.uimafit.util.JCasUtil.*;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.core.util.CtakesFileNamer;
String path = null;
File cwd = new File(".");
path = cwd.getCanonicalPath();
System.out.println(path);
//this.class.classLoader.rootLoader.addURL( new URL("file:///C:/lib/my.jar") )
String libLocation = "file:///" + path + "/lib/";
String modelName;
modelName = "med-facts-i2b2-1.2-SNAPSHOT.jar";
this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
modelName = "med-facts-zoner-1.1.jar";
this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
modelName = "jcarafe-ext_2.9.1-0.9.8.3.RC4.jar";
this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
modelName = "jcarafe-core_2.9.1-0.9.8.3.RC4.jar";
this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
if(args.length < 1) {
System.out.println("Please specify input directory");
System.exit(1);
}
//scala.ScalaObject o = new scala.ScalaObject();
//System.out.println("HERE! I was able to get an error about abstract interface 'scala.ScalaObject' so it is findable here");
System.out.println("Reading from directory: " + args[0]);
CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(args[0]);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Start of section to be replaced/deleted once get resolving to models (jars and zips) to work properly from groovy//
//Download Models
//TODO: Separate downloads from URL here is a hack.
//Models should really be automatically downloaded from
//maven central as part of ctakes-*-res projects/artifacts via @grab.
File downloadDir;
String ctakesRepoUrl = "http://svn.apache.org/repos/asf/ctakes/trunk/";
String modelUrl;
String modelRelative;
String modelRelativeName;
downloadDir = new File("org/apache/ctakes/core/sentdetect");
downloadDir.mkdirs();
modelUrl = ctakesRepoUrl + "ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip";
downloadFile(modelUrl, "org/apache/ctakes/core/sentdetect/sd-med-model.zip");
downloadDir = new File("org/apache/ctakes/constituency/parser/models");
downloadDir.mkdirs();
modelUrl = ctakesRepoUrl + "ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin"
downloadFile(modelUrl, "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin");
downloadDir = new File("org/apache/ctakes/postagger/models/");
downloadDir.mkdirs();
modelUrl = ctakesRepoUrl + "ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-pos.zip";
downloadFile(modelUrl, "org/apache/ctakes/postagger/models/mayo-pos.zip");
modelRelative = "org/apache/ctakes/dependency/parser/models/pred/";
modelName = "mayo-en-pred-1.3.0.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/dependency/parser/models/role/";
modelName = "mayo-en-role-1.3.0.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/dependency/parser/models/srl/";
modelName = "mayo-en-srl-1.3.0.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/chunker/models/";
modelName = "chunker-model.zip";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-chunker-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "i2b2.model";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "cue.model";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "scope.model";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "pos.model";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "featureFile11b";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "generic.txt";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "history.txt";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "polarity.txt";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "org/apache/ctakes/assertion/models/";
modelName = "uncertainty.txt";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
// sharpPolarityFrags.txt
// sharpUncertaintyFrags.txt
modelRelative = "org/apache/ctakes/dictionary/lookup/";
modelName = "LookupDesc_Db.xml";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-dictionary-lookup-res/src/main/resources/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
// get some jars that can't get using grapes/@grab
modelRelative = "lib/";
modelName = "jcarafe-core_2.9.1-0.9.8.3.RC4.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "lib/";
modelName = "jcarafe-ext_2.9.1-0.9.8.3.RC4.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "lib/";
modelName = "med-facts-zoner-1.1.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
modelRelative = "lib/";
modelName = "med-facts-i2b2-1.2-SNAPSHOT.jar";
modelRelativeName = modelRelative + modelName;
modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
downloadDir = new File(modelRelative);
downloadDir.mkdirs();
downloadFile(modelUrl, modelRelativeName);
// tried using net.sf.mastif grapes but dep errors....
System.err.println("TODO YET - DEAL WITH GETTING rxnorm_index");
System.err.println("TODO YET - DEAL WITH GETTING OrangeBook");
File curDir = new File(".");
String depDepDir = "org/apache/ctakes/dependency/parser/models/dependency";
String depLemDir = "org/apache/ctakes/dependency/parser/models/lemmatizer";
//File depParserDir = new File(curDir, "org/apache/ctakes/dependency/parser/models/dependency";)
//File depParserLemDir = new File(curDir, "org/apache/ctakes/dependency/parser/models/lemmatizer";)
String dictionaryJar = "dictionary-1.3.1.jar";
String dictionaryJarWithRelPath = depLemDir + "/" + dictionaryJar;
String mayoEnDepJar = "mayo-en-dep-1.3.0.jar";
String mayoEnDepJarWithRelPath = depDepDir + "/" + mayoEnDepJar;
File f1 = new File(depDepDir);
File f2 = new File(depLemDir);
String absPath;
absPath = f1.getAbsolutePath();
System.out.println("Creating dirs for " + absPath);
f1.mkdirs();
absPath = f2.getAbsolutePath();
System.out.println("Creating dirs for " + absPath);
f2.mkdirs();
String modelsURL = "https://svn.apache.org/repos/asf/ctakes/tags/ctakes-3.1.1/ctakes-dependency-parser-res/src/main/resources/org/apache/ctakes/dependency/parser/models/"
String dictionaryJarURL = modelsURL + "lemmatizer/" + dictionaryJar;
String mayoEnDepJarURL = modelsURL + "dependency/" + mayoEnDepJar;
System.out.println("Downloading resources not available separately from maven central: " + dictionaryJar);
downloadFile(dictionaryJarURL, dictionaryJarWithRelPath);
downloadFile(mayoEnDepJarURL, mayoEnDepJarWithRelPath);
//downloadFile("https://svn.apache.org/repos/asf/ctakes/tags/ctakes-3.1.1/ctakes-dependency-parser-res/src/main/resources/org/apache/ctakes/dependency/parser/models/lemmatizer/dictionary-1.3.1.jar", "dictionary-1.3.1.jar");
// End of section to be replaced/deleted once get resolving to models (jars and zips) to work properly from groovy//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//Build the pipeline to run
AggregateBuilder aggregateBuilder = new AggregateBuilder();
AnalysisEngineDescription clinicalPipelineWithUmls = AnalysisEngineFactory.createAnalysisEngineDescription("desc/analysis_engine/AggregatePlaintextUMLSProcessor" // Note, do not include .xml in the name here
);
aggregateBuilder.add(clinicalPipelineWithUmls);
//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
//SentenceDetector.class,
//SentenceDetector.SD_MODEL_FILE_PARAM,
//"sd-med-model.zip"));
//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
//ConstituencyParser.class,
//ConstituencyParser.PARAM_MODELFILE,
//"sharpacq-3.1.bin"));
//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription("org.apache.ctakes.typesystem.types.TypeSystem");
AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription(
XWriter.class,
typeSystemDescription,
XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
"output-dir",
XWriter.PARAM_FILE_NAMER_CLASS_NAME,
CtakesFileNamer.class.getName()
);
aggregateBuilder.add(xWriter);
SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
// Custom writer class used at the end of the pipeline to write results to screen
class Writer extends org.uimafit.component.JCasAnnotator_ImplBase {
void process(JCas jcas) {
System.out.println("Commented out most of process() for Writer");
}
}
def downloadFile(String url, String filename) {
System.out.println("Downloading: " + url);
def file = new File(filename);
String savingAs = file.getAbsolutePath();
System.out.println("Saving as " + savingAs);
if(file.exists()) {
System.out.println("File already exists:" + filename);
return;
}
def f = new FileOutputStream(file)
def out = new BufferedOutputStream(f)
out << new URL(url).openStream()
out.close()
}