#!/usr/bin/env groovy /** ** This assumes that you have installed Groovy and ** that you have the command groovy available in your path. ** On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy. ** You can download groovy from http://groovy.codehaus.org/ ** The first run may be slow since it needs to download all of the dependencies. ** Usage: $groovy cTAKES-without-resources.groovy [inputDir] ** or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true cTAKES-without-resources.groovy [inputDir] **/ // @GrabResolver from Richard Eckart de Castilho, needed while using OpenNLP pre-1.5.3 version. // To fix issue with jwnl unresolved dependency @GrabResolver(name='opennlp.sf.net', root='http://opennlp.sourceforge.net/maven2') // Tried getting core-res first in hopes it will get added to classpath and that can avoid: //Caused by: java.io.FileNotFoundException: org\apache\ctakes\core\sentdetect\sd-med-model.zip (The system cannot find the path specified) // but it didn't make a difference.... @Grapes([ @Grab(group='org.scala-lang', module='scala-library', version='2.9.0'), @Grab(group='org.scala-tools.sbinary', module='sbinary_2.9.0', version='0.4.0'), // @Grab(group='org.apache.ctakes', // module='ctakes-core-res', // version='3.1.1'), @Grab(group='org.apache.ctakes', module='ctakes-clinical-pipeline', version='3.1.1'), //@Grab(group='net.sf.mastif', module='mastif-i2b2', version='1.4'), //@Grab(group='net.sf.mastif', module='mastif-zoner', version='1.4'), //@Grab(group='net.sf.carafe.jcarafe', module='jcarafe-ext_2.9.1', version='0.9.8.3.RC4'), //@Grab(group='net.sf.carafe.jcarafe', module='jcarafe-core_2.9.1', version='0.9.8.3.RC4'), // @Grab(group='org.apache.ctakes', // module='ctakes-dependency-parser-res', // version='3.1.1'), //net.sourceforge.ctakesresources //ctakes-resources-umls2011ab //3.1.1 @Grab(group='net.sourceforge.ctakesresources', module='ctakes-resources-umls2011ab', version='3.1.1') ]) import java.io.File; import org.apache.uima.jcas.JCas; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.collection.CollectionReader; import org.apache.uima.resource.metadata.TypeSystemDescription; import org.cleartk.util.cr.FilesCollectionReader; import org.uimafit.factory.AnalysisEngineFactory; import org.uimafit.factory.AggregateBuilder; import org.uimafit.pipeline.SimplePipeline; import org.uimafit.component.JCasAnnotator_ImplBase; import org.uimafit.component.xwriter.XWriter; import org.uimafit.factory.TypeSystemDescriptionFactory; import org.uimafit.factory.TypePrioritiesFactory; import static org.uimafit.util.JCasUtil.*; import org.apache.ctakes.typesystem.type.syntax.BaseToken; import org.apache.ctakes.typesystem.type.textspan.Segment; import org.apache.ctakes.typesystem.type.textspan.Sentence; import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode; import org.apache.ctakes.core.resource.FileLocator; import org.apache.ctakes.core.ae.SentenceDetector; import org.apache.ctakes.core.ae.SimpleSegmentAnnotator; import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB; import org.apache.ctakes.core.util.CtakesFileNamer; String path = null; File cwd = new File("."); path = cwd.getCanonicalPath(); System.out.println(path); //this.class.classLoader.rootLoader.addURL( new URL("file:///C:/lib/my.jar") ) String libLocation = "file:///" + path + "/lib/"; String modelName; modelName = "med-facts-i2b2-1.2-SNAPSHOT.jar"; this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) ); modelName = "med-facts-zoner-1.1.jar"; this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) ); modelName = "jcarafe-ext_2.9.1-0.9.8.3.RC4.jar"; this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) ); modelName = "jcarafe-core_2.9.1-0.9.8.3.RC4.jar"; this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) ); if(args.length < 1) { System.out.println("Please specify input directory"); System.exit(1); } //scala.ScalaObject o = new scala.ScalaObject(); //System.out.println("HERE! I was able to get an error about abstract interface 'scala.ScalaObject' so it is findable here"); System.out.println("Reading from directory: " + args[0]); CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(args[0]); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Start of section to be replaced/deleted once get resolving to models (jars and zips) to work properly from groovy// //Download Models //TODO: Separate downloads from URL here is a hack. //Models should really be automatically downloaded from //maven central as part of ctakes-*-res projects/artifacts via @grab. File downloadDir; String ctakesRepoUrl = "http://svn.apache.org/repos/asf/ctakes/trunk/"; String modelUrl; String modelRelative; String modelRelativeName; downloadDir = new File("org/apache/ctakes/core/sentdetect"); downloadDir.mkdirs(); modelUrl = ctakesRepoUrl + "ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip"; downloadFile(modelUrl, "org/apache/ctakes/core/sentdetect/sd-med-model.zip"); downloadDir = new File("org/apache/ctakes/constituency/parser/models"); downloadDir.mkdirs(); modelUrl = ctakesRepoUrl + "ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin" downloadFile(modelUrl, "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin"); downloadDir = new File("org/apache/ctakes/postagger/models/"); downloadDir.mkdirs(); modelUrl = ctakesRepoUrl + "ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-pos.zip"; downloadFile(modelUrl, "org/apache/ctakes/postagger/models/mayo-pos.zip"); modelRelative = "org/apache/ctakes/dependency/parser/models/pred/"; modelName = "mayo-en-pred-1.3.0.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/dependency/parser/models/role/"; modelName = "mayo-en-role-1.3.0.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/dependency/parser/models/srl/"; modelName = "mayo-en-srl-1.3.0.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/chunker/models/"; modelName = "chunker-model.zip"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-chunker-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "i2b2.model"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "cue.model"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "scope.model"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "pos.model"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "featureFile11b"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "generic.txt"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "history.txt"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "polarity.txt"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "org/apache/ctakes/assertion/models/"; modelName = "uncertainty.txt"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); // sharpPolarityFrags.txt // sharpUncertaintyFrags.txt modelRelative = "org/apache/ctakes/dictionary/lookup/"; modelName = "LookupDesc_Db.xml"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-dictionary-lookup-res/src/main/resources/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); // get some jars that can't get using grapes/@grab modelRelative = "lib/"; modelName = "jcarafe-core_2.9.1-0.9.8.3.RC4.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "lib/"; modelName = "jcarafe-ext_2.9.1-0.9.8.3.RC4.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "lib/"; modelName = "med-facts-zoner-1.1.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); modelRelative = "lib/"; modelName = "med-facts-i2b2-1.2-SNAPSHOT.jar"; modelRelativeName = modelRelative + modelName; modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName; downloadDir = new File(modelRelative); downloadDir.mkdirs(); downloadFile(modelUrl, modelRelativeName); // tried using net.sf.mastif grapes but dep errors.... System.err.println("TODO YET - DEAL WITH GETTING rxnorm_index"); System.err.println("TODO YET - DEAL WITH GETTING OrangeBook"); File curDir = new File("."); String depDepDir = "org/apache/ctakes/dependency/parser/models/dependency"; String depLemDir = "org/apache/ctakes/dependency/parser/models/lemmatizer"; //File depParserDir = new File(curDir, "org/apache/ctakes/dependency/parser/models/dependency";) //File depParserLemDir = new File(curDir, "org/apache/ctakes/dependency/parser/models/lemmatizer";) String dictionaryJar = "dictionary-1.3.1.jar"; String dictionaryJarWithRelPath = depLemDir + "/" + dictionaryJar; String mayoEnDepJar = "mayo-en-dep-1.3.0.jar"; String mayoEnDepJarWithRelPath = depDepDir + "/" + mayoEnDepJar; File f1 = new File(depDepDir); File f2 = new File(depLemDir); String absPath; absPath = f1.getAbsolutePath(); System.out.println("Creating dirs for " + absPath); f1.mkdirs(); absPath = f2.getAbsolutePath(); System.out.println("Creating dirs for " + absPath); f2.mkdirs(); String modelsURL = "https://svn.apache.org/repos/asf/ctakes/tags/ctakes-3.1.1/ctakes-dependency-parser-res/src/main/resources/org/apache/ctakes/dependency/parser/models/" String dictionaryJarURL = modelsURL + "lemmatizer/" + dictionaryJar; String mayoEnDepJarURL = modelsURL + "dependency/" + mayoEnDepJar; System.out.println("Downloading resources not available separately from maven central: " + dictionaryJar); downloadFile(dictionaryJarURL, dictionaryJarWithRelPath); downloadFile(mayoEnDepJarURL, mayoEnDepJarWithRelPath); //downloadFile("https://svn.apache.org/repos/asf/ctakes/tags/ctakes-3.1.1/ctakes-dependency-parser-res/src/main/resources/org/apache/ctakes/dependency/parser/models/lemmatizer/dictionary-1.3.1.jar", "dictionary-1.3.1.jar"); // End of section to be replaced/deleted once get resolving to models (jars and zips) to work properly from groovy// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //Build the pipeline to run AggregateBuilder aggregateBuilder = new AggregateBuilder(); AnalysisEngineDescription clinicalPipelineWithUmls = AnalysisEngineFactory.createAnalysisEngineDescription("desc/analysis_engine/AggregatePlaintextUMLSProcessor" // Note, do not include .xml in the name here ); aggregateBuilder.add(clinicalPipelineWithUmls); //aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class)); //aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription( //SentenceDetector.class, //SentenceDetector.SD_MODEL_FILE_PARAM, //"sd-med-model.zip")); //aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class)); //aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription( //ConstituencyParser.class, //ConstituencyParser.PARAM_MODELFILE, //"sharpacq-3.1.bin")); //aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class)); TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription("org.apache.ctakes.typesystem.types.TypeSystem"); AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription( XWriter.class, typeSystemDescription, XWriter.PARAM_OUTPUT_DIRECTORY_NAME, "output-dir", XWriter.PARAM_FILE_NAMER_CLASS_NAME, CtakesFileNamer.class.getName() ); aggregateBuilder.add(xWriter); SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); // Custom writer class used at the end of the pipeline to write results to screen class Writer extends org.uimafit.component.JCasAnnotator_ImplBase { void process(JCas jcas) { System.out.println("Commented out most of process() for Writer"); } } def downloadFile(String url, String filename) { System.out.println("Downloading: " + url); def file = new File(filename); String savingAs = file.getAbsolutePath(); System.out.println("Saving as " + savingAs); if(file.exists()) { System.out.println("File already exists:" + filename); return; } def f = new FileOutputStream(file) def out = new BufferedOutputStream(f) out << new URL(url).openStream() out.close() }