#!/usr/bin/env groovy
/**
** 	This assumes that you have installed Groovy and 
** 	that you have the command groovy available in your path. 
** 	On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy.
** 	You can download groovy from http://groovy.codehaus.org/
** 	The first run may be slow since it needs to download all of the dependencies.
**  Usage: $./parser.groovy [inputDir]
** 	or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true parser.groovy [inputDir]
**/
@Grab(group='org.apache.ctakes',
      module='ctakes-core',
            version='3.1.0')
@Grab(group='org.apache.ctakes',
      module='ctakes-core-res',
            version='3.1.0')			
@Grab(group='org.apache.ctakes',
      module='ctakes-constituency-parser',
            version='3.1.0')
@Grab(group='org.apache.ctakes',
      module='ctakes-constituency-parser-res',
            version='3.1.0')		
@Grab(group='org.apache.ctakes',
      module='ctakes-clinical-pipeline',
            version='3.1.0')
import java.io.File;
import org.apache.uima.jcas.JCas;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.cleartk.util.cr.FilesCollectionReader;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.pipeline.SimplePipeline;	
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.factory.TypeSystemDescriptionFactory;
import org.uimafit.factory.TypePrioritiesFactory;
import static org.uimafit.util.JCasUtil.*;

import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;

		CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(args[0]);
		if(args.length < 1) {
		System.out.println("Please specify input directory");
		System.exit(1);
		}
		System.out.println("Reading from directory: " + args[0]);

		//Download Models
		//TODO: Seperate downloads from URL here is a hack.  
		//Models should really be automatically downloaded from 
		//maven central as part of ctakes-*-res projects/artifacts via @grab.
		//Illustrative purposes until we have all of the *-res artifacts in maven central.
		downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip","sd-med-model.zip");
		downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin","sharpacq-3.1.bin");

		//Build the pipeline to run
		AggregateBuilder aggregateBuilder = new AggregateBuilder();
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
            SentenceDetector.class,
            SentenceDetector.SD_MODEL_FILE_PARAM,
            "sd-med-model.zip"));
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));			
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
			ConstituencyParser.class,
			ConstituencyParser.PARAM_MODELFILE,
            "sharpacq-3.1.bin"));
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
		SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());

// Custom writer class used at the end of the pipeline to write results to screen
class Writer extends org.uimafit.component.JCasAnnotator_ImplBase {
  void process(JCas jcas) {
	//Get each Treebanknode and print out the text and it's parse string
    select(jcas, TopTreebankNode).each { println "${it.treebankParse} "  }
  }
}

def downloadFile(String url, String filename) {
	System.out.println("Downloading: " + url);
	def file = new File(filename);
	if(file.exists()) {
	  System.out.println("File already exists:" + filename);
	  return;
	}
    def f = new FileOutputStream(url.tokenize("/")[-1])
    def out = new BufferedOutputStream(f)
    out << new URL(url).openStream()
    out.close()
}