/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
#!/usr/bin/env groovy
/**
** 	This assumes that you have installed Groovy and 
** 	that you have the command groovy available in your path. 
** 	On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy.
** 	You can download groovy from http://groovy.codehaus.org/
** 	The first run may be slow since it needs to download all of the dependencies.
**  Usage: $./parser.groovy [inputDir]
**  where inputDir contains the files to be parsed.
** 	Or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true parser.groovy [inputDir]
**/

      
@Grab(group='org.apache.ctakes',
      module='ctakes-core',
            version='4.0.0')
@Grab(group='org.apache.ctakes',
      module='ctakes-core-res',
            version='4.0.0')			
@Grab(group='org.apache.ctakes',
      module='ctakes-constituency-parser',
            version='4.0.0')
@Grab(group='org.apache.ctakes',
      module='ctakes-constituency-parser-res',
            version='4.0.0')
@Grab(group='org.cleartk',
      module='cleartk-util',
      version='0.9.2')
      
@Grab(group='org.apache.uima',
      module='uimafit-core',
      version='2.2.0')

/*
@Grab(group='org.apache.ctakes',
      module='ctakes-clinical-pipeline',
            version='4.0.0')
*/
          
import java.io.File;
import org.apache.uima.cas.CAS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.cleartk.util.cr.FilesCollectionReader;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.pipeline.SimplePipeline;	
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
import org.apache.uima.fit.factory.TypePrioritiesFactory;
import static org.apache.uima.fit.util.JCasUtil.*;

import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
import org.apache.uima.fit.util.JCasUtil;

		if(args.length < 1) {
		System.out.println("Please specify input directory");
		System.exit(1);
		}
		System.out.println("Reading from directory: " + args[0]);

		CollectionReader collectionReader = FilesCollectionReader.getCollectionReaderWithSuffixes(args[0], CAS.NAME_DEFAULT_SOFA, "txt");
		//Download Models
		//TODO: Separate downloads from URL here is a hack.  
		//Models should really be automatically downloaded from 
		//maven central as part of ctakes-*-res projects/artifacts via @grab.
		//Illustrative purposes until we have all of the *-res artifacts in maven central.
		downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip","sd-med-model.zip");
		downloadFile("http://svn.apache.org/repos/asf/ctakes/trunk/ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin","sharpacq-3.1.bin");

		//Build the pipeline to run
		AggregateBuilder aggregateBuilder = new AggregateBuilder();
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
            SentenceDetector.class,
            SentenceDetector.SD_MODEL_FILE_PARAM,
            "sd-med-model.zip"));
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));			
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
			ConstituencyParser.class,
			ConstituencyParser.PARAM_MODEL_FILENAME,
            "sharpacq-3.1.bin"));
		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
		SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());

// Custom writer class used at the end of the pipeline to write results to screen
class Writer extends org.apache.uima.fit.component.JCasAnnotator_ImplBase {
  void process(JCas jcas) {
	//Get each Treebanknode and print out the text and it's parse string
    //select(jcas, TopTreebankNode).each { println "${it.treebankParse} "  }
    for(TopTreebankNode node : JCasUtil.select(jcas, TopTreebankNode.class)){
        println(node.getTreebankParse());
    }
  }
}

def downloadFile(String url, String filename) {
	System.out.println("Downloading: " + url);
	def file = new File(filename);
	if(file.exists()) {
	  System.out.println("File already exists:" + filename);
	  return;
	}
    def f = new FileOutputStream(file)
    def out = new BufferedOutputStream(f)
    out << new URL(url).openStream()
    out.close()
}