Coverage Report - org.apache.any23.servlet.WebResponder
 
Classes in this File Line Coverage Branch Coverage Complexity
WebResponder
0%
0/147
0%
0/56
4.75
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.servlet;
 19  
 
 20  
 import org.apache.any23.Any23;
 21  
 import org.apache.any23.ExtractionReport;
 22  
 import org.apache.any23.extractor.ExtractionException;
 23  
 import org.apache.any23.extractor.ExtractionParameters;
 24  
 import org.apache.any23.filter.IgnoreAccidentalRDFa;
 25  
 import org.apache.any23.source.DocumentSource;
 26  
 import org.apache.any23.validator.SerializationException;
 27  
 import org.apache.any23.validator.ValidationReport;
 28  
 import org.apache.any23.validator.XMLValidationReportSerializer;
 29  
 import org.apache.any23.writer.CompositeTripleHandler;
 30  
 import org.apache.any23.writer.CountingTripleHandler;
 31  
 import org.apache.any23.writer.FormatWriter;
 32  
 import org.apache.any23.writer.ReportingTripleHandler;
 33  
 import org.apache.any23.writer.TripleHandler;
 34  
 import org.apache.any23.writer.WriterRegistry;
 35  
 import sun.security.validator.ValidatorException;
 36  
 
 37  
 import javax.servlet.ServletOutputStream;
 38  
 import javax.servlet.http.HttpServletResponse;
 39  
 import java.io.ByteArrayOutputStream;
 40  
 import java.io.IOException;
 41  
 import java.io.PrintStream;
 42  
 import java.nio.charset.Charset;
 43  
 import java.util.ArrayList;
 44  
 import java.util.List;
 45  
 
 46  
 /**
 47  
  * This class is responsible for building the {@link Servlet}
 48  
  * web response.
 49  
  */
 50  
 class WebResponder {
 51  
 
 52  0
     private static final WriterRegistry writerRegistry = WriterRegistry.getInstance();
 53  
 
 54  
     /**
 55  
      * Library facade.
 56  
      */
 57  
     private final Any23 runner;
 58  
 
 59  
     /**
 60  
      * Servlet for which building the response.
 61  
      */
 62  
     private Servlet any23servlet;
 63  
 
 64  
     /**
 65  
      * Servlet response object.
 66  
      */
 67  
     private HttpServletResponse response;
 68  
 
 69  
     /**
 70  
      * RDF triple writer.
 71  
      */
 72  0
     private TripleHandler rdfWriter = null;
 73  
 
 74  
     /**
 75  
      * Error and statistics reporter.
 76  
      */
 77  0
     private ReportingTripleHandler reporter = null;
 78  
 
 79  
     /**
 80  
      * Type of expected output.
 81  
      */
 82  0
     private String outputMediaType = null;
 83  
 
 84  
     /**
 85  
      * The output stream.
 86  
      */
 87  0
     private ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
 88  
 
 89  0
     public WebResponder(Servlet any23servlet, HttpServletResponse response) {
 90  0
         this.any23servlet = any23servlet;
 91  0
         this.response = response;
 92  0
         this.runner = new Any23();
 93  0
         runner.setHTTPUserAgent("Any23-Servlet");
 94  0
     }
 95  
 
 96  
     protected Any23 getRunner() {
 97  0
         return runner;
 98  
     }
 99  
 
 100  
     public void runExtraction(
 101  
             DocumentSource in,
 102  
             ExtractionParameters eps,
 103  
             String format,
 104  
             boolean report, boolean annotate
 105  
     ) throws IOException {
 106  0
         if (in == null) return;
 107  0
         if (!initRdfWriter(format, report, annotate)) return;
 108  
         final ExtractionReport er;
 109  
         try {
 110  0
             er = runner.extract(eps, in, rdfWriter);
 111  0
             rdfWriter.close();
 112  0
             if (! er.hasMatchingExtractors() ) {
 113  0
                 sendError(
 114  
                         415,
 115  
                         "No suitable extractor found for this media type",
 116  
                         null,
 117  
                         er.getValidationReport(),
 118  
                         report
 119  
                 );
 120  0
                 return;
 121  
             }
 122  0
         } catch (IOException ioe) {
 123  
             // IO Error.
 124  0
             if (ioe.getCause() != null && ValidatorException.class.equals(ioe.getCause().getClass())) {
 125  0
                 final String errMsg = "Could not fetch input, IO Error.";
 126  0
                 any23servlet.log(errMsg, ioe.getCause());
 127  0
                 sendError(502, errMsg, ioe, null, report);
 128  0
                 return;
 129  
             }
 130  0
             any23servlet.log("Could not fetch input", ioe);
 131  0
             sendError(502, "Could not fetch input.", ioe, null, report);
 132  0
             return;
 133  0
         } catch (ExtractionException e) {
 134  
             // Extraction error.
 135  0
             any23servlet.log("Could not parse input", e);
 136  0
             sendError(502, "Could not parse input.", e, null, report);
 137  0
             return;
 138  0
         } catch (Exception e) {
 139  0
             any23servlet.log("Internal error", e);
 140  0
             sendError(500, "Internal error.", e, null, report);
 141  0
             return;
 142  0
         }
 143  
 
 144  
         /* *** No triples found. *** */
 145  0
         any23servlet.log("Extraction complete, " + reporter.getTotalTriples() + " triples");
 146  0
         if (reporter.getTotalTriples() == 0) {
 147  0
             sendError(
 148  
                     501,
 149  
                     "Extraction completed. No triples have been found.",
 150  
                     null,
 151  
                     er.getValidationReport(), report
 152  
             );
 153  0
             return;
 154  
         }
 155  
 
 156  
         // Regular response.
 157  0
         response.setContentType(outputMediaType);
 158  0
         response.setStatus(200);
 159  
         // Set the output encoding equals to the input one.
 160  0
         final String charsetEncoding = er.getEncoding();
 161  0
         if (Charset.isSupported(charsetEncoding)) {
 162  0
             response.setCharacterEncoding(er.getEncoding());
 163  
         } else {
 164  0
             response.setCharacterEncoding("UTF-8");
 165  
         }
 166  
 
 167  0
         final ServletOutputStream sos = response.getOutputStream();
 168  0
         final byte[] data = byteOutStream.toByteArray();
 169  0
         if(report) {
 170  0
             final PrintStream ps = new PrintStream(sos);
 171  
             try {
 172  0
                 printHeader(ps);
 173  0
                 printResponse(reporter, er.getValidationReport(), data, ps);
 174  0
             } catch (Exception e) {
 175  0
                 throw new RuntimeException("An error occurred while serializing the output response.", e);
 176  
             } finally {
 177  0
                 ps.close();
 178  0
             }
 179  0
         } else {
 180  0
             sos.write(data);
 181  
         }
 182  0
     }
 183  
 
 184  
     public void sendError(int code, String msg, boolean report) throws IOException {
 185  0
         sendError(code, msg, null, null, report);
 186  0
     }
 187  
     
 188  
     private void printHeader(PrintStream ps) {
 189  0
         ps.println("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
 190  0
     }
 191  
 
 192  
     private void printResponse(ReportingTripleHandler rth, ValidationReport vr, byte[] data, PrintStream ps) {
 193  0
         ps.println("<response>");
 194  0
         printExtractors(rth, ps);
 195  0
         printReport(null, null, vr, ps);
 196  0
         printData(data, ps);
 197  0
         ps.println("</response>");
 198  0
     }
 199  
 
 200  
     private void printExtractors(ReportingTripleHandler rth, PrintStream ps) {
 201  0
         ps.println("<extractors>");
 202  0
         for (String extractor : rth.getExtractorNames()) {
 203  0
             ps.print("<extractor>");
 204  0
             ps.print(extractor);
 205  0
             ps.println("</extractor>");
 206  
         }
 207  0
         ps.println("</extractors>");
 208  0
     }
 209  
 
 210  
     private void printReport(String msg, Throwable e, ValidationReport vr, PrintStream ps) {
 211  0
         XMLValidationReportSerializer reportSerializer = new XMLValidationReportSerializer();
 212  0
         ps.println("<report>");
 213  0
         ps.printf("<message>%s</message>\n", msg == null ? "" : msg);
 214  0
         ps.println("<error>");
 215  0
         if(e != null) {
 216  0
             ps.println("<![CDATA[");
 217  0
             e.printStackTrace(ps);
 218  0
             ps.println("]]>");
 219  
         }
 220  0
         ps.println("</error>");
 221  
         // ps.println("<![CDATA[");
 222  
         try {
 223  0
             reportSerializer.serialize(vr, ps);
 224  0
         } catch (SerializationException se) {
 225  0
             ps.println("An error occurred while serializing error.");
 226  0
             se.printStackTrace(ps);
 227  0
         }
 228  
         // ps.println("]]>");
 229  0
         ps.println("</report>");
 230  0
     }
 231  
 
 232  
     private void printData(byte[] data, PrintStream ps) {
 233  0
         ps.println("<data>");
 234  0
         ps.println("<![CDATA[");
 235  
         try {
 236  0
             ps.write(data);
 237  0
         } catch (IOException ioe) {
 238  0
             ps.println("An error occurred while serializing data.");
 239  0
             ioe.printStackTrace(ps);
 240  0
         }
 241  0
         ps.println("]]>");
 242  0
         ps.println("</data>");
 243  0
     }
 244  
 
 245  
     private void sendError(int code, String msg, Exception e, ValidationReport vr, boolean report)
 246  
     throws IOException {
 247  0
         response.setStatus(code);
 248  0
         response.setContentType("text/plain");
 249  0
         final PrintStream ps = new PrintStream(response.getOutputStream());
 250  0
         if (report) {
 251  
             try {
 252  0
                 printHeader(ps);
 253  0
                 printReport(msg, e, vr, ps);
 254  
             } finally {
 255  0
                 ps.close();
 256  0
             }
 257  
         } else {
 258  0
             ps.println(msg);
 259  0
             if (e != null) {
 260  0
                 ps.println("================================================================");
 261  0
                 e.printStackTrace(ps);
 262  0
                 ps.println("================================================================");
 263  
             }
 264  
         }
 265  0
     }
 266  
 
 267  
     private boolean initRdfWriter(String format, boolean report, boolean annotate) throws IOException {
 268  0
         final FormatWriter fw = getFormatWriter(format, annotate);
 269  0
         if (fw == null) {
 270  0
             sendError(
 271  
                     400,
 272  
                     "Invalid format '" + format + "', try one of: [rdfxml, turtle, ntriples, nquads, trix, json]",
 273  
                     null,
 274  
                     null,
 275  
                     report
 276  
             );
 277  0
             return false;
 278  
         }
 279  0
         outputMediaType = WriterRegistry.getMimeType( fw.getClass() );
 280  0
         List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
 281  0
         tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
 282  0
         tripleHandlers.add(new CountingTripleHandler());
 283  0
         rdfWriter = new CompositeTripleHandler(tripleHandlers);
 284  0
         reporter = new ReportingTripleHandler(rdfWriter);
 285  0
         rdfWriter = reporter;
 286  0
         return true;
 287  
     }
 288  
 
 289  
     private FormatWriter getFormatWriter(String format, boolean annotate) throws IOException {
 290  
         final String finalFormat;
 291  0
         if ("rdf".equals(format) || "xml".equals(format) || "rdfxml".equals(format)) {
 292  0
             finalFormat = "rdfxml";
 293  0
         } else if ("turtle".equals(format) || "ttl".equals(format)) {
 294  0
             finalFormat = "turtle";
 295  0
         } else if ("n3".equals(format)) {
 296  0
             finalFormat = "turtle";
 297  0
         } else if ("n-triples".equals(format) || "ntriples".equals(format) || "nt".equals(format)) {
 298  0
             finalFormat = "ntriples";
 299  0
         } else if("nquads".equals(format) || "n-quads".equals(format) || "nq".equals(format)) {
 300  0
             finalFormat = "nquads";
 301  0
         } else if("trix".equals(format)) {
 302  0
             finalFormat = "trix";
 303  0
         } else if("json".equals(format)) {
 304  0
             finalFormat = "json";
 305  
         } else {
 306  0
             return null;
 307  
         }
 308  0
         final FormatWriter writer = writerRegistry.getWriterInstanceByIdentifier(finalFormat, byteOutStream);
 309  0
         writer.setAnnotated(annotate);
 310  0
         return writer;
 311  
     }
 312  
 
 313  
 }