Coverage Report - org.apache.any23.servlet.Servlet
 
Classes in this File Line Coverage Branch Coverage Complexity
Servlet
0%
0/119
0%
0/82
5.933
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.servlet;
 19  
 
 20  
 import org.apache.any23.configuration.DefaultConfiguration;
 21  
 import org.apache.any23.extractor.ExtractionParameters;
 22  
 import org.apache.any23.http.HTTPClient;
 23  
 import org.apache.any23.servlet.conneg.Any23Negotiator;
 24  
 import org.apache.any23.servlet.conneg.MediaRangeSpec;
 25  
 import org.apache.any23.source.ByteArrayDocumentSource;
 26  
 import org.apache.any23.source.DocumentSource;
 27  
 import org.apache.any23.source.HTTPDocumentSource;
 28  
 import org.apache.any23.source.StringDocumentSource;
 29  
 
 30  
 import javax.servlet.ServletException;
 31  
 import javax.servlet.http.HttpServlet;
 32  
 import javax.servlet.http.HttpServletRequest;
 33  
 import javax.servlet.http.HttpServletResponse;
 34  
 import java.io.IOException;
 35  
 import java.net.URI;
 36  
 import java.net.URISyntaxException;
 37  
 import java.util.regex.Pattern;
 38  
 
 39  
 import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
 40  
 
 41  
 /**
 42  
  * A <i>Servlet</i> that fetches a client-specified <i>URI</i>,
 43  
  * RDFizes the content, and returns it in a format chosen by the client.
 44  
  *
 45  
  * @author Gabriele Renzi
 46  
  * @author Richard Cyganiak (richard@cyganiak.de)
 47  
  */
 48  0
 public class Servlet extends HttpServlet {
 49  
 
 50  
     public static final String DEFAULT_BASE_URI = "http://any23.org/tmp/";
 51  
 
 52  
     private static final long serialVersionUID = 8207685628715421336L;
 53  
 
 54  
     // RFC 3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 55  0
     private final static Pattern schemeRegex =
 56  
             Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:");
 57  
 
 58  
     @Override
 59  
     protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException, ServletException {
 60  0
         final WebResponder responder = new WebResponder(this, resp);
 61  0
         final String format = getFormatFromRequestOrNegotiation(req);
 62  0
         final boolean report = isReport(req);
 63  0
         final boolean annotate = isAnnotated(req);
 64  0
         if (format == null) {
 65  0
             responder.sendError(406, "Client accept header does not include a supported output format", report);
 66  0
             return;
 67  
         }
 68  0
         final String uri = getInputURIFromRequest(req);
 69  0
         if (uri == null) {
 70  0
             responder.sendError(404, "Missing URI in GET request. Try /format/http://example.com/myfile", report);
 71  0
             return;
 72  
         }
 73  0
         final ExtractionParameters eps = getExtractionParameters(req);
 74  0
         responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
 75  0
     }
 76  
 
 77  
     @Override
 78  
     protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws IOException {
 79  0
         final WebResponder responder = new WebResponder(this, resp);
 80  0
         final boolean report = isReport(req);
 81  0
         final boolean annotate = isAnnotated(req);
 82  0
         if (req.getContentType() == null) {
 83  0
             responder.sendError(400, "Invalid POST request, no Content-Type for the message body specified", report);
 84  0
             return;
 85  
         }
 86  0
         final String uri = getInputURIFromRequest(req);
 87  0
         final String format = getFormatFromRequestOrNegotiation(req);
 88  0
         if (format == null) {
 89  0
             responder.sendError(406, "Client accept header does not include a supported output format", report);
 90  0
             return;
 91  
         }
 92  0
         final ExtractionParameters eps = getExtractionParameters(req);
 93  0
         if ("application/x-www-form-urlencoded".equals(getContentTypeHeader(req))) {
 94  0
             if (uri != null) {
 95  0
                 log("Attempting conversion to '" + format + "' from URI <" + uri + ">");
 96  0
                 responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
 97  0
                 return;
 98  
             }
 99  0
             if (req.getParameter("body") == null) {
 100  0
                 responder.sendError(400, "Invalid POST request, parameter 'uri' or 'body' required", report);
 101  0
                 return;
 102  
             }
 103  0
             String type = null;
 104  0
             if (req.getParameter("type") != null && !"".equals(req.getParameter("type"))) {
 105  0
                 type = req.getParameter("type");
 106  
             }
 107  0
             log("Attempting conversion to '" + format + "' from body parameter");
 108  0
             responder.runExtraction(
 109  
                     new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_URI, type),
 110  
                     eps,
 111  
                     format,
 112  
                     report, annotate
 113  
             );
 114  0
             return;
 115  
         }
 116  0
         log("Attempting conversion to '" + format + "' from POST body");
 117  0
         responder.runExtraction(
 118  
                 new ByteArrayDocumentSource(
 119  
                         req.getInputStream(),
 120  
                         Servlet.DEFAULT_BASE_URI,
 121  
                         getContentTypeHeader(req)
 122  
                 ),
 123  
                 eps,
 124  
                 format,
 125  
                 report, annotate
 126  
         );
 127  0
     }
 128  
 
 129  
     private String getFormatFromRequestOrNegotiation(HttpServletRequest request) {
 130  0
         String fromRequest = getFormatFromRequest(request);
 131  0
         if (fromRequest != null && !"".equals(fromRequest) && !"best".equals(fromRequest)) {
 132  0
             return fromRequest;
 133  
         }
 134  0
         MediaRangeSpec result = Any23Negotiator.getNegotiator().getBestMatch(request.getHeader("Accept"));
 135  0
         if (result == null) {
 136  0
             return null;
 137  
         }
 138  0
         if ("text/turtle".equals(result.getMediaType())) {
 139  0
             return "turtle";
 140  
         }
 141  0
         if ("text/rdf+n3".equals(result.getMediaType())) {
 142  0
             return "n3";
 143  
         }
 144  0
         if ("text/rdf+nq".equals(result.getMediaType())) {
 145  0
             return "nq";
 146  
         }
 147  0
         if ("application/rdf+xml".equals(result.getMediaType())) {
 148  0
             return "rdf";
 149  
         }
 150  0
         if ("text/plain".equals(result.getMediaType())) {
 151  0
             return "nt";
 152  
         }
 153  0
         return "turtle";    // shouldn't happen
 154  
     }
 155  
 
 156  
     private String getFormatFromRequest(HttpServletRequest request) {
 157  0
         if (request.getPathInfo() == null) return "best";
 158  0
         String[] args = request.getPathInfo().split("/", 3);
 159  0
         if (args.length < 2 || "".equals(args[1])) {
 160  0
             if (request.getParameter("format") == null) {
 161  0
                 return "best";
 162  
             } else {
 163  0
                 return request.getParameter("format");
 164  
             }
 165  
         }
 166  0
         return args[1];
 167  
     }
 168  
 
 169  
     private String getInputURIFromRequest(HttpServletRequest request) {
 170  0
         if (request.getPathInfo() == null) return null;
 171  0
         String[] args = request.getPathInfo().split("/", 3);
 172  0
         if (args.length < 3) {
 173  0
             if (request.getParameter("uri") != null) {
 174  0
                 return request.getParameter("uri").trim();
 175  
             }
 176  0
             if (request.getParameter("url") != null) {
 177  0
                 return request.getParameter("url").trim();
 178  
             }
 179  0
             return null;
 180  
         }
 181  0
         String uri = args[2];
 182  0
         if (request.getQueryString() != null) {
 183  0
             uri = uri + "?" + request.getQueryString();
 184  
         }
 185  0
         if (!hasScheme(uri)) {
 186  0
             uri = "http://" + uri;
 187  0
         } else if (hasOnlySingleSlashAfterScheme(uri)) {
 188  
             // This is to work around an issue where Tomcat 6.0.18 is
 189  
             // too smart for us. Tomcat normalizes double-slashes in
 190  
             // the path, and thus turns "http://" into "http:/" if it
 191  
             // occurs in the path. So we restore the double slash.
 192  0
             uri = uri.replaceFirst(":/", "://");
 193  
         }
 194  0
         return uri.trim();
 195  
     }
 196  
 
 197  
 
 198  
     private boolean hasScheme(String uri) {
 199  0
         return schemeRegex.matcher(uri).find();
 200  
     }
 201  
 
 202  0
     private final static Pattern schemeAndSingleSlashRegex =
 203  
             Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:/[^/]");
 204  
 
 205  
     private boolean hasOnlySingleSlashAfterScheme(String uri) {
 206  0
         return schemeAndSingleSlashRegex.matcher(uri).find();
 207  
     }
 208  
 
 209  
     private String getContentTypeHeader(HttpServletRequest req) {
 210  0
         if (req.getHeader("Content-Type") == null) return null;
 211  0
         if ("".equals(req.getHeader("Content-Type"))) return null;
 212  0
         String contentType = req.getHeader("Content-Type");
 213  
         // strip off parameters such as ";charset=UTF-8"
 214  0
         int index = contentType.indexOf(";");
 215  0
         if (index == -1) return contentType;
 216  0
         return contentType.substring(0, index);
 217  
     }
 218  
 
 219  
     private DocumentSource createHTTPDocumentSource(WebResponder responder, String uri, boolean report)
 220  
     throws IOException {
 221  
         try {
 222  0
             if (!isValidURI(uri)) {
 223  0
                 throw new URISyntaxException(uri, "@@@");
 224  
             }
 225  0
             return createHTTPDocumentSource(responder.getRunner().getHTTPClient(), uri);
 226  0
         } catch (URISyntaxException ex) {
 227  0
             responder.sendError(400, "Invalid input URI " + uri, report);
 228  0
             return null;
 229  
         }
 230  
     }
 231  
 
 232  
     protected DocumentSource createHTTPDocumentSource(HTTPClient httpClient, String uri)
 233  
             throws IOException, URISyntaxException {
 234  0
         return new HTTPDocumentSource(httpClient, uri);
 235  
     }
 236  
 
 237  
     private boolean isValidURI(String s) {
 238  
         try {
 239  0
             URI uri = new URI(s);
 240  0
             if (!"http".equals(uri.getScheme()) && !"https".equals(uri.getScheme())) {
 241  0
                 return false;
 242  
             }
 243  0
         } catch (URISyntaxException e) {
 244  0
             return false;
 245  0
         }
 246  0
         return true;
 247  
     }
 248  
 
 249  
     private ValidationMode getValidationMode(HttpServletRequest request) {
 250  0
         final String PARAMETER = "validation-mode";
 251  0
         final String validationMode = request.getParameter(PARAMETER);
 252  0
         if(validationMode == null) return ValidationMode.None;
 253  0
         if("none".equalsIgnoreCase(validationMode)) return ValidationMode.None;
 254  0
         if("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate;
 255  0
         if("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix;
 256  0
         throw new IllegalArgumentException(
 257  
                 String.format("Invalid value '%s' for '%s' parameter.", validationMode, PARAMETER)
 258  
         );
 259  
     }
 260  
     
 261  
     private ExtractionParameters getExtractionParameters(HttpServletRequest request) {
 262  0
         final ValidationMode mode = getValidationMode(request);
 263  0
         return new ExtractionParameters(DefaultConfiguration.singleton(), mode);
 264  
     }
 265  
 
 266  
     private boolean isReport(HttpServletRequest request) {
 267  0
         return request.getParameter("report") != null;
 268  
     }
 269  
 
 270  
     private boolean isAnnotated(HttpServletRequest request) {
 271  0
         return request.getParameter("annotate") != null;
 272  
     }
 273  
 
 274  
 }