Coverage Report - org.apache.any23.rdf.RDFUtils
 
Classes in this File Line Coverage Branch Coverage Complexity
RDFUtils
0%
0/93
0%
0/39
2.194
RDFUtils$1
0%
0/1
N/A
2.194
RDFUtils$BufferRDFHandler
0%
0/13
N/A
2.194
RDFUtils$Parser
0%
0/5
N/A
2.194
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.rdf;
 19  
 
 20  
 import org.apache.any23.io.nquads.NQuadsParser;
 21  
 import org.apache.any23.util.MathUtils;
 22  
 import org.openrdf.model.BNode;
 23  
 import org.openrdf.model.Literal;
 24  
 import org.openrdf.model.Resource;
 25  
 import org.openrdf.model.Statement;
 26  
 import org.openrdf.model.URI;
 27  
 import org.openrdf.model.Value;
 28  
 import org.openrdf.model.ValueFactory;
 29  
 import org.openrdf.model.impl.URIImpl;
 30  
 import org.openrdf.model.impl.ValueFactoryImpl;
 31  
 import org.openrdf.model.vocabulary.RDF;
 32  
 import org.openrdf.rio.RDFHandler;
 33  
 import org.openrdf.rio.RDFHandlerException;
 34  
 import org.openrdf.rio.RDFParseException;
 35  
 import org.openrdf.rio.RDFParser;
 36  
 import org.openrdf.rio.ntriples.NTriplesParser;
 37  
 import org.openrdf.rio.rdfxml.RDFXMLParser;
 38  
 import org.openrdf.rio.turtle.TurtleParser;
 39  
 
 40  
 import javax.xml.datatype.DatatypeConfigurationException;
 41  
 import javax.xml.datatype.DatatypeFactory;
 42  
 import javax.xml.datatype.XMLGregorianCalendar;
 43  
 import java.io.ByteArrayInputStream;
 44  
 import java.io.IOException;
 45  
 import java.io.InputStream;
 46  
 import java.net.URISyntaxException;
 47  
 import java.text.ParseException;
 48  
 import java.text.SimpleDateFormat;
 49  
 import java.util.ArrayList;
 50  
 import java.util.Date;
 51  
 import java.util.GregorianCalendar;
 52  
 import java.util.List;
 53  
 
 54  
 /**
 55  
  * Basic class providing a set of utility methods when dealing with <i>RDF</i>.
 56  
  *
 57  
  * @author Davide Palmisano (dpalmisano@gmail.com)
 58  
  * @author Michele Mostarda (mostarda@fbk.eu)
 59  
  */
 60  
 public class RDFUtils {
 61  
 
 62  
     /**
 63  
      * List of supported <i>RDF</i> parsers.
 64  
      */
 65  0
     public enum Parser {
 66  0
         RDFXML,
 67  0
         Turtle,
 68  0
         NTriples,
 69  0
         NQuads
 70  
     }
 71  
 
 72  0
     private static final ValueFactory valueFactory = ValueFactoryImpl.getInstance();
 73  
 
 74  
     /**
 75  
      * Fixes typical errors in an absolute URI, such as unescaped spaces.
 76  
      *
 77  
      * @param uri An absolute URI, can have typical syntax errors
 78  
      * @return An absolute URI that is valid against the URI syntax
 79  
      * @throws IllegalArgumentException if URI is not fixable
 80  
      */
 81  
     public static String fixAbsoluteURI(String uri) {
 82  0
         String fixed = fixURIWithException(uri);
 83  0
         if (!fixed.matches("[a-zA-Z0-9]+:/.*")) throw new IllegalArgumentException("not a absolute URI: " + uri);
 84  
         // Add trailing slash if URI has only authority but no path.
 85  0
         if (fixed.matches("https?://[a-zA-Z0-9.-]+(:[0-9+])?")) {
 86  0
             fixed = fixed + "/";
 87  
         }
 88  0
         return fixed;
 89  
     }
 90  
 
 91  
     /**
 92  
      * This method allows to obtain an <a href="http://www.w3.org/TR/xmlschema-2/#date">XML Schema</a> compliant date
 93  
      * providing a textual representation of a date and textual a pattern for parsing it.
 94  
      *
 95  
      * @param dateToBeParsed the String containing the date.
 96  
      * @param format the pattern as descibed in {@link java.text.SimpleDateFormat}
 97  
      * @return a {@link String} representing the date
 98  
      * @throws java.text.ParseException
 99  
      * @throws javax.xml.datatype.DatatypeConfigurationException
 100  
      */
 101  
     public static String getXSDDate(String dateToBeParsed, String format)
 102  
     throws ParseException, DatatypeConfigurationException {
 103  0
         SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
 104  0
         Date date = simpleDateFormat.parse(dateToBeParsed);
 105  0
         GregorianCalendar gc = new GregorianCalendar();
 106  0
         gc.setTime(date);
 107  0
         XMLGregorianCalendar xml = DatatypeFactory.newInstance().newXMLGregorianCalendar(gc);
 108  0
         xml.setTimezone(0);
 109  0
         return xml.toString();
 110  
     }
 111  
 
 112  
     /**
 113  
      * Prints a <code>date</code> to the XSD datetime format.
 114  
      *
 115  
      * @param date date to be printed.
 116  
      * @return the string representation of the input date.
 117  
      */
 118  
     public static String toXSDDateTime(Date date) {
 119  0
         SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
 120  0
         String s = simpleDateFormat.format(date);
 121  0
         StringBuilder sb = new StringBuilder(s);
 122  0
         sb.insert(22, ':');
 123  0
         return sb.toString();
 124  
     }
 125  
 
 126  
     /**
 127  
      * Tries to fix a potentially broken relative or absolute URI.
 128  
      *
 129  
      * <p/>
 130  
      * These appear to be good rules:
 131  
      * Remove whitespace or '\' or '"' in beginning and end
 132  
      * Replace space with %20
 133  
      * Drop the triple if it matches this regex (only protocol): ^[a-zA-Z0-9]+:(//)?$
 134  
      * Drop the triple if it matches this regex: ^javascript:
 135  
      * Truncate ">.*$ from end of lines (Neko didn't quite manage to fix broken markup)
 136  
      * Drop the triple if any of these appear in the URL: <>[]|*{}"<>\
 137  
      * <p/>
 138  
      *
 139  
      * @param unescapedURI uri string to be unescaped.
 140  
      * @return the unescaped string.
 141  
      */
 142  
     public static String fixURIWithException(String unescapedURI) {
 143  0
         if (unescapedURI == null) throw new IllegalArgumentException("URI was null");
 144  
 
 145  
         //    Remove starting and ending whitespace
 146  0
         String escapedURI = unescapedURI.trim();
 147  
 
 148  
         //Replace space with %20
 149  0
         escapedURI = escapedURI.replaceAll(" ", "%20");
 150  
 
 151  
         //strip linebreaks
 152  0
         escapedURI = escapedURI.replaceAll("\n", "");
 153  
 
 154  
         //'Remove starting  "\" or '"'
 155  0
         if (escapedURI.startsWith("\\") || escapedURI.startsWith("\"")) escapedURI = escapedURI.substring(1);
 156  
         //Remove  ending   "\" or '"'
 157  0
         if (escapedURI.endsWith("\\") || escapedURI.endsWith("\""))
 158  0
             escapedURI = escapedURI.substring(0, escapedURI.length() - 1);
 159  
 
 160  
         //Drop the triple if it matches this regex (only protocol): ^[a-zA-Z0-9]+:/?/?$
 161  0
         if (escapedURI.matches("^[a-zA-Z0-9]+:/?/?$"))
 162  0
             throw new IllegalArgumentException("no authority in URI: " + unescapedURI);
 163  
 
 164  
         //Drop the triple if it matches this regex: ^javascript:
 165  0
         if (escapedURI.matches("^javascript:"))
 166  0
             throw new IllegalArgumentException("URI starts with javascript: " + unescapedURI);
 167  
 
 168  
         // stripHTML
 169  
         // escapedURI = escapedURI.replaceAll("\\<.*?\\>", "");
 170  
 
 171  
         //>.*$ from end of lines (Neko didn't quite manage to fix broken markup)
 172  0
         escapedURI = escapedURI.replaceAll(">.*$", "");
 173  
 
 174  
         //Drop the triple if any of these appear in the URL: <>[]|*{}"<>\
 175  0
         if (escapedURI.matches("[<>\\[\\]|\\*\\{\\}\"\\\\]"))
 176  0
             throw new IllegalArgumentException("Invalid character in URI: " + unescapedURI);
 177  
 
 178  0
         return escapedURI;
 179  
     }
 180  
 
 181  
     /**
 182  
      * Creates a {@link URI}.
 183  
      */
 184  
     public static URI uri(String uri) {
 185  0
         return valueFactory.createURI(uri);
 186  
     }
 187  
 
 188  
     /**
 189  
      * Creates a {@link URI}.
 190  
      */
 191  
     public static URI uri(String namespace, String localName) {
 192  0
         return valueFactory.createURI(namespace, localName);
 193  
     }
 194  
 
 195  
     /**
 196  
      * Creates a {@link Literal}.
 197  
      */
 198  
     public static Literal literal(String s) {
 199  0
         return valueFactory.createLiteral(s);
 200  
     }
 201  
 
 202  
     /**
 203  
      * Creates a {@link Literal}.
 204  
      */
 205  
     public static Literal literal(boolean b) {
 206  0
         return valueFactory.createLiteral(b);
 207  
     }
 208  
 
 209  
     /**
 210  
      * Creates a {@link Literal}.
 211  
      */
 212  
     public static Literal literal(byte b) {
 213  0
         return valueFactory.createLiteral(b);
 214  
     }
 215  
 
 216  
     /**
 217  
      * Creates a {@link Literal}.
 218  
      */
 219  
     public static Literal literal(short s) {
 220  0
         return valueFactory.createLiteral(s);
 221  
     }
 222  
 
 223  
     /**
 224  
      * Creates a {@link Literal}.
 225  
      */
 226  
     public static Literal literal(int i) {
 227  0
         return valueFactory.createLiteral(i);
 228  
     }
 229  
 
 230  
     /**
 231  
      * Creates a {@link Literal}.
 232  
      */
 233  
     public static Literal literal(long l) {
 234  0
         return valueFactory.createLiteral(l);
 235  
     }
 236  
 
 237  
     /**
 238  
      * Creates a {@link Literal}.
 239  
      */
 240  
     public static Literal literal(float f) {
 241  0
         return valueFactory.createLiteral(f);
 242  
     }
 243  
 
 244  
     /**
 245  
      * Creates a {@link Literal}.
 246  
      */
 247  
     public static Literal literal(double d) {
 248  0
         return valueFactory.createLiteral(d);
 249  
     }
 250  
 
 251  
     /**
 252  
      * Creates a {@link Literal}.
 253  
      */
 254  
     public static Literal literal(String s, String l) {
 255  0
         return valueFactory.createLiteral(s, l);
 256  
     }
 257  
 
 258  
     /**
 259  
      * Creates a {@link Literal}.
 260  
      */
 261  
     public static Literal literal(String s, URI datatype) {
 262  0
         return valueFactory.createLiteral(s, datatype);
 263  
     }
 264  
 
 265  
     /**
 266  
      * Creates a {@link BNode}.
 267  
      */
 268  
     // TODO: replace this with all occurrences of #getBNode()
 269  
     public static BNode bnode(String id) {
 270  0
         return valueFactory.createBNode(id);
 271  
     }
 272  
 
 273  
     /**
 274  
      * @return a <code>bnode</code> with unique id.
 275  
      */
 276  
     public static BNode bnode() {
 277  0
         return valueFactory.createBNode();
 278  
     }
 279  
 
 280  
     /**
 281  
      * Creates a {@link BNode}.
 282  
      */
 283  
     public static BNode getBNode(String id) {
 284  0
         return valueFactory.createBNode(
 285  
             "node" + MathUtils.md5(id)
 286  
         );
 287  
     }
 288  
 
 289  
     /**
 290  
      * Creates a {@link Statement}.
 291  
      */
 292  
     public static Statement triple(Resource s, URI p, Value o) {
 293  0
         return valueFactory.createStatement(s, p, o);
 294  
     }
 295  
 
 296  
     /**
 297  
      * Creates a {@link Statement}.
 298  
      */
 299  
     public static Statement quad(Resource s, URI p, Value o, Resource g) {
 300  0
         return valueFactory.createStatement(s, p, o, g);
 301  
     }
 302  
 
 303  
     /**
 304  
      * Creates a {@link Value}. If <code>s == 'a'</code> returns
 305  
      * an {@link RDF#TYPE}. If <code> s.matches('[a-z0-9]+:.*')</code>
 306  
      * expands the corresponding prefix using {@link PopularPrefixes}.
 307  
      *
 308  
      * @param s
 309  
      * @return a value instance.
 310  
      */
 311  
     public static Value toRDF(String s) {
 312  0
         if ("a".equals(s)) return RDF.TYPE;
 313  0
         if (s.matches("[a-z0-9]+:.*")) {
 314  0
             return PopularPrefixes.get().expand(s);
 315  
         }
 316  0
         return valueFactory.createLiteral(s);
 317  
     }
 318  
 
 319  
     /**
 320  
      * Creates a statement of type: <code>toRDF(s), toRDF(p), toRDF(o)</code>
 321  
      *
 322  
      * @param s subject.
 323  
      * @param p predicate.
 324  
      * @param o object.
 325  
      * @return a statement instance.
 326  
      */
 327  
     public static Statement toTriple(String s, String p, String o) {
 328  0
         return valueFactory.createStatement((Resource) toRDF(s), (URI) toRDF(p), toRDF(o));
 329  
     }
 330  
 
 331  
     /**
 332  
      * Creates a new {@link RDFParser} instance.
 333  
      *
 334  
      * @param p parser type.
 335  
      * @return parser instance.
 336  
      * @throws IllegalArgumentException if parser is unsupported.
 337  
      */
 338  
     public static RDFParser getRDFParser(Parser p) {
 339  0
         switch (p) {
 340  
             case RDFXML:
 341  0
                 return new RDFXMLParser();
 342  
             case Turtle:
 343  0
                 return new TurtleParser();
 344  
             case NTriples:
 345  0
                 return new NTriplesParser();
 346  
             case NQuads:
 347  0
                 return new NQuadsParser();
 348  
             default:
 349  0
                 throw new IllegalArgumentException();
 350  
         }
 351  
     }
 352  
 
 353  
     /**
 354  
      * Returns a parser type from the given extension.
 355  
      *
 356  
      * @param ext input extension.
 357  
      * @return parser matching the extension.
 358  
      * @throws IllegalArgumentException if no extension matches.
 359  
      */
 360  
     public static Parser getParserFromExtension(String ext) {
 361  0
         if("rdf".equals(ext)) {
 362  0
             return Parser.RDFXML;
 363  
         }
 364  0
         if("ttl".equals(ext)) {
 365  0
             return Parser.Turtle;
 366  
         }
 367  0
         if("nt".equals(ext)) {
 368  0
             return Parser.NTriples;
 369  
         }
 370  0
         if("nq".equals(ext)) {
 371  0
             return Parser.NQuads;
 372  
         }
 373  0
         throw new IllegalArgumentException("Unknown extension : " + ext);
 374  
     }
 375  
 
 376  
     /**
 377  
      * Parses the content of <code>is</code> input stream with the
 378  
      * specified parser <code>p</code> using <code>baseURI</code>.
 379  
      *
 380  
      * @param p parser type.
 381  
      * @param is input stream containing <code>RDF</data>.
 382  
      * @param baseURI base uri.
 383  
      * @return list of statements detected within the input stream.
 384  
      * @throws RDFHandlerException
 385  
      * @throws IOException
 386  
      * @throws RDFParseException
 387  
      */
 388  
     public static Statement[] parseRDF(Parser p, InputStream is, String baseURI)
 389  
     throws RDFHandlerException, IOException, RDFParseException {
 390  0
         final BufferRDFHandler handler = new BufferRDFHandler();
 391  0
         final RDFParser parser = getRDFParser(p);
 392  0
         parser.setVerifyData(true);
 393  0
         parser.setStopAtFirstError(true);
 394  0
         parser.setPreserveBNodeIDs(true);
 395  0
         parser.setRDFHandler(handler);
 396  0
         parser.parse(is, baseURI);
 397  0
         return handler.statements.toArray( new Statement[handler.statements.size()] );
 398  
     }
 399  
 
 400  
     /**
 401  
      * Parses the content of <code>is</code> input stream with the
 402  
      * specified parser <code>p</code> using <code>''</code> as base URI.
 403  
      *
 404  
      * @param p parser type.
 405  
      * @param is input stream containing <code>RDF</data>.
 406  
      * @return list of statements detected within the input stream.
 407  
      * @throws RDFHandlerException
 408  
      * @throws IOException
 409  
      * @throws RDFParseException
 410  
      */
 411  
     public static Statement[] parseRDF(Parser p, InputStream is)
 412  
     throws RDFHandlerException, IOException, RDFParseException {
 413  0
         return parseRDF(p, is, "");
 414  
     }
 415  
 
 416  
     /**
 417  
      * Parses the content of <code>in</code> string with the
 418  
      * specified parser <code>p</code> using <code>''</code> as base URI.
 419  
      *
 420  
      * @param p parser type.
 421  
      * @param in input string containing <code>RDF</data>.
 422  
      * @return list of statements detected within the input string.
 423  
      * @throws RDFHandlerException
 424  
      * @throws IOException
 425  
      * @throws RDFParseException
 426  
      */
 427  
     public static Statement[] parseRDF(Parser p, String in)
 428  
     throws RDFHandlerException, IOException, RDFParseException {
 429  0
         return parseRDF(p, new ByteArrayInputStream(in.getBytes()));
 430  
     }
 431  
 
 432  
     /**
 433  
      * Parses the content of the <code>resource</code> file
 434  
      * guessing the content format from the extension.
 435  
      *
 436  
      * @param resource resource name.
 437  
      * @return the statements declared within the resource file.
 438  
      * @throws java.io.IOException if an error occurs while reading file.
 439  
      * @throws org.openrdf.rio.RDFHandlerException if an error occurs while parsing file.
 440  
      * @throws org.openrdf.rio.RDFParseException if an error occurs while parsing file.
 441  
      */
 442  
     public static Statement[] parseRDF(String resource) throws RDFHandlerException, IOException, RDFParseException {
 443  0
         final int extIndex = resource.lastIndexOf(".");
 444  0
         if(extIndex == -1)
 445  0
             throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource);
 446  0
         final String extension = resource.substring(extIndex + 1);
 447  0
         return parseRDF( getParserFromExtension(extension), RDFUtils.class.getResourceAsStream(resource) );
 448  
     }
 449  
 
 450  
     /**
 451  
      * Checks if <code>href</code> is absolute or not.
 452  
      *
 453  
      * @param href candidate URI.
 454  
      * @return <code>true</code> if <code>href</code> is absolute,
 455  
      *         <code>false</code> otherwise.
 456  
      */
 457  
     public static boolean isAbsoluteURI(String href) {
 458  
         try {
 459  0
             new URIImpl(href.trim());
 460  0
             new java.net.URI(href.trim());
 461  0
             return true;
 462  0
         } catch (IllegalArgumentException e) {
 463  0
             return false;
 464  0
         } catch (URISyntaxException e) {
 465  0
             return false;
 466  
         }
 467  
     }
 468  
 
 469  0
     private RDFUtils() {}
 470  
 
 471  0
     private static class BufferRDFHandler implements RDFHandler {
 472  
 
 473  0
         private final List<Statement> statements = new ArrayList<Statement>();
 474  
 
 475  0
         private int documents = 0;
 476  0
         private boolean open = false;
 477  
 
 478  
         @Override
 479  
         public void startRDF() throws RDFHandlerException {
 480  0
             documents++;
 481  0
             open = true;
 482  0
         }
 483  
 
 484  
         @Override
 485  
         public void endRDF() throws RDFHandlerException {
 486  0
             open = false;
 487  0
         }
 488  
 
 489  
         @Override
 490  
         public void handleNamespace(String s, String s1) throws RDFHandlerException {
 491  
             // Empty.
 492  0
         }
 493  
 
 494  
         @Override
 495  
         public void handleStatement(Statement statement) throws RDFHandlerException {
 496  0
             statements.add(statement);
 497  0
         }
 498  
 
 499  
         @Override
 500  
         public void handleComment(String s) throws RDFHandlerException {
 501  
             // Empty.
 502  0
         }
 503  
 
 504  
     }
 505  
 
 506  
 }