Coverage Report - org.apache.any23.extractor.rdf.RDFParserFactory
 
Classes in this File Line Coverage Branch Coverage Complexity
RDFParserFactory
0%
0/29
0%
0/6
1.571
RDFParserFactory$1
N/A
N/A
1.571
RDFParserFactory$ExtendedTurtleParser
0%
0/7
N/A
1.571
RDFParserFactory$InternalParseErrorListener
0%
0/21
0%
0/2
1.571
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor.rdf;
 19  
 
 20  
 import org.apache.any23.extractor.ErrorReporter;
 21  
 import org.apache.any23.extractor.ExtractionContext;
 22  
 import org.apache.any23.extractor.ExtractionResult;
 23  
 import org.apache.any23.io.nquads.NQuadsParser;
 24  
 import org.apache.any23.rdf.Any23ValueFactoryWrapper;
 25  
 import org.openrdf.model.impl.ValueFactoryImpl;
 26  
 import org.openrdf.rio.ParseErrorListener;
 27  
 import org.openrdf.rio.RDFHandlerException;
 28  
 import org.openrdf.rio.RDFParseException;
 29  
 import org.openrdf.rio.RDFParser;
 30  
 import org.openrdf.rio.ntriples.NTriplesParser;
 31  
 import org.openrdf.rio.rdfxml.RDFXMLParser;
 32  
 import org.openrdf.rio.trix.TriXParser;
 33  
 import org.openrdf.rio.turtle.TurtleParser;
 34  
 import org.slf4j.Logger;
 35  
 import org.slf4j.LoggerFactory;
 36  
 
 37  
 import java.io.IOException;
 38  
 import java.io.InputStream;
 39  
 import java.io.Reader;
 40  
 
 41  
 /**
 42  
  * This factory provides a common logic for creating and configuring correctly
 43  
  * any <i>RDF</i> parser used within the library.
 44  
  *
 45  
  * @author Michele Mostarda (mostarda@fbk.eu)
 46  
  */
 47  0
 public class RDFParserFactory {
 48  
 
 49  0
     private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class);
 50  
 
 51  
     private static RDFParserFactory instance;
 52  
 
 53  
     public static RDFParserFactory getInstance() {
 54  0
         if(instance == null) {
 55  0
             instance = new RDFParserFactory();
 56  
         }
 57  0
         return instance;
 58  
     }
 59  
 
 60  
     /**
 61  
      * Returns a new instance of a configured {@link org.openrdf.rio.turtle.TurtleParser}.
 62  
      *
 63  
      * @param verifyDataType data verification enable if <code>true</code>.
 64  
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
 65  
      * @param extractionContext the extraction context where the parser is used.
 66  
      * @param extractionResult the output extraction result.
 67  
      * @return a new instance of a configured Turtle parser.
 68  
      */
 69  
     public TurtleParser getTurtleParserInstance(
 70  
             final boolean verifyDataType,
 71  
             final boolean stopAtFirstError,
 72  
             final ExtractionContext extractionContext,
 73  
             final ExtractionResult extractionResult
 74  
     ) {
 75  0
         if (extractionResult == null) {
 76  0
             throw new NullPointerException("extractionResult cannot be null.");
 77  
         }
 78  0
         final TurtleParser parser = new ExtendedTurtleParser();
 79  0
         configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
 80  0
         return parser;
 81  
     }
 82  
 
 83  
     /**
 84  
      * Returns a new instance of a configured {@link org.openrdf.rio.rdfxml.RDFXMLParser}.
 85  
      *
 86  
      * @param verifyDataType data verification enable if <code>true</code>.
 87  
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
 88  
      * @param extractionContext the extraction context where the parser is used.
 89  
      * @param extractionResult the output extraction result.
 90  
      * @return a new instance of a configured RDFXML parser.
 91  
      */
 92  
     public RDFXMLParser getRDFXMLParser(
 93  
             final boolean verifyDataType,
 94  
             final boolean stopAtFirstError,
 95  
             final ExtractionContext extractionContext,
 96  
             final ExtractionResult extractionResult
 97  
     ) {
 98  0
         final RDFXMLParser parser = new RDFXMLParser();
 99  0
         configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
 100  0
         return parser;
 101  
     }
 102  
 
 103  
     /**
 104  
      * Returns a new instance of a configured {@link org.openrdf.rio.ntriples.NTriplesParser}.
 105  
      *
 106  
      * @param verifyDataType data verification enable if <code>true</code>.
 107  
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
 108  
      * @param extractionContext the extraction context where the parser is used.
 109  
      * @param extractionResult the output extraction result.
 110  
      * @return a new instance of a configured NTriples parser.
 111  
      */
 112  
     public NTriplesParser getNTriplesParser(
 113  
             final boolean verifyDataType,
 114  
             final boolean stopAtFirstError,
 115  
             final ExtractionContext extractionContext,
 116  
             final ExtractionResult extractionResult
 117  
     ) {
 118  0
         final NTriplesParser parser = new NTriplesParser();
 119  0
         configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
 120  0
         return parser;
 121  
     }
 122  
 
 123  
     /**
 124  
      * Returns a new instance of a configured {@link org.apache.any23.io.nquads.NQuadsParser}.
 125  
      *
 126  
      * @param verifyDataType data verification enable if <code>true</code>.
 127  
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
 128  
      * @param extractionContext the extraction context where the parser is used.
 129  
      * @param extractionResult the output extraction result.
 130  
      * @return a new instance of a configured NQuads parser.
 131  
      */
 132  
     public NQuadsParser getNQuadsParser(
 133  
             final boolean verifyDataType,
 134  
             final boolean stopAtFirstError,
 135  
             final ExtractionContext extractionContext,
 136  
             final ExtractionResult extractionResult
 137  
     ) {
 138  0
         final NQuadsParser parser = new NQuadsParser();
 139  0
         configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
 140  0
         return parser;
 141  
     }
 142  
 
 143  
     /**
 144  
      * Returns a new instance of a configured {@link TriXParser}.
 145  
      *
 146  
      * @param verifyDataType data verification enable if <code>true</code>.
 147  
      * @param stopAtFirstError the parser stops at first error if <code>true</code>.
 148  
      * @param extractionContext the extraction context where the parser is used.
 149  
      * @param extractionResult the output extraction result.
 150  
      * @return a new instance of a configured TriX parser.
 151  
      */
 152  
     public TriXParser getTriXParser(
 153  
             final boolean verifyDataType,
 154  
             final boolean stopAtFirstError,
 155  
             final ExtractionContext extractionContext,
 156  
             final ExtractionResult extractionResult
 157  
     ) {
 158  0
         final TriXParser parser = new TriXParser();
 159  0
         configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
 160  0
         return parser;
 161  
     }
 162  
 
 163  
     /**
 164  
      * Configures the given parser on the specified extraction result
 165  
      * setting the policies for data verification and error handling.
 166  
      *
 167  
      * @param parser the parser to be configured.
 168  
      * @param verifyDataType enables the data verification.
 169  
      * @param stopAtFirstError enables the tolerant error handling.
 170  
      * @param extractionContext the extraction context in which the parser is used.
 171  
      * @param extractionResult the extraction result used to collect the parsed data.
 172  
      */
 173  
     // TODO: what about passing just default language and ErrorReport to configureParser() ?
 174  
     private void configureParser(
 175  
             final RDFParser parser,
 176  
             final boolean verifyDataType,
 177  
             final boolean stopAtFirstError,
 178  
             final ExtractionContext extractionContext,
 179  
             final ExtractionResult extractionResult
 180  
     ) {
 181  0
         parser.setDatatypeHandling(
 182  
             verifyDataType ? RDFParser.DatatypeHandling.VERIFY : RDFParser.DatatypeHandling.IGNORE
 183  
         );
 184  0
         parser.setStopAtFirstError(stopAtFirstError);
 185  0
         parser.setParseErrorListener( new InternalParseErrorListener(extractionResult) );
 186  0
         parser.setValueFactory(
 187  
                 new Any23ValueFactoryWrapper(
 188  
                         ValueFactoryImpl.getInstance(),
 189  
                         extractionResult,
 190  
                         extractionContext.getDefaultLanguage()
 191  
                 )
 192  
         );
 193  0
         parser.setRDFHandler(new RDFHandlerAdapter(extractionResult));
 194  0
     }
 195  
 
 196  
     /**
 197  
      * Internal listener used to trace <i>RDF</i> parse errors.
 198  
      */
 199  
     private class InternalParseErrorListener implements ParseErrorListener {
 200  
 
 201  
         private final ErrorReporter extractionResult;
 202  
 
 203  0
         public InternalParseErrorListener(ErrorReporter er) {
 204  0
             extractionResult = er;
 205  0
         }
 206  
 
 207  
         public void warning(String msg, int lineNo, int colNo) {
 208  
             try {
 209  0
                 extractionResult.notifyError(ExtractionResult.ErrorLevel.WARN, msg, lineNo, colNo);
 210  0
             } catch (Exception e) {
 211  0
                 notifyExceptionInNotification(e);
 212  0
             }
 213  0
         }
 214  
 
 215  
         public void error(String msg, int lineNo, int colNo) {
 216  
             try {
 217  0
                 extractionResult.notifyError(ExtractionResult.ErrorLevel.ERROR, msg, lineNo, colNo);
 218  0
             } catch (Exception e) {
 219  0
                 notifyExceptionInNotification(e);
 220  0
             }
 221  0
         }
 222  
 
 223  
         public void fatalError(String msg, int lineNo, int colNo) {
 224  
             try {
 225  0
                 extractionResult.notifyError(ExtractionResult.ErrorLevel.FATAL, msg, lineNo, colNo);
 226  0
             } catch (Exception e) {
 227  0
                 notifyExceptionInNotification(e);
 228  0
             }
 229  0
         }
 230  
 
 231  
         private void notifyExceptionInNotification(Exception e) {
 232  0
             if (logger != null) {
 233  0
                 logger.error("An exception occurred while notifying an error.", e);
 234  
             }
 235  0
         }
 236  
     }
 237  
 
 238  
     /**
 239  
      * This extended Turtle parser sets the default namespace to the base URI
 240  
      * before the parsing.
 241  
      */
 242  0
     private class ExtendedTurtleParser extends TurtleParser {
 243  
         @Override
 244  
         public void parse(Reader reader, String baseURI)
 245  
         throws IOException, RDFParseException, RDFHandlerException {
 246  0
             setNamespace("", baseURI);
 247  0
             super.parse(reader, baseURI);
 248  0
         }
 249  
 
 250  
         @Override
 251  
         public void parse(InputStream in, String baseURI)
 252  
         throws IOException, RDFParseException, RDFHandlerException {
 253  0
             setNamespace("", baseURI);
 254  0
             super.parse(in, baseURI);
 255  0
         }
 256  
     }
 257  
 }