Coverage Report - org.apache.any23.extractor.ExtractionResultImpl
 
Classes in this File Line Coverage Branch Coverage Complexity
ExtractionResultImpl
0%
0/120
0%
0/66
3.778
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor;
 19  
 
 20  
 import org.apache.any23.extractor.html.MicroformatExtractor;
 21  
 import org.apache.any23.rdf.Prefixes;
 22  
 import org.apache.any23.writer.TripleHandler;
 23  
 import org.apache.any23.writer.TripleHandlerException;
 24  
 import org.openrdf.model.BNode;
 25  
 import org.openrdf.model.Resource;
 26  
 import org.openrdf.model.URI;
 27  
 import org.openrdf.model.Value;
 28  
 
 29  
 import java.io.PrintStream;
 30  
 import java.util.ArrayList;
 31  
 import java.util.Collection;
 32  
 import java.util.Collections;
 33  
 import java.util.HashSet;
 34  
 import java.util.List;
 35  
 import java.util.Set;
 36  
 
 37  
 /**
 38  
  * <p/>
 39  
  * A default implementation of {@link ExtractionResult}; it receives
 40  
  * extraction output from one {@link Extractor} working on one document,
 41  
  * and passes the output on to a {@link TripleHandler}. It deals with
 42  
  * details such as creation of {@link ExtractionContext} objects
 43  
  * and closing any open contexts at the end of extraction.
 44  
  * <p/>
 45  
  * The {@link #close()} method must be invoked after the extractor has
 46  
  * finished processing.
 47  
  * <p/>
 48  
  * There is usually no need to provide additional implementations
 49  
  * of the ExtractionWriter interface.
 50  
  * <p/>
 51  
  *
 52  
  * @see org.apache.any23.writer.TripleHandler
 53  
  * @see ExtractionContext
 54  
  * @author Richard Cyganiak (richard@cyganiak.de)
 55  
  * @author Michele Mostarda (michele.mostarda@gmail.com)
 56  
  */
 57  
 public class ExtractionResultImpl implements TagSoupExtractionResult {
 58  
 
 59  
     private final ExtractionContext context;
 60  
 
 61  
     private final Extractor<?> extractor;
 62  
 
 63  
     private final TripleHandler tripleHandler;
 64  
 
 65  0
     private final Collection<ExtractionResult> subResults = new ArrayList<ExtractionResult>();
 66  
 
 67  0
     private final Set<Object> knownContextIDs = new HashSet<Object>();
 68  
 
 69  0
     private boolean isClosed = false;
 70  
 
 71  0
     private boolean isInitialized = false;
 72  
 
 73  
     private List<Error> errors;
 74  
 
 75  
     private List<ResourceRoot> resourceRoots;
 76  
 
 77  
     private List<PropertyPath> propertyPaths;
 78  
 
 79  
     public ExtractionResultImpl(
 80  
             ExtractionContext context,
 81  
             Extractor<?> extractor,
 82  
             TripleHandler tripleHandler
 83  0
     ) {
 84  0
         if(context == null) {
 85  0
             throw new NullPointerException("context cannot be null.");
 86  
         }
 87  0
         if(extractor == null) {
 88  0
             throw new NullPointerException("extractor cannot be null.");
 89  
         }
 90  0
         if(tripleHandler == null) {
 91  0
             throw new NullPointerException("triple handler cannot be null.");
 92  
         }
 93  
 
 94  0
         this.extractor       = extractor;
 95  0
         this.tripleHandler   = tripleHandler;
 96  0
         this.context         = context;
 97  0
         knownContextIDs.add( context.getUniqueID() );
 98  0
     }
 99  
 
 100  
     public boolean hasErrors() {
 101  0
         return errors != null;
 102  
     }
 103  
 
 104  
     public int getErrorsCount() {
 105  0
         return errors == null ? 0 : errors.size();
 106  
     }
 107  
 
 108  
     public void printErrorsReport(PrintStream ps) {
 109  0
         ps.print(String.format("Context: %s [errors: %d] {\n", context, getErrorsCount()));
 110  0
         if (errors != null) {
 111  0
             for (Error error : errors) {
 112  0
                 ps.print(error.toString());
 113  0
                 ps.print("\n");
 114  
             }
 115  
         }
 116  
         // Printing sub results.
 117  0
         for (ExtractionResult er : subResults) {
 118  0
             er.printErrorsReport(ps);
 119  
         }
 120  0
         ps.print("}\n");
 121  0
     }
 122  
 
 123  
     public Collection<Error> getErrors() {
 124  0
         return errors == null ? Collections.<Error>emptyList() : Collections.unmodifiableList(errors);
 125  
     }
 126  
 
 127  
     public ExtractionResult openSubResult(ExtractionContext context) {
 128  0
         final String contextID = context.getUniqueID();
 129  0
         if (knownContextIDs.contains(contextID)) {
 130  0
             throw new IllegalArgumentException("Duplicate contextID: " + contextID);
 131  
         }
 132  0
         knownContextIDs.add(contextID);
 133  
 
 134  0
         checkOpen();
 135  0
         ExtractionResult result =
 136  
                 new ExtractionResultImpl(context, extractor, tripleHandler);
 137  0
         subResults.add(result);
 138  0
         return result;
 139  
     }
 140  
 
 141  
     public ExtractionContext getExtractionContext() {
 142  0
         return context;
 143  
     }
 144  
 
 145  
     public void writeTriple(Resource s, URI p, Value o, URI g) {
 146  0
         if (s == null || p == null || o == null) return;
 147  
         // Check for misconstructed literals or BNodes, Sesame does not catch this.
 148  0
         if (s.stringValue() == null || p.stringValue() == null || o.stringValue() == null) {
 149  0
             throw new IllegalArgumentException("The statement arguments must be not null.");
 150  
         }
 151  0
         checkOpen();
 152  
         try {
 153  0
             tripleHandler.receiveTriple(s, p, o, g, context);
 154  0
         } catch (TripleHandlerException e) {
 155  0
             throw new RuntimeException(
 156  
                     String.format("Error while receiving triple %s %s %s", s, p, o ),
 157  
                     e
 158  
             );
 159  0
         }
 160  0
     }
 161  
 
 162  
     public void writeTriple(Resource s, URI p, Value o) {
 163  0
         writeTriple(s, p, o, null);
 164  0
     }
 165  
 
 166  
     public void writeNamespace(String prefix, String uri) {
 167  0
         checkOpen();
 168  
         try {
 169  0
             tripleHandler.receiveNamespace(prefix, uri, context);
 170  0
         } catch (TripleHandlerException e) {
 171  0
             throw new RuntimeException(
 172  
                     String.format("Error while writing namespace %s:%s", prefix, uri),
 173  
                     e
 174  
             );
 175  0
         }
 176  0
     }
 177  
 
 178  
     public void notifyError(ErrorLevel level, String msg, int row, int col) {
 179  0
         if(errors == null) {
 180  0
             errors = new ArrayList<Error>();
 181  
         }
 182  0
         errors.add( new Error(level, msg, row, col) );
 183  0
     }
 184  
 
 185  
     public void close() {
 186  0
         if (isClosed) return;
 187  0
         isClosed = true;
 188  0
         for (ExtractionResult subResult : subResults) {
 189  0
             subResult.close();
 190  
         }
 191  0
         if (isInitialized) {
 192  
             try {
 193  0
                 tripleHandler.closeContext(context);
 194  0
             } catch (TripleHandlerException e) {
 195  0
                 throw new RuntimeException("Error while opening context", e);
 196  0
             }
 197  
         }
 198  0
     }
 199  
 
 200  
     private void checkOpen() {
 201  0
         if (!isInitialized) {
 202  0
             isInitialized = true;
 203  
             try {
 204  0
                 tripleHandler.openContext(context);
 205  0
             } catch (TripleHandlerException e) {
 206  0
                 throw new RuntimeException("Error while opening context", e);
 207  0
             }
 208  0
             Prefixes prefixes = extractor.getDescription().getPrefixes();
 209  0
             for (String prefix : prefixes.allPrefixes()) {
 210  
                 try {
 211  0
                     tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceURIFor(prefix), context);
 212  0
                 } catch (TripleHandlerException e) {
 213  0
                     throw new RuntimeException(String.format("Error while writing namespace %s", prefix),
 214  
                             e
 215  
                     );
 216  0
                 }
 217  
             }
 218  
         }
 219  0
         if (isClosed) {
 220  0
             throw new IllegalStateException("Not open: " + context);
 221  
         }
 222  0
     }
 223  
 
 224  
     public void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
 225  0
         if(resourceRoots == null) {
 226  0
             resourceRoots = new ArrayList<ResourceRoot>();
 227  
         }
 228  0
         resourceRoots.add( new ResourceRoot(path, root, extractor) );
 229  0
     }
 230  
 
 231  
     public List<ResourceRoot> getResourceRoots() {
 232  0
         List<ResourceRoot> allRoots = new ArrayList<ResourceRoot>();
 233  0
         if(resourceRoots != null) {
 234  0
             allRoots.addAll( resourceRoots );
 235  
         }
 236  0
         for(ExtractionResult er : subResults) {
 237  0
             ExtractionResultImpl eri = (ExtractionResultImpl) er;
 238  0
             if( eri.resourceRoots != null ) {
 239  0
                 allRoots.addAll( eri.resourceRoots );
 240  
             }
 241  0
         }
 242  0
         return allRoots;
 243  
     }
 244  
 
 245  
     public void addPropertyPath(
 246  
             Class<? extends MicroformatExtractor> extractor,
 247  
             Resource propertySubject,
 248  
             Resource property,
 249  
             BNode object,
 250  
             String[] path
 251  
     ) {
 252  0
         if(propertyPaths == null) {
 253  0
             propertyPaths = new ArrayList<PropertyPath>();
 254  
         }
 255  0
         propertyPaths.add( new PropertyPath(path, propertySubject, property, object, extractor) );
 256  0
     }
 257  
 
 258  
     public List<PropertyPath> getPropertyPaths() {
 259  0
         List<PropertyPath> allPaths = new ArrayList<PropertyPath>();
 260  0
         if(propertyPaths != null) {
 261  0
             allPaths.addAll( propertyPaths );
 262  
         }
 263  0
         for(ExtractionResult er : subResults) {
 264  0
             ExtractionResultImpl eri = (ExtractionResultImpl) er;
 265  0
             if( eri.propertyPaths != null ) {
 266  0
                 allPaths.addAll( eri.propertyPaths );
 267  
             }
 268  0
         }
 269  0
         return allPaths;
 270  
     }
 271  
 
 272  
     @Override
 273  
     public String toString() {
 274  0
         final StringBuilder sb = new StringBuilder();
 275  0
         sb.append(context.toString());
 276  0
         sb.append('\n');
 277  0
         if (errors != null) {
 278  0
             sb.append("Errors {\n");
 279  0
             for (Error error : errors) {
 280  0
                 sb.append('\t');
 281  0
                 sb.append(error.toString());
 282  0
                 sb.append('\n');
 283  
             }
 284  
         }
 285  0
         sb.append("}\n");
 286  0
         return sb.toString();
 287  
     }
 288  
 
 289  
 }