Coverage Report - org.apache.any23.extractor.TagSoupExtractionResult
 
Classes in this File Line Coverage Branch Coverage Complexity
TagSoupExtractionResult
N/A
N/A
1.938
TagSoupExtractionResult$PropertyPath
0%
0/21
0%
0/8
1.938
TagSoupExtractionResult$ResourceRoot
0%
0/15
0%
0/8
1.938
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor;
 19  
 
 20  
 import org.apache.any23.extractor.html.MicroformatExtractor;
 21  
 import org.openrdf.model.BNode;
 22  
 import org.openrdf.model.Resource;
 23  
 
 24  
 import java.util.Arrays;
 25  
 import java.util.List;
 26  
 
 27  
 /**
 28  
  * This interface models a specific {@link ExtractionResult}
 29  
  * able to collect property roots generated by <i>HTML Microformat</i> extractions.
 30  
  *
 31  
  * @author Michele Mostarda (mostarda@fbk.eu)
 32  
  */
 33  
 public interface TagSoupExtractionResult extends ExtractionResult {
 34  
 
 35  
     /**
 36  
      * Adds a root property to the extraction result, specifying also
 37  
      * the <i>path</i> corresponding to the root of data which generated the property
 38  
      * and the extractor responsible for such addition.
 39  
      *
 40  
      * @param path the <i>path</i> from the document root to the local root of the data generating the property.
 41  
      * @param root the property root node.
 42  
      * @param extractor the extractor responsible of such extraction.
 43  
      */
 44  
     void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor);
 45  
 
 46  
     /**
 47  
      * Returns all the collected property roots.
 48  
      *
 49  
      * @return an <b>unmodifiable</b> list of
 50  
      *         {@link TagSoupExtractionResult.ResourceRoot}s.
 51  
      */
 52  
     List<ResourceRoot> getResourceRoots();
 53  
 
 54  
     /**
 55  
      * Adds a property path to the list of the extracted data.
 56  
      *
 57  
      * @param extractor the identifier of the extractor responsible for retrieving such property.
 58  
      * @param propertySubject the subject of the property.
 59  
      * @param property the property URI.
 60  
      * @param object the property object if any, <code>null</code> otherwise.
 61  
      * @param path the path of the <i>HTML</i> node from which the property literal has been extracted.
 62  
      */
 63  
     void addPropertyPath(
 64  
             Class<? extends MicroformatExtractor> extractor,
 65  
             Resource propertySubject,
 66  
             Resource property,
 67  
             BNode object,
 68  
             String[] path
 69  
     );
 70  
 
 71  
     /**
 72  
      * Returns all the collected property paths.
 73  
      *
 74  
      * @return a valid list of property paths.
 75  
      */
 76  
     List<PropertyPath> getPropertyPaths();
 77  
 
 78  
     /**
 79  
      * Defines a property root object.
 80  
      */
 81  
     class ResourceRoot {
 82  
         private String[] path;
 83  
         private Resource root;
 84  
         private Class<? extends MicroformatExtractor>  extractor;
 85  
 
 86  0
         public ResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
 87  0
             if(path == null || path.length == 0) {
 88  0
                 throw new IllegalArgumentException( String.format("Invalid xpath: '%s'.", Arrays.toString(path) ) );
 89  
             }
 90  0
             if(root == null) {
 91  0
                 throw new IllegalArgumentException("Invalid root, cannot be null.");
 92  
             }
 93  0
             if(extractor == null) {
 94  0
                 throw new IllegalArgumentException("Invalid extractor, cannot ne null");
 95  
             }
 96  0
             this.path      = path;
 97  0
             this.root      = root;
 98  0
             this.extractor = extractor;
 99  0
         }
 100  
 
 101  
         public String[] getPath() {
 102  0
             return path;
 103  
         }
 104  
 
 105  
         public Resource getRoot() {
 106  0
             return root;
 107  
         }
 108  
 
 109  
         public Class<? extends MicroformatExtractor> getExtractor() {
 110  0
             return extractor;
 111  
         }
 112  
 
 113  
         @Override
 114  
         public String toString() {
 115  0
             return String.format(
 116  
                     "%s-%s-%s %s",
 117  
                     this.getClass().getCanonicalName(),
 118  
                     Arrays.toString(path), 
 119  
                     root,
 120  
                     extractor
 121  
             );
 122  
         }
 123  
     }
 124  
 
 125  
     /**
 126  
      * Defines a property path object.
 127  
      */
 128  
     class PropertyPath {
 129  
 
 130  
         private Class<? extends MicroformatExtractor>  extractor;
 131  
         private String[] path;
 132  
         private Resource subject;
 133  
         private Resource property;
 134  
         private BNode    object;
 135  
 
 136  0
         public PropertyPath(String[] path, Resource subject, Resource property, BNode object, Class<? extends MicroformatExtractor> extractor) {
 137  0
             if(path == null) {
 138  0
                 throw new NullPointerException("path cannot be null.");
 139  
             }
 140  0
             if(subject == null) {
 141  0
                 throw new NullPointerException("subject cannot be null.");
 142  
             }
 143  0
             if(property == null) {
 144  0
                 throw new NullPointerException("property cannot be null.");
 145  
             }
 146  0
             if(extractor == null) {
 147  0
                 throw new NullPointerException("extractor cannot be null.");
 148  
             }
 149  0
             this.path      = path;
 150  0
             this.subject   = subject;
 151  0
             this.property  = property;
 152  0
             this.object    = object;
 153  0
             this.extractor = extractor;
 154  0
         }
 155  
 
 156  
         public String[] getPath() {
 157  0
             return path;
 158  
         }
 159  
 
 160  
         public Resource getSubject() {
 161  0
             return subject;
 162  
         }
 163  
 
 164  
         public Resource getProperty() {
 165  0
             return property;
 166  
         }
 167  
 
 168  
         public BNode getObject() {
 169  0
             return object;
 170  
         }
 171  
 
 172  
         public Class<? extends MicroformatExtractor> getExtractor() {
 173  0
             return extractor;
 174  
         }
 175  
 
 176  
         @Override
 177  
          public String toString() {
 178  0
             return String.format(
 179  
                     "%s %s - %s - %s -- %s -->",
 180  
                     this.getClass().getCanonicalName(),
 181  
                     Arrays.toString(path),
 182  
                     extractor,
 183  
                     subject,
 184  
                     property
 185  
             );
 186  
         }
 187  
     }
 188  
 
 189  
 }