Coverage Report - org.apache.any23.extractor.html.HReviewExtractor
 
Classes in this File Line Coverage Branch Coverage Complexity
HReviewExtractor
0%
0/56
0%
0/6
1.25
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor.html;
 19  
 
 20  
 import org.apache.any23.extractor.ExtractionException;
 21  
 import org.apache.any23.extractor.ExtractionResult;
 22  
 import org.apache.any23.extractor.ExtractorDescription;
 23  
 import org.apache.any23.extractor.ExtractorFactory;
 24  
 import org.apache.any23.extractor.SimpleExtractorFactory;
 25  
 import org.apache.any23.extractor.TagSoupExtractionResult;
 26  
 import org.apache.any23.rdf.PopularPrefixes;
 27  
 import org.apache.any23.vocab.DCTERMS;
 28  
 import org.apache.any23.vocab.REVIEW;
 29  
 import org.apache.any23.vocab.VCARD;
 30  
 import org.openrdf.model.BNode;
 31  
 import org.openrdf.model.Resource;
 32  
 import org.openrdf.model.vocabulary.RDF;
 33  
 import org.w3c.dom.Node;
 34  
 
 35  
 import java.util.Arrays;
 36  
 import java.util.List;
 37  
 
 38  
 import static org.apache.any23.extractor.html.HTMLDocument.TextField;
 39  
 
 40  
 /**
 41  
  * Extractor for the <a href="http://microformats.org/wiki/hreview">hReview</a>
 42  
  * microformat.
 43  
  *
 44  
  * @author Gabriele Renzi
 45  
  */
 46  0
 public class HReviewExtractor extends EntityBasedMicroformatExtractor {
 47  
 
 48  0
     private static final REVIEW  vREVIEW  = REVIEW.getInstance();
 49  0
     private static final VCARD   vVCARD   = VCARD.getInstance();
 50  0
     private static final DCTERMS vDCTERMS = DCTERMS.getInstance();
 51  
 
 52  0
     public final static ExtractorFactory<HReviewExtractor> factory =
 53  
             SimpleExtractorFactory.create(
 54  
                     "html-mf-hreview",
 55  
                     PopularPrefixes.createSubset("rdf", "vcard", "rev"),
 56  
                     Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
 57  
                     "example-mf-hreview.html",
 58  
                     HReviewExtractor.class
 59  
             );
 60  
 
 61  
     public ExtractorDescription getDescription() {
 62  0
         return factory;
 63  
     }
 64  
 
 65  
     protected String getBaseClassName() {
 66  0
         return "hreview";
 67  
     }
 68  
 
 69  
     @Override
 70  
     protected void resetExtractor() {
 71  
         // Empty.
 72  0
     }
 73  
 
 74  
     protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
 75  0
         BNode rev = getBlankNodeFor(node);
 76  0
         out.writeTriple(rev, RDF.TYPE, vREVIEW.Review);
 77  0
         final HTMLDocument fragment = new HTMLDocument(node);
 78  0
         addRating(fragment, rev);
 79  0
         addSummary(fragment, rev);
 80  0
         addTime(fragment, rev);
 81  0
         addType(fragment, rev);
 82  0
         addDescription(fragment, rev);
 83  0
         addItem(fragment, rev);
 84  0
         addReviewer(fragment, rev);
 85  
 
 86  0
         final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
 87  0
         tser.addResourceRoot(
 88  
                 DomUtils.getXPathListForNode(node),
 89  
                 rev,
 90  
                 this.getClass()
 91  
         );
 92  
 
 93  0
         return true;
 94  
     }
 95  
 
 96  
     private void addType(HTMLDocument doc, Resource rev) {
 97  0
         TextField value = doc.getSingularTextField("type");
 98  0
         conditionallyAddStringProperty(
 99  
                 value.source(),
 100  
                 rev, vREVIEW.type, value.value()
 101  
         );
 102  0
     }
 103  
 
 104  
     private void addReviewer(HTMLDocument doc, Resource rev) {
 105  0
         List<Node> nodes = doc.findAllByClassName("reviewer");
 106  0
         if (nodes.size() > 0) {
 107  0
             Node node0 = nodes.get(0);
 108  0
             addBNodeProperty(
 109  
                     node0,
 110  
                     rev, vREVIEW.reviewer, getBlankNodeFor(node0)
 111  
             );
 112  
         }
 113  0
     }
 114  
 
 115  
     private void addItem(HTMLDocument root, BNode rev) throws ExtractionException {
 116  0
         List<Node> nodes = root.findAllByClassName("item");
 117  0
         for (Node node : nodes) {
 118  0
             Resource item = findDummy(new HTMLDocument(node));
 119  0
             addBNodeProperty(
 120  
                     node,
 121  
                     item, vREVIEW.hasReview, rev
 122  
             );
 123  0
         }
 124  0
     }
 125  
 
 126  
     private Resource findDummy(HTMLDocument item) throws ExtractionException {
 127  0
         Resource blank = getBlankNodeFor(item.getDocument());
 128  0
         TextField val = item.getSingularTextField("fn");
 129  0
         conditionallyAddStringProperty(
 130  
                 val.source(),
 131  
                 blank, vVCARD.fn, val.value()
 132  
         );
 133  0
         final TextField url = item.getSingularUrlField("url");
 134  0
         conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveURI(url.value()));
 135  0
         TextField pics[] = item.getPluralUrlField("photo");
 136  0
         for (TextField pic : pics) {
 137  0
             addURIProperty(blank, vVCARD.photo, getHTMLDocument().resolveURI(pic.value()));
 138  
         }
 139  0
         return blank;
 140  
     }
 141  
 
 142  
     private void addRating(HTMLDocument doc, Resource rev) {
 143  0
         HTMLDocument.TextField value = doc.getSingularTextField("rating");
 144  0
         conditionallyAddStringProperty(
 145  
                 value.source(), rev, vREVIEW.rating, value.value()
 146  
         );
 147  0
     }
 148  
 
 149  
     private void addSummary(HTMLDocument doc, Resource rev) {
 150  0
         TextField value = doc.getSingularTextField("summary");
 151  0
         conditionallyAddStringProperty(
 152  
                 value.source(),
 153  
                 rev, vREVIEW.title, value.value()
 154  
         );
 155  0
     }
 156  
 
 157  
     private void addTime(HTMLDocument doc, Resource rev) {
 158  0
         TextField value = doc.getSingularTextField("dtreviewed");
 159  0
         conditionallyAddStringProperty(
 160  
                 value.source(),
 161  
                 rev, vDCTERMS.date, value.value()
 162  
         );
 163  0
     }
 164  
 
 165  
     private void addDescription(HTMLDocument doc, Resource rev) {
 166  0
         TextField value = doc.getSingularTextField("description");
 167  0
         conditionallyAddStringProperty(
 168  
                 value.source(),
 169  
                 rev, vREVIEW.text, value.value()
 170  
         );
 171  0
     }
 172  
 
 173  
 }