Coverage Report - org.apache.any23.extractor.html.HRecipeExtractor
 
Classes in this File Line Coverage Branch Coverage Complexity
HRecipeExtractor
0%
0/75
0%
0/12
1.316
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor.html;
 19  
 
 20  
 import org.apache.any23.extractor.ExtractionException;
 21  
 import org.apache.any23.extractor.ExtractionResult;
 22  
 import org.apache.any23.extractor.ExtractorDescription;
 23  
 import org.apache.any23.extractor.ExtractorFactory;
 24  
 import org.apache.any23.extractor.SimpleExtractorFactory;
 25  
 import org.apache.any23.rdf.PopularPrefixes;
 26  
 import org.apache.any23.vocab.HRECIPE;
 27  
 import org.openrdf.model.BNode;
 28  
 import org.openrdf.model.URI;
 29  
 import org.openrdf.model.vocabulary.RDF;
 30  
 import org.w3c.dom.Node;
 31  
 
 32  
 import java.util.Arrays;
 33  
 
 34  
 /**
 35  
  * Extractor for the <a href="http://microformats.org/wiki/hrecipe">hRecipe</a>
 36  
  * microformat.
 37  
  *
 38  
  * @author Michele Mostarda (mostarda@fbk.eu)
 39  
  */
 40  0
 public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
 41  
 
 42  0
     private static final HRECIPE vHRECIPE = HRECIPE.getInstance();
 43  
 
 44  0
     public final static ExtractorFactory<HRecipeExtractor> factory =
 45  
             SimpleExtractorFactory.create(
 46  
                     "html-mf-hrecipe",
 47  
                     PopularPrefixes.createSubset("rdf", "hrecipe"),
 48  
                     Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
 49  
                     "example-mf-hrecipe.html",
 50  
                     HRecipeExtractor.class
 51  
             );
 52  
 
 53  
 
 54  
     @Override
 55  
     public ExtractorDescription getDescription() {
 56  0
         return factory;
 57  
     }
 58  
 
 59  
     @Override
 60  
     protected String getBaseClassName() {
 61  0
         return "hrecipe";
 62  
     }
 63  
 
 64  
     @Override
 65  
     protected void resetExtractor() {
 66  
         // Empty.
 67  0
     }
 68  
 
 69  
     @Override
 70  
     protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
 71  0
         final BNode recipe = getBlankNodeFor(node);
 72  0
         conditionallyAddResourceProperty(recipe, RDF.TYPE, vHRECIPE.Recipe);
 73  0
         final HTMLDocument fragment = new HTMLDocument(node);
 74  0
         addFN(fragment, recipe);
 75  0
         addIngredients(fragment, recipe);
 76  0
         addYield(fragment, recipe);
 77  0
         addInstructions(fragment, recipe);
 78  0
         addDurations(fragment, recipe);
 79  0
         addPhoto(fragment, recipe);
 80  0
         addSummary(fragment, recipe);
 81  0
         addAuthors(fragment, recipe);
 82  0
         addPublished(fragment, recipe);
 83  0
         addNutritions(fragment, recipe);
 84  0
         addTags(fragment, recipe);
 85  0
         return true;
 86  
     }
 87  
 
 88  
     /**
 89  
      * Maps a field text with a property.
 90  
      *
 91  
      * @param fragment
 92  
      * @param recipe
 93  
      * @param fieldClass
 94  
      * @param property
 95  
      */
 96  
     private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, URI property) {
 97  0
         HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
 98  0
         conditionallyAddStringProperty(
 99  
                 title.source(), recipe, property, title.value()
 100  
         );
 101  0
     }
 102  
 
 103  
     /**
 104  
      * Adds the <code>fn</code> triple.
 105  
      *
 106  
      * @param fragment
 107  
      * @param recipe
 108  
      */
 109  
     private void addFN(HTMLDocument fragment, BNode recipe) {
 110  0
         mapFieldWithProperty(fragment, recipe, "fn", vHRECIPE.fn);
 111  0
     }
 112  
 
 113  
     /**
 114  
      * Adds the <code>ingredient</code> triples.
 115  
      *
 116  
      * @param fragment
 117  
      * @param ingredient
 118  
      * @return
 119  
      */
 120  
     private BNode addIngredient(HTMLDocument fragment,  HTMLDocument.TextField ingredient) {
 121  0
         final BNode ingredientBnode = getBlankNodeFor(ingredient.source());
 122  0
         addURIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient);
 123  0
         conditionallyAddStringProperty(
 124  
                 ingredient.source(),
 125  
                 ingredientBnode,
 126  
                 vHRECIPE.ingredientName,
 127  
                 HTMLDocument.readNodeContent(ingredient.source(), true)
 128  
         );
 129  0
         mapFieldWithProperty(fragment, ingredientBnode, "value", vHRECIPE.ingredientQuantity);
 130  0
         mapFieldWithProperty(fragment, ingredientBnode, "type" , vHRECIPE.ingredientQuantityType);
 131  0
         return ingredientBnode;
 132  
     }
 133  
 
 134  
     /**
 135  
      * Adds the <code>ingredients</code>list triples.
 136  
      *
 137  
      * @param fragment
 138  
      * @param recipe
 139  
      * @return
 140  
      */
 141  
     private void addIngredients(HTMLDocument fragment, BNode recipe) {
 142  0
         final HTMLDocument.TextField[] ingredients = fragment.getPluralTextField("ingredient");
 143  0
         for(HTMLDocument.TextField ingredient : ingredients) {
 144  0
             addBNodeProperty(recipe, vHRECIPE.ingredient, addIngredient(fragment, ingredient));
 145  
         }
 146  0
     }
 147  
 
 148  
     /**
 149  
      * Adds the <code>instruction</code> triples.
 150  
      *
 151  
      * @param fragment
 152  
      * @param recipe
 153  
      */
 154  
     private void addInstructions(HTMLDocument fragment, BNode recipe) {
 155  0
         mapFieldWithProperty(fragment, recipe, "instructions", vHRECIPE.instructions);
 156  
 
 157  0
     }
 158  
 
 159  
     /**
 160  
      * Adds the <code>yield</code> triples.
 161  
      *
 162  
      * @param fragment
 163  
      * @param recipe
 164  
      */
 165  
     private void addYield(HTMLDocument fragment, BNode recipe) {
 166  0
         mapFieldWithProperty(fragment, recipe, "yield", vHRECIPE.yield);
 167  0
     }
 168  
 
 169  
     /**
 170  
      * Adds the <code>duration</code> triples.
 171  
      *
 172  
      * @param fragment
 173  
      * @param duration
 174  
      * @return
 175  
      */
 176  
     //TODO: USE http://microformats.org/wiki/value-class-pattern to read correct date format.
 177  
     private BNode addDuration(HTMLDocument fragment, HTMLDocument.TextField duration) {
 178  0
         final BNode durationBnode = getBlankNodeFor(duration.source());
 179  0
         addURIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration);
 180  0
         conditionallyAddStringProperty(
 181  
                 duration.source(),
 182  
                 durationBnode, vHRECIPE.durationTime, duration.value()
 183  
         );
 184  0
         mapFieldWithProperty(fragment, durationBnode, "value-title", vHRECIPE.durationTitle);
 185  0
         return durationBnode;
 186  
     }
 187  
 
 188  
     /**
 189  
      * Adds the <code>yield</code> triples.
 190  
      *
 191  
      * @param fragment
 192  
      * @param recipe
 193  
      */
 194  
     private void addDurations(HTMLDocument fragment, BNode recipe) {
 195  0
       final HTMLDocument.TextField[] durations = fragment.getPluralTextField("duration");
 196  0
         for(HTMLDocument.TextField duration : durations) {
 197  0
             addBNodeProperty(recipe, vHRECIPE.duration, addDuration(fragment, duration));
 198  
         }
 199  0
     }
 200  
 
 201  
     /**
 202  
      * Adds the <code>photo</code> triples.
 203  
      *
 204  
      * @param fragment
 205  
      * @param recipe
 206  
      * @throws ExtractionException
 207  
      */
 208  
     private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
 209  0
         final HTMLDocument.TextField[] photos = fragment.getPluralUrlField("photo");
 210  0
         for(HTMLDocument.TextField photo : photos) {
 211  0
             addURIProperty(recipe, vHRECIPE.photo, fragment.resolveURI(photo.value()));
 212  
         }
 213  0
     }
 214  
 
 215  
     /**
 216  
      * Adds the <code>summary</code> triples.
 217  
      *
 218  
      * @param fragment
 219  
      * @param recipe
 220  
      */
 221  
     private void addSummary(HTMLDocument fragment, BNode recipe) {
 222  0
         mapFieldWithProperty(fragment, recipe, "summary", vHRECIPE.summary);
 223  0
     }
 224  
 
 225  
     /**
 226  
      * Adds the <code>authors</code> triples.
 227  
      *
 228  
      * @param fragment
 229  
      * @param recipe
 230  
      */
 231  
     private void addAuthors(HTMLDocument fragment, BNode recipe) {
 232  0
         final HTMLDocument.TextField[] authors = fragment.getPluralTextField("author");
 233  0
          for(HTMLDocument.TextField author : authors) {
 234  0
              conditionallyAddStringProperty(
 235  
                     author.source(),
 236  
                     recipe, vHRECIPE.author, author.value()
 237  
               );
 238  
         }
 239  0
     }
 240  
 
 241  
     /**
 242  
      * Adds the <code>published</code> triples.
 243  
      *
 244  
      * @param fragment
 245  
      * @param recipe
 246  
      */
 247  
     //TODO: USE http://microformats.org/wiki/value-class-pattern to read correct date format.
 248  
     private void addPublished(HTMLDocument fragment, BNode recipe) {
 249  0
         mapFieldWithProperty(fragment, recipe, "published", vHRECIPE.published);
 250  0
     }
 251  
 
 252  
     /**
 253  
      * Adds the <code>nutrition</code> triples.
 254  
      *
 255  
      * @param fragment
 256  
      * @param nutrition
 257  
      * @return
 258  
      */
 259  
     private BNode addNutrition(HTMLDocument fragment, HTMLDocument.TextField nutrition) {
 260  0
         final BNode nutritionBnode = getBlankNodeFor(nutrition.source());
 261  0
         addURIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition);
 262  0
         conditionallyAddStringProperty(
 263  
                 nutrition.source(),
 264  
                 nutritionBnode, vHRECIPE.nutritionValue, nutrition.value()
 265  
         );
 266  0
         mapFieldWithProperty(fragment, nutritionBnode, "value", vHRECIPE.nutritionValue);
 267  0
         mapFieldWithProperty(fragment, nutritionBnode, "type" , vHRECIPE.nutritionValueType);
 268  0
         return nutritionBnode;
 269  
     }
 270  
 
 271  
     /**
 272  
      * Adds the <code>nutritions</code> triples.
 273  
      *
 274  
      * @param fragment
 275  
      * @param recipe
 276  
      */
 277  
     private void addNutritions(HTMLDocument fragment, BNode recipe) {
 278  0
         HTMLDocument.TextField[] nutritions = fragment.getPluralTextField("nutrition");
 279  0
         for (HTMLDocument.TextField nutrition : nutritions) {
 280  0
             addBNodeProperty(recipe, vHRECIPE.nutrition, addNutrition(fragment, nutrition));
 281  
         }
 282  0
     }
 283  
 
 284  
     /**
 285  
      * Adds the <code>tags</code> triples.
 286  
      *
 287  
      * @param fragment
 288  
      * @param recipe
 289  
      */
 290  
     private void addTags(HTMLDocument fragment, BNode recipe) {
 291  0
         HTMLDocument.TextField[] tags = fragment.extractRelTagNodes();
 292  0
         for(HTMLDocument.TextField tag : tags) {
 293  0
             conditionallyAddStringProperty(
 294  
                     tag.source(),
 295  
                     recipe, vHRECIPE.tag, tag.value()
 296  
               );
 297  
         }
 298  0
     }
 299  
 
 300  
 }