Coverage Report - org.apache.any23.extractor.xpath.TemplateXPathExtractionRuleImpl
 
Classes in this File Line Coverage Branch Coverage Complexity
TemplateXPathExtractionRuleImpl
0%
0/61
0%
0/32
2.643
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor.xpath;
 19  
 
 20  
 import org.apache.any23.extractor.ExtractionResult;
 21  
 import org.apache.any23.extractor.html.DomUtils;
 22  
 import org.openrdf.model.URI;
 23  
 import org.w3c.dom.Document;
 24  
 
 25  
 import java.util.ArrayList;
 26  
 import java.util.HashMap;
 27  
 import java.util.List;
 28  
 import java.util.Map;
 29  
 import java.util.regex.Pattern;
 30  
 
 31  
 /**
 32  
  * Default implementation of {@link XPathExtractionRule}.
 33  
  *
 34  
  * @author Michele Mostarda (mostarda@fbk.eu)
 35  
  */
 36  
 public class TemplateXPathExtractionRuleImpl implements TemplateXPathExtractionRule {
 37  
 
 38  
     private final String name;
 39  
 
 40  
     private final String uriRegex;
 41  
 
 42  
     private final Pattern uriRegexPattern;
 43  
 
 44  
     private final List<Variable> variables;
 45  
 
 46  
     private final List<QuadTemplate> templates;
 47  
 
 48  0
     public TemplateXPathExtractionRuleImpl(String name, String uriRegex) {
 49  0
         if(name == null) {
 50  0
             throw new NullPointerException("The rule name cannot be null.");
 51  
         }
 52  
 
 53  0
         this.name = name;
 54  0
         this.uriRegex = uriRegex;
 55  
 
 56  
         try {
 57  0
             uriRegexPattern = uriRegex != null ? Pattern.compile(uriRegex) : null;
 58  0
         } catch (Exception e) {
 59  0
             throw new IllegalArgumentException("Invalid value for uriRegex.", e);
 60  0
         }
 61  0
         variables = new ArrayList<Variable>();
 62  0
         templates = new ArrayList<QuadTemplate>();
 63  0
     }
 64  
 
 65  
     /**
 66  
      * @return the regex pattern filtering the template pages.
 67  
      */
 68  
     public String getUriRegex() {
 69  0
         return uriRegex;
 70  
     }
 71  
 
 72  
     public void add(Variable variable) {
 73  0
         checkVariableNameNotDeclared(variable.getName());
 74  0
         variables.add(variable);
 75  0
     }
 76  
 
 77  
     public boolean remove(Variable variable) {
 78  0
         return variables.remove(variable);
 79  
     }
 80  
 
 81  
     public void add(QuadTemplate template) {
 82  0
         checkTemplateVariablesDeclared(template);
 83  0
         templates.add(template);
 84  0
     }
 85  
 
 86  
     public boolean remove(QuadTemplate template) {
 87  0
         return templates.remove(template);
 88  
     }
 89  
 
 90  
     public String getName() {
 91  0
         return name;
 92  
     }
 93  
 
 94  
     public boolean acceptURI(URI uri) {
 95  0
         if(uriRegexPattern == null) {
 96  0
             return true;
 97  
         }
 98  0
         return uriRegexPattern.matcher(uri.stringValue()).find();
 99  
     }
 100  
 
 101  
     public void process(Document in, ExtractionResult er) {
 102  0
         final Map<String,String> varValues = new HashMap<String, String>();
 103  
         String value;
 104  0
         for(Variable variable : variables) {
 105  0
             value = DomUtils.find(in, variable.getxPath().toUpperCase());
 106  0
             varValues.put(variable.getName(), value);
 107  
         }
 108  
 
 109  0
         for(QuadTemplate template : templates) {
 110  0
             template.printOut(er, varValues);
 111  
         }
 112  0
     }
 113  
 
 114  
     private boolean variableNameDeclared(String varName) {
 115  0
         for(Variable variable : variables) {
 116  0
             if(variable.getName().equals(varName)) {
 117  0
                 return true;
 118  
             }
 119  
         }
 120  0
         return false;
 121  
     }
 122  
 
 123  
     private void checkVariableNameDeclared(String varName) {
 124  0
         if (!variableNameDeclared(varName)) {
 125  0
             throw new IllegalArgumentException(
 126  
                     String.format("A variable with name '%s' was not declared.", varName)
 127  
             );
 128  
         }
 129  0
     }
 130  
 
 131  
     private void checkVariableNameNotDeclared(String varName) {
 132  0
         if (variableNameDeclared(varName)) {
 133  0
             throw new IllegalArgumentException(
 134  
                     String.format("A variable with name '%s' is already declared.", varName)
 135  
             );
 136  
         }
 137  0
     }
 138  
 
 139  
     private void checkTemplateVariablesDeclared(QuadTemplate template) {
 140  0
         if( template.getSubject().isVar()   ) checkVariableNameDeclared( template.getSubject().getInternalValue() );
 141  0
         if( template.getPredicate().isVar() ) checkVariableNameDeclared( template.getPredicate().getInternalValue() );
 142  0
         if( template.getObject().isVar()    ) checkVariableNameDeclared( template.getObject().getInternalValue() );
 143  0
         if( template.getGraph() != null && template.getGraph().isVar() ) {
 144  0
             checkVariableNameDeclared( template.getGraph().getInternalValue() );
 145  
         }
 146  0
     }
 147  
 
 148  
     @Override
 149  
     public String toString() {
 150  0
         final StringBuilder sb = new StringBuilder();
 151  0
         sb.append('\n');
 152  0
         sb.append("name: ").append(name).append('\n');
 153  0
         sb.append("pattern: '").append(uriRegex).append("'").append('\n');
 154  
 
 155  0
         sb.append("variables {\n");
 156  0
         for (Variable variable : variables) {
 157  0
             sb.append(variable.getName()).append(":").append(variable.getxPath()).append('\n');
 158  
         }
 159  0
         sb.append("}\n");
 160  
 
 161  0
         sb.append("templates {\n");
 162  0
         for (QuadTemplate template : templates) {
 163  0
             sb.append(template).append('\n');
 164  
         }
 165  0
         sb.append("}\n");
 166  0
         return sb.toString();
 167  
     }
 168  
 }