Coverage Report

Coverage Report - org.apache.any23.extractor.xpath.XPathExtractor

Classes in this File

Line Coverage

Branch Coverage

Complexity

XPathExtractor

0/17

0/4

1.333

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.any23.extractor.xpath;
 
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.Extractor;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.SimpleExtractorFactory;
 import org.openrdf.model.URI;
 import org.w3c.dom.Document;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
 /**
  * Implementation of an {@link org.apache.any23.extractor.Extractor.TagSoupDOMExtractor} able to
  * apply {@link XPathExtractionRule}s and generate <i>quads</i>.
  *
  * @see XPathExtractionRule
  * @author Michele Mostarda (mostarda@fbk.eu)
  */
 public class XPathExtractor implements Extractor.TagSoupDOMExtractor {
 
     public final static String NAME = "html-xpath";
 
     public final static ExtractorFactory<XPathExtractor> factory =
             SimpleExtractorFactory.create(
                     NAME,
                     null,
                     Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
                     null,
                     XPathExtractor.class
             );
 
     private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<XPathExtractionRule>();
 
     public XPathExtractor(List<XPathExtractionRule> rules) {
         xPathExtractionRules.addAll(rules);
     }
 
     public void add(XPathExtractionRule rule) {
         xPathExtractionRules.add(rule);
     }
 
     public void remove(XPathExtractionRule rule) {
         xPathExtractionRules.remove(rule);
     }
 
     public boolean contains(XPathExtractionRule rule) {
         return xPathExtractionRules.contains(rule);
     }
 
     public void run(
             ExtractionParameters extractionParameters,
             ExtractionContext extractionContext,
             Document in,
             ExtractionResult out
     )
     throws IOException, ExtractionException {
         final URI documentURI = extractionContext.getDocumentURI();
         for(XPathExtractionRule rule : xPathExtractionRules) {
             if(rule.acceptURI(documentURI)) {
                 rule.process(in, out);
             }
         }
     }
 
     public ExtractorDescription getDescription() {
         return factory;
     }
 
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		package org.apache.any23.extractor.xpath;
19
20		import org.apache.any23.extractor.ExtractionContext;
21		import org.apache.any23.extractor.ExtractionException;
22		import org.apache.any23.extractor.ExtractionParameters;
23		import org.apache.any23.extractor.ExtractionResult;
24		import org.apache.any23.extractor.Extractor;
25		import org.apache.any23.extractor.ExtractorDescription;
26		import org.apache.any23.extractor.ExtractorFactory;
27		import org.apache.any23.extractor.SimpleExtractorFactory;
28		import org.openrdf.model.URI;
29		import org.w3c.dom.Document;
30
31		import java.io.IOException;
32		import java.util.ArrayList;
33		import java.util.Arrays;
34		import java.util.List;
35
36		/**
37		* Implementation of an {@link org.apache.any23.extractor.Extractor.TagSoupDOMExtractor} able to
38		* apply {@link XPathExtractionRule}s and generate <i>quads</i>.
39		*
40		* @see XPathExtractionRule
41		* @author Michele Mostarda (mostarda@fbk.eu)
42		*/
43	0	public class XPathExtractor implements Extractor.TagSoupDOMExtractor {
44
45		public final static String NAME = "html-xpath";
46
47	0	public final static ExtractorFactory<XPathExtractor> factory =
48		SimpleExtractorFactory.create(
49		NAME,
50		null,
51		Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
52		null,
53		XPathExtractor.class
54		);
55
56	0	private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<XPathExtractionRule>();
57
58	0	public XPathExtractor(List<XPathExtractionRule> rules) {
59	0	xPathExtractionRules.addAll(rules);
60	0	}
61
62		public void add(XPathExtractionRule rule) {
63	0	xPathExtractionRules.add(rule);
64	0	}
65
66		public void remove(XPathExtractionRule rule) {
67	0	xPathExtractionRules.remove(rule);
68	0	}
69
70		public boolean contains(XPathExtractionRule rule) {
71	0	return xPathExtractionRules.contains(rule);
72		}
73
74		public void run(
75		ExtractionParameters extractionParameters,
76		ExtractionContext extractionContext,
77		Document in,
78		ExtractionResult out
79		)
80		throws IOException, ExtractionException {
81	0	final URI documentURI = extractionContext.getDocumentURI();
82	0	for(XPathExtractionRule rule : xPathExtractionRules) {
83	0	if(rule.acceptURI(documentURI)) {
84	0	rule.process(in, out);
85		}
86		}
87	0	}
88
89		public ExtractorDescription getDescription() {
90	0	return factory;
91		}
92
93		}