Coverage Report

Coverage Report - org.apache.any23.extractor.html.LicenseExtractor

Classes in this File

0/14

0/4

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.any23.extractor.html;
 
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.SimpleExtractorFactory;
 import org.apache.any23.rdf.PopularPrefixes;
 import org.apache.any23.vocab.XHTML;
 import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
 import org.openrdf.model.URI;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
 
 import java.io.IOException;
 import java.util.Arrays;
 
 /**
  * Extractor for the <a href="http://microformats.org/wiki/rel-license">rel-license</a>
  * microformat.
  * <p/>
  *
  * @author Gabriele Renzi
  * @author Richard Cyganiak
  */
 public class LicenseExtractor implements TagSoupDOMExtractor {
 
     private static final XHTML vXHTML = XHTML.getInstance();
 
     public final static ExtractorFactory<LicenseExtractor> factory =
             SimpleExtractorFactory.create(
                     "html-mf-license",
                     PopularPrefixes.createSubset("xhtml"),
                     Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
                     "example-mf-license.html",
                     LicenseExtractor.class
             );
 
     public void run(
             ExtractionParameters extractionParameters,
             ExtractionContext extractionContext,
             Document in,
             ExtractionResult out
     ) throws IOException, ExtractionException {
         HTMLDocument document = new HTMLDocument(in);
         final URI documentURI = extractionContext.getDocumentURI();
         for (Node node : DomUtils.findAll(in, "//A[@rel='license']/@href")) {
             String link = node.getNodeValue();
             if ("".equals(link)) {
                 out.notifyError(
                         ExtractionResult.ErrorLevel.WARN,
                         String.format(
                                 "Invalid license link detected within document %s.",
                                 documentURI.toString()
                         ),
                         0, 0
                 );
                 continue;
             }
             out.writeTriple(documentURI, vXHTML.license, document.resolveURI(link));
         }
     }
 
     public ExtractorDescription getDescription() {
         return factory;
     }
     
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		package org.apache.any23.extractor.html;
19
20		import org.apache.any23.extractor.ExtractionContext;
21		import org.apache.any23.extractor.ExtractionException;
22		import org.apache.any23.extractor.ExtractionParameters;
23		import org.apache.any23.extractor.ExtractionResult;
24		import org.apache.any23.extractor.ExtractorDescription;
25		import org.apache.any23.extractor.ExtractorFactory;
26		import org.apache.any23.extractor.SimpleExtractorFactory;
27		import org.apache.any23.rdf.PopularPrefixes;
28		import org.apache.any23.vocab.XHTML;
29		import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
30		import org.openrdf.model.URI;
31		import org.w3c.dom.Document;
32		import org.w3c.dom.Node;
33
34		import java.io.IOException;
35		import java.util.Arrays;
36
37		/**
38		* Extractor for the <a href="http://microformats.org/wiki/rel-license">rel-license</a>
39		* microformat.
40		* <p/>
41		*
42		* @author Gabriele Renzi
43		* @author Richard Cyganiak
44		*/
45	0	public class LicenseExtractor implements TagSoupDOMExtractor {
46
47	0	private static final XHTML vXHTML = XHTML.getInstance();
48
49	0	public final static ExtractorFactory<LicenseExtractor> factory =
50		SimpleExtractorFactory.create(
51		"html-mf-license",
52		PopularPrefixes.createSubset("xhtml"),
53		Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
54		"example-mf-license.html",
55		LicenseExtractor.class
56		);
57
58		public void run(
59		ExtractionParameters extractionParameters,
60		ExtractionContext extractionContext,
61		Document in,
62		ExtractionResult out
63		) throws IOException, ExtractionException {
64	0	HTMLDocument document = new HTMLDocument(in);
65	0	final URI documentURI = extractionContext.getDocumentURI();
66	0	for (Node node : DomUtils.findAll(in, "//A[@rel='license']/@href")) {
67	0	String link = node.getNodeValue();
68	0	if ("".equals(link)) {
69	0	out.notifyError(
70		ExtractionResult.ErrorLevel.WARN,
71		String.format(
72		"Invalid license link detected within document %s.",
73		documentURI.toString()
74		),
75		0, 0
76		);
77	0	continue;
78		}
79	0	out.writeTriple(documentURI, vXHTML.license, document.resolveURI(link));
80	0	}
81	0	}
82
83		public ExtractorDescription getDescription() {
84	0	return factory;
85		}
86
87		}