Coverage Report

Coverage Report - org.apache.any23.extractor.rdfa.RDFa11Extractor

Classes in this File

Line Coverage

Branch Coverage

Complexity

RDFa11Extractor

0/21

N/A

1.25

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.any23.extractor.rdfa;
 
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.Extractor;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.SimpleExtractorFactory;
 import org.w3c.dom.Document;
 
 import java.io.IOException;
 import java.net.URL;
 import java.util.Arrays;
 
 /**
  * {@link org.apache.any23.extractor.Extractor} implementation for
  * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.1</a> specification.
  *
  * @author Michele Mostarda (mostarda@fbk.eu)
  */
 public class RDFa11Extractor implements Extractor.TagSoupDOMExtractor {
 
     public final static String NAME = "html-rdfa11";
 
     public final static ExtractorFactory<RDFa11Extractor> factory =
             SimpleExtractorFactory.create(
                     NAME,
                     null,
                     Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"),
                     "example-rdfa11.html",
                     RDFa11Extractor.class
             );
 
     private final RDFa11Parser parser;
 
      private boolean verifyDataType;
 
      private boolean stopAtFirstError;
 
      /**
       * Constructor, allows to specify the validation and error handling policies.
       *
       * @param verifyDataType if <code>true</code> the data types will be verified,
       *         if <code>false</code> will be ignored.
       * @param stopAtFirstError if <code>true</code> the parser will stop at first parsing error,
       *        if <code>false</code> will ignore non blocking errors.
       */
      public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
          this.parser = new RDFa11Parser();
          this.verifyDataType   = verifyDataType;
          this.stopAtFirstError = stopAtFirstError;
      }
 
      /**
       * Default constructor, with no verification of data types and not stop at first error.
       */
      public RDFa11Extractor() {
          this(false, false);
      }
 
      public boolean isVerifyDataType() {
          return verifyDataType;
      }
 
      public void setVerifyDataType(boolean verifyDataType) {
          this.verifyDataType = verifyDataType;
      }
 
      public boolean isStopAtFirstError() {
          return stopAtFirstError;
      }
 
      public void setStopAtFirstError(boolean stopAtFirstError) {
          this.stopAtFirstError = stopAtFirstError;
      }
 
      public void run(
              ExtractionParameters extractionParameters,
              ExtractionContext extractionContext,
              Document in,
              ExtractionResult out
      ) throws IOException, ExtractionException {
          try {
              parser.processDocument( new URL(extractionContext.getDocumentURI().toString() ), in, out );
          } catch (RDFa11ParserException rpe) {
              throw new ExtractionException("Error while performing extraction.", rpe);
          }
      }
 
      /**
       * @return the {@link org.apache.any23.extractor.ExtractorDescription} of this extractor
       */
      public ExtractorDescription getDescription() {
          return factory;
      }
 
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		package org.apache.any23.extractor.rdfa;
19
20		import org.apache.any23.extractor.ExtractionContext;
21		import org.apache.any23.extractor.ExtractionException;
22		import org.apache.any23.extractor.ExtractionParameters;
23		import org.apache.any23.extractor.ExtractionResult;
24		import org.apache.any23.extractor.Extractor;
25		import org.apache.any23.extractor.ExtractorDescription;
26		import org.apache.any23.extractor.ExtractorFactory;
27		import org.apache.any23.extractor.SimpleExtractorFactory;
28		import org.w3c.dom.Document;
29
30		import java.io.IOException;
31		import java.net.URL;
32		import java.util.Arrays;
33
34		/**
35		* {@link org.apache.any23.extractor.Extractor} implementation for
36		* <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.1</a> specification.
37		*
38		* @author Michele Mostarda (mostarda@fbk.eu)
39		*/
40	0	public class RDFa11Extractor implements Extractor.TagSoupDOMExtractor {
41
42		public final static String NAME = "html-rdfa11";
43
44	0	public final static ExtractorFactory<RDFa11Extractor> factory =
45		SimpleExtractorFactory.create(
46		NAME,
47		null,
48		Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"),
49		"example-rdfa11.html",
50		RDFa11Extractor.class
51		);
52
53		private final RDFa11Parser parser;
54
55		private boolean verifyDataType;
56
57		private boolean stopAtFirstError;
58
59		/**
60		* Constructor, allows to specify the validation and error handling policies.
61		*
62		* @param verifyDataType if <code>true</code> the data types will be verified,
63		* if <code>false</code> will be ignored.
64		* @param stopAtFirstError if <code>true</code> the parser will stop at first parsing error,
65		* if <code>false</code> will ignore non blocking errors.
66		*/
67	0	public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
68	0	this.parser = new RDFa11Parser();
69	0	this.verifyDataType = verifyDataType;
70	0	this.stopAtFirstError = stopAtFirstError;
71	0	}
72
73		/**
74		* Default constructor, with no verification of data types and not stop at first error.
75		*/
76		public RDFa11Extractor() {
77	0	this(false, false);
78	0	}
79
80		public boolean isVerifyDataType() {
81	0	return verifyDataType;
82		}
83
84		public void setVerifyDataType(boolean verifyDataType) {
85	0	this.verifyDataType = verifyDataType;
86	0	}
87
88		public boolean isStopAtFirstError() {
89	0	return stopAtFirstError;
90		}
91
92		public void setStopAtFirstError(boolean stopAtFirstError) {
93	0	this.stopAtFirstError = stopAtFirstError;
94	0	}
95
96		public void run(
97		ExtractionParameters extractionParameters,
98		ExtractionContext extractionContext,
99		Document in,
100		ExtractionResult out
101		) throws IOException, ExtractionException {
102		try {
103	0	parser.processDocument( new URL(extractionContext.getDocumentURI().toString() ), in, out );
104	0	} catch (RDFa11ParserException rpe) {
105	0	throw new ExtractionException("Error while performing extraction.", rpe);
106	0	}
107	0	}
108
109		/**
110		* @return the {@link org.apache.any23.extractor.ExtractorDescription} of this extractor
111		*/
112		public ExtractorDescription getDescription() {
113	0	return factory;
114		}
115
116		}