Coverage Report

Coverage Report - org.apache.any23.extractor.ExtractionResultImpl

Classes in this File

Line Coverage

Branch Coverage

Complexity

ExtractionResultImpl

0/120

0/66

3.778

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.any23.extractor;
 
 import org.apache.any23.extractor.html.MicroformatExtractor;
 import org.apache.any23.rdf.Prefixes;
 import org.apache.any23.writer.TripleHandler;
 import org.apache.any23.writer.TripleHandlerException;
 import org.openrdf.model.BNode;
 import org.openrdf.model.Resource;
 import org.openrdf.model.URI;
 import org.openrdf.model.Value;
 
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
 /**
  * <p/>
  * A default implementation of {@link ExtractionResult}; it receives
  * extraction output from one {@link Extractor} working on one document,
  * and passes the output on to a {@link TripleHandler}. It deals with
  * details such as creation of {@link ExtractionContext} objects
  * and closing any open contexts at the end of extraction.
  * <p/>
  * The {@link #close()} method must be invoked after the extractor has
  * finished processing.
  * <p/>
  * There is usually no need to provide additional implementations
  * of the ExtractionWriter interface.
  * <p/>
  *
  * @see org.apache.any23.writer.TripleHandler
  * @see ExtractionContext
  * @author Richard Cyganiak (richard@cyganiak.de)
  * @author Michele Mostarda (michele.mostarda@gmail.com)
  */
 public class ExtractionResultImpl implements TagSoupExtractionResult {
 
     private final ExtractionContext context;
 
     private final Extractor<?> extractor;
 
     private final TripleHandler tripleHandler;
 
     private final Collection<ExtractionResult> subResults = new ArrayList<ExtractionResult>();
 
     private final Set<Object> knownContextIDs = new HashSet<Object>();
 
     private boolean isClosed = false;
 
     private boolean isInitialized = false;
 
     private List<Error> errors;
 
     private List<ResourceRoot> resourceRoots;
 
     private List<PropertyPath> propertyPaths;
 
     public ExtractionResultImpl(
             ExtractionContext context,
             Extractor<?> extractor,
             TripleHandler tripleHandler
     ) {
         if(context == null) {
             throw new NullPointerException("context cannot be null.");
         }
         if(extractor == null) {
             throw new NullPointerException("extractor cannot be null.");
         }
         if(tripleHandler == null) {
             throw new NullPointerException("triple handler cannot be null.");
         }
 
         this.extractor       = extractor;
         this.tripleHandler   = tripleHandler;
         this.context         = context;
         knownContextIDs.add( context.getUniqueID() );
     }
 
     public boolean hasErrors() {
         return errors != null;
     }
 
     public int getErrorsCount() {
         return errors == null ? 0 : errors.size();
     }
 
     public void printErrorsReport(PrintStream ps) {
         ps.print(String.format("Context: %s [errors: %d] {\n", context, getErrorsCount()));
         if (errors != null) {
             for (Error error : errors) {
                 ps.print(error.toString());
                 ps.print("\n");
             }
         }
         // Printing sub results.
         for (ExtractionResult er : subResults) {
             er.printErrorsReport(ps);
         }
         ps.print("}\n");
     }
 
     public Collection<Error> getErrors() {
         return errors == null ? Collections.<Error>emptyList() : Collections.unmodifiableList(errors);
     }
 
     public ExtractionResult openSubResult(ExtractionContext context) {
         final String contextID = context.getUniqueID();
         if (knownContextIDs.contains(contextID)) {
             throw new IllegalArgumentException("Duplicate contextID: " + contextID);
         }
         knownContextIDs.add(contextID);
 
         checkOpen();
         ExtractionResult result =
                 new ExtractionResultImpl(context, extractor, tripleHandler);
         subResults.add(result);
         return result;
     }
 
     public ExtractionContext getExtractionContext() {
         return context;
     }
 
     public void writeTriple(Resource s, URI p, Value o, URI g) {
         if (s == null || p == null || o == null) return;
         // Check for misconstructed literals or BNodes, Sesame does not catch this.
         if (s.stringValue() == null || p.stringValue() == null || o.stringValue() == null) {
             throw new IllegalArgumentException("The statement arguments must be not null.");
         }
         checkOpen();
         try {
             tripleHandler.receiveTriple(s, p, o, g, context);
         } catch (TripleHandlerException e) {
             throw new RuntimeException(
                     String.format("Error while receiving triple %s %s %s", s, p, o ),
                     e
             );
         }
     }
 
     public void writeTriple(Resource s, URI p, Value o) {
         writeTriple(s, p, o, null);
     }
 
     public void writeNamespace(String prefix, String uri) {
         checkOpen();
         try {
             tripleHandler.receiveNamespace(prefix, uri, context);
         } catch (TripleHandlerException e) {
             throw new RuntimeException(
                     String.format("Error while writing namespace %s:%s", prefix, uri),
                     e
             );
         }
     }
 
     public void notifyError(ErrorLevel level, String msg, int row, int col) {
         if(errors == null) {
             errors = new ArrayList<Error>();
         }
         errors.add( new Error(level, msg, row, col) );
     }
 
     public void close() {
         if (isClosed) return;
         isClosed = true;
         for (ExtractionResult subResult : subResults) {
             subResult.close();
         }
         if (isInitialized) {
             try {
                 tripleHandler.closeContext(context);
             } catch (TripleHandlerException e) {
                 throw new RuntimeException("Error while opening context", e);
             }
         }
     }
 
     private void checkOpen() {
         if (!isInitialized) {
             isInitialized = true;
             try {
                 tripleHandler.openContext(context);
             } catch (TripleHandlerException e) {
                 throw new RuntimeException("Error while opening context", e);
             }
             Prefixes prefixes = extractor.getDescription().getPrefixes();
             for (String prefix : prefixes.allPrefixes()) {
                 try {
                     tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceURIFor(prefix), context);
                 } catch (TripleHandlerException e) {
                     throw new RuntimeException(String.format("Error while writing namespace %s", prefix),
                             e
                     );
                 }
             }
         }
         if (isClosed) {
             throw new IllegalStateException("Not open: " + context);
         }
     }
 
     public void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
         if(resourceRoots == null) {
             resourceRoots = new ArrayList<ResourceRoot>();
         }
         resourceRoots.add( new ResourceRoot(path, root, extractor) );
     }
 
     public List<ResourceRoot> getResourceRoots() {
         List<ResourceRoot> allRoots = new ArrayList<ResourceRoot>();
         if(resourceRoots != null) {
             allRoots.addAll( resourceRoots );
         }
         for(ExtractionResult er : subResults) {
             ExtractionResultImpl eri = (ExtractionResultImpl) er;
             if( eri.resourceRoots != null ) {
                 allRoots.addAll( eri.resourceRoots );
             }
         }
         return allRoots;
     }
 
     public void addPropertyPath(
             Class<? extends MicroformatExtractor> extractor,
             Resource propertySubject,
             Resource property,
             BNode object,
             String[] path
     ) {
         if(propertyPaths == null) {
             propertyPaths = new ArrayList<PropertyPath>();
         }
         propertyPaths.add( new PropertyPath(path, propertySubject, property, object, extractor) );
     }
 
     public List<PropertyPath> getPropertyPaths() {
         List<PropertyPath> allPaths = new ArrayList<PropertyPath>();
         if(propertyPaths != null) {
             allPaths.addAll( propertyPaths );
         }
         for(ExtractionResult er : subResults) {
             ExtractionResultImpl eri = (ExtractionResultImpl) er;
             if( eri.propertyPaths != null ) {
                 allPaths.addAll( eri.propertyPaths );
             }
         }
         return allPaths;
     }
 
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder();
         sb.append(context.toString());
         sb.append('\n');
         if (errors != null) {
             sb.append("Errors {\n");
             for (Error error : errors) {
                 sb.append('\t');
                 sb.append(error.toString());
                 sb.append('\n');
             }
         }
         sb.append("}\n");
         return sb.toString();
     }
 
 }

1		/*
2		* Licensed to the Apache Software Foundation (ASF) under one or more
3		* contributor license agreements. See the NOTICE file distributed with
4		* this work for additional information regarding copyright ownership.
5		* The ASF licenses this file to You under the Apache License, Version 2.0
6		* (the "License"); you may not use this file except in compliance with
7		* the License. You may obtain a copy of the License at
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*/
17
18		package org.apache.any23.extractor;
19
20		import org.apache.any23.extractor.html.MicroformatExtractor;
21		import org.apache.any23.rdf.Prefixes;
22		import org.apache.any23.writer.TripleHandler;
23		import org.apache.any23.writer.TripleHandlerException;
24		import org.openrdf.model.BNode;
25		import org.openrdf.model.Resource;
26		import org.openrdf.model.URI;
27		import org.openrdf.model.Value;
28
29		import java.io.PrintStream;
30		import java.util.ArrayList;
31		import java.util.Collection;
32		import java.util.Collections;
33		import java.util.HashSet;
34		import java.util.List;
35		import java.util.Set;
36
37		/**
38		* <p/>
39		* A default implementation of {@link ExtractionResult}; it receives
40		* extraction output from one {@link Extractor} working on one document,
41		* and passes the output on to a {@link TripleHandler}. It deals with
42		* details such as creation of {@link ExtractionContext} objects
43		* and closing any open contexts at the end of extraction.
44		* <p/>
45		* The {@link #close()} method must be invoked after the extractor has
46		* finished processing.
47		* <p/>
48		* There is usually no need to provide additional implementations
49		* of the ExtractionWriter interface.
50		* <p/>
51		*
52		* @see org.apache.any23.writer.TripleHandler
53		* @see ExtractionContext
54		* @author Richard Cyganiak (richard@cyganiak.de)
55		* @author Michele Mostarda (michele.mostarda@gmail.com)
56		*/
57		public class ExtractionResultImpl implements TagSoupExtractionResult {
58
59		private final ExtractionContext context;
60
61		private final Extractor<?> extractor;
62
63		private final TripleHandler tripleHandler;
64
65	0	private final Collection<ExtractionResult> subResults = new ArrayList<ExtractionResult>();
66
67	0	private final Set<Object> knownContextIDs = new HashSet<Object>();
68
69	0	private boolean isClosed = false;
70
71	0	private boolean isInitialized = false;
72
73		private List<Error> errors;
74
75		private List<ResourceRoot> resourceRoots;
76
77		private List<PropertyPath> propertyPaths;
78
79		public ExtractionResultImpl(
80		ExtractionContext context,
81		Extractor<?> extractor,
82		TripleHandler tripleHandler
83	0	) {
84	0	if(context == null) {
85	0	throw new NullPointerException("context cannot be null.");
86		}
87	0	if(extractor == null) {
88	0	throw new NullPointerException("extractor cannot be null.");
89		}
90	0	if(tripleHandler == null) {
91	0	throw new NullPointerException("triple handler cannot be null.");
92		}
93
94	0	this.extractor = extractor;
95	0	this.tripleHandler = tripleHandler;
96	0	this.context = context;
97	0	knownContextIDs.add( context.getUniqueID() );
98	0	}
99
100		public boolean hasErrors() {
101	0	return errors != null;
102		}
103
104		public int getErrorsCount() {
105	0	return errors == null ? 0 : errors.size();
106		}
107
108		public void printErrorsReport(PrintStream ps) {
109	0	ps.print(String.format("Context: %s [errors: %d] {\n", context, getErrorsCount()));
110	0	if (errors != null) {
111	0	for (Error error : errors) {
112	0	ps.print(error.toString());
113	0	ps.print("\n");
114		}
115		}
116		// Printing sub results.
117	0	for (ExtractionResult er : subResults) {
118	0	er.printErrorsReport(ps);
119		}
120	0	ps.print("}\n");
121	0	}
122
123		public Collection<Error> getErrors() {
124	0	return errors == null ? Collections.<Error>emptyList() : Collections.unmodifiableList(errors);
125		}
126
127		public ExtractionResult openSubResult(ExtractionContext context) {
128	0	final String contextID = context.getUniqueID();
129	0	if (knownContextIDs.contains(contextID)) {
130	0	throw new IllegalArgumentException("Duplicate contextID: " + contextID);
131		}
132	0	knownContextIDs.add(contextID);
133
134	0	checkOpen();
135	0	ExtractionResult result =
136		new ExtractionResultImpl(context, extractor, tripleHandler);
137	0	subResults.add(result);
138	0	return result;
139		}
140
141		public ExtractionContext getExtractionContext() {
142	0	return context;
143		}
144
145		public void writeTriple(Resource s, URI p, Value o, URI g) {
146	0	if (s == null \|\| p == null \|\| o == null) return;
147		// Check for misconstructed literals or BNodes, Sesame does not catch this.
148	0	if (s.stringValue() == null \|\| p.stringValue() == null \|\| o.stringValue() == null) {
149	0	throw new IllegalArgumentException("The statement arguments must be not null.");
150		}
151	0	checkOpen();
152		try {
153	0	tripleHandler.receiveTriple(s, p, o, g, context);
154	0	} catch (TripleHandlerException e) {
155	0	throw new RuntimeException(
156		String.format("Error while receiving triple %s %s %s", s, p, o ),
157		e
158		);
159	0	}
160	0	}
161
162		public void writeTriple(Resource s, URI p, Value o) {
163	0	writeTriple(s, p, o, null);
164	0	}
165
166		public void writeNamespace(String prefix, String uri) {
167	0	checkOpen();
168		try {
169	0	tripleHandler.receiveNamespace(prefix, uri, context);
170	0	} catch (TripleHandlerException e) {
171	0	throw new RuntimeException(
172		String.format("Error while writing namespace %s:%s", prefix, uri),
173		e
174		);
175	0	}
176	0	}
177
178		public void notifyError(ErrorLevel level, String msg, int row, int col) {
179	0	if(errors == null) {
180	0	errors = new ArrayList<Error>();
181		}
182	0	errors.add( new Error(level, msg, row, col) );
183	0	}
184
185		public void close() {
186	0	if (isClosed) return;
187	0	isClosed = true;
188	0	for (ExtractionResult subResult : subResults) {
189	0	subResult.close();
190		}
191	0	if (isInitialized) {
192		try {
193	0	tripleHandler.closeContext(context);
194	0	} catch (TripleHandlerException e) {
195	0	throw new RuntimeException("Error while opening context", e);
196	0	}
197		}
198	0	}
199
200		private void checkOpen() {
201	0	if (!isInitialized) {
202	0	isInitialized = true;
203		try {
204	0	tripleHandler.openContext(context);
205	0	} catch (TripleHandlerException e) {
206	0	throw new RuntimeException("Error while opening context", e);
207	0	}
208	0	Prefixes prefixes = extractor.getDescription().getPrefixes();
209	0	for (String prefix : prefixes.allPrefixes()) {
210		try {
211	0	tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceURIFor(prefix), context);
212	0	} catch (TripleHandlerException e) {
213	0	throw new RuntimeException(String.format("Error while writing namespace %s", prefix),
214		e
215		);
216	0	}
217		}
218		}
219	0	if (isClosed) {
220	0	throw new IllegalStateException("Not open: " + context);
221		}
222	0	}
223
224		public void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) {
225	0	if(resourceRoots == null) {
226	0	resourceRoots = new ArrayList<ResourceRoot>();
227		}
228	0	resourceRoots.add( new ResourceRoot(path, root, extractor) );
229	0	}
230
231		public List<ResourceRoot> getResourceRoots() {
232	0	List<ResourceRoot> allRoots = new ArrayList<ResourceRoot>();
233	0	if(resourceRoots != null) {
234	0	allRoots.addAll( resourceRoots );
235		}
236	0	for(ExtractionResult er : subResults) {
237	0	ExtractionResultImpl eri = (ExtractionResultImpl) er;
238	0	if( eri.resourceRoots != null ) {
239	0	allRoots.addAll( eri.resourceRoots );
240		}
241	0	}
242	0	return allRoots;
243		}
244
245		public void addPropertyPath(
246		Class<? extends MicroformatExtractor> extractor,
247		Resource propertySubject,
248		Resource property,
249		BNode object,
250		String[] path
251		) {
252	0	if(propertyPaths == null) {
253	0	propertyPaths = new ArrayList<PropertyPath>();
254		}
255	0	propertyPaths.add( new PropertyPath(path, propertySubject, property, object, extractor) );
256	0	}
257
258		public List<PropertyPath> getPropertyPaths() {
259	0	List<PropertyPath> allPaths = new ArrayList<PropertyPath>();
260	0	if(propertyPaths != null) {
261	0	allPaths.addAll( propertyPaths );
262		}
263	0	for(ExtractionResult er : subResults) {
264	0	ExtractionResultImpl eri = (ExtractionResultImpl) er;
265	0	if( eri.propertyPaths != null ) {
266	0	allPaths.addAll( eri.propertyPaths );
267		}
268	0	}
269	0	return allPaths;
270		}
271
272		@Override
273		public String toString() {
274	0	final StringBuilder sb = new StringBuilder();
275	0	sb.append(context.toString());
276	0	sb.append('\n');
277	0	if (errors != null) {
278	0	sb.append("Errors {\n");
279	0	for (Error error : errors) {
280	0	sb.append('\t');
281	0	sb.append(error.toString());
282	0	sb.append('\n');
283		}
284		}
285	0	sb.append("}\n");
286	0	return sb.toString();
287		}
288
289		}