1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.extractor; |
19 | |
|
20 | |
import org.apache.any23.extractor.html.MicroformatExtractor; |
21 | |
import org.apache.any23.rdf.Prefixes; |
22 | |
import org.apache.any23.writer.TripleHandler; |
23 | |
import org.apache.any23.writer.TripleHandlerException; |
24 | |
import org.openrdf.model.BNode; |
25 | |
import org.openrdf.model.Resource; |
26 | |
import org.openrdf.model.URI; |
27 | |
import org.openrdf.model.Value; |
28 | |
|
29 | |
import java.io.PrintStream; |
30 | |
import java.util.ArrayList; |
31 | |
import java.util.Collection; |
32 | |
import java.util.Collections; |
33 | |
import java.util.HashSet; |
34 | |
import java.util.List; |
35 | |
import java.util.Set; |
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
public class ExtractionResultImpl implements TagSoupExtractionResult { |
58 | |
|
59 | |
private final ExtractionContext context; |
60 | |
|
61 | |
private final Extractor<?> extractor; |
62 | |
|
63 | |
private final TripleHandler tripleHandler; |
64 | |
|
65 | 0 | private final Collection<ExtractionResult> subResults = new ArrayList<ExtractionResult>(); |
66 | |
|
67 | 0 | private final Set<Object> knownContextIDs = new HashSet<Object>(); |
68 | |
|
69 | 0 | private boolean isClosed = false; |
70 | |
|
71 | 0 | private boolean isInitialized = false; |
72 | |
|
73 | |
private List<Error> errors; |
74 | |
|
75 | |
private List<ResourceRoot> resourceRoots; |
76 | |
|
77 | |
private List<PropertyPath> propertyPaths; |
78 | |
|
79 | |
public ExtractionResultImpl( |
80 | |
ExtractionContext context, |
81 | |
Extractor<?> extractor, |
82 | |
TripleHandler tripleHandler |
83 | 0 | ) { |
84 | 0 | if(context == null) { |
85 | 0 | throw new NullPointerException("context cannot be null."); |
86 | |
} |
87 | 0 | if(extractor == null) { |
88 | 0 | throw new NullPointerException("extractor cannot be null."); |
89 | |
} |
90 | 0 | if(tripleHandler == null) { |
91 | 0 | throw new NullPointerException("triple handler cannot be null."); |
92 | |
} |
93 | |
|
94 | 0 | this.extractor = extractor; |
95 | 0 | this.tripleHandler = tripleHandler; |
96 | 0 | this.context = context; |
97 | 0 | knownContextIDs.add( context.getUniqueID() ); |
98 | 0 | } |
99 | |
|
100 | |
public boolean hasErrors() { |
101 | 0 | return errors != null; |
102 | |
} |
103 | |
|
104 | |
public int getErrorsCount() { |
105 | 0 | return errors == null ? 0 : errors.size(); |
106 | |
} |
107 | |
|
108 | |
public void printErrorsReport(PrintStream ps) { |
109 | 0 | ps.print(String.format("Context: %s [errors: %d] {\n", context, getErrorsCount())); |
110 | 0 | if (errors != null) { |
111 | 0 | for (Error error : errors) { |
112 | 0 | ps.print(error.toString()); |
113 | 0 | ps.print("\n"); |
114 | |
} |
115 | |
} |
116 | |
|
117 | 0 | for (ExtractionResult er : subResults) { |
118 | 0 | er.printErrorsReport(ps); |
119 | |
} |
120 | 0 | ps.print("}\n"); |
121 | 0 | } |
122 | |
|
123 | |
public Collection<Error> getErrors() { |
124 | 0 | return errors == null ? Collections.<Error>emptyList() : Collections.unmodifiableList(errors); |
125 | |
} |
126 | |
|
127 | |
public ExtractionResult openSubResult(ExtractionContext context) { |
128 | 0 | final String contextID = context.getUniqueID(); |
129 | 0 | if (knownContextIDs.contains(contextID)) { |
130 | 0 | throw new IllegalArgumentException("Duplicate contextID: " + contextID); |
131 | |
} |
132 | 0 | knownContextIDs.add(contextID); |
133 | |
|
134 | 0 | checkOpen(); |
135 | 0 | ExtractionResult result = |
136 | |
new ExtractionResultImpl(context, extractor, tripleHandler); |
137 | 0 | subResults.add(result); |
138 | 0 | return result; |
139 | |
} |
140 | |
|
141 | |
public ExtractionContext getExtractionContext() { |
142 | 0 | return context; |
143 | |
} |
144 | |
|
145 | |
public void writeTriple(Resource s, URI p, Value o, URI g) { |
146 | 0 | if (s == null || p == null || o == null) return; |
147 | |
|
148 | 0 | if (s.stringValue() == null || p.stringValue() == null || o.stringValue() == null) { |
149 | 0 | throw new IllegalArgumentException("The statement arguments must be not null."); |
150 | |
} |
151 | 0 | checkOpen(); |
152 | |
try { |
153 | 0 | tripleHandler.receiveTriple(s, p, o, g, context); |
154 | 0 | } catch (TripleHandlerException e) { |
155 | 0 | throw new RuntimeException( |
156 | |
String.format("Error while receiving triple %s %s %s", s, p, o ), |
157 | |
e |
158 | |
); |
159 | 0 | } |
160 | 0 | } |
161 | |
|
162 | |
public void writeTriple(Resource s, URI p, Value o) { |
163 | 0 | writeTriple(s, p, o, null); |
164 | 0 | } |
165 | |
|
166 | |
public void writeNamespace(String prefix, String uri) { |
167 | 0 | checkOpen(); |
168 | |
try { |
169 | 0 | tripleHandler.receiveNamespace(prefix, uri, context); |
170 | 0 | } catch (TripleHandlerException e) { |
171 | 0 | throw new RuntimeException( |
172 | |
String.format("Error while writing namespace %s:%s", prefix, uri), |
173 | |
e |
174 | |
); |
175 | 0 | } |
176 | 0 | } |
177 | |
|
178 | |
public void notifyError(ErrorLevel level, String msg, int row, int col) { |
179 | 0 | if(errors == null) { |
180 | 0 | errors = new ArrayList<Error>(); |
181 | |
} |
182 | 0 | errors.add( new Error(level, msg, row, col) ); |
183 | 0 | } |
184 | |
|
185 | |
public void close() { |
186 | 0 | if (isClosed) return; |
187 | 0 | isClosed = true; |
188 | 0 | for (ExtractionResult subResult : subResults) { |
189 | 0 | subResult.close(); |
190 | |
} |
191 | 0 | if (isInitialized) { |
192 | |
try { |
193 | 0 | tripleHandler.closeContext(context); |
194 | 0 | } catch (TripleHandlerException e) { |
195 | 0 | throw new RuntimeException("Error while opening context", e); |
196 | 0 | } |
197 | |
} |
198 | 0 | } |
199 | |
|
200 | |
private void checkOpen() { |
201 | 0 | if (!isInitialized) { |
202 | 0 | isInitialized = true; |
203 | |
try { |
204 | 0 | tripleHandler.openContext(context); |
205 | 0 | } catch (TripleHandlerException e) { |
206 | 0 | throw new RuntimeException("Error while opening context", e); |
207 | 0 | } |
208 | 0 | Prefixes prefixes = extractor.getDescription().getPrefixes(); |
209 | 0 | for (String prefix : prefixes.allPrefixes()) { |
210 | |
try { |
211 | 0 | tripleHandler.receiveNamespace(prefix, prefixes.getNamespaceURIFor(prefix), context); |
212 | 0 | } catch (TripleHandlerException e) { |
213 | 0 | throw new RuntimeException(String.format("Error while writing namespace %s", prefix), |
214 | |
e |
215 | |
); |
216 | 0 | } |
217 | |
} |
218 | |
} |
219 | 0 | if (isClosed) { |
220 | 0 | throw new IllegalStateException("Not open: " + context); |
221 | |
} |
222 | 0 | } |
223 | |
|
224 | |
public void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) { |
225 | 0 | if(resourceRoots == null) { |
226 | 0 | resourceRoots = new ArrayList<ResourceRoot>(); |
227 | |
} |
228 | 0 | resourceRoots.add( new ResourceRoot(path, root, extractor) ); |
229 | 0 | } |
230 | |
|
231 | |
public List<ResourceRoot> getResourceRoots() { |
232 | 0 | List<ResourceRoot> allRoots = new ArrayList<ResourceRoot>(); |
233 | 0 | if(resourceRoots != null) { |
234 | 0 | allRoots.addAll( resourceRoots ); |
235 | |
} |
236 | 0 | for(ExtractionResult er : subResults) { |
237 | 0 | ExtractionResultImpl eri = (ExtractionResultImpl) er; |
238 | 0 | if( eri.resourceRoots != null ) { |
239 | 0 | allRoots.addAll( eri.resourceRoots ); |
240 | |
} |
241 | 0 | } |
242 | 0 | return allRoots; |
243 | |
} |
244 | |
|
245 | |
public void addPropertyPath( |
246 | |
Class<? extends MicroformatExtractor> extractor, |
247 | |
Resource propertySubject, |
248 | |
Resource property, |
249 | |
BNode object, |
250 | |
String[] path |
251 | |
) { |
252 | 0 | if(propertyPaths == null) { |
253 | 0 | propertyPaths = new ArrayList<PropertyPath>(); |
254 | |
} |
255 | 0 | propertyPaths.add( new PropertyPath(path, propertySubject, property, object, extractor) ); |
256 | 0 | } |
257 | |
|
258 | |
public List<PropertyPath> getPropertyPaths() { |
259 | 0 | List<PropertyPath> allPaths = new ArrayList<PropertyPath>(); |
260 | 0 | if(propertyPaths != null) { |
261 | 0 | allPaths.addAll( propertyPaths ); |
262 | |
} |
263 | 0 | for(ExtractionResult er : subResults) { |
264 | 0 | ExtractionResultImpl eri = (ExtractionResultImpl) er; |
265 | 0 | if( eri.propertyPaths != null ) { |
266 | 0 | allPaths.addAll( eri.propertyPaths ); |
267 | |
} |
268 | 0 | } |
269 | 0 | return allPaths; |
270 | |
} |
271 | |
|
272 | |
@Override |
273 | |
public String toString() { |
274 | 0 | final StringBuilder sb = new StringBuilder(); |
275 | 0 | sb.append(context.toString()); |
276 | 0 | sb.append('\n'); |
277 | 0 | if (errors != null) { |
278 | 0 | sb.append("Errors {\n"); |
279 | 0 | for (Error error : errors) { |
280 | 0 | sb.append('\t'); |
281 | 0 | sb.append(error.toString()); |
282 | 0 | sb.append('\n'); |
283 | |
} |
284 | |
} |
285 | 0 | sb.append("}\n"); |
286 | 0 | return sb.toString(); |
287 | |
} |
288 | |
|
289 | |
} |