1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.rdf; |
19 | |
|
20 | |
import org.apache.any23.io.nquads.NQuadsParser; |
21 | |
import org.apache.any23.util.MathUtils; |
22 | |
import org.openrdf.model.BNode; |
23 | |
import org.openrdf.model.Literal; |
24 | |
import org.openrdf.model.Resource; |
25 | |
import org.openrdf.model.Statement; |
26 | |
import org.openrdf.model.URI; |
27 | |
import org.openrdf.model.Value; |
28 | |
import org.openrdf.model.ValueFactory; |
29 | |
import org.openrdf.model.impl.URIImpl; |
30 | |
import org.openrdf.model.impl.ValueFactoryImpl; |
31 | |
import org.openrdf.model.vocabulary.RDF; |
32 | |
import org.openrdf.rio.RDFHandler; |
33 | |
import org.openrdf.rio.RDFHandlerException; |
34 | |
import org.openrdf.rio.RDFParseException; |
35 | |
import org.openrdf.rio.RDFParser; |
36 | |
import org.openrdf.rio.ntriples.NTriplesParser; |
37 | |
import org.openrdf.rio.rdfxml.RDFXMLParser; |
38 | |
import org.openrdf.rio.turtle.TurtleParser; |
39 | |
|
40 | |
import javax.xml.datatype.DatatypeConfigurationException; |
41 | |
import javax.xml.datatype.DatatypeFactory; |
42 | |
import javax.xml.datatype.XMLGregorianCalendar; |
43 | |
import java.io.ByteArrayInputStream; |
44 | |
import java.io.IOException; |
45 | |
import java.io.InputStream; |
46 | |
import java.net.URISyntaxException; |
47 | |
import java.text.ParseException; |
48 | |
import java.text.SimpleDateFormat; |
49 | |
import java.util.ArrayList; |
50 | |
import java.util.Date; |
51 | |
import java.util.GregorianCalendar; |
52 | |
import java.util.List; |
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
public class RDFUtils { |
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | 0 | public enum Parser { |
66 | 0 | RDFXML, |
67 | 0 | Turtle, |
68 | 0 | NTriples, |
69 | 0 | NQuads |
70 | |
} |
71 | |
|
72 | 0 | private static final ValueFactory valueFactory = ValueFactoryImpl.getInstance(); |
73 | |
|
74 | |
|
75 | |
|
76 | |
|
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
public static String fixAbsoluteURI(String uri) { |
82 | 0 | String fixed = fixURIWithException(uri); |
83 | 0 | if (!fixed.matches("[a-zA-Z0-9]+:/.*")) throw new IllegalArgumentException("not a absolute URI: " + uri); |
84 | |
|
85 | 0 | if (fixed.matches("https?://[a-zA-Z0-9.-]+(:[0-9+])?")) { |
86 | 0 | fixed = fixed + "/"; |
87 | |
} |
88 | 0 | return fixed; |
89 | |
} |
90 | |
|
91 | |
|
92 | |
|
93 | |
|
94 | |
|
95 | |
|
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
public static String getXSDDate(String dateToBeParsed, String format) |
102 | |
throws ParseException, DatatypeConfigurationException { |
103 | 0 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format); |
104 | 0 | Date date = simpleDateFormat.parse(dateToBeParsed); |
105 | 0 | GregorianCalendar gc = new GregorianCalendar(); |
106 | 0 | gc.setTime(date); |
107 | 0 | XMLGregorianCalendar xml = DatatypeFactory.newInstance().newXMLGregorianCalendar(gc); |
108 | 0 | xml.setTimezone(0); |
109 | 0 | return xml.toString(); |
110 | |
} |
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
public static String toXSDDateTime(Date date) { |
119 | 0 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); |
120 | 0 | String s = simpleDateFormat.format(date); |
121 | 0 | StringBuilder sb = new StringBuilder(s); |
122 | 0 | sb.insert(22, ':'); |
123 | 0 | return sb.toString(); |
124 | |
} |
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | |
|
138 | |
|
139 | |
|
140 | |
|
141 | |
|
142 | |
public static String fixURIWithException(String unescapedURI) { |
143 | 0 | if (unescapedURI == null) throw new IllegalArgumentException("URI was null"); |
144 | |
|
145 | |
|
146 | 0 | String escapedURI = unescapedURI.trim(); |
147 | |
|
148 | |
|
149 | 0 | escapedURI = escapedURI.replaceAll(" ", "%20"); |
150 | |
|
151 | |
|
152 | 0 | escapedURI = escapedURI.replaceAll("\n", ""); |
153 | |
|
154 | |
|
155 | 0 | if (escapedURI.startsWith("\\") || escapedURI.startsWith("\"")) escapedURI = escapedURI.substring(1); |
156 | |
|
157 | 0 | if (escapedURI.endsWith("\\") || escapedURI.endsWith("\"")) |
158 | 0 | escapedURI = escapedURI.substring(0, escapedURI.length() - 1); |
159 | |
|
160 | |
|
161 | 0 | if (escapedURI.matches("^[a-zA-Z0-9]+:/?/?$")) |
162 | 0 | throw new IllegalArgumentException("no authority in URI: " + unescapedURI); |
163 | |
|
164 | |
|
165 | 0 | if (escapedURI.matches("^javascript:")) |
166 | 0 | throw new IllegalArgumentException("URI starts with javascript: " + unescapedURI); |
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | 0 | escapedURI = escapedURI.replaceAll(">.*$", ""); |
173 | |
|
174 | |
|
175 | 0 | if (escapedURI.matches("[<>\\[\\]|\\*\\{\\}\"\\\\]")) |
176 | 0 | throw new IllegalArgumentException("Invalid character in URI: " + unescapedURI); |
177 | |
|
178 | 0 | return escapedURI; |
179 | |
} |
180 | |
|
181 | |
|
182 | |
|
183 | |
|
184 | |
public static URI uri(String uri) { |
185 | 0 | return valueFactory.createURI(uri); |
186 | |
} |
187 | |
|
188 | |
|
189 | |
|
190 | |
|
191 | |
public static URI uri(String namespace, String localName) { |
192 | 0 | return valueFactory.createURI(namespace, localName); |
193 | |
} |
194 | |
|
195 | |
|
196 | |
|
197 | |
|
198 | |
public static Literal literal(String s) { |
199 | 0 | return valueFactory.createLiteral(s); |
200 | |
} |
201 | |
|
202 | |
|
203 | |
|
204 | |
|
205 | |
public static Literal literal(boolean b) { |
206 | 0 | return valueFactory.createLiteral(b); |
207 | |
} |
208 | |
|
209 | |
|
210 | |
|
211 | |
|
212 | |
public static Literal literal(byte b) { |
213 | 0 | return valueFactory.createLiteral(b); |
214 | |
} |
215 | |
|
216 | |
|
217 | |
|
218 | |
|
219 | |
public static Literal literal(short s) { |
220 | 0 | return valueFactory.createLiteral(s); |
221 | |
} |
222 | |
|
223 | |
|
224 | |
|
225 | |
|
226 | |
public static Literal literal(int i) { |
227 | 0 | return valueFactory.createLiteral(i); |
228 | |
} |
229 | |
|
230 | |
|
231 | |
|
232 | |
|
233 | |
public static Literal literal(long l) { |
234 | 0 | return valueFactory.createLiteral(l); |
235 | |
} |
236 | |
|
237 | |
|
238 | |
|
239 | |
|
240 | |
public static Literal literal(float f) { |
241 | 0 | return valueFactory.createLiteral(f); |
242 | |
} |
243 | |
|
244 | |
|
245 | |
|
246 | |
|
247 | |
public static Literal literal(double d) { |
248 | 0 | return valueFactory.createLiteral(d); |
249 | |
} |
250 | |
|
251 | |
|
252 | |
|
253 | |
|
254 | |
public static Literal literal(String s, String l) { |
255 | 0 | return valueFactory.createLiteral(s, l); |
256 | |
} |
257 | |
|
258 | |
|
259 | |
|
260 | |
|
261 | |
public static Literal literal(String s, URI datatype) { |
262 | 0 | return valueFactory.createLiteral(s, datatype); |
263 | |
} |
264 | |
|
265 | |
|
266 | |
|
267 | |
|
268 | |
|
269 | |
public static BNode bnode(String id) { |
270 | 0 | return valueFactory.createBNode(id); |
271 | |
} |
272 | |
|
273 | |
|
274 | |
|
275 | |
|
276 | |
public static BNode bnode() { |
277 | 0 | return valueFactory.createBNode(); |
278 | |
} |
279 | |
|
280 | |
|
281 | |
|
282 | |
|
283 | |
public static BNode getBNode(String id) { |
284 | 0 | return valueFactory.createBNode( |
285 | |
"node" + MathUtils.md5(id) |
286 | |
); |
287 | |
} |
288 | |
|
289 | |
|
290 | |
|
291 | |
|
292 | |
public static Statement triple(Resource s, URI p, Value o) { |
293 | 0 | return valueFactory.createStatement(s, p, o); |
294 | |
} |
295 | |
|
296 | |
|
297 | |
|
298 | |
|
299 | |
public static Statement quad(Resource s, URI p, Value o, Resource g) { |
300 | 0 | return valueFactory.createStatement(s, p, o, g); |
301 | |
} |
302 | |
|
303 | |
|
304 | |
|
305 | |
|
306 | |
|
307 | |
|
308 | |
|
309 | |
|
310 | |
|
311 | |
public static Value toRDF(String s) { |
312 | 0 | if ("a".equals(s)) return RDF.TYPE; |
313 | 0 | if (s.matches("[a-z0-9]+:.*")) { |
314 | 0 | return PopularPrefixes.get().expand(s); |
315 | |
} |
316 | 0 | return valueFactory.createLiteral(s); |
317 | |
} |
318 | |
|
319 | |
|
320 | |
|
321 | |
|
322 | |
|
323 | |
|
324 | |
|
325 | |
|
326 | |
|
327 | |
public static Statement toTriple(String s, String p, String o) { |
328 | 0 | return valueFactory.createStatement((Resource) toRDF(s), (URI) toRDF(p), toRDF(o)); |
329 | |
} |
330 | |
|
331 | |
|
332 | |
|
333 | |
|
334 | |
|
335 | |
|
336 | |
|
337 | |
|
338 | |
public static RDFParser getRDFParser(Parser p) { |
339 | 0 | switch (p) { |
340 | |
case RDFXML: |
341 | 0 | return new RDFXMLParser(); |
342 | |
case Turtle: |
343 | 0 | return new TurtleParser(); |
344 | |
case NTriples: |
345 | 0 | return new NTriplesParser(); |
346 | |
case NQuads: |
347 | 0 | return new NQuadsParser(); |
348 | |
default: |
349 | 0 | throw new IllegalArgumentException(); |
350 | |
} |
351 | |
} |
352 | |
|
353 | |
|
354 | |
|
355 | |
|
356 | |
|
357 | |
|
358 | |
|
359 | |
|
360 | |
public static Parser getParserFromExtension(String ext) { |
361 | 0 | if("rdf".equals(ext)) { |
362 | 0 | return Parser.RDFXML; |
363 | |
} |
364 | 0 | if("ttl".equals(ext)) { |
365 | 0 | return Parser.Turtle; |
366 | |
} |
367 | 0 | if("nt".equals(ext)) { |
368 | 0 | return Parser.NTriples; |
369 | |
} |
370 | 0 | if("nq".equals(ext)) { |
371 | 0 | return Parser.NQuads; |
372 | |
} |
373 | 0 | throw new IllegalArgumentException("Unknown extension : " + ext); |
374 | |
} |
375 | |
|
376 | |
|
377 | |
|
378 | |
|
379 | |
|
380 | |
|
381 | |
|
382 | |
|
383 | |
|
384 | |
|
385 | |
|
386 | |
|
387 | |
|
388 | |
public static Statement[] parseRDF(Parser p, InputStream is, String baseURI) |
389 | |
throws RDFHandlerException, IOException, RDFParseException { |
390 | 0 | final BufferRDFHandler handler = new BufferRDFHandler(); |
391 | 0 | final RDFParser parser = getRDFParser(p); |
392 | 0 | parser.setVerifyData(true); |
393 | 0 | parser.setStopAtFirstError(true); |
394 | 0 | parser.setPreserveBNodeIDs(true); |
395 | 0 | parser.setRDFHandler(handler); |
396 | 0 | parser.parse(is, baseURI); |
397 | 0 | return handler.statements.toArray( new Statement[handler.statements.size()] ); |
398 | |
} |
399 | |
|
400 | |
|
401 | |
|
402 | |
|
403 | |
|
404 | |
|
405 | |
|
406 | |
|
407 | |
|
408 | |
|
409 | |
|
410 | |
|
411 | |
public static Statement[] parseRDF(Parser p, InputStream is) |
412 | |
throws RDFHandlerException, IOException, RDFParseException { |
413 | 0 | return parseRDF(p, is, ""); |
414 | |
} |
415 | |
|
416 | |
|
417 | |
|
418 | |
|
419 | |
|
420 | |
|
421 | |
|
422 | |
|
423 | |
|
424 | |
|
425 | |
|
426 | |
|
427 | |
public static Statement[] parseRDF(Parser p, String in) |
428 | |
throws RDFHandlerException, IOException, RDFParseException { |
429 | 0 | return parseRDF(p, new ByteArrayInputStream(in.getBytes())); |
430 | |
} |
431 | |
|
432 | |
|
433 | |
|
434 | |
|
435 | |
|
436 | |
|
437 | |
|
438 | |
|
439 | |
|
440 | |
|
441 | |
|
442 | |
public static Statement[] parseRDF(String resource) throws RDFHandlerException, IOException, RDFParseException { |
443 | 0 | final int extIndex = resource.lastIndexOf("."); |
444 | 0 | if(extIndex == -1) |
445 | 0 | throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource); |
446 | 0 | final String extension = resource.substring(extIndex + 1); |
447 | 0 | return parseRDF( getParserFromExtension(extension), RDFUtils.class.getResourceAsStream(resource) ); |
448 | |
} |
449 | |
|
450 | |
|
451 | |
|
452 | |
|
453 | |
|
454 | |
|
455 | |
|
456 | |
|
457 | |
public static boolean isAbsoluteURI(String href) { |
458 | |
try { |
459 | 0 | new URIImpl(href.trim()); |
460 | 0 | new java.net.URI(href.trim()); |
461 | 0 | return true; |
462 | 0 | } catch (IllegalArgumentException e) { |
463 | 0 | return false; |
464 | 0 | } catch (URISyntaxException e) { |
465 | 0 | return false; |
466 | |
} |
467 | |
} |
468 | |
|
469 | 0 | private RDFUtils() {} |
470 | |
|
471 | 0 | private static class BufferRDFHandler implements RDFHandler { |
472 | |
|
473 | 0 | private final List<Statement> statements = new ArrayList<Statement>(); |
474 | |
|
475 | 0 | private int documents = 0; |
476 | 0 | private boolean open = false; |
477 | |
|
478 | |
@Override |
479 | |
public void startRDF() throws RDFHandlerException { |
480 | 0 | documents++; |
481 | 0 | open = true; |
482 | 0 | } |
483 | |
|
484 | |
@Override |
485 | |
public void endRDF() throws RDFHandlerException { |
486 | 0 | open = false; |
487 | 0 | } |
488 | |
|
489 | |
@Override |
490 | |
public void handleNamespace(String s, String s1) throws RDFHandlerException { |
491 | |
|
492 | 0 | } |
493 | |
|
494 | |
@Override |
495 | |
public void handleStatement(Statement statement) throws RDFHandlerException { |
496 | 0 | statements.add(statement); |
497 | 0 | } |
498 | |
|
499 | |
@Override |
500 | |
public void handleComment(String s) throws RDFHandlerException { |
501 | |
|
502 | 0 | } |
503 | |
|
504 | |
} |
505 | |
|
506 | |
} |