1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.extractor; |
19 | |
|
20 | |
import org.apache.any23.source.MemCopyFactory; |
21 | |
import org.apache.any23.source.StringDocumentSource; |
22 | |
import org.apache.any23.writer.TripleHandler; |
23 | |
import org.apache.any23.writer.TripleHandlerException; |
24 | |
import org.apache.any23.writer.TurtleWriter; |
25 | |
|
26 | |
import java.io.ByteArrayOutputStream; |
27 | |
import java.io.IOException; |
28 | |
import java.io.InputStream; |
29 | |
|
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
public class ExampleInputOutput { |
39 | |
|
40 | |
private final ExtractorFactory<?> factory; |
41 | |
|
42 | |
public ExampleInputOutput(String extractorName) { |
43 | 0 | this(ExtractorRegistry.getInstance().getFactory(extractorName)); |
44 | 0 | } |
45 | |
|
46 | 0 | public ExampleInputOutput(ExtractorFactory<?> factory) { |
47 | 0 | this.factory = factory; |
48 | 0 | } |
49 | |
|
50 | |
public String getExampleInput() throws IOException { |
51 | 0 | if (factory.getExampleInput() == null) { |
52 | 0 | return null; |
53 | |
} |
54 | 0 | if (isBlindExtractor()) { |
55 | 0 | return null; |
56 | |
} |
57 | 0 | InputStream in = factory.createExtractor().getClass().getResourceAsStream( |
58 | |
factory.getExampleInput()); |
59 | 0 | if (in == null) { |
60 | 0 | throw new IllegalArgumentException( |
61 | |
"Example input resource not found for extractor " + |
62 | |
factory.getExtractorName() + ": " + |
63 | |
factory.getExampleInput()); |
64 | |
} |
65 | 0 | return new String(MemCopyFactory.toByteArray(in), "utf-8"); |
66 | |
} |
67 | |
|
68 | |
public String getExampleURI() { |
69 | 0 | if (factory.getExampleInput() == null) { |
70 | 0 | return null; |
71 | |
} |
72 | 0 | if (isBlindExtractor()) { |
73 | 0 | return factory.getExampleInput(); |
74 | |
} |
75 | 0 | return "http://example.com/"; |
76 | |
} |
77 | |
|
78 | |
public String getExampleOutput() throws IOException, ExtractionException { |
79 | 0 | if (factory.getExampleInput() == null) { |
80 | 0 | return null; |
81 | |
} |
82 | 0 | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
83 | 0 | TripleHandler writer = new TurtleWriter(out); |
84 | 0 | new SingleDocumentExtraction( |
85 | |
new StringDocumentSource(getExampleInput(), getExampleURI()), |
86 | |
factory, |
87 | |
writer).run(); |
88 | |
try { |
89 | 0 | writer.close(); |
90 | 0 | } catch (TripleHandlerException e) { |
91 | 0 | throw new ExtractionException("Error while closing the triple handler", e); |
92 | 0 | } |
93 | 0 | return out.toString("utf-8"); |
94 | |
} |
95 | |
|
96 | |
private boolean isBlindExtractor() { |
97 | 0 | return factory.createExtractor() instanceof Extractor.BlindExtractor; |
98 | |
} |
99 | |
|
100 | |
} |