1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.extractor.rdf; |
19 | |
|
20 | |
import org.apache.any23.extractor.ErrorReporter; |
21 | |
import org.apache.any23.extractor.ExtractionContext; |
22 | |
import org.apache.any23.extractor.ExtractionResult; |
23 | |
import org.apache.any23.io.nquads.NQuadsParser; |
24 | |
import org.apache.any23.rdf.Any23ValueFactoryWrapper; |
25 | |
import org.openrdf.model.impl.ValueFactoryImpl; |
26 | |
import org.openrdf.rio.ParseErrorListener; |
27 | |
import org.openrdf.rio.RDFHandlerException; |
28 | |
import org.openrdf.rio.RDFParseException; |
29 | |
import org.openrdf.rio.RDFParser; |
30 | |
import org.openrdf.rio.ntriples.NTriplesParser; |
31 | |
import org.openrdf.rio.rdfxml.RDFXMLParser; |
32 | |
import org.openrdf.rio.trix.TriXParser; |
33 | |
import org.openrdf.rio.turtle.TurtleParser; |
34 | |
import org.slf4j.Logger; |
35 | |
import org.slf4j.LoggerFactory; |
36 | |
|
37 | |
import java.io.IOException; |
38 | |
import java.io.InputStream; |
39 | |
import java.io.Reader; |
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | 0 | public class RDFParserFactory { |
48 | |
|
49 | 0 | private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class); |
50 | |
|
51 | |
private static RDFParserFactory instance; |
52 | |
|
53 | |
public static RDFParserFactory getInstance() { |
54 | 0 | if(instance == null) { |
55 | 0 | instance = new RDFParserFactory(); |
56 | |
} |
57 | 0 | return instance; |
58 | |
} |
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
public TurtleParser getTurtleParserInstance( |
70 | |
final boolean verifyDataType, |
71 | |
final boolean stopAtFirstError, |
72 | |
final ExtractionContext extractionContext, |
73 | |
final ExtractionResult extractionResult |
74 | |
) { |
75 | 0 | if (extractionResult == null) { |
76 | 0 | throw new NullPointerException("extractionResult cannot be null."); |
77 | |
} |
78 | 0 | final TurtleParser parser = new ExtendedTurtleParser(); |
79 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
80 | 0 | return parser; |
81 | |
} |
82 | |
|
83 | |
|
84 | |
|
85 | |
|
86 | |
|
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | |
public RDFXMLParser getRDFXMLParser( |
93 | |
final boolean verifyDataType, |
94 | |
final boolean stopAtFirstError, |
95 | |
final ExtractionContext extractionContext, |
96 | |
final ExtractionResult extractionResult |
97 | |
) { |
98 | 0 | final RDFXMLParser parser = new RDFXMLParser(); |
99 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
100 | 0 | return parser; |
101 | |
} |
102 | |
|
103 | |
|
104 | |
|
105 | |
|
106 | |
|
107 | |
|
108 | |
|
109 | |
|
110 | |
|
111 | |
|
112 | |
public NTriplesParser getNTriplesParser( |
113 | |
final boolean verifyDataType, |
114 | |
final boolean stopAtFirstError, |
115 | |
final ExtractionContext extractionContext, |
116 | |
final ExtractionResult extractionResult |
117 | |
) { |
118 | 0 | final NTriplesParser parser = new NTriplesParser(); |
119 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
120 | 0 | return parser; |
121 | |
} |
122 | |
|
123 | |
|
124 | |
|
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
public NQuadsParser getNQuadsParser( |
133 | |
final boolean verifyDataType, |
134 | |
final boolean stopAtFirstError, |
135 | |
final ExtractionContext extractionContext, |
136 | |
final ExtractionResult extractionResult |
137 | |
) { |
138 | 0 | final NQuadsParser parser = new NQuadsParser(); |
139 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
140 | 0 | return parser; |
141 | |
} |
142 | |
|
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
public TriXParser getTriXParser( |
153 | |
final boolean verifyDataType, |
154 | |
final boolean stopAtFirstError, |
155 | |
final ExtractionContext extractionContext, |
156 | |
final ExtractionResult extractionResult |
157 | |
) { |
158 | 0 | final TriXParser parser = new TriXParser(); |
159 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
160 | 0 | return parser; |
161 | |
} |
162 | |
|
163 | |
|
164 | |
|
165 | |
|
166 | |
|
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | |
|
173 | |
|
174 | |
private void configureParser( |
175 | |
final RDFParser parser, |
176 | |
final boolean verifyDataType, |
177 | |
final boolean stopAtFirstError, |
178 | |
final ExtractionContext extractionContext, |
179 | |
final ExtractionResult extractionResult |
180 | |
) { |
181 | 0 | parser.setDatatypeHandling( |
182 | |
verifyDataType ? RDFParser.DatatypeHandling.VERIFY : RDFParser.DatatypeHandling.IGNORE |
183 | |
); |
184 | 0 | parser.setStopAtFirstError(stopAtFirstError); |
185 | 0 | parser.setParseErrorListener( new InternalParseErrorListener(extractionResult) ); |
186 | 0 | parser.setValueFactory( |
187 | |
new Any23ValueFactoryWrapper( |
188 | |
ValueFactoryImpl.getInstance(), |
189 | |
extractionResult, |
190 | |
extractionContext.getDefaultLanguage() |
191 | |
) |
192 | |
); |
193 | 0 | parser.setRDFHandler(new RDFHandlerAdapter(extractionResult)); |
194 | 0 | } |
195 | |
|
196 | |
|
197 | |
|
198 | |
|
199 | |
private class InternalParseErrorListener implements ParseErrorListener { |
200 | |
|
201 | |
private final ErrorReporter extractionResult; |
202 | |
|
203 | 0 | public InternalParseErrorListener(ErrorReporter er) { |
204 | 0 | extractionResult = er; |
205 | 0 | } |
206 | |
|
207 | |
public void warning(String msg, int lineNo, int colNo) { |
208 | |
try { |
209 | 0 | extractionResult.notifyError(ExtractionResult.ErrorLevel.WARN, msg, lineNo, colNo); |
210 | 0 | } catch (Exception e) { |
211 | 0 | notifyExceptionInNotification(e); |
212 | 0 | } |
213 | 0 | } |
214 | |
|
215 | |
public void error(String msg, int lineNo, int colNo) { |
216 | |
try { |
217 | 0 | extractionResult.notifyError(ExtractionResult.ErrorLevel.ERROR, msg, lineNo, colNo); |
218 | 0 | } catch (Exception e) { |
219 | 0 | notifyExceptionInNotification(e); |
220 | 0 | } |
221 | 0 | } |
222 | |
|
223 | |
public void fatalError(String msg, int lineNo, int colNo) { |
224 | |
try { |
225 | 0 | extractionResult.notifyError(ExtractionResult.ErrorLevel.FATAL, msg, lineNo, colNo); |
226 | 0 | } catch (Exception e) { |
227 | 0 | notifyExceptionInNotification(e); |
228 | 0 | } |
229 | 0 | } |
230 | |
|
231 | |
private void notifyExceptionInNotification(Exception e) { |
232 | 0 | if (logger != null) { |
233 | 0 | logger.error("An exception occurred while notifying an error.", e); |
234 | |
} |
235 | 0 | } |
236 | |
} |
237 | |
|
238 | |
|
239 | |
|
240 | |
|
241 | |
|
242 | 0 | private class ExtendedTurtleParser extends TurtleParser { |
243 | |
@Override |
244 | |
public void parse(Reader reader, String baseURI) |
245 | |
throws IOException, RDFParseException, RDFHandlerException { |
246 | 0 | setNamespace("", baseURI); |
247 | 0 | super.parse(reader, baseURI); |
248 | 0 | } |
249 | |
|
250 | |
@Override |
251 | |
public void parse(InputStream in, String baseURI) |
252 | |
throws IOException, RDFParseException, RDFHandlerException { |
253 | 0 | setNamespace("", baseURI); |
254 | 0 | super.parse(in, baseURI); |
255 | 0 | } |
256 | |
} |
257 | |
} |