1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
|
22 | |
|
23 | |
|
24 | |
package org.apache.commons.xmlio.in; |
25 | |
|
26 | |
import java.io.*; |
27 | |
import java.util.*; |
28 | |
import java.net.*; |
29 | |
|
30 | |
import org.xml.sax.*; |
31 | |
import org.xml.sax.helpers.*; |
32 | |
import javax.xml.parsers.*; |
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
|
70 | |
|
71 | 0 | public class SimpleImporter { |
72 | |
|
73 | |
|
74 | 0 | private boolean trimContent = true; |
75 | 0 | private boolean makeCopy = false; |
76 | 0 | private boolean zeroLengthIsNull = true; |
77 | 0 | private boolean includeLeadingCDataIntoStartElementCallback = true; |
78 | 0 | private boolean fullDebug = false; |
79 | 0 | private boolean useQName = true; |
80 | 0 | private boolean buildComplexPath = false; |
81 | |
|
82 | |
protected SAXParserFactory factory; |
83 | |
|
84 | 0 | protected List callbackHandlerList = new ArrayList(); |
85 | |
|
86 | |
|
87 | 0 | protected StringBuffer currentMixedPCData = null; |
88 | 0 | protected boolean foundMixedPCData = false; |
89 | |
|
90 | |
|
91 | 0 | protected StringBuffer firstPCData = null; |
92 | 0 | protected boolean isFirstPCData = true; |
93 | |
|
94 | |
|
95 | 0 | protected ParseElement currentElement = null; |
96 | |
|
97 | 0 | protected PathStack parseStack = new PathStack(); |
98 | |
|
99 | 0 | protected String debugBuffer = null; |
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | 0 | public SimpleImporter() { |
105 | 0 | factory = SAXParserFactory.newInstance(); |
106 | 0 | } |
107 | |
|
108 | |
|
109 | |
public boolean getFoundMixedPCData() { |
110 | 0 | return foundMixedPCData; |
111 | |
} |
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
public boolean getUseQName() { |
117 | 0 | return useQName; |
118 | |
} |
119 | |
|
120 | |
|
121 | |
|
122 | |
|
123 | |
public void setUseQName(boolean useQName) { |
124 | 0 | this.useQName = useQName; |
125 | 0 | } |
126 | |
|
127 | |
|
128 | |
|
129 | |
|
130 | |
public boolean getBuildComplexPath() { |
131 | 0 | return buildComplexPath; |
132 | |
} |
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | |
public void setBuildComplexPath(boolean buildComplexPath) { |
138 | 0 | this.buildComplexPath = buildComplexPath; |
139 | 0 | } |
140 | |
|
141 | |
|
142 | |
|
143 | |
|
144 | |
|
145 | |
public void setFullDebugMode(boolean fullDebug) { |
146 | 0 | this.fullDebug = fullDebug; |
147 | 0 | } |
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
public boolean getFullDebugMode() { |
153 | 0 | return fullDebug; |
154 | |
} |
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
|
160 | |
public String getParsedStreamForDebug() { |
161 | 0 | if (!getFullDebugMode()) { |
162 | 0 | return null; |
163 | |
} else { |
164 | 0 | return debugBuffer; |
165 | |
} |
166 | |
} |
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | |
|
173 | |
|
174 | |
|
175 | |
|
176 | |
|
177 | |
|
178 | |
public boolean getIncludeLeadingCDataIntoStartElementCallback() { |
179 | 0 | return includeLeadingCDataIntoStartElementCallback; |
180 | |
} |
181 | |
|
182 | |
|
183 | |
|
184 | |
|
185 | |
public void setIncludeLeadingCDataIntoStartElementCallback(boolean includeLeadingCDataIntoStartElementCallback) { |
186 | 0 | this.includeLeadingCDataIntoStartElementCallback = includeLeadingCDataIntoStartElementCallback; |
187 | 0 | } |
188 | |
|
189 | |
|
190 | |
|
191 | |
|
192 | |
public boolean getTrimContent() { |
193 | 0 | return trimContent; |
194 | |
} |
195 | |
|
196 | |
|
197 | |
|
198 | |
|
199 | |
|
200 | |
public void setTrimContent(boolean trimContent) { |
201 | 0 | this.trimContent = trimContent; |
202 | 0 | } |
203 | |
|
204 | |
|
205 | |
|
206 | |
|
207 | |
public boolean getZeroLengthIsNull() { |
208 | 0 | return zeroLengthIsNull; |
209 | |
} |
210 | |
|
211 | |
|
212 | |
|
213 | |
|
214 | |
public void setZeroLengthIsNull(boolean zeroLengthIsNull) { |
215 | 0 | this.zeroLengthIsNull = zeroLengthIsNull; |
216 | 0 | } |
217 | |
|
218 | |
|
219 | |
|
220 | |
|
221 | |
|
222 | |
|
223 | |
public boolean getMakeCopy() { |
224 | 0 | return makeCopy; |
225 | |
} |
226 | |
|
227 | |
|
228 | |
public void setMakeCopy(boolean makeCopy) { |
229 | 0 | this.makeCopy = makeCopy; |
230 | 0 | } |
231 | |
|
232 | |
|
233 | |
|
234 | |
|
235 | |
|
236 | |
public void addSimpleImportHandler(SimpleImportHandler callbackHandler) { |
237 | 0 | synchronized (callbackHandlerList) { |
238 | 0 | if (!callbackHandlerList.contains(callbackHandler)) { |
239 | 0 | callbackHandlerList.add(callbackHandler); |
240 | |
} |
241 | 0 | } |
242 | 0 | } |
243 | |
|
244 | |
|
245 | |
|
246 | |
|
247 | |
|
248 | |
public void removeSimpleImportHandler(SimpleImportHandler callbackHandler) { |
249 | 0 | synchronized (callbackHandlerList) { |
250 | 0 | callbackHandlerList.remove(callbackHandler); |
251 | 0 | } |
252 | 0 | } |
253 | |
|
254 | |
|
255 | |
|
256 | |
|
257 | |
|
258 | |
|
259 | |
|
260 | |
|
261 | |
public synchronized void parseUrlOrFile(String urlOrFileName) |
262 | |
throws ParserConfigurationException, SAXException, IOException, SimpleImporterException { |
263 | 0 | Throwable urlException = null; |
264 | 0 | Throwable fileException = null; |
265 | 0 | InputStream in = null; |
266 | |
try { |
267 | 0 | URL url = new URL(urlOrFileName); |
268 | 0 | URLConnection urlConnection = url.openConnection(); |
269 | 0 | in = urlConnection.getInputStream(); |
270 | 0 | } catch (MalformedURLException mue) { |
271 | 0 | urlException = mue; |
272 | 0 | } catch (IOException ioe) { |
273 | 0 | urlException = ioe; |
274 | 0 | } |
275 | |
|
276 | |
try { |
277 | 0 | in = new FileInputStream(urlOrFileName); |
278 | 0 | } catch (IOException ioe) { |
279 | 0 | fileException = ioe; |
280 | 0 | } |
281 | |
|
282 | 0 | if (in != null) { |
283 | 0 | parse(new InputSource(new BufferedInputStream(in))); |
284 | |
} else { |
285 | 0 | throw new SimpleImporterException( |
286 | |
"Could not parse " |
287 | |
+ urlOrFileName |
288 | |
+ ", is neither URL (" |
289 | |
+ urlException.getMessage() |
290 | |
+ ") nor file (" |
291 | |
+ fileException.getMessage() |
292 | |
+ ")."); |
293 | |
} |
294 | 0 | } |
295 | |
|
296 | |
|
297 | |
|
298 | |
|
299 | |
|
300 | |
|
301 | |
public synchronized void parse(InputSource is) throws ParserConfigurationException, SAXException, IOException { |
302 | 0 | firstPCData = null; |
303 | 0 | currentElement = null; |
304 | 0 | factory.setNamespaceAware(!useQName || buildComplexPath); |
305 | 0 | SAXParser parser = factory.newSAXParser(); |
306 | 0 | if (getFullDebugMode()) { |
307 | 0 | InputSource preReadIn = bufferParserStream(is); |
308 | 0 | parser.parse(preReadIn, new SAXHandler()); |
309 | 0 | } else { |
310 | 0 | parser.parse(is, new SAXHandler()); |
311 | |
} |
312 | 0 | } |
313 | |
|
314 | |
private InputSource bufferParserStream(InputSource is) throws IOException { |
315 | 0 | StringBuffer buf = new StringBuffer(); |
316 | |
Reader reader; |
317 | |
BufferedReader bufferedReader; |
318 | 0 | if (is.getCharacterStream() != null) { |
319 | 0 | reader = is.getCharacterStream(); |
320 | |
} else { |
321 | 0 | String encoding = is.getEncoding(); |
322 | 0 | if (encoding != null) { |
323 | 0 | reader = new InputStreamReader(is.getByteStream(), encoding); |
324 | |
} else { |
325 | 0 | reader = new InputStreamReader(is.getByteStream()); |
326 | |
} |
327 | |
} |
328 | 0 | if (reader instanceof BufferedReader) { |
329 | 0 | bufferedReader = (BufferedReader) reader; |
330 | |
} else { |
331 | 0 | bufferedReader = new BufferedReader(reader); |
332 | |
} |
333 | |
|
334 | |
while (true) { |
335 | 0 | String line = bufferedReader.readLine(); |
336 | 0 | if (line == null) { |
337 | 0 | break; |
338 | |
} else { |
339 | 0 | buf.append(line).append('\n'); |
340 | |
} |
341 | 0 | } |
342 | 0 | debugBuffer = buf.toString(); |
343 | 0 | return new InputSource(new StringReader(debugBuffer)); |
344 | |
} |
345 | |
|
346 | |
|
347 | |
private void callBackStartElementWhenReady() { |
348 | 0 | if (currentElement != null) { |
349 | 0 | String content = getFirstPCData(); |
350 | |
SimplePath path; |
351 | 0 | if (buildComplexPath) { |
352 | 0 | path = |
353 | |
new SimplePath( |
354 | |
currentElement.path, |
355 | |
(Item[]) currentElement.pathList.toArray(new Item[currentElement.pathList.size()])); |
356 | |
} else { |
357 | 0 | path = new SimplePath(currentElement.path); |
358 | |
|
359 | |
} |
360 | |
|
361 | 0 | synchronized (callbackHandlerList) { |
362 | 0 | for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) { |
363 | 0 | SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next(); |
364 | 0 | if (getMakeCopy()) { |
365 | |
|
366 | 0 | callbackHandler.startElement( |
367 | |
new SimplePath(path), |
368 | |
currentElement.name, |
369 | |
new AttributesImpl(currentElement.attributes), |
370 | |
content); |
371 | |
} else { |
372 | 0 | callbackHandler.startElement(path, currentElement.name, currentElement.attributes, content); |
373 | |
} |
374 | 0 | } |
375 | 0 | } |
376 | |
|
377 | 0 | firstPCData = null; |
378 | 0 | currentElement = null; |
379 | |
} |
380 | 0 | } |
381 | |
|
382 | |
private void sendCharacters(String text) { |
383 | 0 | if (text == null) |
384 | 0 | return; |
385 | |
|
386 | 0 | if (isFirstPCData) { |
387 | 0 | if (includeLeadingCDataIntoStartElementCallback) { |
388 | 0 | addToFirstPCData(text); |
389 | |
} else { |
390 | 0 | sendCData(text); |
391 | |
} |
392 | |
} else { |
393 | 0 | foundMixedPCData = true; |
394 | 0 | sendCData(text); |
395 | |
} |
396 | 0 | } |
397 | |
|
398 | |
private void callBackCDATAWhenReady() { |
399 | 0 | callBackStartElementWhenReady(); |
400 | 0 | if (currentMixedPCData == null) { |
401 | 0 | return; |
402 | |
} |
403 | 0 | String text = currentMixedPCData.toString(); |
404 | 0 | text = trimPCData(text); |
405 | 0 | if (text == null) { |
406 | 0 | return; |
407 | |
} |
408 | |
|
409 | |
SimplePath path; |
410 | 0 | if (buildComplexPath) { |
411 | 0 | path = |
412 | |
new SimplePath( |
413 | |
parseStack.getPath(), |
414 | |
(Item[]) parseStack.getPathList().toArray(new Item[parseStack.getPathList().size()])); |
415 | |
} else { |
416 | 0 | path = new SimplePath(parseStack.getPath()); |
417 | |
|
418 | |
} |
419 | |
|
420 | 0 | synchronized (callbackHandlerList) { |
421 | 0 | for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) { |
422 | 0 | SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next(); |
423 | 0 | if (getMakeCopy()) { |
424 | |
|
425 | 0 | callbackHandler.cData(new SimplePath(path), text); |
426 | |
} else { |
427 | 0 | callbackHandler.cData(path, text); |
428 | |
} |
429 | 0 | } |
430 | 0 | } |
431 | 0 | currentMixedPCData = null; |
432 | 0 | } |
433 | |
|
434 | |
|
435 | |
private void sendCData(String text) { |
436 | |
|
437 | |
|
438 | 0 | if (currentMixedPCData == null) { |
439 | 0 | currentMixedPCData = new StringBuffer(text.length()); |
440 | |
} |
441 | 0 | currentMixedPCData.append(text); |
442 | 0 | } |
443 | |
|
444 | |
private void addToFirstPCData(String text) { |
445 | 0 | if (firstPCData == null) { |
446 | 0 | firstPCData = new StringBuffer(text.length()); |
447 | |
} |
448 | 0 | firstPCData.append(text); |
449 | 0 | } |
450 | |
|
451 | |
private String getFirstPCData() { |
452 | 0 | if (firstPCData == null) { |
453 | 0 | return null; |
454 | |
} else { |
455 | 0 | String text = firstPCData.toString(); |
456 | 0 | return trimPCData(text); |
457 | |
} |
458 | |
} |
459 | |
|
460 | |
|
461 | |
private String trimPCData(String pcData) { |
462 | 0 | if (pcData == null) { |
463 | 0 | return null; |
464 | |
} else { |
465 | 0 | if (getTrimContent()) { |
466 | 0 | pcData = pcData.trim(); |
467 | |
} |
468 | 0 | if (pcData.length() == 0 && getZeroLengthIsNull()) { |
469 | 0 | return null; |
470 | |
} else { |
471 | 0 | return pcData; |
472 | |
} |
473 | |
} |
474 | |
} |
475 | |
|
476 | |
|
477 | |
private final static class ParseElement { |
478 | |
public String name, path; |
479 | |
public List pathList; |
480 | |
public AttributesImpl attributes; |
481 | |
|
482 | 0 | public ParseElement(String name, String path, List pathList, AttributesImpl attributes) { |
483 | 0 | this.name = name; |
484 | 0 | this.path = path; |
485 | 0 | this.attributes = attributes; |
486 | 0 | this.pathList = pathList; |
487 | 0 | } |
488 | |
} |
489 | |
|
490 | 0 | private final class SAXHandler extends DefaultHandler { |
491 | |
public void startDocument() { |
492 | 0 | synchronized (callbackHandlerList) { |
493 | 0 | for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) { |
494 | 0 | SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next(); |
495 | 0 | callbackHandler.startDocument(); |
496 | 0 | } |
497 | 0 | } |
498 | 0 | } |
499 | |
|
500 | |
public void endDocument() { |
501 | |
|
502 | 0 | callBackStartElementWhenReady(); |
503 | 0 | callBackCDATAWhenReady(); |
504 | 0 | synchronized (callbackHandlerList) { |
505 | 0 | for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) { |
506 | 0 | SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next(); |
507 | 0 | callbackHandler.endDocument(); |
508 | 0 | } |
509 | 0 | } |
510 | 0 | } |
511 | |
|
512 | |
public void characters(char ch[], int start, int length) { |
513 | 0 | if (length < 1) |
514 | 0 | return; |
515 | 0 | String text = new String(ch, start, length); |
516 | 0 | sendCharacters(text); |
517 | 0 | } |
518 | |
|
519 | |
public void endElement(String namespaceURI, String localName, String qName) { |
520 | |
|
521 | |
|
522 | 0 | callBackStartElementWhenReady(); |
523 | 0 | callBackCDATAWhenReady(); |
524 | |
String name; |
525 | 0 | if (!useQName || qName == null || qName.length() == 0) { |
526 | 0 | name = localName; |
527 | |
} else { |
528 | 0 | name = qName; |
529 | |
} |
530 | |
|
531 | |
SimplePath path; |
532 | 0 | if (buildComplexPath) { |
533 | 0 | path = |
534 | |
new SimplePath( |
535 | |
parseStack.getPath(), |
536 | |
(Item[]) parseStack.getPathList().toArray(new Item[parseStack.getPathList().size()])); |
537 | |
} else { |
538 | 0 | path = new SimplePath(parseStack.getPath()); |
539 | |
|
540 | |
} |
541 | |
|
542 | 0 | synchronized (callbackHandlerList) { |
543 | 0 | for (Iterator it = callbackHandlerList.iterator(); it.hasNext();) { |
544 | 0 | SimpleImportHandler callbackHandler = (SimpleImportHandler) it.next(); |
545 | 0 | if (getMakeCopy()) { |
546 | |
|
547 | 0 | callbackHandler.endElement(new SimplePath(path), name); |
548 | |
} else { |
549 | 0 | callbackHandler.endElement(path, name); |
550 | |
} |
551 | 0 | } |
552 | 0 | } |
553 | |
|
554 | |
|
555 | 0 | if (parseStack.empty()) { |
556 | 0 | throw new SimpleImporterException("Umatchted end tag: " + name); |
557 | |
} else { |
558 | 0 | Object top = parseStack.peek(); |
559 | |
String topName; |
560 | 0 | if (buildComplexPath) { |
561 | 0 | topName = ((Item)top).getName(); |
562 | |
} else { |
563 | 0 | topName = (String)top; |
564 | |
} |
565 | 0 | if (!name.equals(topName)) { |
566 | 0 | throw new SimpleImporterException( |
567 | |
"End tag " + name + " does not match start tag " + top); |
568 | |
} else { |
569 | 0 | parseStack.pop(); |
570 | |
} |
571 | |
} |
572 | |
|
573 | 0 | isFirstPCData = false; |
574 | 0 | } |
575 | |
|
576 | |
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { |
577 | |
|
578 | |
|
579 | 0 | callBackStartElementWhenReady(); |
580 | 0 | callBackCDATAWhenReady(); |
581 | |
String name; |
582 | 0 | if (!useQName || qName == null || qName.length() == 0) { |
583 | 0 | name = localName; |
584 | |
} else { |
585 | 0 | name = qName; |
586 | |
} |
587 | 0 | parseStack.push(namespaceURI, name); |
588 | |
|
589 | |
|
590 | 0 | AttributesImpl attributesCopy = new AttributesImpl(atts); |
591 | 0 | currentElement = new ParseElement(name, parseStack.getPath(), parseStack.getPathList(), attributesCopy); |
592 | |
|
593 | |
|
594 | |
|
595 | |
|
596 | 0 | isFirstPCData = true; |
597 | 0 | } |
598 | |
} |
599 | |
|
600 | |
|
601 | |
private final class PathStack { |
602 | |
|
603 | |
private List pathStack; |
604 | |
|
605 | 0 | public PathStack(int initialCapacity) { |
606 | 0 | pathStack = new ArrayList(initialCapacity); |
607 | 0 | } |
608 | |
|
609 | 0 | public PathStack() { |
610 | 0 | pathStack = new ArrayList(); |
611 | 0 | } |
612 | |
|
613 | |
public String getPath() { |
614 | 0 | StringBuffer path = new StringBuffer(100); |
615 | |
|
616 | 0 | path.append('/'); |
617 | 0 | for (Iterator it = pathStack.iterator(); it.hasNext();) { |
618 | 0 | Object element = it.next(); |
619 | |
String pathElement; |
620 | 0 | if (buildComplexPath) { |
621 | 0 | pathElement = ((Item) element).getName(); |
622 | |
} else { |
623 | 0 | pathElement = (String) element; |
624 | |
} |
625 | 0 | path.append(pathElement).append('/'); |
626 | 0 | } |
627 | 0 | return path.toString(); |
628 | |
} |
629 | |
|
630 | |
public List getPathList() { |
631 | 0 | return pathStack; |
632 | |
} |
633 | |
|
634 | |
public String toString() { |
635 | 0 | return getPath(); |
636 | |
} |
637 | |
|
638 | |
public void push(String namespaceURI, String name) { |
639 | 0 | if (buildComplexPath) { |
640 | 0 | pathStack.add(new Item(name, namespaceURI)); |
641 | |
} else { |
642 | 0 | pathStack.add(name); |
643 | |
} |
644 | 0 | } |
645 | |
|
646 | |
public int size() { |
647 | 0 | return pathStack.size(); |
648 | |
} |
649 | |
|
650 | |
public boolean empty() { |
651 | 0 | return (pathStack.size() <= 0); |
652 | |
} |
653 | |
|
654 | |
public Object peek() { |
655 | 0 | int size = pathStack.size(); |
656 | 0 | if (size > 0) { |
657 | 0 | return pathStack.get(size - 1); |
658 | |
} else { |
659 | 0 | return null; |
660 | |
} |
661 | |
} |
662 | |
|
663 | |
public Object pop() { |
664 | 0 | int size = pathStack.size(); |
665 | 0 | if (size > 0) { |
666 | 0 | Object o = pathStack.get(size - 1); |
667 | 0 | pathStack.remove(size - 1); |
668 | 0 | return o; |
669 | |
} else { |
670 | 0 | return null; |
671 | |
} |
672 | |
} |
673 | |
|
674 | |
} |
675 | |
} |