1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.http.HttpEntity;
40 import org.apache.http.HttpResponse;
41 import org.apache.http.HttpStatus;
42 import org.apache.http.client.ClientProtocolException;
43 import org.apache.http.client.HttpRequestRetryHandler;
44 import org.apache.http.client.methods.HttpGet;
45 import org.apache.http.impl.client.DefaultHttpClient;
46 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47 import org.apache.http.util.EntityUtils;
48
49 import org.apache.maven.doxia.macro.MacroExecutionException;
50 import org.apache.maven.doxia.markup.XmlMarkup;
51 import org.apache.maven.doxia.sink.Sink;
52 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
53 import org.apache.maven.doxia.util.HtmlTools;
54 import org.apache.maven.doxia.util.XmlValidator;
55
56 import org.codehaus.plexus.util.FileUtils;
57 import org.codehaus.plexus.util.IOUtil;
58 import org.codehaus.plexus.util.StringUtils;
59 import org.codehaus.plexus.util.xml.pull.MXParser;
60 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62
63 import org.xml.sax.EntityResolver;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.SAXException;
66
67
68
69
70
71
72
73
74 public abstract class AbstractXmlParser
75 extends AbstractParser
76 implements XmlMarkup
77 {
78
79
80
81
82
83
84 private static final Pattern PATTERN_ENTITY_1 =
85 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86
87
88
89
90
91
92
93 private static final Pattern PATTERN_ENTITY_2 =
94 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95
96 private boolean ignorableWhitespace;
97
98 private boolean collapsibleWhitespace;
99
100 private boolean trimmableWhitespace;
101
102 private Map<String, String> entities;
103
104 private boolean validate = false;
105
106
107 public void parse( Reader source, Sink sink )
108 throws ParseException
109 {
110 init();
111
112 Reader src = source;
113
114
115 if ( isValidate() )
116 {
117 String content;
118 try
119 {
120 content = IOUtil.toString( new BufferedReader( src ) );
121 }
122 catch ( IOException e )
123 {
124 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
125 }
126
127 new XmlValidator( getLog() ).validate( content );
128
129 src = new StringReader( content );
130 }
131
132
133 try
134 {
135 XmlPullParser parser = new MXParser();
136
137 parser.setInput( src );
138
139
140
141 initXmlParser( parser );
142
143 sink.enableLogging( getLog() );
144
145 parseXml( parser, sink );
146 }
147 catch ( XmlPullParserException ex )
148 {
149 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
150 ex.getColumnNumber() );
151 }
152 catch ( MacroExecutionException ex )
153 {
154 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
155 }
156
157 setSecondParsing( false );
158 init();
159 }
160
161
162
163
164
165
166
167 protected void initXmlParser( XmlPullParser parser )
168 throws XmlPullParserException
169 {
170
171 }
172
173
174
175
176
177
178 @Override
179 public void parse( String string, Sink sink )
180 throws ParseException
181 {
182 super.parse( string, sink );
183 }
184
185
186 @Override
187 public final int getType()
188 {
189 return XML_TYPE;
190 }
191
192
193
194
195
196
197
198
199 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
200 {
201 int count = parser.getAttributeCount();
202
203 if ( count < 0 )
204 {
205 return null;
206 }
207
208 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
209
210 for ( int i = 0; i < count; i++ )
211 {
212 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
213 }
214
215 return atts;
216 }
217
218
219
220
221
222
223
224
225
226 private void parseXml( XmlPullParser parser, Sink sink )
227 throws XmlPullParserException, MacroExecutionException
228 {
229 int eventType = parser.getEventType();
230
231 while ( eventType != XmlPullParser.END_DOCUMENT )
232 {
233 if ( eventType == XmlPullParser.START_TAG )
234 {
235 handleStartTag( parser, sink );
236 }
237 else if ( eventType == XmlPullParser.END_TAG )
238 {
239 handleEndTag( parser, sink );
240 }
241 else if ( eventType == XmlPullParser.TEXT )
242 {
243 String text = getText( parser );
244
245 if ( isIgnorableWhitespace() )
246 {
247 if ( text.trim().length() != 0 )
248 {
249 handleText( parser, sink );
250 }
251 }
252 else
253 {
254 handleText( parser, sink );
255 }
256 }
257 else if ( eventType == XmlPullParser.CDSECT )
258 {
259 handleCdsect( parser, sink );
260 }
261 else if ( eventType == XmlPullParser.COMMENT )
262 {
263 handleComment( parser, sink );
264 }
265 else if ( eventType == XmlPullParser.ENTITY_REF )
266 {
267 handleEntity( parser, sink );
268 }
269 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
270 {
271
272 }
273 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
274 {
275
276 }
277 else if ( eventType == XmlPullParser.DOCDECL )
278 {
279 addLocalEntities( parser, parser.getText() );
280
281 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
282 {
283 addDTDEntities( parser, new String( res ) );
284 }
285 }
286
287 try
288 {
289 eventType = parser.nextToken();
290 }
291 catch ( IOException io )
292 {
293 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
294 }
295 }
296 }
297
298
299
300
301
302
303
304
305
306 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
307 throws XmlPullParserException, MacroExecutionException;
308
309
310
311
312
313
314
315
316
317 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
318 throws XmlPullParserException, MacroExecutionException;
319
320
321
322
323
324
325
326
327
328
329
330 protected void handleText( XmlPullParser parser, Sink sink )
331 throws XmlPullParserException
332 {
333 String text = getText( parser );
334
335
336
337
338
339 if ( StringUtils.isNotEmpty( text ) )
340 {
341 sink.text( text );
342 }
343 }
344
345
346
347
348
349
350
351
352
353
354
355 protected void handleCdsect( XmlPullParser parser, Sink sink )
356 throws XmlPullParserException
357 {
358 sink.text( getText( parser ) );
359 }
360
361
362
363
364
365
366
367
368
369
370
371 protected void handleComment( XmlPullParser parser, Sink sink )
372 throws XmlPullParserException
373 {
374 if ( isEmitComments() )
375 {
376 sink.comment( getText( parser ) );
377 }
378 }
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394 protected void handleEntity( XmlPullParser parser, Sink sink )
395 throws XmlPullParserException
396 {
397 String text = getText( parser );
398
399 String name = parser.getName();
400
401 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
402 {
403 sink.nonBreakingSpace();
404 }
405 else
406 {
407 String unescaped = HtmlTools.unescapeHTML( text );
408
409 sink.text( unescaped );
410 }
411 }
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
429 {
430 Object[] required = new Object[] { Integer.valueOf( type ) };
431
432 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
433
434 sink.unknown( parser.getName(), required, attribs );
435 }
436
437
438
439
440
441
442
443
444 protected boolean isIgnorableWhitespace()
445 {
446 return ignorableWhitespace;
447 }
448
449
450
451
452
453
454
455
456
457
458 protected void setIgnorableWhitespace( boolean ignorable )
459 {
460 this.ignorableWhitespace = ignorable;
461 }
462
463
464
465
466
467
468
469
470 protected boolean isCollapsibleWhitespace()
471 {
472 return collapsibleWhitespace;
473 }
474
475
476
477
478
479
480
481
482
483
484 protected void setCollapsibleWhitespace( boolean collapsible )
485 {
486 this.collapsibleWhitespace = collapsible;
487 }
488
489
490
491
492
493
494
495
496 protected boolean isTrimmableWhitespace()
497 {
498 return trimmableWhitespace;
499 }
500
501
502
503
504
505
506
507
508
509
510 protected void setTrimmableWhitespace( boolean trimmable )
511 {
512 this.trimmableWhitespace = trimmable;
513 }
514
515
516
517
518
519
520
521
522
523
524
525 protected String getText( XmlPullParser parser )
526 {
527 String text = parser.getText();
528
529 if ( isTrimmableWhitespace() )
530 {
531 text = text.trim();
532 }
533
534 if ( isCollapsibleWhitespace() )
535 {
536 StringBuilder newText = new StringBuilder();
537 String[] elts = StringUtils.split( text, " \r\n" );
538 for ( int i = 0; i < elts.length; i++ )
539 {
540 newText.append( elts[i] );
541 if ( ( i + 1 ) < elts.length )
542 {
543 newText.append( " " );
544 }
545 }
546 text = newText.toString();
547 }
548
549 return text;
550 }
551
552
553
554
555
556
557
558
559
560
561
562
563
564 protected Map<String, String> getLocalEntities()
565 {
566 if ( entities == null )
567 {
568 entities = new LinkedHashMap<String, String>();
569 }
570
571 return entities;
572 }
573
574
575
576
577
578
579
580 public boolean isValidate()
581 {
582 return validate;
583 }
584
585
586
587
588
589
590
591
592 public void setValidate( boolean validate )
593 {
594 this.validate = validate;
595 }
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
613 throws XmlPullParserException
614 {
615 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
616 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
617 {
618 return;
619 }
620
621 parser.defineEntityReplacementText( entityName, entityValue );
622 getLocalEntities().put( entityName, entityValue );
623 }
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638 private void addLocalEntities( XmlPullParser parser, String text )
639 throws XmlPullParserException
640 {
641 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
642 if ( entitiesCount > 0 )
643 {
644
645 int start = text.indexOf( '[' );
646 int end = text.lastIndexOf( ']' );
647 if ( start != -1 && end != -1 )
648 {
649 addDTDEntities( parser, text.substring( start + 1, end ) );
650 }
651 }
652 }
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669 private void addDTDEntities( XmlPullParser parser, String text )
670 throws XmlPullParserException
671 {
672 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
673 if ( entitiesCount > 0 )
674 {
675 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
676 BufferedReader reader = new BufferedReader( new StringReader( txt ) );
677 String line;
678 String tmpLine = "";
679 try
680 {
681 Matcher matcher;
682 while ( ( line = reader.readLine() ) != null )
683 {
684 tmpLine += "\n" + line;
685 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
686 if ( matcher.find() && matcher.groupCount() == 7 )
687 {
688 String entityName = matcher.group( 2 );
689 String entityValue = matcher.group( 5 );
690
691 addEntity( parser, entityName, entityValue );
692 tmpLine = "";
693 }
694 else
695 {
696 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
697 if ( matcher.find() && matcher.groupCount() == 8 )
698 {
699 String entityName = matcher.group( 2 );
700 String entityValue = matcher.group( 5 );
701
702 addEntity( parser, entityName, entityValue );
703 tmpLine = "";
704 }
705 }
706 }
707 }
708 catch ( IOException e )
709 {
710
711 }
712 finally
713 {
714 IOUtil.close( reader );
715 }
716 }
717 }
718
719
720
721
722
723 public static class CachedFileEntityResolver
724 implements EntityResolver
725 {
726
727 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
728
729
730 public InputSource resolveEntity( String publicId, String systemId )
731 throws SAXException, IOException
732 {
733 byte[] res = ENTITY_CACHE.get( systemId );
734
735 if ( res == null )
736 {
737 String systemName = FileUtils.getFile( systemId ).getName();
738 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
739
740 if ( !temp.exists() )
741 {
742
743 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
744 {
745
746
747 String resource = "/" + systemName;
748 URL url = getClass().getResource( resource );
749 if ( url != null )
750 {
751 res = toByteArray( url );
752 }
753 else
754 {
755 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
756 + " because '" + resource + "' is not available of the classpath." );
757 }
758 }
759 else
760 {
761 res = toByteArray( new URL( systemId ) );
762 }
763
764
765 copy( res, temp );
766 }
767 else
768 {
769
770 res = toByteArray( temp.toURI().toURL() );
771 }
772
773 ENTITY_CACHE.put( systemId, res );
774 }
775
776 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
777 is.setPublicId( publicId );
778 is.setSystemId( systemId );
779
780 return is;
781 }
782
783
784
785
786
787
788
789
790
791
792
793 private static byte[] toByteArray( URL url )
794 throws SAXException
795 {
796 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
797 {
798 InputStream is = null;
799 try
800 {
801 is = url.openStream();
802 if ( is == null )
803 {
804 throw new SAXException( "Cannot open stream from the url: " + url.toString() );
805 }
806 return IOUtil.toByteArray( is );
807 }
808 catch ( IOException e )
809 {
810 throw new SAXException( "IOException: " + e.getMessage(), e );
811 }
812 finally
813 {
814 IOUtil.close( is );
815 }
816 }
817
818
819 DefaultHttpClient client = new DefaultHttpClient();
820 HttpGet method = new HttpGet( url.toString() );
821
822
823 method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
824
825 HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
826 client.setHttpRequestRetryHandler( retryHandler );
827
828 HttpEntity entity = null;
829 try
830 {
831 HttpResponse response = client.execute( method );
832 int statusCode = response.getStatusLine().getStatusCode();
833 if ( statusCode != HttpStatus.SC_OK )
834 {
835 throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
836 + statusCode + ", which is not allowed. The server gave this reason for the failure '"
837 + response.getStatusLine().getReasonPhrase() + "'." );
838 }
839
840 entity = response.getEntity();
841 return EntityUtils.toByteArray( entity );
842 }
843 catch ( ClientProtocolException e )
844 {
845 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
846 }
847 catch ( IOException e )
848 {
849 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
850 }
851 finally
852 {
853 if ( entity != null )
854 {
855 try
856 {
857 entity.consumeContent();
858 }
859 catch ( IOException e )
860 {
861
862 }
863 }
864 }
865 }
866
867
868
869
870
871
872
873
874
875 private void copy( byte[] res, File f )
876 throws SAXException
877 {
878 if ( f.isDirectory() )
879 {
880 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
881 }
882
883 OutputStream os = null;
884 try
885 {
886 os = new FileOutputStream( f );
887 IOUtil.copy( res, os );
888 }
889 catch ( IOException e )
890 {
891 throw new SAXException( "IOException: " + e.getMessage(), e );
892 }
893 finally
894 {
895 IOUtil.close( os );
896 }
897 }
898 }
899 }