1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.http.HttpStatus;
40 import org.apache.http.client.ClientProtocolException;
41 import org.apache.http.client.methods.CloseableHttpResponse;
42 import org.apache.http.client.methods.HttpGet;
43 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
44 import org.apache.http.impl.client.HttpClientBuilder;
45 import org.apache.http.util.EntityUtils;
46 import org.apache.maven.doxia.macro.MacroExecutionException;
47 import org.apache.maven.doxia.markup.XmlMarkup;
48 import org.apache.maven.doxia.sink.Sink;
49 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
50 import org.apache.maven.doxia.util.HtmlTools;
51 import org.apache.maven.doxia.util.XmlValidator;
52
53 import org.codehaus.plexus.util.FileUtils;
54 import org.codehaus.plexus.util.IOUtil;
55 import org.codehaus.plexus.util.StringUtils;
56 import org.codehaus.plexus.util.xml.pull.MXParser;
57 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
58 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
59
60 import org.xml.sax.EntityResolver;
61 import org.xml.sax.InputSource;
62 import org.xml.sax.SAXException;
63
64
65
66
67
68
69
70
71 public abstract class AbstractXmlParser
72 extends AbstractParser
73 implements XmlMarkup
74 {
75
76
77
78
79
80
81 private static final Pattern PATTERN_ENTITY_1 =
82 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
83
84
85
86
87
88
89
90 private static final Pattern PATTERN_ENTITY_2 =
91 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
92
93 private boolean ignorableWhitespace;
94
95 private boolean collapsibleWhitespace;
96
97 private boolean trimmableWhitespace;
98
99 private Map<String, String> entities;
100
101 private boolean validate = false;
102
103
104 public void parse( Reader source, Sink sink )
105 throws ParseException
106 {
107 init();
108
109 Reader src = source;
110
111
112 if ( isValidate() )
113 {
114 String content;
115 try
116 {
117 content = IOUtil.toString( new BufferedReader( src ) );
118 }
119 catch ( IOException e )
120 {
121 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
122 }
123
124 new XmlValidator( getLog() ).validate( content );
125
126 src = new StringReader( content );
127 }
128
129
130 try
131 {
132 XmlPullParser parser = new MXParser();
133
134 parser.setInput( src );
135
136
137
138 initXmlParser( parser );
139
140 sink.enableLogging( getLog() );
141
142 parseXml( parser, sink );
143 }
144 catch ( XmlPullParserException ex )
145 {
146 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
147 ex.getColumnNumber() );
148 }
149 catch ( MacroExecutionException ex )
150 {
151 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
152 }
153
154 setSecondParsing( false );
155 init();
156 }
157
158
159
160
161
162
163
164 protected void initXmlParser( XmlPullParser parser )
165 throws XmlPullParserException
166 {
167
168 }
169
170
171
172
173
174
175 @Override
176 public void parse( String string, Sink sink )
177 throws ParseException
178 {
179 super.parse( string, sink );
180 }
181
182
183 @Override
184 public final int getType()
185 {
186 return XML_TYPE;
187 }
188
189
190
191
192
193
194
195
196 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
197 {
198 int count = parser.getAttributeCount();
199
200 if ( count < 0 )
201 {
202 return null;
203 }
204
205 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
206
207 for ( int i = 0; i < count; i++ )
208 {
209 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
210 }
211
212 return atts;
213 }
214
215
216
217
218
219
220
221
222
223 private void parseXml( XmlPullParser parser, Sink sink )
224 throws XmlPullParserException, MacroExecutionException
225 {
226 int eventType = parser.getEventType();
227
228 while ( eventType != XmlPullParser.END_DOCUMENT )
229 {
230 if ( eventType == XmlPullParser.START_TAG )
231 {
232 handleStartTag( parser, sink );
233 }
234 else if ( eventType == XmlPullParser.END_TAG )
235 {
236 handleEndTag( parser, sink );
237 }
238 else if ( eventType == XmlPullParser.TEXT )
239 {
240 String text = getText( parser );
241
242 if ( isIgnorableWhitespace() )
243 {
244 if ( text.trim().length() != 0 )
245 {
246 handleText( parser, sink );
247 }
248 }
249 else
250 {
251 handleText( parser, sink );
252 }
253 }
254 else if ( eventType == XmlPullParser.CDSECT )
255 {
256 handleCdsect( parser, sink );
257 }
258 else if ( eventType == XmlPullParser.COMMENT )
259 {
260 handleComment( parser, sink );
261 }
262 else if ( eventType == XmlPullParser.ENTITY_REF )
263 {
264 handleEntity( parser, sink );
265 }
266 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
267 {
268
269 }
270 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
271 {
272
273 }
274 else if ( eventType == XmlPullParser.DOCDECL )
275 {
276 addLocalEntities( parser, parser.getText() );
277
278 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
279 {
280 addDTDEntities( parser, new String( res ) );
281 }
282 }
283
284 try
285 {
286 eventType = parser.nextToken();
287 }
288 catch ( IOException io )
289 {
290 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
291 }
292 }
293 }
294
295
296
297
298
299
300
301
302
303 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
304 throws XmlPullParserException, MacroExecutionException;
305
306
307
308
309
310
311
312
313
314 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
315 throws XmlPullParserException, MacroExecutionException;
316
317
318
319
320
321
322
323
324
325
326
327 protected void handleText( XmlPullParser parser, Sink sink )
328 throws XmlPullParserException
329 {
330 String text = getText( parser );
331
332
333
334
335
336 if ( StringUtils.isNotEmpty( text ) )
337 {
338 sink.text( text );
339 }
340 }
341
342
343
344
345
346
347
348
349
350
351
352 protected void handleCdsect( XmlPullParser parser, Sink sink )
353 throws XmlPullParserException
354 {
355 sink.text( getText( parser ) );
356 }
357
358
359
360
361
362
363
364
365
366
367
368 protected void handleComment( XmlPullParser parser, Sink sink )
369 throws XmlPullParserException
370 {
371 if ( isEmitComments() )
372 {
373 sink.comment( getText( parser ) );
374 }
375 }
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 protected void handleEntity( XmlPullParser parser, Sink sink )
392 throws XmlPullParserException
393 {
394 String text = getText( parser );
395
396 String name = parser.getName();
397
398 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
399 {
400 sink.nonBreakingSpace();
401 }
402 else
403 {
404 String unescaped = HtmlTools.unescapeHTML( text );
405
406 sink.text( unescaped );
407 }
408 }
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
426 {
427 Object[] required = new Object[] { type };
428
429 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
430
431 sink.unknown( parser.getName(), required, attribs );
432 }
433
434
435
436
437
438
439
440
441 protected boolean isIgnorableWhitespace()
442 {
443 return ignorableWhitespace;
444 }
445
446
447
448
449
450
451
452
453
454
455 protected void setIgnorableWhitespace( boolean ignorable )
456 {
457 this.ignorableWhitespace = ignorable;
458 }
459
460
461
462
463
464
465
466
467 protected boolean isCollapsibleWhitespace()
468 {
469 return collapsibleWhitespace;
470 }
471
472
473
474
475
476
477
478
479
480
481 protected void setCollapsibleWhitespace( boolean collapsible )
482 {
483 this.collapsibleWhitespace = collapsible;
484 }
485
486
487
488
489
490
491
492
493 protected boolean isTrimmableWhitespace()
494 {
495 return trimmableWhitespace;
496 }
497
498
499
500
501
502
503
504
505
506
507 protected void setTrimmableWhitespace( boolean trimmable )
508 {
509 this.trimmableWhitespace = trimmable;
510 }
511
512
513
514
515
516
517
518
519
520
521
522 protected String getText( XmlPullParser parser )
523 {
524 String text = parser.getText();
525
526 if ( isTrimmableWhitespace() )
527 {
528 text = text.trim();
529 }
530
531 if ( isCollapsibleWhitespace() )
532 {
533 StringBuilder newText = new StringBuilder();
534 String[] elts = StringUtils.split( text, " \r\n" );
535 for ( int i = 0; i < elts.length; i++ )
536 {
537 newText.append( elts[i] );
538 if ( ( i + 1 ) < elts.length )
539 {
540 newText.append( " " );
541 }
542 }
543 text = newText.toString();
544 }
545
546 return text;
547 }
548
549
550
551
552
553
554
555
556
557
558
559
560
561 protected Map<String, String> getLocalEntities()
562 {
563 if ( entities == null )
564 {
565 entities = new LinkedHashMap<>();
566 }
567
568 return entities;
569 }
570
571
572
573
574
575
576
577 public boolean isValidate()
578 {
579 return validate;
580 }
581
582
583
584
585
586
587
588
589 public void setValidate( boolean validate )
590 {
591 this.validate = validate;
592 }
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
610 throws XmlPullParserException
611 {
612 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
613 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
614 {
615 return;
616 }
617
618 parser.defineEntityReplacementText( entityName, entityValue );
619 getLocalEntities().put( entityName, entityValue );
620 }
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635 private void addLocalEntities( XmlPullParser parser, String text )
636 throws XmlPullParserException
637 {
638 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
639 if ( entitiesCount > 0 )
640 {
641
642 int start = text.indexOf( '[' );
643 int end = text.lastIndexOf( ']' );
644 if ( start != -1 && end != -1 )
645 {
646 addDTDEntities( parser, text.substring( start + 1, end ) );
647 }
648 }
649 }
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666 private void addDTDEntities( XmlPullParser parser, String text )
667 throws XmlPullParserException
668 {
669 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
670 if ( entitiesCount > 0 )
671 {
672 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
673 try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
674 {
675 String line;
676 String tmpLine = "";
677 Matcher matcher;
678 while ( ( line = reader.readLine() ) != null )
679 {
680 tmpLine += "\n" + line;
681 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
682 if ( matcher.find() && matcher.groupCount() == 7 )
683 {
684 String entityName = matcher.group( 2 );
685 String entityValue = matcher.group( 5 );
686
687 addEntity( parser, entityName, entityValue );
688 tmpLine = "";
689 }
690 else
691 {
692 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
693 if ( matcher.find() && matcher.groupCount() == 8 )
694 {
695 String entityName = matcher.group( 2 );
696 String entityValue = matcher.group( 5 );
697
698 addEntity( parser, entityName, entityValue );
699 tmpLine = "";
700 }
701 }
702 }
703 }
704 catch ( IOException e )
705 {
706
707 }
708 }
709 }
710
711
712
713
714
715 public static class CachedFileEntityResolver
716 implements EntityResolver
717 {
718
719 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
720
721
722 public InputSource resolveEntity( String publicId, String systemId )
723 throws SAXException, IOException
724 {
725 byte[] res = ENTITY_CACHE.get( systemId );
726
727 if ( res == null )
728 {
729 String systemName = FileUtils.getFile( systemId ).getName();
730 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
731
732 if ( !temp.exists() )
733 {
734
735 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
736 {
737
738
739 String resource = "/" + systemName;
740 URL url = getClass().getResource( resource );
741 if ( url != null )
742 {
743 res = toByteArray( url );
744 }
745 else
746 {
747 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
748 + " because '" + resource + "' is not available of the classpath." );
749 }
750 }
751 else
752 {
753 res = toByteArray( new URL( systemId ) );
754 }
755
756
757 copy( res, temp );
758 }
759 else
760 {
761
762 res = toByteArray( temp.toURI().toURL() );
763 }
764
765 ENTITY_CACHE.put( systemId, res );
766 }
767
768 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
769 is.setPublicId( publicId );
770 is.setSystemId( systemId );
771
772 return is;
773 }
774
775
776
777
778
779
780
781
782
783
784
785 private static byte[] toByteArray( URL url )
786 throws SAXException
787 {
788 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
789 {
790 InputStream is = null;
791 try
792 {
793 is = url.openStream();
794 if ( is == null )
795 {
796 throw new SAXException( "Cannot open stream from the url: " + url.toString() );
797 }
798 return IOUtil.toByteArray( is );
799 }
800 catch ( IOException e )
801 {
802 throw new SAXException( "IOException: " + e.getMessage(), e );
803 }
804 finally
805 {
806 IOUtil.close( is );
807 }
808 }
809
810
811 HttpClientBuilder httpClientBuilder = HttpClientBuilder.create()
812 .useSystemProperties()
813 .setRetryHandler( new DefaultHttpRequestRetryHandler( 3, false ) )
814
815
816 .setUserAgent( "Apache-Doxia/" + doxiaVersion() );
817
818 try ( CloseableHttpResponse response = httpClientBuilder.build().execute( new HttpGet( url.toString() ) ) )
819 {
820 int statusCode = response.getStatusLine().getStatusCode();
821 if ( statusCode != HttpStatus.SC_OK )
822 {
823 throw new IOException(
824 "The status code when accessing the URL '" + url.toString() + "' was " + statusCode
825 + ", which is not allowed. The server gave this reason for the failure '"
826 + response.getStatusLine().getReasonPhrase() + "'." );
827 }
828
829 return EntityUtils.toByteArray( response.getEntity() );
830 }
831 catch ( ClientProtocolException e )
832 {
833 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
834 }
835 catch ( IOException e )
836 {
837 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
838 }
839 }
840
841
842
843
844
845
846
847
848
849 private void copy( byte[] res, File f )
850 throws SAXException
851 {
852 if ( f.isDirectory() )
853 {
854 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
855 }
856
857 OutputStream os = null;
858 try
859 {
860 os = new FileOutputStream( f );
861 IOUtil.copy( res, os );
862 }
863 catch ( IOException e )
864 {
865 throw new SAXException( "IOException: " + e.getMessage(), e );
866 }
867 finally
868 {
869 IOUtil.close( os );
870 }
871 }
872 }
873 }