1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.http.HttpStatus;
40 import org.apache.http.client.ClientProtocolException;
41 import org.apache.http.client.methods.CloseableHttpResponse;
42 import org.apache.http.client.methods.HttpGet;
43 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
44 import org.apache.http.impl.client.HttpClientBuilder;
45 import org.apache.http.util.EntityUtils;
46 import org.apache.maven.doxia.macro.MacroExecutionException;
47 import org.apache.maven.doxia.markup.XmlMarkup;
48 import org.apache.maven.doxia.sink.Sink;
49 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
50 import org.apache.maven.doxia.util.HtmlTools;
51 import org.apache.maven.doxia.util.XmlValidator;
52
53 import org.codehaus.plexus.util.FileUtils;
54 import org.codehaus.plexus.util.IOUtil;
55 import org.codehaus.plexus.util.StringUtils;
56 import org.codehaus.plexus.util.xml.pull.MXParser;
57 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
58 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
59
60 import org.xml.sax.EntityResolver;
61 import org.xml.sax.InputSource;
62 import org.xml.sax.SAXException;
63
64
65
66
67
68
69
70 public abstract class AbstractXmlParser
71 extends AbstractParser
72 implements XmlMarkup
73 {
74
75
76
77
78
79
80 private static final Pattern PATTERN_ENTITY_1 =
81 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
82
83
84
85
86
87
88
89 private static final Pattern PATTERN_ENTITY_2 =
90 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
91
92 private boolean ignorableWhitespace;
93
94 private boolean collapsibleWhitespace;
95
96 private boolean trimmableWhitespace;
97
98 private Map<String, String> entities;
99
100 private boolean validate = false;
101
102
103 public void parse( Reader source, Sink sink, String reference )
104 throws ParseException
105 {
106 init();
107
108 Reader src = source;
109
110
111 if ( isValidate() )
112 {
113 String content;
114 try
115 {
116 content = IOUtil.toString( new BufferedReader( src ) );
117 }
118 catch ( IOException e )
119 {
120 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
121 }
122
123 new XmlValidator( getLog() ).validate( content );
124
125 src = new StringReader( content );
126 }
127
128
129 try
130 {
131 XmlPullParser parser = new MXParser();
132
133 parser.setInput( src );
134
135
136
137 initXmlParser( parser );
138
139 sink.enableLogging( getLog() );
140
141 parseXml( parser, sink );
142 }
143 catch ( XmlPullParserException ex )
144 {
145 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
146 ex.getColumnNumber() );
147 }
148 catch ( MacroExecutionException ex )
149 {
150 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
151 }
152
153 setSecondParsing( false );
154 init();
155 }
156
157
158
159
160
161
162
163 protected void initXmlParser( XmlPullParser parser )
164 throws XmlPullParserException
165 {
166
167 }
168
169
170 @Override
171 public final int getType()
172 {
173 return XML_TYPE;
174 }
175
176
177
178
179
180
181
182
183 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
184 {
185 int count = parser.getAttributeCount();
186
187 if ( count < 0 )
188 {
189 return null;
190 }
191
192 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
193
194 for ( int i = 0; i < count; i++ )
195 {
196 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
197 }
198
199 return atts;
200 }
201
202
203
204
205
206
207
208
209
210 private void parseXml( XmlPullParser parser, Sink sink )
211 throws XmlPullParserException, MacroExecutionException
212 {
213 int eventType = parser.getEventType();
214
215 while ( eventType != XmlPullParser.END_DOCUMENT )
216 {
217 if ( eventType == XmlPullParser.START_TAG )
218 {
219 handleStartTag( parser, sink );
220 }
221 else if ( eventType == XmlPullParser.END_TAG )
222 {
223 handleEndTag( parser, sink );
224 }
225 else if ( eventType == XmlPullParser.TEXT )
226 {
227 String text = getText( parser );
228
229 if ( isIgnorableWhitespace() )
230 {
231 if ( text.trim().length() != 0 )
232 {
233 handleText( parser, sink );
234 }
235 }
236 else
237 {
238 handleText( parser, sink );
239 }
240 }
241 else if ( eventType == XmlPullParser.CDSECT )
242 {
243 handleCdsect( parser, sink );
244 }
245 else if ( eventType == XmlPullParser.COMMENT )
246 {
247 handleComment( parser, sink );
248 }
249 else if ( eventType == XmlPullParser.ENTITY_REF )
250 {
251 handleEntity( parser, sink );
252 }
253 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
254 {
255
256 }
257 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
258 {
259
260 }
261 else if ( eventType == XmlPullParser.DOCDECL )
262 {
263 addLocalEntities( parser, parser.getText() );
264
265 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
266 {
267 addDTDEntities( parser, new String( res ) );
268 }
269 }
270
271 try
272 {
273 eventType = parser.nextToken();
274 }
275 catch ( IOException io )
276 {
277 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
278 }
279 }
280 }
281
282
283
284
285
286
287
288
289
290 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
291 throws XmlPullParserException, MacroExecutionException;
292
293
294
295
296
297
298
299
300
301 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
302 throws XmlPullParserException, MacroExecutionException;
303
304
305
306
307
308
309
310
311
312
313
314 protected void handleText( XmlPullParser parser, Sink sink )
315 throws XmlPullParserException
316 {
317 String text = getText( parser );
318
319
320
321
322
323 if ( StringUtils.isNotEmpty( text ) )
324 {
325 sink.text( text );
326 }
327 }
328
329
330
331
332
333
334
335
336
337
338
339 protected void handleCdsect( XmlPullParser parser, Sink sink )
340 throws XmlPullParserException
341 {
342 sink.text( getText( parser ) );
343 }
344
345
346
347
348
349
350
351
352
353
354
355 protected void handleComment( XmlPullParser parser, Sink sink )
356 throws XmlPullParserException
357 {
358 if ( isEmitComments() )
359 {
360 sink.comment( getText( parser ) );
361 }
362 }
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378 protected void handleEntity( XmlPullParser parser, Sink sink )
379 throws XmlPullParserException
380 {
381 String text = getText( parser );
382
383 String name = parser.getName();
384
385 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
386 {
387 sink.nonBreakingSpace();
388 }
389 else
390 {
391 String unescaped = HtmlTools.unescapeHTML( text );
392
393 sink.text( unescaped );
394 }
395 }
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
413 {
414 Object[] required = new Object[] { type };
415
416 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
417
418 sink.unknown( parser.getName(), required, attribs );
419 }
420
421
422
423
424
425
426
427
428 protected boolean isIgnorableWhitespace()
429 {
430 return ignorableWhitespace;
431 }
432
433
434
435
436
437
438
439
440
441
442 protected void setIgnorableWhitespace( boolean ignorable )
443 {
444 this.ignorableWhitespace = ignorable;
445 }
446
447
448
449
450
451
452
453
454 protected boolean isCollapsibleWhitespace()
455 {
456 return collapsibleWhitespace;
457 }
458
459
460
461
462
463
464
465
466
467
468 protected void setCollapsibleWhitespace( boolean collapsible )
469 {
470 this.collapsibleWhitespace = collapsible;
471 }
472
473
474
475
476
477
478
479
480 protected boolean isTrimmableWhitespace()
481 {
482 return trimmableWhitespace;
483 }
484
485
486
487
488
489
490
491
492
493
494 protected void setTrimmableWhitespace( boolean trimmable )
495 {
496 this.trimmableWhitespace = trimmable;
497 }
498
499
500
501
502
503
504
505
506
507
508
509 protected String getText( XmlPullParser parser )
510 {
511 String text = parser.getText();
512
513 if ( isTrimmableWhitespace() )
514 {
515 text = text.trim();
516 }
517
518 if ( isCollapsibleWhitespace() )
519 {
520 StringBuilder newText = new StringBuilder();
521 String[] elts = StringUtils.split( text, " \r\n" );
522 for ( int i = 0; i < elts.length; i++ )
523 {
524 newText.append( elts[i] );
525 if ( ( i + 1 ) < elts.length )
526 {
527 newText.append( " " );
528 }
529 }
530 text = newText.toString();
531 }
532
533 return text;
534 }
535
536
537
538
539
540
541
542
543
544
545
546
547
548 protected Map<String, String> getLocalEntities()
549 {
550 if ( entities == null )
551 {
552 entities = new LinkedHashMap<>();
553 }
554
555 return entities;
556 }
557
558
559
560
561
562
563
564 public boolean isValidate()
565 {
566 return validate;
567 }
568
569
570
571
572
573
574
575
576 public void setValidate( boolean validate )
577 {
578 this.validate = validate;
579 }
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
597 throws XmlPullParserException
598 {
599 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
600 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
601 {
602 return;
603 }
604
605 parser.defineEntityReplacementText( entityName, entityValue );
606 getLocalEntities().put( entityName, entityValue );
607 }
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622 private void addLocalEntities( XmlPullParser parser, String text )
623 throws XmlPullParserException
624 {
625 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
626 if ( entitiesCount > 0 )
627 {
628
629 int start = text.indexOf( '[' );
630 int end = text.lastIndexOf( ']' );
631 if ( start != -1 && end != -1 )
632 {
633 addDTDEntities( parser, text.substring( start + 1, end ) );
634 }
635 }
636 }
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653 private void addDTDEntities( XmlPullParser parser, String text )
654 throws XmlPullParserException
655 {
656 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
657 if ( entitiesCount > 0 )
658 {
659 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
660 try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
661 {
662 String line;
663 String tmpLine = "";
664 Matcher matcher;
665 while ( ( line = reader.readLine() ) != null )
666 {
667 tmpLine += "\n" + line;
668 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
669 if ( matcher.find() && matcher.groupCount() == 7 )
670 {
671 String entityName = matcher.group( 2 );
672 String entityValue = matcher.group( 5 );
673
674 addEntity( parser, entityName, entityValue );
675 tmpLine = "";
676 }
677 else
678 {
679 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
680 if ( matcher.find() && matcher.groupCount() == 8 )
681 {
682 String entityName = matcher.group( 2 );
683 String entityValue = matcher.group( 5 );
684
685 addEntity( parser, entityName, entityValue );
686 tmpLine = "";
687 }
688 }
689 }
690 }
691 catch ( IOException e )
692 {
693
694 }
695 }
696 }
697
698
699
700
701
702 public static class CachedFileEntityResolver
703 implements EntityResolver
704 {
705
706 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
707
708
709 public InputSource resolveEntity( String publicId, String systemId )
710 throws SAXException, IOException
711 {
712 byte[] res = ENTITY_CACHE.get( systemId );
713
714 if ( res == null )
715 {
716 String systemName = FileUtils.getFile( systemId ).getName();
717 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
718
719 if ( !temp.exists() )
720 {
721
722 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
723 {
724
725
726 String resource = "/" + systemName;
727 URL url = getClass().getResource( resource );
728 if ( url != null )
729 {
730 res = toByteArray( url );
731 }
732 else
733 {
734 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
735 + " because '" + resource + "' is not available of the classpath." );
736 }
737 }
738 else
739 {
740 res = toByteArray( new URL( systemId ) );
741 }
742
743
744 copy( res, temp );
745 }
746 else
747 {
748
749 res = toByteArray( temp.toURI().toURL() );
750 }
751
752 ENTITY_CACHE.put( systemId, res );
753 }
754
755 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
756 is.setPublicId( publicId );
757 is.setSystemId( systemId );
758
759 return is;
760 }
761
762
763
764
765
766
767
768
769
770
771
772 private static byte[] toByteArray( URL url )
773 throws SAXException
774 {
775 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
776 {
777 InputStream is = null;
778 try
779 {
780 is = url.openStream();
781 if ( is == null )
782 {
783 throw new SAXException( "Cannot open stream from the url: " + url.toString() );
784 }
785 return IOUtil.toByteArray( is );
786 }
787 catch ( IOException e )
788 {
789 throw new SAXException( "IOException: " + e.getMessage(), e );
790 }
791 finally
792 {
793 IOUtil.close( is );
794 }
795 }
796
797
798 HttpClientBuilder httpClientBuilder = HttpClientBuilder.create()
799 .useSystemProperties()
800 .setRetryHandler( new DefaultHttpRequestRetryHandler( 3, false ) )
801
802
803 .setUserAgent( "Apache-Doxia/" + doxiaVersion() );
804
805 try ( CloseableHttpResponse response = httpClientBuilder.build().execute( new HttpGet( url.toString() ) ) )
806 {
807 int statusCode = response.getStatusLine().getStatusCode();
808 if ( statusCode != HttpStatus.SC_OK )
809 {
810 throw new IOException(
811 "The status code when accessing the URL '" + url.toString() + "' was " + statusCode
812 + ", which is not allowed. The server gave this reason for the failure '"
813 + response.getStatusLine().getReasonPhrase() + "'." );
814 }
815
816 return EntityUtils.toByteArray( response.getEntity() );
817 }
818 catch ( ClientProtocolException e )
819 {
820 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
821 }
822 catch ( IOException e )
823 {
824 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
825 }
826 }
827
828
829
830
831
832
833
834
835
836 private void copy( byte[] res, File f )
837 throws SAXException
838 {
839 if ( f.isDirectory() )
840 {
841 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
842 }
843
844 OutputStream os = null;
845 try
846 {
847 os = new FileOutputStream( f );
848 IOUtil.copy( res, os );
849 }
850 catch ( IOException e )
851 {
852 throw new SAXException( "IOException: " + e.getMessage(), e );
853 }
854 finally
855 {
856 IOUtil.close( os );
857 }
858 }
859 }
860 }