001package org.apache.maven.doxia.parser; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.io.BufferedReader; 023import java.io.ByteArrayInputStream; 024import java.io.File; 025import java.io.FileOutputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.OutputStream; 029import java.io.Reader; 030import java.io.StringReader; 031import java.net.URL; 032import java.util.Hashtable; 033import java.util.LinkedHashMap; 034import java.util.Locale; 035import java.util.Map; 036import java.util.regex.Matcher; 037import java.util.regex.Pattern; 038 039import org.apache.http.HttpStatus; 040import org.apache.http.client.ClientProtocolException; 041import org.apache.http.client.methods.CloseableHttpResponse; 042import org.apache.http.client.methods.HttpGet; 043import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; 044import org.apache.http.impl.client.HttpClientBuilder; 045import org.apache.http.util.EntityUtils; 046import org.apache.maven.doxia.macro.MacroExecutionException; 047import org.apache.maven.doxia.markup.XmlMarkup; 048import org.apache.maven.doxia.sink.Sink; 049import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 050import org.apache.maven.doxia.util.HtmlTools; 051import org.apache.maven.doxia.util.XmlValidator; 052 053import org.codehaus.plexus.util.FileUtils; 054import org.codehaus.plexus.util.IOUtil; 055import org.codehaus.plexus.util.StringUtils; 056import org.codehaus.plexus.util.xml.pull.MXParser; 057import org.codehaus.plexus.util.xml.pull.XmlPullParser; 058import org.codehaus.plexus.util.xml.pull.XmlPullParserException; 059 060import org.xml.sax.EntityResolver; 061import org.xml.sax.InputSource; 062import org.xml.sax.SAXException; 063 064/** 065 * An abstract class that defines some convenience methods for <code>XML</code> parsers. 066 * 067 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a> 068 * @since 1.0 069 */ 070public abstract class AbstractXmlParser 071 extends AbstractParser 072 implements XmlMarkup 073{ 074 /** 075 * Entity pattern for HTML entity, i.e. &nbsp; 076 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*> 077 * <br> 078 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. 079 */ 080 private static final Pattern PATTERN_ENTITY_1 = 081 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" ); 082 083 /** 084 * Entity pattern for Unicode entity, i.e. &#38; 085 * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" 086 * <br> 087 * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>. 088 */ 089 private static final Pattern PATTERN_ENTITY_2 = 090 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" ); 091 092 private boolean ignorableWhitespace; 093 094 private boolean collapsibleWhitespace; 095 096 private boolean trimmableWhitespace; 097 098 private Map<String, String> entities; 099 100 private boolean validate = false; 101 102 /** {@inheritDoc} */ 103 public void parse( Reader source, Sink sink, String reference ) 104 throws ParseException 105 { 106 init(); 107 108 Reader src = source; 109 110 // 1 first parsing if validation is required 111 if ( isValidate() ) 112 { 113 String content; 114 try 115 { 116 content = IOUtil.toString( new BufferedReader( src ) ); 117 } 118 catch ( IOException e ) 119 { 120 throw new ParseException( "Error reading the model: " + e.getMessage(), e ); 121 } 122 123 new XmlValidator( getLog() ).validate( content ); 124 125 src = new StringReader( content ); 126 } 127 128 // 2 second parsing to process 129 try 130 { 131 XmlPullParser parser = new MXParser(); 132 133 parser.setInput( src ); 134 135 // allow parser initialization, e.g. for additional entities in XHTML 136 // Note: do it after input is set, otherwise values are reset 137 initXmlParser( parser ); 138 139 sink.enableLogging( getLog() ); 140 141 parseXml( parser, sink ); 142 } 143 catch ( XmlPullParserException ex ) 144 { 145 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(), 146 ex.getColumnNumber() ); 147 } 148 catch ( MacroExecutionException ex ) 149 { 150 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex ); 151 } 152 153 setSecondParsing( false ); 154 init(); 155 } 156 157 /** 158 * Initializes the parser with custom entities or other options. 159 * 160 * @param parser A parser, not null. 161 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser 162 */ 163 protected void initXmlParser( XmlPullParser parser ) 164 throws XmlPullParserException 165 { 166 // nop 167 } 168 169 /** {@inheritDoc} */ 170 @Override 171 public final int getType() 172 { 173 return XML_TYPE; 174 } 175 176 /** 177 * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet. 178 * 179 * @param parser A parser, not null. 180 * @return a SinkEventAttributeSet or null if the current parser event is not a start tag. 181 * @since 1.1 182 */ 183 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser ) 184 { 185 int count = parser.getAttributeCount(); 186 187 if ( count < 0 ) 188 { 189 return null; 190 } 191 192 SinkEventAttributeSet atts = new SinkEventAttributeSet( count ); 193 194 for ( int i = 0; i < count; i++ ) 195 { 196 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) ); 197 } 198 199 return atts; 200 } 201 202 /** 203 * Parse the model from the XmlPullParser into the given sink. 204 * 205 * @param parser A parser, not null. 206 * @param sink the sink to receive the events. 207 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 208 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 209 */ 210 private void parseXml( XmlPullParser parser, Sink sink ) 211 throws XmlPullParserException, MacroExecutionException 212 { 213 int eventType = parser.getEventType(); 214 215 while ( eventType != XmlPullParser.END_DOCUMENT ) 216 { 217 if ( eventType == XmlPullParser.START_TAG ) 218 { 219 handleStartTag( parser, sink ); 220 } 221 else if ( eventType == XmlPullParser.END_TAG ) 222 { 223 handleEndTag( parser, sink ); 224 } 225 else if ( eventType == XmlPullParser.TEXT ) 226 { 227 String text = getText( parser ); 228 229 if ( isIgnorableWhitespace() ) 230 { 231 if ( text.trim().length() != 0 ) 232 { 233 handleText( parser, sink ); 234 } 235 } 236 else 237 { 238 handleText( parser, sink ); 239 } 240 } 241 else if ( eventType == XmlPullParser.CDSECT ) 242 { 243 handleCdsect( parser, sink ); 244 } 245 else if ( eventType == XmlPullParser.COMMENT ) 246 { 247 handleComment( parser, sink ); 248 } 249 else if ( eventType == XmlPullParser.ENTITY_REF ) 250 { 251 handleEntity( parser, sink ); 252 } 253 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE ) 254 { 255 // nop 256 } 257 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION ) 258 { 259 // nop 260 } 261 else if ( eventType == XmlPullParser.DOCDECL ) 262 { 263 addLocalEntities( parser, parser.getText() ); 264 265 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() ) 266 { 267 addDTDEntities( parser, new String( res ) ); 268 } 269 } 270 271 try 272 { 273 eventType = parser.nextToken(); 274 } 275 catch ( IOException io ) 276 { 277 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io ); 278 } 279 } 280 } 281 282 /** 283 * Goes through the possible start tags. 284 * 285 * @param parser A parser, not null. 286 * @param sink the sink to receive the events. 287 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 288 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 289 */ 290 protected abstract void handleStartTag( XmlPullParser parser, Sink sink ) 291 throws XmlPullParserException, MacroExecutionException; 292 293 /** 294 * Goes through the possible end tags. 295 * 296 * @param parser A parser, not null. 297 * @param sink the sink to receive the events. 298 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 299 * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro 300 */ 301 protected abstract void handleEndTag( XmlPullParser parser, Sink sink ) 302 throws XmlPullParserException, MacroExecutionException; 303 304 /** 305 * Handles text events. 306 * 307 * <p>This is a default implementation, if the parser points to a non-empty text element, 308 * it is emitted as a text event into the specified sink.</p> 309 * 310 * @param parser A parser, not null. 311 * @param sink the sink to receive the events. Not null. 312 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 313 */ 314 protected void handleText( XmlPullParser parser, Sink sink ) 315 throws XmlPullParserException 316 { 317 String text = getText( parser ); 318 319 /* 320 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the 321 * parser so any whitespace that makes it here is significant. 322 */ 323 if ( StringUtils.isNotEmpty( text ) ) 324 { 325 sink.text( text ); 326 } 327 } 328 329 /** 330 * Handles CDATA sections. 331 * 332 * <p>This is a default implementation, all data are emitted as text 333 * events into the specified sink.</p> 334 * 335 * @param parser A parser, not null. 336 * @param sink the sink to receive the events. Not null. 337 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 338 */ 339 protected void handleCdsect( XmlPullParser parser, Sink sink ) 340 throws XmlPullParserException 341 { 342 sink.text( getText( parser ) ); 343 } 344 345 /** 346 * Handles comments. 347 * 348 * <p>This is a default implementation, all data are emitted as comment 349 * events into the specified sink.</p> 350 * 351 * @param parser A parser, not null. 352 * @param sink the sink to receive the events. Not null. 353 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 354 */ 355 protected void handleComment( XmlPullParser parser, Sink sink ) 356 throws XmlPullParserException 357 { 358 if ( isEmitComments() ) 359 { 360 sink.comment( getText( parser ) ); 361 } 362 } 363 364 /** 365 * Handles entities. 366 * 367 * <p>This is a default implementation, all entities are resolved and emitted as text 368 * events into the specified sink, except:</p> 369 * <ul> 370 * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code> 371 * are emitted as <code>nonBreakingSpace()</code> events.</li> 372 * </ul> 373 * 374 * @param parser A parser, not null. 375 * @param sink the sink to receive the events. Not null. 376 * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model 377 */ 378 protected void handleEntity( XmlPullParser parser, Sink sink ) 379 throws XmlPullParserException 380 { 381 String text = getText( parser ); 382 383 String name = parser.getName(); 384 385 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) ) 386 { 387 sink.nonBreakingSpace(); 388 } 389 else 390 { 391 String unescaped = HtmlTools.unescapeHTML( text ); 392 393 sink.text( unescaped ); 394 } 395 } 396 397 /** 398 * Handles an unknown event. 399 * 400 * <p>This is a default implementation, all events are emitted as unknown 401 * events into the specified sink.</p> 402 * 403 * @param parser the parser to get the event from. 404 * @param sink the sink to receive the event. 405 * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE, 406 * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE. 407 * It will be passed as the first argument of the required parameters to the Sink 408 * {@link 409 * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)} 410 * method. 411 */ 412 protected void handleUnknown( XmlPullParser parser, Sink sink, int type ) 413 { 414 Object[] required = new Object[] { type }; 415 416 SinkEventAttributeSet attribs = getAttributesFromParser( parser ); 417 418 sink.unknown( parser.getName(), required, attribs ); 419 } 420 421 /** 422 * <p>isIgnorableWhitespace.</p> 423 * 424 * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise. 425 * @see #setIgnorableWhitespace(boolean) 426 * @since 1.1 427 */ 428 protected boolean isIgnorableWhitespace() 429 { 430 return ignorableWhitespace; 431 } 432 433 /** 434 * Specify that whitespace will be ignored. I.e.: 435 * <pre><tr> <td/> </tr></pre> 436 * is equivalent to 437 * <pre><tr><td/></tr></pre> 438 * 439 * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise. 440 * @since 1.1 441 */ 442 protected void setIgnorableWhitespace( boolean ignorable ) 443 { 444 this.ignorableWhitespace = ignorable; 445 } 446 447 /** 448 * <p>isCollapsibleWhitespace.</p> 449 * 450 * @return <code>true</code> if text will collapse, <code>false</code> otherwise. 451 * @see #setCollapsibleWhitespace(boolean) 452 * @since 1.1 453 */ 454 protected boolean isCollapsibleWhitespace() 455 { 456 return collapsibleWhitespace; 457 } 458 459 /** 460 * Specify that text will be collapsed. I.e.: 461 * <pre>Text Text</pre> 462 * is equivalent to 463 * <pre>Text Text</pre> 464 * 465 * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise. 466 * @since 1.1 467 */ 468 protected void setCollapsibleWhitespace( boolean collapsible ) 469 { 470 this.collapsibleWhitespace = collapsible; 471 } 472 473 /** 474 * <p>isTrimmableWhitespace.</p> 475 * 476 * @return <code>true</code> if text will be trim, <code>false</code> otherwise. 477 * @see #setTrimmableWhitespace(boolean) 478 * @since 1.1 479 */ 480 protected boolean isTrimmableWhitespace() 481 { 482 return trimmableWhitespace; 483 } 484 485 /** 486 * Specify that text will be collapsed. I.e.: 487 * <pre><p> Text </p></pre> 488 * is equivalent to 489 * <pre><p>Text</p></pre> 490 * 491 * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise. 492 * @since 1.1 493 */ 494 protected void setTrimmableWhitespace( boolean trimmable ) 495 { 496 this.trimmableWhitespace = trimmable; 497 } 498 499 /** 500 * <p>getText.</p> 501 * 502 * @param parser A parser, not null. 503 * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration. 504 * @see XmlPullParser#getText() 505 * @see #isCollapsibleWhitespace() 506 * @see #isTrimmableWhitespace() 507 * @since 1.1 508 */ 509 protected String getText( XmlPullParser parser ) 510 { 511 String text = parser.getText(); 512 513 if ( isTrimmableWhitespace() ) 514 { 515 text = text.trim(); 516 } 517 518 if ( isCollapsibleWhitespace() ) 519 { 520 StringBuilder newText = new StringBuilder(); 521 String[] elts = StringUtils.split( text, " \r\n" ); 522 for ( int i = 0; i < elts.length; i++ ) 523 { 524 newText.append( elts[i] ); 525 if ( ( i + 1 ) < elts.length ) 526 { 527 newText.append( " " ); 528 } 529 } 530 text = newText.toString(); 531 } 532 533 return text; 534 } 535 536 /** 537 * Return the defined entities in a local doctype. I.e.: 538 * <pre> 539 * <!DOCTYPE foo [ 540 * <!ENTITY bar "&#x160;"> 541 * <!ENTITY bar1 "&#x161;"> 542 * ]> 543 * </pre> 544 * 545 * @return a map of the defined entities in a local doctype. 546 * @since 1.1 547 */ 548 protected Map<String, String> getLocalEntities() 549 { 550 if ( entities == null ) 551 { 552 entities = new LinkedHashMap<>(); 553 } 554 555 return entities; 556 } 557 558 /** 559 * <p>isValidate.</p> 560 * 561 * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise. 562 * @since 1.1 563 */ 564 public boolean isValidate() 565 { 566 return validate; 567 } 568 569 /** 570 * Specify a flag to validate or not the XML content. 571 * 572 * @param validate the validate to set 573 * @see #parse(Reader, Sink) 574 * @since 1.1 575 */ 576 public void setValidate( boolean validate ) 577 { 578 this.validate = validate; 579 } 580 581 // ---------------------------------------------------------------------- 582 // Private methods 583 // ---------------------------------------------------------------------- 584 585 /** 586 * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}. 587 * <br> 588 * By default, we exclude the default XML entities: &amp;, &lt;, &gt;, &quot; and &apos;. 589 * 590 * @param parser not null 591 * @param entityName not null 592 * @param entityValue not null 593 * @throws XmlPullParserException if any 594 * @see XmlPullParser#defineEntityReplacementText(String, String) 595 */ 596 private void addEntity( XmlPullParser parser, String entityName, String entityValue ) 597 throws XmlPullParserException 598 { 599 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" ) 600 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) ) 601 { 602 return; 603 } 604 605 parser.defineEntityReplacementText( entityName, entityValue ); 606 getLocalEntities().put( entityName, entityValue ); 607 } 608 609 /** 610 * Handle entities defined in a local doctype as the following: 611 * <pre> 612 * <!DOCTYPE foo [ 613 * <!ENTITY bar "&#x160;"> 614 * <!ENTITY bar1 "&#x161;"> 615 * ]> 616 * </pre> 617 * 618 * @param parser not null 619 * @param text not null 620 * @throws XmlPullParserException if any 621 */ 622 private void addLocalEntities( XmlPullParser parser, String text ) 623 throws XmlPullParserException 624 { 625 int entitiesCount = StringUtils.countMatches( text, ENTITY_START ); 626 if ( entitiesCount > 0 ) 627 { 628 // text should be foo [...] 629 int start = text.indexOf( '[' ); 630 int end = text.lastIndexOf( ']' ); 631 if ( start != -1 && end != -1 ) 632 { 633 addDTDEntities( parser, text.substring( start + 1, end ) ); 634 } 635 } 636 } 637 638 /** 639 * Handle entities defined in external doctypes as the following: 640 * <pre> 641 * <!DOCTYPE foo [ 642 * <!-- These are the entity sets for ISO Latin 1 characters for the XHTML --> 643 * <!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" 644 * "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"> 645 * %HTMLlat1; 646 * ]> 647 * </pre> 648 * 649 * @param parser not null 650 * @param text not null 651 * @throws XmlPullParserException if any 652 */ 653 private void addDTDEntities( XmlPullParser parser, String text ) 654 throws XmlPullParserException 655 { 656 int entitiesCount = StringUtils.countMatches( text, ENTITY_START ); 657 if ( entitiesCount > 0 ) 658 { 659 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START ); 660 try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) ) 661 { 662 String line; 663 String tmpLine = ""; 664 Matcher matcher; 665 while ( ( line = reader.readLine() ) != null ) 666 { 667 tmpLine += "\n" + line; 668 matcher = PATTERN_ENTITY_1.matcher( tmpLine ); 669 if ( matcher.find() && matcher.groupCount() == 7 ) 670 { 671 String entityName = matcher.group( 2 ); 672 String entityValue = matcher.group( 5 ); 673 674 addEntity( parser, entityName, entityValue ); 675 tmpLine = ""; 676 } 677 else 678 { 679 matcher = PATTERN_ENTITY_2.matcher( tmpLine ); 680 if ( matcher.find() && matcher.groupCount() == 8 ) 681 { 682 String entityName = matcher.group( 2 ); 683 String entityValue = matcher.group( 5 ); 684 685 addEntity( parser, entityName, entityValue ); 686 tmpLine = ""; 687 } 688 } 689 } 690 } 691 catch ( IOException e ) 692 { 693 // nop 694 } 695 } 696 } 697 698 /** 699 * Implementation of the callback mechanism <code>EntityResolver</code>. 700 * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>. 701 */ 702 public static class CachedFileEntityResolver 703 implements EntityResolver 704 { 705 /** Map with systemId as key and the content of systemId as byte[]. */ 706 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>(); 707 708 /** {@inheritDoc} */ 709 public InputSource resolveEntity( String publicId, String systemId ) 710 throws SAXException, IOException 711 { 712 byte[] res = ENTITY_CACHE.get( systemId ); 713 // already cached? 714 if ( res == null ) 715 { 716 String systemName = FileUtils.getFile( systemId ).getName(); 717 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName ); 718 // maybe already as a temp file? 719 if ( !temp.exists() ) 720 { 721 // is systemId a file or an url? 722 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) ) 723 { 724 // Doxia XSDs are included in the jars, so try to find the resource systemName from 725 // the classpath... 726 String resource = "/" + systemName; 727 URL url = getClass().getResource( resource ); 728 if ( url != null ) 729 { 730 res = toByteArray( url ); 731 } 732 else 733 { 734 throw new SAXException( "Could not find the SYSTEM entity: " + systemId 735 + " because '" + resource + "' is not available of the classpath." ); 736 } 737 } 738 else 739 { 740 res = toByteArray( new URL( systemId ) ); 741 } 742 743 // write systemId as temp file 744 copy( res, temp ); 745 } 746 else 747 { 748 // TODO How to refresh Doxia XSDs from temp dir? 749 res = toByteArray( temp.toURI().toURL() ); 750 } 751 752 ENTITY_CACHE.put( systemId, res ); 753 } 754 755 InputSource is = new InputSource( new ByteArrayInputStream( res ) ); 756 is.setPublicId( publicId ); 757 is.setSystemId( systemId ); 758 759 return is; 760 } 761 762 /** 763 * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url 764 * content. 765 * Otherwise, use HttpClient to get the http content. 766 * Wrap all internal exceptions to throw SAXException. 767 * 768 * @param url not null 769 * @return return an array of byte 770 * @throws SAXException if any 771 */ 772 private static byte[] toByteArray( URL url ) 773 throws SAXException 774 { 775 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) ) 776 { 777 InputStream is = null; 778 try 779 { 780 is = url.openStream(); 781 if ( is == null ) 782 { 783 throw new SAXException( "Cannot open stream from the url: " + url.toString() ); 784 } 785 return IOUtil.toByteArray( is ); 786 } 787 catch ( IOException e ) 788 { 789 throw new SAXException( "IOException: " + e.getMessage(), e ); 790 } 791 finally 792 { 793 IOUtil.close( is ); 794 } 795 } 796 797 // it is an HTTP url, using HttpClient... 798 HttpClientBuilder httpClientBuilder = HttpClientBuilder.create() 799 .useSystemProperties() 800 .setRetryHandler( new DefaultHttpRequestRetryHandler( 3, false ) ) 801 // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C 802 // The default user-agent is "Apache-HttpClient/4.5.8 (java 7)" 803 .setUserAgent( "Apache-Doxia/" + doxiaVersion() ); 804 805 try ( CloseableHttpResponse response = httpClientBuilder.build().execute( new HttpGet( url.toString() ) ) ) 806 { 807 int statusCode = response.getStatusLine().getStatusCode(); 808 if ( statusCode != HttpStatus.SC_OK ) 809 { 810 throw new IOException( 811 "The status code when accessing the URL '" + url.toString() + "' was " + statusCode 812 + ", which is not allowed. The server gave this reason for the failure '" 813 + response.getStatusLine().getReasonPhrase() + "'." ); 814 } 815 816 return EntityUtils.toByteArray( response.getEntity() ); 817 } 818 catch ( ClientProtocolException e ) 819 { 820 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e ); 821 } 822 catch ( IOException e ) 823 { 824 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e ); 825 } 826 } 827 828 /** 829 * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException. 830 * 831 * @param res not null array of byte 832 * @param f the file where to write the bytes 833 * @throws SAXException if any 834 * @see IOUtil#copy(byte[], OutputStream) 835 */ 836 private void copy( byte[] res, File f ) 837 throws SAXException 838 { 839 if ( f.isDirectory() ) 840 { 841 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." ); 842 } 843 844 OutputStream os = null; 845 try 846 { 847 os = new FileOutputStream( f ); 848 IOUtil.copy( res, os ); 849 } 850 catch ( IOException e ) 851 { 852 throw new SAXException( "IOException: " + e.getMessage(), e ); 853 } 854 finally 855 { 856 IOUtil.close( os ); 857 } 858 } 859 } 860}