View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedReader;
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.net.URL;
32  import java.util.Hashtable;
33  import java.util.LinkedHashMap;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.apache.http.HttpStatus;
40  import org.apache.http.client.ClientProtocolException;
41  import org.apache.http.client.methods.CloseableHttpResponse;
42  import org.apache.http.client.methods.HttpGet;
43  import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
44  import org.apache.http.impl.client.HttpClientBuilder;
45  import org.apache.http.util.EntityUtils;
46  import org.apache.maven.doxia.macro.MacroExecutionException;
47  import org.apache.maven.doxia.markup.XmlMarkup;
48  import org.apache.maven.doxia.sink.Sink;
49  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
50  import org.apache.maven.doxia.util.HtmlTools;
51  import org.apache.maven.doxia.util.XmlValidator;
52  
53  import org.codehaus.plexus.util.FileUtils;
54  import org.codehaus.plexus.util.IOUtil;
55  import org.codehaus.plexus.util.StringUtils;
56  import org.codehaus.plexus.util.xml.pull.MXParser;
57  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
58  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
59  
60  import org.xml.sax.EntityResolver;
61  import org.xml.sax.InputSource;
62  import org.xml.sax.SAXException;
63  
64  /**
65   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
66   *
67   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
68   * @since 1.0
69   */
70  public abstract class AbstractXmlParser
71      extends AbstractParser
72      implements XmlMarkup
73  {
74      /**
75       * Entity pattern for HTML entity, i.e. &#38;nbsp;
76       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
77       * <br>
78       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
79       */
80      private static final Pattern PATTERN_ENTITY_1 =
81          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
82  
83      /**
84       * Entity pattern for Unicode entity, i.e. &#38;#38;
85       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
86       * <br>
87       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
88       */
89      private static final Pattern PATTERN_ENTITY_2 =
90          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
91  
92      private boolean ignorableWhitespace;
93  
94      private boolean collapsibleWhitespace;
95  
96      private boolean trimmableWhitespace;
97  
98      private Map<String, String> entities;
99  
100     private boolean validate = false;
101 
102     /** {@inheritDoc} */
103     public void parse( Reader source, Sink sink, String reference )
104         throws ParseException
105     {
106         init();
107 
108         Reader src = source;
109 
110         // 1 first parsing if validation is required
111         if ( isValidate() )
112         {
113             String content;
114             try
115             {
116                 content = IOUtil.toString( new BufferedReader( src ) );
117             }
118             catch ( IOException e )
119             {
120                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
121             }
122 
123             new XmlValidator( getLog() ).validate( content );
124 
125             src = new StringReader( content );
126         }
127 
128         // 2 second parsing to process
129         try
130         {
131             XmlPullParser parser = new MXParser();
132 
133             parser.setInput( src );
134             
135             // allow parser initialization, e.g. for additional entities in XHTML
136             // Note: do it after input is set, otherwise values are reset
137             initXmlParser( parser );
138 
139             sink.enableLogging( getLog() );
140 
141             parseXml( parser, sink );
142         }
143         catch ( XmlPullParserException ex )
144         {
145             throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
146                                       ex.getColumnNumber() );
147         }
148         catch ( MacroExecutionException ex )
149         {
150             throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
151         }
152 
153         setSecondParsing( false );
154         init();
155     }
156 
157     /**
158      * Initializes the parser with custom entities or other options.
159      *
160      * @param parser A parser, not null.
161      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
162      */
163     protected void initXmlParser( XmlPullParser parser )
164         throws XmlPullParserException
165     {
166         // nop
167     }
168 
169     /** {@inheritDoc} */
170     @Override
171     public final int getType()
172     {
173         return XML_TYPE;
174     }
175 
176     /**
177      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
178      *
179      * @param parser A parser, not null.
180      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
181      * @since 1.1
182      */
183     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
184     {
185         int count = parser.getAttributeCount();
186 
187         if ( count < 0 )
188         {
189             return null;
190         }
191 
192         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
193 
194         for ( int i = 0; i < count; i++ )
195         {
196             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
197         }
198 
199         return atts;
200     }
201 
202     /**
203      * Parse the model from the XmlPullParser into the given sink.
204      *
205      * @param parser A parser, not null.
206      * @param sink the sink to receive the events.
207      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
208      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
209      */
210     private void parseXml( XmlPullParser parser, Sink sink )
211         throws XmlPullParserException, MacroExecutionException
212     {
213         int eventType = parser.getEventType();
214 
215         while ( eventType != XmlPullParser.END_DOCUMENT )
216         {
217             if ( eventType == XmlPullParser.START_TAG )
218             {
219                 handleStartTag( parser, sink );
220             }
221             else if ( eventType == XmlPullParser.END_TAG )
222             {
223                 handleEndTag( parser, sink );
224             }
225             else if ( eventType == XmlPullParser.TEXT )
226             {
227                 String text = getText( parser );
228 
229                 if ( isIgnorableWhitespace() )
230                 {
231                     if ( text.trim().length() != 0 )
232                     {
233                         handleText( parser, sink );
234                     }
235                 }
236                 else
237                 {
238                     handleText( parser, sink );
239                 }
240             }
241             else if ( eventType == XmlPullParser.CDSECT )
242             {
243                 handleCdsect( parser, sink );
244             }
245             else if ( eventType == XmlPullParser.COMMENT )
246             {
247                 handleComment( parser, sink );
248             }
249             else if ( eventType == XmlPullParser.ENTITY_REF )
250             {
251                 handleEntity( parser, sink );
252             }
253             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
254             {
255                 // nop
256             }
257             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
258             {
259                 // nop
260             }
261             else if ( eventType == XmlPullParser.DOCDECL )
262             {
263                 addLocalEntities( parser, parser.getText() );
264 
265                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
266                 {
267                     addDTDEntities( parser, new String( res ) );
268                 }
269             }
270 
271             try
272             {
273                 eventType = parser.nextToken();
274             }
275             catch ( IOException io )
276             {
277                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
278             }
279         }
280     }
281 
282     /**
283      * Goes through the possible start tags.
284      *
285      * @param parser A parser, not null.
286      * @param sink the sink to receive the events.
287      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
288      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
289      */
290     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
291         throws XmlPullParserException, MacroExecutionException;
292 
293     /**
294      * Goes through the possible end tags.
295      *
296      * @param parser A parser, not null.
297      * @param sink the sink to receive the events.
298      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
299      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
300      */
301     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
302         throws XmlPullParserException, MacroExecutionException;
303 
304     /**
305      * Handles text events.
306      *
307      * <p>This is a default implementation, if the parser points to a non-empty text element,
308      * it is emitted as a text event into the specified sink.</p>
309      *
310      * @param parser A parser, not null.
311      * @param sink the sink to receive the events. Not null.
312      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
313      */
314     protected void handleText( XmlPullParser parser, Sink sink )
315         throws XmlPullParserException
316     {
317         String text = getText( parser );
318 
319         /*
320          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
321          * parser so any whitespace that makes it here is significant.
322          */
323         if ( StringUtils.isNotEmpty( text ) )
324         {
325             sink.text( text );
326         }
327     }
328 
329     /**
330      * Handles CDATA sections.
331      *
332      * <p>This is a default implementation, all data are emitted as text
333      * events into the specified sink.</p>
334      *
335      * @param parser A parser, not null.
336      * @param sink the sink to receive the events. Not null.
337      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
338      */
339     protected void handleCdsect( XmlPullParser parser, Sink sink )
340         throws XmlPullParserException
341     {
342         sink.text( getText( parser ) );
343     }
344 
345     /**
346      * Handles comments.
347      *
348      * <p>This is a default implementation, all data are emitted as comment
349      * events into the specified sink.</p>
350      *
351      * @param parser A parser, not null.
352      * @param sink the sink to receive the events. Not null.
353      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354      */
355     protected void handleComment( XmlPullParser parser, Sink sink )
356         throws XmlPullParserException
357     {
358         if ( isEmitComments() )
359         {
360             sink.comment( getText( parser ) );
361         }
362     }
363 
364     /**
365      * Handles entities.
366      *
367      * <p>This is a default implementation, all entities are resolved and emitted as text
368      * events into the specified sink, except:</p>
369      * <ul>
370      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
371      * are emitted as <code>nonBreakingSpace()</code> events.</li>
372      * </ul>
373      *
374      * @param parser A parser, not null.
375      * @param sink the sink to receive the events. Not null.
376      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
377      */
378     protected void handleEntity( XmlPullParser parser, Sink sink )
379         throws XmlPullParserException
380     {
381         String text = getText( parser );
382 
383         String name = parser.getName();
384 
385         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
386         {
387             sink.nonBreakingSpace();
388         }
389         else
390         {
391             String unescaped = HtmlTools.unescapeHTML( text );
392 
393             sink.text( unescaped );
394         }
395     }
396 
397     /**
398      * Handles an unknown event.
399      *
400      * <p>This is a default implementation, all events are emitted as unknown
401      * events into the specified sink.</p>
402      *
403      * @param parser the parser to get the event from.
404      * @param sink the sink to receive the event.
405      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
406      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
407      * It will be passed as the first argument of the required parameters to the Sink
408      * {@link
409      * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
410      * method.
411      */
412     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
413     {
414         Object[] required = new Object[] { type };
415 
416         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
417 
418         sink.unknown( parser.getName(), required, attribs );
419     }
420 
421     /**
422      * <p>isIgnorableWhitespace.</p>
423      *
424      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
425      * @see #setIgnorableWhitespace(boolean)
426      * @since 1.1
427      */
428     protected boolean isIgnorableWhitespace()
429     {
430         return ignorableWhitespace;
431     }
432 
433     /**
434      * Specify that whitespace will be ignored. I.e.:
435      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
436      * is equivalent to
437      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
438      *
439      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
440      * @since 1.1
441      */
442     protected void setIgnorableWhitespace( boolean ignorable )
443     {
444         this.ignorableWhitespace = ignorable;
445     }
446 
447     /**
448      * <p>isCollapsibleWhitespace.</p>
449      *
450      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
451      * @see #setCollapsibleWhitespace(boolean)
452      * @since 1.1
453      */
454     protected boolean isCollapsibleWhitespace()
455     {
456         return collapsibleWhitespace;
457     }
458 
459     /**
460      * Specify that text will be collapsed. I.e.:
461      * <pre>Text   Text</pre>
462      * is equivalent to
463      * <pre>Text Text</pre>
464      *
465      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
466      * @since 1.1
467      */
468     protected void setCollapsibleWhitespace( boolean collapsible )
469     {
470         this.collapsibleWhitespace = collapsible;
471     }
472 
473     /**
474      * <p>isTrimmableWhitespace.</p>
475      *
476      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
477      * @see #setTrimmableWhitespace(boolean)
478      * @since 1.1
479      */
480     protected boolean isTrimmableWhitespace()
481     {
482         return trimmableWhitespace;
483     }
484 
485     /**
486      * Specify that text will be collapsed. I.e.:
487      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
488      * is equivalent to
489      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
490      *
491      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
492      * @since 1.1
493      */
494     protected void setTrimmableWhitespace( boolean trimmable )
495     {
496         this.trimmableWhitespace = trimmable;
497     }
498 
499     /**
500      * <p>getText.</p>
501      *
502      * @param parser A parser, not null.
503      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
504      * @see XmlPullParser#getText()
505      * @see #isCollapsibleWhitespace()
506      * @see #isTrimmableWhitespace()
507      * @since 1.1
508      */
509     protected String getText( XmlPullParser parser )
510     {
511         String text = parser.getText();
512 
513         if ( isTrimmableWhitespace() )
514         {
515             text = text.trim();
516         }
517 
518         if ( isCollapsibleWhitespace() )
519         {
520             StringBuilder newText = new StringBuilder();
521             String[] elts = StringUtils.split( text, " \r\n" );
522             for ( int i = 0; i < elts.length; i++ )
523             {
524                 newText.append( elts[i] );
525                 if ( ( i + 1 ) < elts.length )
526                 {
527                     newText.append( " " );
528                 }
529             }
530             text = newText.toString();
531         }
532 
533         return text;
534     }
535 
536     /**
537      * Return the defined entities in a local doctype. I.e.:
538      * <pre>
539      * &lt;!DOCTYPE foo [
540      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
541      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
542      * ]&gt;
543      * </pre>
544      *
545      * @return a map of the defined entities in a local doctype.
546      * @since 1.1
547      */
548     protected Map<String, String> getLocalEntities()
549     {
550         if ( entities == null )
551         {
552             entities = new LinkedHashMap<>();
553         }
554 
555         return entities;
556     }
557 
558     /**
559      * <p>isValidate.</p>
560      *
561      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
562      * @since 1.1
563      */
564     public boolean isValidate()
565     {
566         return validate;
567     }
568 
569     /**
570      * Specify a flag to validate or not the XML content.
571      *
572      * @param validate the validate to set
573      * @see #parse(Reader, Sink)
574      * @since 1.1
575      */
576     public void setValidate( boolean validate )
577     {
578         this.validate = validate;
579     }
580 
581     // ----------------------------------------------------------------------
582     // Private methods
583     // ----------------------------------------------------------------------
584 
585     /**
586      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
587      * <br>
588      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
589      *
590      * @param parser not null
591      * @param entityName not null
592      * @param entityValue not null
593      * @throws XmlPullParserException if any
594      * @see XmlPullParser#defineEntityReplacementText(String, String)
595      */
596     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
597         throws XmlPullParserException
598     {
599         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
600             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
601         {
602             return;
603         }
604 
605         parser.defineEntityReplacementText( entityName, entityValue );
606         getLocalEntities().put( entityName, entityValue );
607     }
608 
609     /**
610      * Handle entities defined in a local doctype as the following:
611      * <pre>
612      * &lt;!DOCTYPE foo [
613      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
614      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
615      * ]&gt;
616      * </pre>
617      *
618      * @param parser not null
619      * @param text not null
620      * @throws XmlPullParserException if any
621      */
622     private void addLocalEntities( XmlPullParser parser, String text )
623         throws XmlPullParserException
624     {
625         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
626         if ( entitiesCount > 0 )
627         {
628             // text should be foo [...]
629             int start = text.indexOf( '[' );
630             int end = text.lastIndexOf( ']' );
631             if ( start != -1 && end != -1 )
632             {
633                 addDTDEntities( parser, text.substring( start + 1, end ) );
634             }
635         }
636     }
637 
638     /**
639      * Handle entities defined in external doctypes as the following:
640      * <pre>
641      * &lt;!DOCTYPE foo [
642      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
643      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
644      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
645      *   %HTMLlat1;
646      * ]&gt;
647      * </pre>
648      *
649      * @param parser not null
650      * @param text not null
651      * @throws XmlPullParserException if any
652      */
653     private void addDTDEntities( XmlPullParser parser, String text )
654         throws XmlPullParserException
655     {
656         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
657         if ( entitiesCount > 0 )
658         {
659             final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
660             try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
661             {
662                 String line;
663                 String tmpLine = "";
664                 Matcher matcher;
665                 while ( ( line = reader.readLine() ) != null )
666                 {
667                     tmpLine += "\n" + line;
668                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
669                     if ( matcher.find() && matcher.groupCount() == 7 )
670                     {
671                         String entityName = matcher.group( 2 );
672                         String entityValue = matcher.group( 5 );
673 
674                         addEntity( parser, entityName, entityValue );
675                         tmpLine = "";
676                     }
677                     else
678                     {
679                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
680                         if ( matcher.find() && matcher.groupCount() == 8 )
681                         {
682                             String entityName = matcher.group( 2 );
683                             String entityValue = matcher.group( 5 );
684 
685                             addEntity( parser, entityName, entityValue );
686                             tmpLine = "";
687                         }
688                     }
689                 }
690             }
691             catch ( IOException e )
692             {
693                 // nop
694             }
695         }
696     }
697 
698     /**
699      * Implementation of the callback mechanism <code>EntityResolver</code>.
700      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
701      */
702     public static class CachedFileEntityResolver
703         implements EntityResolver
704     {
705         /** Map with systemId as key and the content of systemId as byte[]. */
706         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
707 
708         /** {@inheritDoc} */
709         public InputSource resolveEntity( String publicId, String systemId )
710             throws SAXException, IOException
711         {
712             byte[] res = ENTITY_CACHE.get( systemId );
713             // already cached?
714             if ( res == null )
715             {
716                 String systemName = FileUtils.getFile( systemId ).getName();
717                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
718                 // maybe already as a temp file?
719                 if ( !temp.exists() )
720                 {
721                     // is systemId a file or an url?
722                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
723                     {
724                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
725                         // the classpath...
726                         String resource = "/" + systemName;
727                         URL url = getClass().getResource( resource );
728                         if ( url != null )
729                         {
730                             res = toByteArray( url );
731                         }
732                         else
733                         {
734                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
735                             + " because '" + resource + "' is not available of the classpath." );
736                         }
737                     }
738                     else
739                     {
740                         res = toByteArray( new URL( systemId ) );
741                     }
742 
743                     // write systemId as temp file
744                     copy( res, temp );
745                 }
746                 else
747                 {
748                     // TODO How to refresh Doxia XSDs from temp dir?
749                     res = toByteArray( temp.toURI().toURL() );
750                 }
751 
752                 ENTITY_CACHE.put( systemId, res );
753             }
754 
755             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
756             is.setPublicId( publicId );
757             is.setSystemId( systemId );
758 
759             return is;
760         }
761 
762         /**
763          * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
764          * content.
765          * Otherwise, use HttpClient to get the http content.
766          * Wrap all internal exceptions to throw SAXException.
767          *
768          * @param url not null
769          * @return return an array of byte
770          * @throws SAXException if any
771          */
772         private static byte[] toByteArray( URL url )
773             throws SAXException
774         {
775             if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
776             {
777                 InputStream is = null;
778                 try
779                 {
780                     is = url.openStream();
781                     if ( is == null )
782                     {
783                         throw new SAXException( "Cannot open stream from the url: " + url.toString() );
784                     }
785                     return IOUtil.toByteArray( is );
786                 }
787                 catch ( IOException e )
788                 {
789                     throw new SAXException( "IOException: " + e.getMessage(), e );
790                 }
791                 finally
792                 {
793                     IOUtil.close( is );
794                 }
795             }
796 
797             // it is an HTTP url, using HttpClient...
798             HttpClientBuilder httpClientBuilder = HttpClientBuilder.create()
799                     .useSystemProperties()
800                     .setRetryHandler( new DefaultHttpRequestRetryHandler( 3, false ) )
801                     // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
802                     // The default user-agent is "Apache-HttpClient/4.5.8 (java 7)"
803                     .setUserAgent( "Apache-Doxia/" + doxiaVersion() );
804 
805             try ( CloseableHttpResponse response = httpClientBuilder.build().execute( new HttpGet( url.toString() ) ) )
806             {
807                 int statusCode = response.getStatusLine().getStatusCode();
808                 if ( statusCode != HttpStatus.SC_OK )
809                 {
810                     throw new IOException(
811                             "The status code when accessing the URL '" + url.toString() + "' was " + statusCode
812                                     + ", which is not allowed. The server gave this reason for the failure '"
813                                     + response.getStatusLine().getReasonPhrase() + "'." );
814                 }
815 
816                 return EntityUtils.toByteArray( response.getEntity() );
817             }
818             catch ( ClientProtocolException e )
819             {
820                 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
821             }
822             catch ( IOException e )
823             {
824                 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
825             }
826         }
827 
828         /**
829          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
830          *
831          * @param res not null array of byte
832          * @param f the file where to write the bytes
833          * @throws SAXException if any
834          * @see IOUtil#copy(byte[], OutputStream)
835          */
836         private void copy( byte[] res, File f )
837             throws SAXException
838         {
839             if ( f.isDirectory() )
840             {
841                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
842             }
843 
844             OutputStream os = null;
845             try
846             {
847                 os = new FileOutputStream( f );
848                 IOUtil.copy( res, os );
849             }
850             catch ( IOException e )
851             {
852                 throw new SAXException( "IOException: " + e.getMessage(), e );
853             }
854             finally
855             {
856                 IOUtil.close( os );
857             }
858         }
859     }
860 }