View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedReader;
23  import java.io.ByteArrayInputStream;
24  import java.io.File;
25  import java.io.FileOutputStream;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.OutputStream;
29  import java.io.Reader;
30  import java.io.StringReader;
31  import java.net.URL;
32  import java.util.Hashtable;
33  import java.util.LinkedHashMap;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  
39  import org.apache.http.HttpStatus;
40  import org.apache.http.client.ClientProtocolException;
41  import org.apache.http.client.methods.CloseableHttpResponse;
42  import org.apache.http.client.methods.HttpGet;
43  import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
44  import org.apache.http.impl.client.HttpClientBuilder;
45  import org.apache.http.util.EntityUtils;
46  import org.apache.maven.doxia.macro.MacroExecutionException;
47  import org.apache.maven.doxia.markup.XmlMarkup;
48  import org.apache.maven.doxia.sink.Sink;
49  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
50  import org.apache.maven.doxia.util.HtmlTools;
51  import org.apache.maven.doxia.util.XmlValidator;
52  
53  import org.codehaus.plexus.util.FileUtils;
54  import org.codehaus.plexus.util.IOUtil;
55  import org.codehaus.plexus.util.StringUtils;
56  import org.codehaus.plexus.util.xml.pull.MXParser;
57  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
58  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
59  
60  import org.xml.sax.EntityResolver;
61  import org.xml.sax.InputSource;
62  import org.xml.sax.SAXException;
63  
64  /**
65   * An abstract class that defines some convenience methods for <code>XML</code> parsers.
66   *
67   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
68   * @version $Id$
69   * @since 1.0
70   */
71  public abstract class AbstractXmlParser
72      extends AbstractParser
73      implements XmlMarkup
74  {
75      /**
76       * Entity pattern for HTML entity, i.e. &#38;nbsp;
77       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
78       * <br>
79       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
80       */
81      private static final Pattern PATTERN_ENTITY_1 =
82          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
83  
84      /**
85       * Entity pattern for Unicode entity, i.e. &#38;#38;
86       * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
87       * <br>
88       * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
89       */
90      private static final Pattern PATTERN_ENTITY_2 =
91          Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
92  
93      private boolean ignorableWhitespace;
94  
95      private boolean collapsibleWhitespace;
96  
97      private boolean trimmableWhitespace;
98  
99      private Map<String, String> entities;
100 
101     private boolean validate = false;
102 
103     /** {@inheritDoc} */
104     public void parse( Reader source, Sink sink )
105         throws ParseException
106     {
107         init();
108 
109         Reader src = source;
110 
111         // 1 first parsing if validation is required
112         if ( isValidate() )
113         {
114             String content;
115             try
116             {
117                 content = IOUtil.toString( new BufferedReader( src ) );
118             }
119             catch ( IOException e )
120             {
121                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
122             }
123 
124             new XmlValidator( getLog() ).validate( content );
125 
126             src = new StringReader( content );
127         }
128 
129         // 2 second parsing to process
130         try
131         {
132             XmlPullParser parser = new MXParser();
133 
134             parser.setInput( src );
135             
136             // allow parser initialization, e.g. for additional entities in XHTML
137             // Note: do it after input is set, otherwise values are reset
138             initXmlParser( parser );
139 
140             sink.enableLogging( getLog() );
141 
142             parseXml( parser, sink );
143         }
144         catch ( XmlPullParserException ex )
145         {
146             throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
147                                       ex.getColumnNumber() );
148         }
149         catch ( MacroExecutionException ex )
150         {
151             throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
152         }
153 
154         setSecondParsing( false );
155         init();
156     }
157     
158     /**
159      * Initializes the parser with custom entities or other options.
160      *
161      * @param parser A parser, not null.
162      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
163      */
164     protected void initXmlParser( XmlPullParser parser )
165         throws XmlPullParserException
166     {
167         // nop
168     }
169 
170     /**
171      * {@inheritDoc}
172      *
173      * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
174      */
175     @Override
176     public void parse( String string, Sink sink )
177         throws ParseException
178     {
179         super.parse( string, sink );
180     }
181 
182     /** {@inheritDoc} */
183     @Override
184     public final int getType()
185     {
186         return XML_TYPE;
187     }
188 
189     /**
190      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
191      *
192      * @param parser A parser, not null.
193      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
194      * @since 1.1
195      */
196     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
197     {
198         int count = parser.getAttributeCount();
199 
200         if ( count < 0 )
201         {
202             return null;
203         }
204 
205         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
206 
207         for ( int i = 0; i < count; i++ )
208         {
209             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
210         }
211 
212         return atts;
213     }
214 
215     /**
216      * Parse the model from the XmlPullParser into the given sink.
217      *
218      * @param parser A parser, not null.
219      * @param sink the sink to receive the events.
220      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
221      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
222      */
223     private void parseXml( XmlPullParser parser, Sink sink )
224         throws XmlPullParserException, MacroExecutionException
225     {
226         int eventType = parser.getEventType();
227 
228         while ( eventType != XmlPullParser.END_DOCUMENT )
229         {
230             if ( eventType == XmlPullParser.START_TAG )
231             {
232                 handleStartTag( parser, sink );
233             }
234             else if ( eventType == XmlPullParser.END_TAG )
235             {
236                 handleEndTag( parser, sink );
237             }
238             else if ( eventType == XmlPullParser.TEXT )
239             {
240                 String text = getText( parser );
241 
242                 if ( isIgnorableWhitespace() )
243                 {
244                     if ( text.trim().length() != 0 )
245                     {
246                         handleText( parser, sink );
247                     }
248                 }
249                 else
250                 {
251                     handleText( parser, sink );
252                 }
253             }
254             else if ( eventType == XmlPullParser.CDSECT )
255             {
256                 handleCdsect( parser, sink );
257             }
258             else if ( eventType == XmlPullParser.COMMENT )
259             {
260                 handleComment( parser, sink );
261             }
262             else if ( eventType == XmlPullParser.ENTITY_REF )
263             {
264                 handleEntity( parser, sink );
265             }
266             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
267             {
268                 // nop
269             }
270             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
271             {
272                 // nop
273             }
274             else if ( eventType == XmlPullParser.DOCDECL )
275             {
276                 addLocalEntities( parser, parser.getText() );
277 
278                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
279                 {
280                     addDTDEntities( parser, new String( res ) );
281                 }
282             }
283 
284             try
285             {
286                 eventType = parser.nextToken();
287             }
288             catch ( IOException io )
289             {
290                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
291             }
292         }
293     }
294 
295     /**
296      * Goes through the possible start tags.
297      *
298      * @param parser A parser, not null.
299      * @param sink the sink to receive the events.
300      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
301      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
302      */
303     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
304         throws XmlPullParserException, MacroExecutionException;
305 
306     /**
307      * Goes through the possible end tags.
308      *
309      * @param parser A parser, not null.
310      * @param sink the sink to receive the events.
311      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
312      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
313      */
314     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
315         throws XmlPullParserException, MacroExecutionException;
316 
317     /**
318      * Handles text events.
319      *
320      * <p>This is a default implementation, if the parser points to a non-empty text element,
321      * it is emitted as a text event into the specified sink.</p>
322      *
323      * @param parser A parser, not null.
324      * @param sink the sink to receive the events. Not null.
325      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
326      */
327     protected void handleText( XmlPullParser parser, Sink sink )
328         throws XmlPullParserException
329     {
330         String text = getText( parser );
331 
332         /*
333          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
334          * parser so any whitespace that makes it here is significant.
335          */
336         if ( StringUtils.isNotEmpty( text ) )
337         {
338             sink.text( text );
339         }
340     }
341 
342     /**
343      * Handles CDATA sections.
344      *
345      * <p>This is a default implementation, all data are emitted as text
346      * events into the specified sink.</p>
347      *
348      * @param parser A parser, not null.
349      * @param sink the sink to receive the events. Not null.
350      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
351      */
352     protected void handleCdsect( XmlPullParser parser, Sink sink )
353         throws XmlPullParserException
354     {
355         sink.text( getText( parser ) );
356     }
357 
358     /**
359      * Handles comments.
360      *
361      * <p>This is a default implementation, all data are emitted as comment
362      * events into the specified sink.</p>
363      *
364      * @param parser A parser, not null.
365      * @param sink the sink to receive the events. Not null.
366      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
367      */
368     protected void handleComment( XmlPullParser parser, Sink sink )
369         throws XmlPullParserException
370     {
371         if ( isEmitComments() )
372         {
373             sink.comment( getText( parser ) );
374         }
375     }
376 
377     /**
378      * Handles entities.
379      *
380      * <p>This is a default implementation, all entities are resolved and emitted as text
381      * events into the specified sink, except:</p>
382      * <ul>
383      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
384      * are emitted as <code>nonBreakingSpace()</code> events.</li>
385      * </ul>
386      *
387      * @param parser A parser, not null.
388      * @param sink the sink to receive the events. Not null.
389      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
390      */
391     protected void handleEntity( XmlPullParser parser, Sink sink )
392         throws XmlPullParserException
393     {
394         String text = getText( parser );
395 
396         String name = parser.getName();
397 
398         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
399         {
400             sink.nonBreakingSpace();
401         }
402         else
403         {
404             String unescaped = HtmlTools.unescapeHTML( text );
405 
406             sink.text( unescaped );
407         }
408     }
409 
410     /**
411      * Handles an unknown event.
412      *
413      * <p>This is a default implementation, all events are emitted as unknown
414      * events into the specified sink.</p>
415      *
416      * @param parser the parser to get the event from.
417      * @param sink the sink to receive the event.
418      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
419      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
420      * It will be passed as the first argument of the required parameters to the Sink
421      * {@link
422      * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
423      * method.
424      */
425     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
426     {
427         Object[] required = new Object[] { type };
428 
429         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
430 
431         sink.unknown( parser.getName(), required, attribs );
432     }
433 
434     /**
435      * <p>isIgnorableWhitespace.</p>
436      *
437      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
438      * @see #setIgnorableWhitespace(boolean)
439      * @since 1.1
440      */
441     protected boolean isIgnorableWhitespace()
442     {
443         return ignorableWhitespace;
444     }
445 
446     /**
447      * Specify that whitespace will be ignored. I.e.:
448      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
449      * is equivalent to
450      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
451      *
452      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
453      * @since 1.1
454      */
455     protected void setIgnorableWhitespace( boolean ignorable )
456     {
457         this.ignorableWhitespace = ignorable;
458     }
459 
460     /**
461      * <p>isCollapsibleWhitespace.</p>
462      *
463      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
464      * @see #setCollapsibleWhitespace(boolean)
465      * @since 1.1
466      */
467     protected boolean isCollapsibleWhitespace()
468     {
469         return collapsibleWhitespace;
470     }
471 
472     /**
473      * Specify that text will be collapsed. I.e.:
474      * <pre>Text   Text</pre>
475      * is equivalent to
476      * <pre>Text Text</pre>
477      *
478      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
479      * @since 1.1
480      */
481     protected void setCollapsibleWhitespace( boolean collapsible )
482     {
483         this.collapsibleWhitespace = collapsible;
484     }
485 
486     /**
487      * <p>isTrimmableWhitespace.</p>
488      *
489      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
490      * @see #setTrimmableWhitespace(boolean)
491      * @since 1.1
492      */
493     protected boolean isTrimmableWhitespace()
494     {
495         return trimmableWhitespace;
496     }
497 
498     /**
499      * Specify that text will be collapsed. I.e.:
500      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
501      * is equivalent to
502      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
503      *
504      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
505      * @since 1.1
506      */
507     protected void setTrimmableWhitespace( boolean trimmable )
508     {
509         this.trimmableWhitespace = trimmable;
510     }
511 
512     /**
513      * <p>getText.</p>
514      *
515      * @param parser A parser, not null.
516      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
517      * @see XmlPullParser#getText()
518      * @see #isCollapsibleWhitespace()
519      * @see #isTrimmableWhitespace()
520      * @since 1.1
521      */
522     protected String getText( XmlPullParser parser )
523     {
524         String text = parser.getText();
525 
526         if ( isTrimmableWhitespace() )
527         {
528             text = text.trim();
529         }
530 
531         if ( isCollapsibleWhitespace() )
532         {
533             StringBuilder newText = new StringBuilder();
534             String[] elts = StringUtils.split( text, " \r\n" );
535             for ( int i = 0; i < elts.length; i++ )
536             {
537                 newText.append( elts[i] );
538                 if ( ( i + 1 ) < elts.length )
539                 {
540                     newText.append( " " );
541                 }
542             }
543             text = newText.toString();
544         }
545 
546         return text;
547     }
548 
549     /**
550      * Return the defined entities in a local doctype. I.e.:
551      * <pre>
552      * &lt;!DOCTYPE foo [
553      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
554      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
555      * ]&gt;
556      * </pre>
557      *
558      * @return a map of the defined entities in a local doctype.
559      * @since 1.1
560      */
561     protected Map<String, String> getLocalEntities()
562     {
563         if ( entities == null )
564         {
565             entities = new LinkedHashMap<>();
566         }
567 
568         return entities;
569     }
570 
571     /**
572      * <p>isValidate.</p>
573      *
574      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
575      * @since 1.1
576      */
577     public boolean isValidate()
578     {
579         return validate;
580     }
581 
582     /**
583      * Specify a flag to validate or not the XML content.
584      *
585      * @param validate the validate to set
586      * @see #parse(Reader, Sink)
587      * @since 1.1
588      */
589     public void setValidate( boolean validate )
590     {
591         this.validate = validate;
592     }
593 
594     // ----------------------------------------------------------------------
595     // Private methods
596     // ----------------------------------------------------------------------
597 
598     /**
599      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
600      * <br>
601      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
602      *
603      * @param parser not null
604      * @param entityName not null
605      * @param entityValue not null
606      * @throws XmlPullParserException if any
607      * @see XmlPullParser#defineEntityReplacementText(String, String)
608      */
609     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
610         throws XmlPullParserException
611     {
612         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
613             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
614         {
615             return;
616         }
617 
618         parser.defineEntityReplacementText( entityName, entityValue );
619         getLocalEntities().put( entityName, entityValue );
620     }
621 
622     /**
623      * Handle entities defined in a local doctype as the following:
624      * <pre>
625      * &lt;!DOCTYPE foo [
626      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
627      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
628      * ]&gt;
629      * </pre>
630      *
631      * @param parser not null
632      * @param text not null
633      * @throws XmlPullParserException if any
634      */
635     private void addLocalEntities( XmlPullParser parser, String text )
636         throws XmlPullParserException
637     {
638         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
639         if ( entitiesCount > 0 )
640         {
641             // text should be foo [...]
642             int start = text.indexOf( '[' );
643             int end = text.lastIndexOf( ']' );
644             if ( start != -1 && end != -1 )
645             {
646                 addDTDEntities( parser, text.substring( start + 1, end ) );
647             }
648         }
649     }
650 
651     /**
652      * Handle entities defined in external doctypes as the following:
653      * <pre>
654      * &lt;!DOCTYPE foo [
655      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
656      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
657      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
658      *   %HTMLlat1;
659      * ]&gt;
660      * </pre>
661      *
662      * @param parser not null
663      * @param text not null
664      * @throws XmlPullParserException if any
665      */
666     private void addDTDEntities( XmlPullParser parser, String text )
667         throws XmlPullParserException
668     {
669         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
670         if ( entitiesCount > 0 )
671         {
672             final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
673             try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
674             {
675                 String line;
676                 String tmpLine = "";
677                 Matcher matcher;
678                 while ( ( line = reader.readLine() ) != null )
679                 {
680                     tmpLine += "\n" + line;
681                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
682                     if ( matcher.find() && matcher.groupCount() == 7 )
683                     {
684                         String entityName = matcher.group( 2 );
685                         String entityValue = matcher.group( 5 );
686 
687                         addEntity( parser, entityName, entityValue );
688                         tmpLine = "";
689                     }
690                     else
691                     {
692                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
693                         if ( matcher.find() && matcher.groupCount() == 8 )
694                         {
695                             String entityName = matcher.group( 2 );
696                             String entityValue = matcher.group( 5 );
697 
698                             addEntity( parser, entityName, entityValue );
699                             tmpLine = "";
700                         }
701                     }
702                 }
703             }
704             catch ( IOException e )
705             {
706                 // nop
707             }
708         }
709     }
710 
711     /**
712      * Implementation of the callback mechanism <code>EntityResolver</code>.
713      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
714      */
715     public static class CachedFileEntityResolver
716         implements EntityResolver
717     {
718         /** Map with systemId as key and the content of systemId as byte[]. */
719         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
720 
721         /** {@inheritDoc} */
722         public InputSource resolveEntity( String publicId, String systemId )
723             throws SAXException, IOException
724         {
725             byte[] res = ENTITY_CACHE.get( systemId );
726             // already cached?
727             if ( res == null )
728             {
729                 String systemName = FileUtils.getFile( systemId ).getName();
730                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
731                 // maybe already as a temp file?
732                 if ( !temp.exists() )
733                 {
734                     // is systemId a file or an url?
735                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
736                     {
737                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
738                         // the classpath...
739                         String resource = "/" + systemName;
740                         URL url = getClass().getResource( resource );
741                         if ( url != null )
742                         {
743                             res = toByteArray( url );
744                         }
745                         else
746                         {
747                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
748                             + " because '" + resource + "' is not available of the classpath." );
749                         }
750                     }
751                     else
752                     {
753                         res = toByteArray( new URL( systemId ) );
754                     }
755 
756                     // write systemId as temp file
757                     copy( res, temp );
758                 }
759                 else
760                 {
761                     // TODO How to refresh Doxia XSDs from temp dir?
762                     res = toByteArray( temp.toURI().toURL() );
763                 }
764 
765                 ENTITY_CACHE.put( systemId, res );
766             }
767 
768             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
769             is.setPublicId( publicId );
770             is.setSystemId( systemId );
771 
772             return is;
773         }
774 
775         /**
776          * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
777          * content.
778          * Otherwise, use HttpClient to get the http content.
779          * Wrap all internal exceptions to throw SAXException.
780          *
781          * @param url not null
782          * @return return an array of byte
783          * @throws SAXException if any
784          */
785         private static byte[] toByteArray( URL url )
786             throws SAXException
787         {
788             if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
789             {
790                 InputStream is = null;
791                 try
792                 {
793                     is = url.openStream();
794                     if ( is == null )
795                     {
796                         throw new SAXException( "Cannot open stream from the url: " + url.toString() );
797                     }
798                     return IOUtil.toByteArray( is );
799                 }
800                 catch ( IOException e )
801                 {
802                     throw new SAXException( "IOException: " + e.getMessage(), e );
803                 }
804                 finally
805                 {
806                     IOUtil.close( is );
807                 }
808             }
809 
810             // it is an HTTP url, using HttpClient...
811             HttpClientBuilder httpClientBuilder = HttpClientBuilder.create()
812                     .useSystemProperties()
813                     .setRetryHandler( new DefaultHttpRequestRetryHandler( 3, false ) )
814                     // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
815                     // The default user-agent is "Apache-HttpClient/4.5.8 (java 7)"
816                     .setUserAgent( "Apache-Doxia/" + doxiaVersion() );
817 
818             try ( CloseableHttpResponse response = httpClientBuilder.build().execute( new HttpGet( url.toString() ) ) )
819             {
820                 int statusCode = response.getStatusLine().getStatusCode();
821                 if ( statusCode != HttpStatus.SC_OK )
822                 {
823                     throw new IOException(
824                             "The status code when accessing the URL '" + url.toString() + "' was " + statusCode
825                                     + ", which is not allowed. The server gave this reason for the failure '"
826                                     + response.getStatusLine().getReasonPhrase() + "'." );
827                 }
828 
829                 return EntityUtils.toByteArray( response.getEntity() );
830             }
831             catch ( ClientProtocolException e )
832             {
833                 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
834             }
835             catch ( IOException e )
836             {
837                 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
838             }
839         }
840 
841         /**
842          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
843          *
844          * @param res not null array of byte
845          * @param f the file where to write the bytes
846          * @throws SAXException if any
847          * @see IOUtil#copy(byte[], OutputStream)
848          */
849         private void copy( byte[] res, File f )
850             throws SAXException
851         {
852             if ( f.isDirectory() )
853             {
854                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
855             }
856 
857             OutputStream os = null;
858             try
859             {
860                 os = new FileOutputStream( f );
861                 IOUtil.copy( res, os );
862             }
863             catch ( IOException e )
864             {
865                 throw new SAXException( "IOException: " + e.getMessage(), e );
866             }
867             finally
868             {
869                 IOUtil.close( os );
870             }
871         }
872     }
873 }