001package org.apache.maven.doxia.parser;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.io.BufferedReader;
023import java.io.ByteArrayInputStream;
024import java.io.File;
025import java.io.FileOutputStream;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.OutputStream;
029import java.io.Reader;
030import java.io.StringReader;
031import java.net.URL;
032import java.util.Hashtable;
033import java.util.LinkedHashMap;
034import java.util.Locale;
035import java.util.Map;
036import java.util.regex.Matcher;
037import java.util.regex.Pattern;
038
039import org.apache.http.HttpStatus;
040import org.apache.http.client.ClientProtocolException;
041import org.apache.http.client.methods.CloseableHttpResponse;
042import org.apache.http.client.methods.HttpGet;
043import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
044import org.apache.http.impl.client.HttpClientBuilder;
045import org.apache.http.util.EntityUtils;
046import org.apache.maven.doxia.macro.MacroExecutionException;
047import org.apache.maven.doxia.markup.XmlMarkup;
048import org.apache.maven.doxia.sink.Sink;
049import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
050import org.apache.maven.doxia.util.HtmlTools;
051import org.apache.maven.doxia.util.XmlValidator;
052
053import org.codehaus.plexus.util.FileUtils;
054import org.codehaus.plexus.util.IOUtil;
055import org.codehaus.plexus.util.StringUtils;
056import org.codehaus.plexus.util.xml.pull.MXParser;
057import org.codehaus.plexus.util.xml.pull.XmlPullParser;
058import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
059
060import org.xml.sax.EntityResolver;
061import org.xml.sax.InputSource;
062import org.xml.sax.SAXException;
063
064/**
065 * An abstract class that defines some convenience methods for <code>XML</code> parsers.
066 *
067 * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
068 * @since 1.0
069 */
070public abstract class AbstractXmlParser
071    extends AbstractParser
072    implements XmlMarkup
073{
074    /**
075     * Entity pattern for HTML entity, i.e. &#38;nbsp;
076     * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
077     * <br>
078     * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
079     */
080    private static final Pattern PATTERN_ENTITY_1 =
081        Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
082
083    /**
084     * Entity pattern for Unicode entity, i.e. &#38;#38;
085     * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
086     * <br>
087     * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
088     */
089    private static final Pattern PATTERN_ENTITY_2 =
090        Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
091
092    private boolean ignorableWhitespace;
093
094    private boolean collapsibleWhitespace;
095
096    private boolean trimmableWhitespace;
097
098    private Map<String, String> entities;
099
100    private boolean validate = false;
101
102    /** {@inheritDoc} */
103    public void parse( Reader source, Sink sink, String reference )
104        throws ParseException
105    {
106        init();
107
108        Reader src = source;
109
110        // 1 first parsing if validation is required
111        if ( isValidate() )
112        {
113            String content;
114            try
115            {
116                content = IOUtil.toString( new BufferedReader( src ) );
117            }
118            catch ( IOException e )
119            {
120                throw new ParseException( "Error reading the model: " + e.getMessage(), e );
121            }
122
123            new XmlValidator( getLog() ).validate( content );
124
125            src = new StringReader( content );
126        }
127
128        // 2 second parsing to process
129        try
130        {
131            XmlPullParser parser = new MXParser();
132
133            parser.setInput( src );
134            
135            // allow parser initialization, e.g. for additional entities in XHTML
136            // Note: do it after input is set, otherwise values are reset
137            initXmlParser( parser );
138
139            sink.enableLogging( getLog() );
140
141            parseXml( parser, sink );
142        }
143        catch ( XmlPullParserException ex )
144        {
145            throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
146                                      ex.getColumnNumber() );
147        }
148        catch ( MacroExecutionException ex )
149        {
150            throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
151        }
152
153        setSecondParsing( false );
154        init();
155    }
156
157    /**
158     * Initializes the parser with custom entities or other options.
159     *
160     * @param parser A parser, not null.
161     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem initializing the parser
162     */
163    protected void initXmlParser( XmlPullParser parser )
164        throws XmlPullParserException
165    {
166        // nop
167    }
168
169    /** {@inheritDoc} */
170    @Override
171    public final int getType()
172    {
173        return XML_TYPE;
174    }
175
176    /**
177     * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
178     *
179     * @param parser A parser, not null.
180     * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
181     * @since 1.1
182     */
183    protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
184    {
185        int count = parser.getAttributeCount();
186
187        if ( count < 0 )
188        {
189            return null;
190        }
191
192        SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
193
194        for ( int i = 0; i < count; i++ )
195        {
196            atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
197        }
198
199        return atts;
200    }
201
202    /**
203     * Parse the model from the XmlPullParser into the given sink.
204     *
205     * @param parser A parser, not null.
206     * @param sink the sink to receive the events.
207     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
208     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
209     */
210    private void parseXml( XmlPullParser parser, Sink sink )
211        throws XmlPullParserException, MacroExecutionException
212    {
213        int eventType = parser.getEventType();
214
215        while ( eventType != XmlPullParser.END_DOCUMENT )
216        {
217            if ( eventType == XmlPullParser.START_TAG )
218            {
219                handleStartTag( parser, sink );
220            }
221            else if ( eventType == XmlPullParser.END_TAG )
222            {
223                handleEndTag( parser, sink );
224            }
225            else if ( eventType == XmlPullParser.TEXT )
226            {
227                String text = getText( parser );
228
229                if ( isIgnorableWhitespace() )
230                {
231                    if ( text.trim().length() != 0 )
232                    {
233                        handleText( parser, sink );
234                    }
235                }
236                else
237                {
238                    handleText( parser, sink );
239                }
240            }
241            else if ( eventType == XmlPullParser.CDSECT )
242            {
243                handleCdsect( parser, sink );
244            }
245            else if ( eventType == XmlPullParser.COMMENT )
246            {
247                handleComment( parser, sink );
248            }
249            else if ( eventType == XmlPullParser.ENTITY_REF )
250            {
251                handleEntity( parser, sink );
252            }
253            else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
254            {
255                // nop
256            }
257            else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
258            {
259                // nop
260            }
261            else if ( eventType == XmlPullParser.DOCDECL )
262            {
263                addLocalEntities( parser, parser.getText() );
264
265                for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
266                {
267                    addDTDEntities( parser, new String( res ) );
268                }
269            }
270
271            try
272            {
273                eventType = parser.nextToken();
274            }
275            catch ( IOException io )
276            {
277                throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
278            }
279        }
280    }
281
282    /**
283     * Goes through the possible start tags.
284     *
285     * @param parser A parser, not null.
286     * @param sink the sink to receive the events.
287     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
288     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
289     */
290    protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
291        throws XmlPullParserException, MacroExecutionException;
292
293    /**
294     * Goes through the possible end tags.
295     *
296     * @param parser A parser, not null.
297     * @param sink the sink to receive the events.
298     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
299     * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
300     */
301    protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
302        throws XmlPullParserException, MacroExecutionException;
303
304    /**
305     * Handles text events.
306     *
307     * <p>This is a default implementation, if the parser points to a non-empty text element,
308     * it is emitted as a text event into the specified sink.</p>
309     *
310     * @param parser A parser, not null.
311     * @param sink the sink to receive the events. Not null.
312     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
313     */
314    protected void handleText( XmlPullParser parser, Sink sink )
315        throws XmlPullParserException
316    {
317        String text = getText( parser );
318
319        /*
320         * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
321         * parser so any whitespace that makes it here is significant.
322         */
323        if ( StringUtils.isNotEmpty( text ) )
324        {
325            sink.text( text );
326        }
327    }
328
329    /**
330     * Handles CDATA sections.
331     *
332     * <p>This is a default implementation, all data are emitted as text
333     * events into the specified sink.</p>
334     *
335     * @param parser A parser, not null.
336     * @param sink the sink to receive the events. Not null.
337     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
338     */
339    protected void handleCdsect( XmlPullParser parser, Sink sink )
340        throws XmlPullParserException
341    {
342        sink.text( getText( parser ) );
343    }
344
345    /**
346     * Handles comments.
347     *
348     * <p>This is a default implementation, all data are emitted as comment
349     * events into the specified sink.</p>
350     *
351     * @param parser A parser, not null.
352     * @param sink the sink to receive the events. Not null.
353     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
354     */
355    protected void handleComment( XmlPullParser parser, Sink sink )
356        throws XmlPullParserException
357    {
358        if ( isEmitComments() )
359        {
360            sink.comment( getText( parser ) );
361        }
362    }
363
364    /**
365     * Handles entities.
366     *
367     * <p>This is a default implementation, all entities are resolved and emitted as text
368     * events into the specified sink, except:</p>
369     * <ul>
370     * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
371     * are emitted as <code>nonBreakingSpace()</code> events.</li>
372     * </ul>
373     *
374     * @param parser A parser, not null.
375     * @param sink the sink to receive the events. Not null.
376     * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
377     */
378    protected void handleEntity( XmlPullParser parser, Sink sink )
379        throws XmlPullParserException
380    {
381        String text = getText( parser );
382
383        String name = parser.getName();
384
385        if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
386        {
387            sink.nonBreakingSpace();
388        }
389        else
390        {
391            String unescaped = HtmlTools.unescapeHTML( text );
392
393            sink.text( unescaped );
394        }
395    }
396
397    /**
398     * Handles an unknown event.
399     *
400     * <p>This is a default implementation, all events are emitted as unknown
401     * events into the specified sink.</p>
402     *
403     * @param parser the parser to get the event from.
404     * @param sink the sink to receive the event.
405     * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
406     * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
407     * It will be passed as the first argument of the required parameters to the Sink
408     * {@link
409     * org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
410     * method.
411     */
412    protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
413    {
414        Object[] required = new Object[] { type };
415
416        SinkEventAttributeSet attribs = getAttributesFromParser( parser );
417
418        sink.unknown( parser.getName(), required, attribs );
419    }
420
421    /**
422     * <p>isIgnorableWhitespace.</p>
423     *
424     * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
425     * @see #setIgnorableWhitespace(boolean)
426     * @since 1.1
427     */
428    protected boolean isIgnorableWhitespace()
429    {
430        return ignorableWhitespace;
431    }
432
433    /**
434     * Specify that whitespace will be ignored. I.e.:
435     * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
436     * is equivalent to
437     * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
438     *
439     * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
440     * @since 1.1
441     */
442    protected void setIgnorableWhitespace( boolean ignorable )
443    {
444        this.ignorableWhitespace = ignorable;
445    }
446
447    /**
448     * <p>isCollapsibleWhitespace.</p>
449     *
450     * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
451     * @see #setCollapsibleWhitespace(boolean)
452     * @since 1.1
453     */
454    protected boolean isCollapsibleWhitespace()
455    {
456        return collapsibleWhitespace;
457    }
458
459    /**
460     * Specify that text will be collapsed. I.e.:
461     * <pre>Text   Text</pre>
462     * is equivalent to
463     * <pre>Text Text</pre>
464     *
465     * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
466     * @since 1.1
467     */
468    protected void setCollapsibleWhitespace( boolean collapsible )
469    {
470        this.collapsibleWhitespace = collapsible;
471    }
472
473    /**
474     * <p>isTrimmableWhitespace.</p>
475     *
476     * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
477     * @see #setTrimmableWhitespace(boolean)
478     * @since 1.1
479     */
480    protected boolean isTrimmableWhitespace()
481    {
482        return trimmableWhitespace;
483    }
484
485    /**
486     * Specify that text will be collapsed. I.e.:
487     * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
488     * is equivalent to
489     * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
490     *
491     * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
492     * @since 1.1
493     */
494    protected void setTrimmableWhitespace( boolean trimmable )
495    {
496        this.trimmableWhitespace = trimmable;
497    }
498
499    /**
500     * <p>getText.</p>
501     *
502     * @param parser A parser, not null.
503     * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
504     * @see XmlPullParser#getText()
505     * @see #isCollapsibleWhitespace()
506     * @see #isTrimmableWhitespace()
507     * @since 1.1
508     */
509    protected String getText( XmlPullParser parser )
510    {
511        String text = parser.getText();
512
513        if ( isTrimmableWhitespace() )
514        {
515            text = text.trim();
516        }
517
518        if ( isCollapsibleWhitespace() )
519        {
520            StringBuilder newText = new StringBuilder();
521            String[] elts = StringUtils.split( text, " \r\n" );
522            for ( int i = 0; i < elts.length; i++ )
523            {
524                newText.append( elts[i] );
525                if ( ( i + 1 ) < elts.length )
526                {
527                    newText.append( " " );
528                }
529            }
530            text = newText.toString();
531        }
532
533        return text;
534    }
535
536    /**
537     * Return the defined entities in a local doctype. I.e.:
538     * <pre>
539     * &lt;!DOCTYPE foo [
540     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
541     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
542     * ]&gt;
543     * </pre>
544     *
545     * @return a map of the defined entities in a local doctype.
546     * @since 1.1
547     */
548    protected Map<String, String> getLocalEntities()
549    {
550        if ( entities == null )
551        {
552            entities = new LinkedHashMap<>();
553        }
554
555        return entities;
556    }
557
558    /**
559     * <p>isValidate.</p>
560     *
561     * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
562     * @since 1.1
563     */
564    public boolean isValidate()
565    {
566        return validate;
567    }
568
569    /**
570     * Specify a flag to validate or not the XML content.
571     *
572     * @param validate the validate to set
573     * @see #parse(Reader, Sink)
574     * @since 1.1
575     */
576    public void setValidate( boolean validate )
577    {
578        this.validate = validate;
579    }
580
581    // ----------------------------------------------------------------------
582    // Private methods
583    // ----------------------------------------------------------------------
584
585    /**
586     * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
587     * <br>
588     * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
589     *
590     * @param parser not null
591     * @param entityName not null
592     * @param entityValue not null
593     * @throws XmlPullParserException if any
594     * @see XmlPullParser#defineEntityReplacementText(String, String)
595     */
596    private void addEntity( XmlPullParser parser, String entityName, String entityValue )
597        throws XmlPullParserException
598    {
599        if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
600            || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
601        {
602            return;
603        }
604
605        parser.defineEntityReplacementText( entityName, entityValue );
606        getLocalEntities().put( entityName, entityValue );
607    }
608
609    /**
610     * Handle entities defined in a local doctype as the following:
611     * <pre>
612     * &lt;!DOCTYPE foo [
613     *   &lt;!ENTITY bar "&#38;#x160;"&gt;
614     *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
615     * ]&gt;
616     * </pre>
617     *
618     * @param parser not null
619     * @param text not null
620     * @throws XmlPullParserException if any
621     */
622    private void addLocalEntities( XmlPullParser parser, String text )
623        throws XmlPullParserException
624    {
625        int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
626        if ( entitiesCount > 0 )
627        {
628            // text should be foo [...]
629            int start = text.indexOf( '[' );
630            int end = text.lastIndexOf( ']' );
631            if ( start != -1 && end != -1 )
632            {
633                addDTDEntities( parser, text.substring( start + 1, end ) );
634            }
635        }
636    }
637
638    /**
639     * Handle entities defined in external doctypes as the following:
640     * <pre>
641     * &lt;!DOCTYPE foo [
642     *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
643     *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
644     *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
645     *   %HTMLlat1;
646     * ]&gt;
647     * </pre>
648     *
649     * @param parser not null
650     * @param text not null
651     * @throws XmlPullParserException if any
652     */
653    private void addDTDEntities( XmlPullParser parser, String text )
654        throws XmlPullParserException
655    {
656        int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
657        if ( entitiesCount > 0 )
658        {
659            final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
660            try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
661            {
662                String line;
663                String tmpLine = "";
664                Matcher matcher;
665                while ( ( line = reader.readLine() ) != null )
666                {
667                    tmpLine += "\n" + line;
668                    matcher = PATTERN_ENTITY_1.matcher( tmpLine );
669                    if ( matcher.find() && matcher.groupCount() == 7 )
670                    {
671                        String entityName = matcher.group( 2 );
672                        String entityValue = matcher.group( 5 );
673
674                        addEntity( parser, entityName, entityValue );
675                        tmpLine = "";
676                    }
677                    else
678                    {
679                        matcher = PATTERN_ENTITY_2.matcher( tmpLine );
680                        if ( matcher.find() && matcher.groupCount() == 8 )
681                        {
682                            String entityName = matcher.group( 2 );
683                            String entityValue = matcher.group( 5 );
684
685                            addEntity( parser, entityName, entityValue );
686                            tmpLine = "";
687                        }
688                    }
689                }
690            }
691            catch ( IOException e )
692            {
693                // nop
694            }
695        }
696    }
697
698    /**
699     * Implementation of the callback mechanism <code>EntityResolver</code>.
700     * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
701     */
702    public static class CachedFileEntityResolver
703        implements EntityResolver
704    {
705        /** Map with systemId as key and the content of systemId as byte[]. */
706        protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
707
708        /** {@inheritDoc} */
709        public InputSource resolveEntity( String publicId, String systemId )
710            throws SAXException, IOException
711        {
712            byte[] res = ENTITY_CACHE.get( systemId );
713            // already cached?
714            if ( res == null )
715            {
716                String systemName = FileUtils.getFile( systemId ).getName();
717                File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
718                // maybe already as a temp file?
719                if ( !temp.exists() )
720                {
721                    // is systemId a file or an url?
722                    if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
723                    {
724                        // Doxia XSDs are included in the jars, so try to find the resource systemName from
725                        // the classpath...
726                        String resource = "/" + systemName;
727                        URL url = getClass().getResource( resource );
728                        if ( url != null )
729                        {
730                            res = toByteArray( url );
731                        }
732                        else
733                        {
734                            throw new SAXException( "Could not find the SYSTEM entity: " + systemId
735                            + " because '" + resource + "' is not available of the classpath." );
736                        }
737                    }
738                    else
739                    {
740                        res = toByteArray( new URL( systemId ) );
741                    }
742
743                    // write systemId as temp file
744                    copy( res, temp );
745                }
746                else
747                {
748                    // TODO How to refresh Doxia XSDs from temp dir?
749                    res = toByteArray( temp.toURI().toURL() );
750                }
751
752                ENTITY_CACHE.put( systemId, res );
753            }
754
755            InputSource is = new InputSource( new ByteArrayInputStream( res ) );
756            is.setPublicId( publicId );
757            is.setSystemId( systemId );
758
759            return is;
760        }
761
762        /**
763         * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
764         * content.
765         * Otherwise, use HttpClient to get the http content.
766         * Wrap all internal exceptions to throw SAXException.
767         *
768         * @param url not null
769         * @return return an array of byte
770         * @throws SAXException if any
771         */
772        private static byte[] toByteArray( URL url )
773            throws SAXException
774        {
775            if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
776            {
777                InputStream is = null;
778                try
779                {
780                    is = url.openStream();
781                    if ( is == null )
782                    {
783                        throw new SAXException( "Cannot open stream from the url: " + url.toString() );
784                    }
785                    return IOUtil.toByteArray( is );
786                }
787                catch ( IOException e )
788                {
789                    throw new SAXException( "IOException: " + e.getMessage(), e );
790                }
791                finally
792                {
793                    IOUtil.close( is );
794                }
795            }
796
797            // it is an HTTP url, using HttpClient...
798            HttpClientBuilder httpClientBuilder = HttpClientBuilder.create()
799                    .useSystemProperties()
800                    .setRetryHandler( new DefaultHttpRequestRetryHandler( 3, false ) )
801                    // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
802                    // The default user-agent is "Apache-HttpClient/4.5.8 (java 7)"
803                    .setUserAgent( "Apache-Doxia/" + doxiaVersion() );
804
805            try ( CloseableHttpResponse response = httpClientBuilder.build().execute( new HttpGet( url.toString() ) ) )
806            {
807                int statusCode = response.getStatusLine().getStatusCode();
808                if ( statusCode != HttpStatus.SC_OK )
809                {
810                    throw new IOException(
811                            "The status code when accessing the URL '" + url.toString() + "' was " + statusCode
812                                    + ", which is not allowed. The server gave this reason for the failure '"
813                                    + response.getStatusLine().getReasonPhrase() + "'." );
814                }
815
816                return EntityUtils.toByteArray( response.getEntity() );
817            }
818            catch ( ClientProtocolException e )
819            {
820                throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
821            }
822            catch ( IOException e )
823            {
824                throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
825            }
826        }
827
828        /**
829         * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
830         *
831         * @param res not null array of byte
832         * @param f the file where to write the bytes
833         * @throws SAXException if any
834         * @see IOUtil#copy(byte[], OutputStream)
835         */
836        private void copy( byte[] res, File f )
837            throws SAXException
838        {
839            if ( f.isDirectory() )
840            {
841                throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
842            }
843
844            OutputStream os = null;
845            try
846            {
847                os = new FileOutputStream( f );
848                IOUtil.copy( res, os );
849            }
850            catch ( IOException e )
851            {
852                throw new SAXException( "IOException: " + e.getMessage(), e );
853            }
854            finally
855            {
856                IOUtil.close( os );
857            }
858        }
859    }
860}