Coverage Report - org.apache.maven.doxia.parser.AbstractXmlParser
 
Classes in this File Line Coverage Branch Coverage Complexity
AbstractXmlParser
73%
109/149
66%
52/78
3,679
AbstractXmlParser$CachedFileEntityResolver
1%
1/61
0%
0/20
3,679
 
 1  
 package org.apache.maven.doxia.parser;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.BufferedReader;
 23  
 import java.io.ByteArrayInputStream;
 24  
 import java.io.File;
 25  
 import java.io.FileOutputStream;
 26  
 import java.io.IOException;
 27  
 import java.io.InputStream;
 28  
 import java.io.OutputStream;
 29  
 import java.io.Reader;
 30  
 import java.io.StringReader;
 31  
 import java.net.URL;
 32  
 import java.util.Hashtable;
 33  
 import java.util.LinkedHashMap;
 34  
 import java.util.Locale;
 35  
 import java.util.Map;
 36  
 import java.util.regex.Matcher;
 37  
 import java.util.regex.Pattern;
 38  
 
 39  
 import org.apache.http.HttpEntity;
 40  
 import org.apache.http.HttpResponse;
 41  
 import org.apache.http.HttpStatus;
 42  
 import org.apache.http.client.ClientProtocolException;
 43  
 import org.apache.http.client.HttpRequestRetryHandler;
 44  
 import org.apache.http.client.methods.HttpGet;
 45  
 import org.apache.http.impl.client.DefaultHttpClient;
 46  
 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
 47  
 import org.apache.http.util.EntityUtils;
 48  
 
 49  
 import org.apache.maven.doxia.macro.MacroExecutionException;
 50  
 import org.apache.maven.doxia.markup.XmlMarkup;
 51  
 import org.apache.maven.doxia.sink.Sink;
 52  
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
 53  
 import org.apache.maven.doxia.util.HtmlTools;
 54  
 import org.apache.maven.doxia.util.XmlValidator;
 55  
 
 56  
 import org.codehaus.plexus.util.FileUtils;
 57  
 import org.codehaus.plexus.util.IOUtil;
 58  
 import org.codehaus.plexus.util.StringUtils;
 59  
 import org.codehaus.plexus.util.xml.pull.MXParser;
 60  
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
 61  
 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
 62  
 
 63  
 import org.xml.sax.EntityResolver;
 64  
 import org.xml.sax.InputSource;
 65  
 import org.xml.sax.SAXException;
 66  
 
 67  
 /**
 68  
  * An abstract class that defines some convenience methods for <code>XML</code> parsers.
 69  
  *
 70  
  * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
 71  
  * @version $Id: AbstractXmlParser.java 1185112 2011-10-17 11:33:00Z ltheussl $
 72  
  * @since 1.0
 73  
  */
 74  44
 public abstract class AbstractXmlParser
 75  
     extends AbstractParser
 76  
     implements XmlMarkup
 77  
 {
 78  
     /**
 79  
      * Entity pattern for HTML entity, i.e. &#38;nbsp;
 80  
      * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>
 81  
      * <br/>
 82  
      * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
 83  
      */
 84  2
     private static final Pattern PATTERN_ENTITY_1 =
 85  
         Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
 86  
 
 87  
     /**
 88  
      * Entity pattern for Unicode entity, i.e. &#38;#38;
 89  
      * "<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>"
 90  
      * <br/>
 91  
      * see <a href="http://www.w3.org/TR/REC-xml/#NT-EntityDecl">http://www.w3.org/TR/REC-xml/#NT-EntityDecl</a>.
 92  
      */
 93  2
     private static final Pattern PATTERN_ENTITY_2 =
 94  
         Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
 95  
 
 96  
     private boolean ignorableWhitespace;
 97  
 
 98  
     private boolean collapsibleWhitespace;
 99  
 
 100  
     private boolean trimmableWhitespace;
 101  
 
 102  
     private Map<String, String> entities;
 103  
 
 104  44
     private boolean validate = false;
 105  
 
 106  
     /** {@inheritDoc} */
 107  
     public void parse( Reader source, Sink sink )
 108  
         throws ParseException
 109  
     {
 110  52
         init();
 111  
 
 112  52
         Reader src = source;
 113  
 
 114  
         // 1 first parsing if validation is required
 115  52
         if ( isValidate() )
 116  
         {
 117  
             String content;
 118  
             try
 119  
             {
 120  0
                 content = IOUtil.toString( new BufferedReader( src ) );
 121  
             }
 122  0
             catch ( IOException e )
 123  
             {
 124  0
                 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
 125  0
             }
 126  
 
 127  0
             new XmlValidator( getLog() ).validate( content );
 128  
 
 129  0
             src = new StringReader( content );
 130  
         }
 131  
 
 132  
         // 2 second parsing to process
 133  
         try
 134  
         {
 135  52
             XmlPullParser parser = new MXParser();
 136  
 
 137  52
             parser.setInput( src );
 138  
 
 139  52
             sink.enableLogging( getLog() );
 140  
 
 141  52
             parseXml( parser, sink );
 142  
         }
 143  0
         catch ( XmlPullParserException ex )
 144  
         {
 145  0
             throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
 146  
                                       ex.getColumnNumber() );
 147  
         }
 148  0
         catch ( MacroExecutionException ex )
 149  
         {
 150  0
             throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
 151  52
         }
 152  
 
 153  52
         setSecondParsing( false );
 154  52
         init();
 155  52
     }
 156  
 
 157  
     /**
 158  
      * {@inheritDoc}
 159  
      *
 160  
      * Convenience method to parse an arbitrary string and emit any xml events into the given sink.
 161  
      */
 162  
     @Override
 163  
     public void parse( String string, Sink sink )
 164  
         throws ParseException
 165  
     {
 166  46
         super.parse( string, sink );
 167  46
     }
 168  
 
 169  
     /** {@inheritDoc} */
 170  
     @Override
 171  
     public final int getType()
 172  
     {
 173  0
         return XML_TYPE;
 174  
     }
 175  
 
 176  
     /**
 177  
      * Converts the attributes of the current start tag of the given parser to a SinkEventAttributeSet.
 178  
      *
 179  
      * @param parser A parser, not null.
 180  
      * @return a SinkEventAttributeSet or null if the current parser event is not a start tag.
 181  
      * @since 1.1
 182  
      */
 183  
     protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
 184  
     {
 185  218
         int count = parser.getAttributeCount();
 186  
 
 187  218
         if ( count < 0 )
 188  
         {
 189  2
             return null;
 190  
         }
 191  
 
 192  216
         SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
 193  
 
 194  260
         for ( int i = 0; i < count; i++ )
 195  
         {
 196  44
             atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
 197  
         }
 198  
 
 199  216
         return atts;
 200  
     }
 201  
 
 202  
     /**
 203  
      * Parse the model from the XmlPullParser into the given sink.
 204  
      *
 205  
      * @param parser A parser, not null.
 206  
      * @param sink the sink to receive the events.
 207  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 208  
      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
 209  
      */
 210  
     private void parseXml( XmlPullParser parser, Sink sink )
 211  
         throws XmlPullParserException, MacroExecutionException
 212  
     {
 213  52
         int eventType = parser.getEventType();
 214  
 
 215  658
         while ( eventType != XmlPullParser.END_DOCUMENT )
 216  
         {
 217  606
             if ( eventType == XmlPullParser.START_TAG )
 218  
             {
 219  214
                 handleStartTag( parser, sink );
 220  
             }
 221  392
             else if ( eventType == XmlPullParser.END_TAG )
 222  
             {
 223  214
                 handleEndTag( parser, sink );
 224  
             }
 225  178
             else if ( eventType == XmlPullParser.TEXT )
 226  
             {
 227  78
                 String text = getText( parser );
 228  
 
 229  78
                 if ( isIgnorableWhitespace() )
 230  
                 {
 231  0
                     if ( text.trim().length() != 0 )
 232  
                     {
 233  0
                         handleText( parser, sink );
 234  
                     }
 235  
                 }
 236  
                 else
 237  
                 {
 238  78
                     handleText( parser, sink );
 239  
                 }
 240  78
             }
 241  100
             else if ( eventType == XmlPullParser.CDSECT )
 242  
             {
 243  4
                 handleCdsect( parser, sink );
 244  
             }
 245  96
             else if ( eventType == XmlPullParser.COMMENT )
 246  
             {
 247  4
                 handleComment( parser, sink );
 248  
             }
 249  92
             else if ( eventType == XmlPullParser.ENTITY_REF )
 250  
             {
 251  34
                 handleEntity( parser, sink );
 252  
             }
 253  58
             else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
 254  
             {
 255  
                 // nop
 256  
             }
 257  56
             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
 258  
             {
 259  
                 // nop
 260  
             }
 261  56
             else if ( eventType == XmlPullParser.DOCDECL )
 262  
             {
 263  4
                 addLocalEntities( parser, parser.getText() );
 264  
 
 265  4
                 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
 266  
                 {
 267  0
                     addDTDEntities( parser, new String( res ) );
 268  
                 }
 269  
             }
 270  
 
 271  
             try
 272  
             {
 273  606
                 eventType = parser.nextToken();
 274  
             }
 275  0
             catch ( IOException io )
 276  
             {
 277  0
                 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
 278  606
             }
 279  
         }
 280  52
     }
 281  
 
 282  
     /**
 283  
      * Goes through the possible start tags.
 284  
      *
 285  
      * @param parser A parser, not null.
 286  
      * @param sink the sink to receive the events.
 287  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 288  
      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
 289  
      */
 290  
     protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
 291  
         throws XmlPullParserException, MacroExecutionException;
 292  
 
 293  
     /**
 294  
      * Goes through the possible end tags.
 295  
      *
 296  
      * @param parser A parser, not null.
 297  
      * @param sink the sink to receive the events.
 298  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 299  
      * @throws org.apache.maven.doxia.macro.MacroExecutionException if there's a problem executing a macro
 300  
      */
 301  
     protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
 302  
         throws XmlPullParserException, MacroExecutionException;
 303  
 
 304  
     /**
 305  
      * Handles text events.
 306  
      *
 307  
      * <p>This is a default implementation, if the parser points to a non-empty text element,
 308  
      * it is emitted as a text event into the specified sink.</p>
 309  
      *
 310  
      * @param parser A parser, not null.
 311  
      * @param sink the sink to receive the events. Not null.
 312  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 313  
      */
 314  
     protected void handleText( XmlPullParser parser, Sink sink )
 315  
         throws XmlPullParserException
 316  
     {
 317  0
         String text = getText( parser );
 318  
 
 319  
         /*
 320  
          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
 321  
          * parser so any whitespace that makes it here is significant.
 322  
          */
 323  0
         if ( StringUtils.isNotEmpty( text ) )
 324  
         {
 325  0
             sink.text( text );
 326  
         }
 327  0
     }
 328  
 
 329  
     /**
 330  
      * Handles CDATA sections.
 331  
      *
 332  
      * <p>This is a default implementation, all data are emitted as text
 333  
      * events into the specified sink.</p>
 334  
      *
 335  
      * @param parser A parser, not null.
 336  
      * @param sink the sink to receive the events. Not null.
 337  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 338  
      */
 339  
     protected void handleCdsect( XmlPullParser parser, Sink sink )
 340  
         throws XmlPullParserException
 341  
     {
 342  0
         sink.text( getText( parser ) );
 343  0
     }
 344  
 
 345  
     /**
 346  
      * Handles comments.
 347  
      *
 348  
      * <p>This is a default implementation, all data are emitted as comment
 349  
      * events into the specified sink.</p>
 350  
      *
 351  
      * @param parser A parser, not null.
 352  
      * @param sink the sink to receive the events. Not null.
 353  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 354  
      */
 355  
     protected void handleComment( XmlPullParser parser, Sink sink )
 356  
         throws XmlPullParserException
 357  
     {
 358  0
         sink.comment( getText( parser ).trim() );
 359  0
     }
 360  
 
 361  
     /**
 362  
      * Handles entities.
 363  
      *
 364  
      * <p>This is a default implementation, all entities are resolved and emitted as text
 365  
      * events into the specified sink, except:</p>
 366  
      * <ul>
 367  
      * <li>the entities with names <code>#160</code>, <code>nbsp</code> and <code>#x00A0</code>
 368  
      * are emitted as <code>nonBreakingSpace()</code> events.</li>
 369  
      * </ul>
 370  
      *
 371  
      * @param parser A parser, not null.
 372  
      * @param sink the sink to receive the events. Not null.
 373  
      * @throws org.codehaus.plexus.util.xml.pull.XmlPullParserException if there's a problem parsing the model
 374  
      */
 375  
     protected void handleEntity( XmlPullParser parser, Sink sink )
 376  
         throws XmlPullParserException
 377  
     {
 378  34
         String text = getText( parser );
 379  
 
 380  34
         String name = parser.getName();
 381  
 
 382  34
         if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
 383  
         {
 384  4
             sink.nonBreakingSpace();
 385  
         }
 386  
         else
 387  
         {
 388  30
             String unescaped = HtmlTools.unescapeHTML( text );
 389  
 
 390  30
             sink.text( unescaped );
 391  
         }
 392  34
     }
 393  
 
 394  
     /**
 395  
      * Handles an unkown event.
 396  
      *
 397  
      * <p>This is a default implementation, all events are emitted as unknown
 398  
      * events into the specified sink.</p>
 399  
      *
 400  
      * @param parser the parser to get the event from.
 401  
      * @param sink the sink to receive the event.
 402  
      * @param type the tag event type. This should be one of HtmlMarkup.TAG_TYPE_SIMPLE,
 403  
      * HtmlMarkup.TAG_TYPE_START, HtmlMarkup.TAG_TYPE_END or HtmlMarkup.ENTITY_TYPE.
 404  
      * It will be passed as the first argument of the required parameters to the Sink
 405  
      * {@link org.apache.maven.doxia.sink.Sink#unknown(String, Object[], org.apache.maven.doxia.sink.SinkEventAttributes)}
 406  
      * method.
 407  
      */
 408  
     protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
 409  
     {
 410  4
         Object[] required = new Object[] { new Integer( type ) };
 411  
 
 412  4
         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
 413  
 
 414  4
         sink.unknown( parser.getName(), required, attribs );
 415  4
     }
 416  
 
 417  
     /**
 418  
      * <p>isIgnorableWhitespace.</p>
 419  
      *
 420  
      * @return <code>true</code> if whitespace will be ignored, <code>false</code> otherwise.
 421  
      * @see #setIgnorableWhitespace(boolean)
 422  
      * @since 1.1
 423  
      */
 424  
     protected boolean isIgnorableWhitespace()
 425  
     {
 426  78
         return ignorableWhitespace;
 427  
     }
 428  
 
 429  
     /**
 430  
      * Specify that whitespace will be ignored. I.e.:
 431  
      * <pre>&lt;tr&gt; &lt;td/&gt; &lt;/tr&gt;</pre>
 432  
      * is equivalent to
 433  
      * <pre>&lt;tr&gt;&lt;td/&gt;&lt;/tr&gt;</pre>
 434  
      *
 435  
      * @param ignorable <code>true</code> to ignore whitespace, <code>false</code> otherwise.
 436  
      * @since 1.1
 437  
      */
 438  
     protected void setIgnorableWhitespace( boolean ignorable )
 439  
     {
 440  0
         this.ignorableWhitespace = ignorable;
 441  0
     }
 442  
 
 443  
     /**
 444  
      * <p>isCollapsibleWhitespace.</p>
 445  
      *
 446  
      * @return <code>true</code> if text will collapse, <code>false</code> otherwise.
 447  
      * @see #setCollapsibleWhitespace(boolean)
 448  
      * @since 1.1
 449  
      */
 450  
     protected boolean isCollapsibleWhitespace()
 451  
     {
 452  198
         return collapsibleWhitespace;
 453  
     }
 454  
 
 455  
     /**
 456  
      * Specify that text will be collapsed. I.e.:
 457  
      * <pre>Text   Text</pre>
 458  
      * is equivalent to
 459  
      * <pre>Text Text</pre>
 460  
      *
 461  
      * @param collapsible <code>true</code> to allow collapsible text, <code>false</code> otherwise.
 462  
      * @since 1.1
 463  
      */
 464  
     protected void setCollapsibleWhitespace( boolean collapsible )
 465  
     {
 466  0
         this.collapsibleWhitespace = collapsible;
 467  0
     }
 468  
 
 469  
     /**
 470  
      * <p>isTrimmableWhitespace.</p>
 471  
      *
 472  
      * @return <code>true</code> if text will be trim, <code>false</code> otherwise.
 473  
      * @see #setTrimmableWhitespace(boolean)
 474  
      * @since 1.1
 475  
      */
 476  
     protected boolean isTrimmableWhitespace()
 477  
     {
 478  198
         return trimmableWhitespace;
 479  
     }
 480  
 
 481  
     /**
 482  
      * Specify that text will be collapsed. I.e.:
 483  
      * <pre>&lt;p&gt; Text &lt;/p&gt;</pre>
 484  
      * is equivalent to
 485  
      * <pre>&lt;p&gt;Text&lt;/p&gt;</pre>
 486  
      *
 487  
      * @param trimmable <code>true</code> to allow trimmable text, <code>false</code> otherwise.
 488  
      * @since 1.1
 489  
      */
 490  
     protected void setTrimmableWhitespace( boolean trimmable )
 491  
     {
 492  0
         this.trimmableWhitespace = trimmable;
 493  0
     }
 494  
 
 495  
     /**
 496  
      * <p>getText.</p>
 497  
      *
 498  
      * @param parser A parser, not null.
 499  
      * @return the {@link XmlPullParser#getText()} taking care of trimmable or collapsible configuration.
 500  
      * @see XmlPullParser#getText()
 501  
      * @see #isCollapsibleWhitespace()
 502  
      * @see #isTrimmableWhitespace()
 503  
      * @since 1.1
 504  
      */
 505  
     protected String getText( XmlPullParser parser )
 506  
     {
 507  198
         String text = parser.getText();
 508  
 
 509  198
         if ( isTrimmableWhitespace() )
 510  
         {
 511  0
             text = text.trim();
 512  
         }
 513  
 
 514  198
         if ( isCollapsibleWhitespace() )
 515  
         {
 516  0
             StringBuilder newText = new StringBuilder();
 517  0
             String[] elts = StringUtils.split( text, " \r\n" );
 518  0
             for ( int i = 0; i < elts.length; i++ )
 519  
             {
 520  0
                 newText.append( elts[i] );
 521  0
                 if ( ( i + 1 ) < elts.length )
 522  
                 {
 523  0
                     newText.append( " " );
 524  
                 }
 525  
             }
 526  0
             text = newText.toString();
 527  
         }
 528  
 
 529  198
         return text;
 530  
     }
 531  
 
 532  
     /**
 533  
      * Return the defined entities in a local doctype. I.e.:
 534  
      * <pre>
 535  
      * &lt;!DOCTYPE foo [
 536  
      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
 537  
      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
 538  
      * ]&gt;
 539  
      * </pre>
 540  
      *
 541  
      * @return a map of the defined entities in a local doctype.
 542  
      * @since 1.1
 543  
      */
 544  
     protected Map<String, String> getLocalEntities()
 545  
     {
 546  16
         if ( entities == null )
 547  
         {
 548  4
             entities = new LinkedHashMap<String, String>();
 549  
         }
 550  
 
 551  16
         return entities;
 552  
     }
 553  
 
 554  
     /**
 555  
      * <p>isValidate.</p>
 556  
      *
 557  
      * @return <code>true</code> if XML content will be validate, <code>false</code> otherwise.
 558  
      * @since 1.1
 559  
      */
 560  
     public boolean isValidate()
 561  
     {
 562  52
         return validate;
 563  
     }
 564  
 
 565  
     /**
 566  
      * Specify a flag to validate or not the XML content.
 567  
      *
 568  
      * @param validate the validate to set
 569  
      * @see #parse(Reader, Sink)
 570  
      * @since 1.1
 571  
      */
 572  
     public void setValidate( boolean validate )
 573  
     {
 574  4
         this.validate = validate;
 575  4
     }
 576  
 
 577  
     // ----------------------------------------------------------------------
 578  
     // Private methods
 579  
     // ----------------------------------------------------------------------
 580  
 
 581  
     /**
 582  
      * Add an entity given by <code>entityName</code> and <code>entityValue</code> to {@link #entities}.
 583  
      * <br/>
 584  
      * By default, we exclude the default XML entities: &#38;amp;, &#38;lt;, &#38;gt;, &#38;quot; and &#38;apos;.
 585  
      *
 586  
      * @param parser not null
 587  
      * @param entityName not null
 588  
      * @param entityValue not null
 589  
      * @throws XmlPullParserException if any
 590  
      * @see {@link XmlPullParser#defineEntityReplacementText(String, String)}
 591  
      */
 592  
     private void addEntity( XmlPullParser parser, String entityName, String entityValue )
 593  
         throws XmlPullParserException
 594  
     {
 595  16
         if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
 596  
             || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
 597  
         {
 598  0
             return;
 599  
         }
 600  
 
 601  16
         parser.defineEntityReplacementText( entityName, entityValue );
 602  16
         getLocalEntities().put( entityName, entityValue );
 603  16
     }
 604  
 
 605  
     /**
 606  
      * Handle entities defined in a local doctype as the following:
 607  
      * <pre>
 608  
      * &lt;!DOCTYPE foo [
 609  
      *   &lt;!ENTITY bar "&#38;#x160;"&gt;
 610  
      *   &lt;!ENTITY bar1 "&#38;#x161;"&gt;
 611  
      * ]&gt;
 612  
      * </pre>
 613  
      *
 614  
      * @param parser not null
 615  
      * @param text not null
 616  
      * @throws XmlPullParserException if any
 617  
      */
 618  
     private void addLocalEntities( XmlPullParser parser, String text )
 619  
         throws XmlPullParserException
 620  
     {
 621  4
         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
 622  4
         if ( entitiesCount > 0 )
 623  
         {
 624  
             // text should be foo [...]
 625  4
             int start = text.indexOf( '[');
 626  4
             int end = text.lastIndexOf( ']');
 627  4
             if ( start != -1 && end != -1 )
 628  
             {
 629  4
                 addDTDEntities( parser, text.substring( start + 1, end ) );
 630  
             }
 631  
         }
 632  4
     }
 633  
 
 634  
     /**
 635  
      * Handle entities defined in external doctypes as the following:
 636  
      * <pre>
 637  
      * &lt;!DOCTYPE foo [
 638  
      *   &lt;!-- These are the entity sets for ISO Latin 1 characters for the XHTML --&gt;
 639  
      *   &lt;!ENTITY % HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
 640  
      *          "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"&gt;
 641  
      *   %HTMLlat1;
 642  
      * ]&gt;
 643  
      * </pre>
 644  
      *
 645  
      * @param parser not null
 646  
      * @param text not null
 647  
      * @throws XmlPullParserException if any
 648  
      */
 649  
     private void addDTDEntities( XmlPullParser parser, String text )
 650  
         throws XmlPullParserException
 651  
     {
 652  4
         int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
 653  4
         if ( entitiesCount > 0 )
 654  
         {
 655  4
             final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
 656  4
             BufferedReader reader = new BufferedReader( new StringReader( txt ) );
 657  
             String line;
 658  4
             String tmpLine = "";
 659  
             try
 660  
             {
 661  
                 Matcher matcher;
 662  32
                 while ( ( line = reader.readLine() ) != null )
 663  
                 {
 664  28
                     tmpLine += "\n" + line;
 665  28
                     matcher = PATTERN_ENTITY_1.matcher( tmpLine );
 666  28
                     if ( matcher.find() && matcher.groupCount() == 7 )
 667  
                     {
 668  4
                         String entityName = matcher.group( 2 );
 669  4
                         String entityValue = matcher.group( 5 );
 670  
 
 671  4
                         addEntity( parser, entityName, entityValue );
 672  4
                         tmpLine = "";
 673  4
                     }
 674  
                     else
 675  
                     {
 676  24
                         matcher = PATTERN_ENTITY_2.matcher( tmpLine );
 677  24
                         if ( matcher.find() && matcher.groupCount() == 8 )
 678  
                         {
 679  12
                             String entityName = matcher.group( 2 );
 680  12
                             String entityValue = matcher.group( 5 );
 681  
 
 682  12
                             addEntity( parser, entityName, entityValue );
 683  12
                             tmpLine = "";
 684  12
                         }
 685  
                     }
 686  
                 }
 687  
             }
 688  0
             catch ( IOException e )
 689  
             {
 690  
                 // nop
 691  
             }
 692  
             finally
 693  
             {
 694  4
                 IOUtil.close( reader );
 695  4
             }
 696  
         }
 697  4
     }
 698  
 
 699  
     /**
 700  
      * Implementation of the callback mechanism <code>EntityResolver</code>.
 701  
      * Using a mechanism of cached files in temp dir to improve performance when using the <code>XMLReader</code>.
 702  
      */
 703  0
     public static class CachedFileEntityResolver
 704  
         implements EntityResolver
 705  
     {
 706  
         /** Map with systemId as key and the content of systemId as byte[]. */
 707  2
         protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
 708  
 
 709  
         /** {@inheritDoc} */
 710  
         public InputSource resolveEntity( String publicId, String systemId )
 711  
             throws SAXException, IOException
 712  
         {
 713  0
             byte[] res = ENTITY_CACHE.get( systemId );
 714  
             // already cached?
 715  0
             if ( res == null )
 716  
             {
 717  0
                 String systemName = FileUtils.getFile( systemId ).getName();
 718  0
                 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
 719  
                 // maybe already as a temp file?
 720  0
                 if ( !temp.exists() )
 721  
                 {
 722  
                     // is systemId a file or an url?
 723  0
                     if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
 724  
                     {
 725  
                         // Doxia XSDs are included in the jars, so try to find the resource systemName from
 726  
                         // the classpath...
 727  0
                         String resource = "/" + systemName;
 728  0
                         URL url = getClass().getResource( resource );
 729  0
                         if ( url != null )
 730  
                         {
 731  0
                             res = toByteArray( url );
 732  
                         }
 733  
                         else
 734  
                         {
 735  0
                             throw new SAXException( "Could not find the SYSTEM entity: " + systemId
 736  
                             + " because '" + resource + "' is not available of the classpath." );
 737  
                         }
 738  0
                     }
 739  
                     else
 740  
                     {
 741  0
                         res = toByteArray( new URL( systemId ) );
 742  
                     }
 743  
 
 744  
                     // write systemId as temp file
 745  0
                     copy( res, temp );
 746  
                 }
 747  
                 else
 748  
                 {
 749  
                     // TODO How to refresh Doxia XSDs from temp dir?
 750  0
                     res = toByteArray( temp.toURI().toURL() );
 751  
                 }
 752  
 
 753  0
                 ENTITY_CACHE.put( systemId, res );
 754  
             }
 755  
 
 756  0
             InputSource is = new InputSource( new ByteArrayInputStream( res ) );
 757  0
             is.setPublicId( publicId );
 758  0
             is.setSystemId( systemId );
 759  
 
 760  0
             return is;
 761  
         }
 762  
 
 763  
         /**
 764  
          * If url is not an http/https urls, call {@link IOUtil#toByteArray(java.io.InputStream)} to get the url
 765  
          * content.
 766  
          * Otherwise, use HttpClient to get the http content.
 767  
          * Wrap all internal exceptions to throw SAXException.
 768  
          *
 769  
          * @param url not null
 770  
          * @return return an array of byte
 771  
          * @throws SAXException if any
 772  
          */
 773  
         private static byte[] toByteArray( URL url )
 774  
             throws SAXException
 775  
         {
 776  0
             if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
 777  
             {
 778  0
                 InputStream is = null;
 779  
                 try
 780  
                 {
 781  0
                     is = url.openStream();
 782  0
                     if ( is == null )
 783  
                     {
 784  0
                         throw new SAXException( "Cannot open stream from the url: " + url.toString() );
 785  
                     }
 786  0
                     return IOUtil.toByteArray( is );
 787  
                 }
 788  0
                 catch ( IOException e )
 789  
                 {
 790  0
                     throw new SAXException( "IOException: " + e.getMessage(), e );
 791  
                 }
 792  
                 finally
 793  
                 {
 794  0
                     IOUtil.close( is );
 795  
                 }
 796  
             }
 797  
 
 798  
             // it is an HTTP url, using HttpClient...
 799  0
             DefaultHttpClient client = new DefaultHttpClient();
 800  0
             HttpGet method = new HttpGet( url.toString() );
 801  
             // Set a user-agent that doesn't contain the word "java", otherwise it will be blocked by the W3C
 802  
             // The default user-agent is "Apache-HttpClient/4.0.2 (java 1.5)"
 803  0
             method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
 804  
 
 805  0
             HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
 806  0
             client.setHttpRequestRetryHandler( retryHandler );
 807  
 
 808  0
             HttpEntity entity = null;
 809  
             try
 810  
             {
 811  0
                 HttpResponse response = client.execute( method );
 812  0
                 int statusCode = response.getStatusLine().getStatusCode();
 813  0
                 if ( statusCode != HttpStatus.SC_OK )
 814  
                 {
 815  0
                     throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
 816  
                         + statusCode + ", which is not allowed. The server gave this reason for the failure '"
 817  
                         + response.getStatusLine().getReasonPhrase() + "'." );
 818  
                 }
 819  
 
 820  0
                 entity = response.getEntity();
 821  0
                 return EntityUtils.toByteArray( entity );
 822  
             }
 823  0
             catch ( ClientProtocolException e )
 824  
             {
 825  0
                 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
 826  
             }
 827  0
             catch ( IOException e )
 828  
             {
 829  0
                 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
 830  
             }
 831  
             finally
 832  
             {
 833  0
                 if ( entity != null )
 834  
                 {
 835  
                     try
 836  
                     {
 837  0
                         entity.consumeContent();
 838  
                     }
 839  0
                     catch ( IOException e )
 840  
                     {
 841  
                         // Ignore
 842  0
                     }
 843  
                 }
 844  
             }
 845  
         }
 846  
 
 847  
         /**
 848  
          * Wrap {@link IOUtil#copy(byte[], OutputStream)} to throw SAXException.
 849  
          *
 850  
          * @param res not null array of byte
 851  
          * @param f the file where to write the bytes
 852  
          * @throws SAXException if any
 853  
          * @see {@link IOUtil#copy(byte[], OutputStream)}
 854  
          */
 855  
         private void copy( byte[] res, File f )
 856  
             throws SAXException
 857  
         {
 858  0
             if ( f.isDirectory() )
 859  
             {
 860  0
                 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
 861  
             }
 862  
 
 863  0
             OutputStream os = null;
 864  
             try
 865  
             {
 866  0
                 os = new FileOutputStream( f );
 867  0
                 IOUtil.copy( res, os );
 868  
             }
 869  0
             catch ( IOException e )
 870  
             {
 871  0
                 throw new SAXException( "IOException: " + e.getMessage(), e );
 872  
             }
 873  
             finally
 874  
             {
 875  0
                 IOUtil.close( os );
 876  0
             }
 877  0
         }
 878  
     }
 879  
 }