Coverage Report - org.apache.maven.doxia.parser.XhtmlBaseParser
 
Classes in this File Line Coverage Branch Coverage Complexity
XhtmlBaseParser
92%
311/337
85%
217/254
4,794
 
 1  
 package org.apache.maven.doxia.parser;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.Reader;
 23  
 import java.util.HashMap;
 24  
 import java.util.Map;
 25  
 import java.util.Set;
 26  
 import java.util.TreeSet;
 27  
 
 28  
 import javax.swing.text.html.HTML.Attribute;
 29  
 
 30  
 import org.apache.maven.doxia.macro.MacroExecutionException;
 31  
 import org.apache.maven.doxia.markup.HtmlMarkup;
 32  
 import org.apache.maven.doxia.sink.Sink;
 33  
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
 34  
 import org.apache.maven.doxia.sink.SinkEventAttributes;
 35  
 import org.apache.maven.doxia.util.DoxiaUtils;
 36  
 
 37  
 import org.codehaus.plexus.util.StringUtils;
 38  
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
 39  
 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
 40  
 
 41  
 /**
 42  
  * Common base parser for xhtml events.
 43  
  *
 44  
  * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
 45  
  * @author ltheussl
 46  
  * @version $Id: XhtmlBaseParser.java 1090706 2011-04-09 23:15:28Z hboutemy $
 47  
  * @since 1.1
 48  
  */
 49  44
 public class XhtmlBaseParser
 50  
     extends AbstractXmlParser
 51  
         implements HtmlMarkup
 52  
 {
 53  
     /** True if a &lt;script&gt;&lt;/script&gt; block is read. CDATA sections within are handled as rawText. */
 54  
     private boolean scriptBlock;
 55  
 
 56  
     /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
 57  
     private boolean isLink;
 58  
 
 59  
     /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
 60  
     private boolean isAnchor;
 61  
 
 62  
     /** Used for nested lists. */
 63  44
     private int orderedListDepth = 0;
 64  
 
 65  
     /** Counts section level. */
 66  
     private int sectionLevel;
 67  
 
 68  
     /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
 69  
     private boolean inVerbatim;
 70  
 
 71  
     /** Used to recognize the case of img inside figure. */
 72  
     private boolean inFigure;
 73  
 
 74  
     /** Decoration properties, eg for texts. */
 75  44
     private final SinkEventAttributeSet decoration = new SinkEventAttributeSet();
 76  
 
 77  
     /** Map of warn messages with a String as key to describe the error type and a Set as value.
 78  
      * Using to reduce warn messages. */
 79  
     private Map<String, Set<String>> warnMessages;
 80  
 
 81  
     /** {@inheritDoc} */
 82  
     public void parse( Reader source, Sink sink )
 83  
         throws ParseException
 84  
     {
 85  52
         init();
 86  
 
 87  
         try
 88  
         {
 89  52
             super.parse( source, sink );
 90  
         }
 91  
         finally
 92  
         {
 93  52
             logWarnings();
 94  
 
 95  52
             setSecondParsing( false );
 96  52
             init();
 97  52
         }
 98  52
     }
 99  
 
 100  
     /**
 101  
      * <p>
 102  
      *   Goes through a common list of possible html start tags. These include only tags that can go into
 103  
      *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
 104  
      * </p>
 105  
      * <p>
 106  
      *   The currently handled tags are:
 107  
      * </p>
 108  
      * <p>
 109  
      *   <code>
 110  
      *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
 111  
      *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
 112  
      *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
 113  
      *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
 114  
      *   </code>
 115  
      * </p>
 116  
      *
 117  
      * @param parser A parser.
 118  
      * @param sink the sink to receive the events.
 119  
      * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
 120  
      */
 121  
     protected boolean baseStartTag( XmlPullParser parser, Sink sink )
 122  
     {
 123  214
         boolean visited = true;
 124  
 
 125  214
         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
 126  
 
 127  214
         if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
 128  
         {
 129  26
             handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
 130  
         }
 131  188
         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
 132  
         {
 133  10
             handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
 134  
         }
 135  178
         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
 136  
         {
 137  8
             handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
 138  
         }
 139  170
         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
 140  
         {
 141  2
             handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
 142  
         }
 143  168
         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
 144  
         {
 145  4
             handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
 146  
         }
 147  164
         else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
 148  
         {
 149  2
             decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" );
 150  
         }
 151  162
         else if ( parser.getName().equals( HtmlMarkup.S.toString() )
 152  
                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
 153  
                 || parser.getName().equals( "del" ) )
 154  
         {
 155  6
             decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" );
 156  
         }
 157  156
         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
 158  
         {
 159  2
             decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" );
 160  
         }
 161  154
         else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
 162  
         {
 163  2
             decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" );
 164  
         }
 165  152
         else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
 166  
         {
 167  18
             handlePStart( sink, attribs );
 168  
         }
 169  134
         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
 170  
         {
 171  18
             visited = handleDivStart( parser, attribs, sink );
 172  
         }
 173  116
         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
 174  
         {
 175  8
             handlePreStart( attribs, sink );
 176  
         }
 177  108
         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
 178  
         {
 179  2
             sink.list( attribs );
 180  
         }
 181  106
         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
 182  
         {
 183  2
             handleOLStart( parser, sink, attribs );
 184  
         }
 185  104
         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
 186  
         {
 187  4
             handleLIStart( sink, attribs );
 188  
         }
 189  100
         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
 190  
         {
 191  2
             sink.definitionList( attribs );
 192  
         }
 193  98
         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
 194  
         {
 195  2
             sink.definitionListItem( attribs );
 196  2
             sink.definedTerm( attribs );
 197  
         }
 198  96
         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
 199  
         {
 200  2
             sink.definition( attribs );
 201  
         }
 202  94
         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
 203  
                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
 204  
         {
 205  12
             sink.bold();
 206  
         }
 207  82
         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
 208  
                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
 209  
         {
 210  14
             handleFigureCaptionStart( sink, attribs );
 211  
         }
 212  68
         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
 213  
                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
 214  
                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
 215  
         {
 216  8
             sink.monospaced();
 217  
         }
 218  60
         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
 219  
         {
 220  20
             handleAStart( parser, sink, attribs );
 221  
         }
 222  40
         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
 223  
         {
 224  4
             handleTableStart( sink, attribs, parser );
 225  
         }
 226  36
         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
 227  
         {
 228  8
             sink.tableRow( attribs );
 229  
         }
 230  28
         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
 231  
         {
 232  4
             sink.tableHeaderCell( attribs );
 233  
         }
 234  24
         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
 235  
         {
 236  4
             sink.tableCell( attribs );
 237  
         }
 238  20
         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
 239  
         {
 240  2
             sink.tableCaption( attribs );
 241  
         }
 242  18
         else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
 243  
         {
 244  2
             sink.lineBreak( attribs );
 245  
         }
 246  16
         else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
 247  
         {
 248  2
             sink.horizontalRule( attribs );
 249  
         }
 250  14
         else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
 251  
         {
 252  8
             handleImgStart( parser, sink, attribs );
 253  
         }
 254  6
         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) )
 255  
         {
 256  2
             handleUnknown( parser, sink, TAG_TYPE_START );
 257  2
             scriptBlock = true;
 258  
         }
 259  
         else
 260  
         {
 261  4
             visited = false;
 262  
         }
 263  
 
 264  214
         return visited;
 265  
     }
 266  
 
 267  
     /**
 268  
      * <p>
 269  
      *   Goes through a common list of possible html end tags.
 270  
      *   These should be re-usable by different xhtml-based parsers.
 271  
      *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
 272  
      *   except for the empty elements (<code>&lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;<code>).
 273  
      * </p>
 274  
      *
 275  
      * @param parser A parser.
 276  
      * @param sink the sink to receive the events.
 277  
      * @return True if the event has been handled by this method, false otherwise.
 278  
      */
 279  
     protected boolean baseEndTag( XmlPullParser parser, Sink sink )
 280  
     {
 281  214
         boolean visited = true;
 282  
 
 283  214
         if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
 284  
         {
 285  18
             if ( !inFigure )
 286  
             {
 287  14
                 sink.paragraph_();
 288  
             }
 289  
         }
 290  196
         else if ( parser.getName().equals( HtmlMarkup.U.toString() )
 291  
                 || parser.getName().equals( HtmlMarkup.S.toString() )
 292  
                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
 293  
                 || parser.getName().equals( "del" ) )
 294  
         {
 295  8
             decoration.removeAttribute( SinkEventAttributes.DECORATION );
 296  
         }
 297  188
         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
 298  
                 || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
 299  
         {
 300  4
             decoration.removeAttribute( SinkEventAttributes.VALIGN );
 301  
         }
 302  184
         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
 303  
         {
 304  18
             if ( inFigure )
 305  
             {
 306  2
                 sink.figure_();
 307  2
                 this.inFigure = false;
 308  
             }
 309  
             else
 310  
             {
 311  16
                 visited = false;
 312  
             }
 313  
         }
 314  166
         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
 315  
         {
 316  8
             verbatim_();
 317  
 
 318  8
             sink.verbatim_();
 319  
         }
 320  158
         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
 321  
         {
 322  2
             sink.list_();
 323  
         }
 324  156
         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
 325  
         {
 326  2
             sink.numberedList_();
 327  2
             orderedListDepth--;
 328  
         }
 329  154
         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
 330  
         {
 331  4
             handleListItemEnd( sink );
 332  
         }
 333  150
         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
 334  
         {
 335  2
             sink.definitionList_();
 336  
         }
 337  148
         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
 338  
         {
 339  2
             sink.definedTerm_();
 340  
         }
 341  146
         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
 342  
         {
 343  2
             sink.definition_();
 344  2
             sink.definitionListItem_();
 345  
         }
 346  144
         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
 347  
                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
 348  
         {
 349  12
             sink.bold_();
 350  
         }
 351  132
         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
 352  
                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
 353  
         {
 354  14
             handleFigureCaptionEnd( sink );
 355  
         }
 356  118
         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
 357  
                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
 358  
                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
 359  
         {
 360  8
             sink.monospaced_();
 361  
         }
 362  110
         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
 363  
         {
 364  20
             handleAEnd( sink );
 365  
         }
 366  
 
 367  
         // ----------------------------------------------------------------------
 368  
         // Tables
 369  
         // ----------------------------------------------------------------------
 370  
 
 371  90
         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
 372  
         {
 373  4
             sink.tableRows_();
 374  
 
 375  4
             sink.table_();
 376  
         }
 377  86
         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
 378  
         {
 379  8
             sink.tableRow_();
 380  
         }
 381  78
         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
 382  
         {
 383  4
             sink.tableHeaderCell_();
 384  
         }
 385  74
         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
 386  
         {
 387  4
             sink.tableCell_();
 388  
         }
 389  70
         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
 390  
         {
 391  2
             sink.tableCaption_();
 392  
         }
 393  68
         else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
 394  
         {
 395  26
             sink.sectionTitle1_();
 396  
         }
 397  42
         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
 398  
         {
 399  10
             sink.sectionTitle2_();
 400  
         }
 401  32
         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
 402  
         {
 403  8
             sink.sectionTitle3_();
 404  
         }
 405  24
         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
 406  
         {
 407  2
             sink.sectionTitle4_();
 408  
         }
 409  22
         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
 410  
         {
 411  4
             sink.sectionTitle5_();
 412  
         }
 413  18
         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) )
 414  
         {
 415  2
             handleUnknown( parser, sink, TAG_TYPE_END );
 416  
 
 417  2
             scriptBlock = false;
 418  
         }
 419  
         else
 420  
         {
 421  16
             visited = false;
 422  
         }
 423  
 
 424  214
         return visited;
 425  
     }
 426  
 
 427  
     /**
 428  
      * {@inheritDoc}
 429  
      *
 430  
      * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
 431  
      * overridden by implementing parsers to include additional tags.
 432  
      */
 433  
     protected void handleStartTag( XmlPullParser parser, Sink sink )
 434  
         throws XmlPullParserException, MacroExecutionException
 435  
     {
 436  214
         if ( !baseStartTag( parser, sink ) )
 437  
         {
 438  20
             if ( getLog().isWarnEnabled() )
 439  
             {
 440  6
                 String position = "[" + parser.getLineNumber() + ":"
 441  
                     + parser.getColumnNumber() + "]";
 442  6
                 String tag = "<" + parser.getName() + ">";
 443  
 
 444  6
                 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
 445  
             }
 446  
         }
 447  214
     }
 448  
 
 449  
     /**
 450  
      * {@inheritDoc}
 451  
      *
 452  
      * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
 453  
      * overridden by implementing parsers to include additional tags.
 454  
      */
 455  
     protected void handleEndTag( XmlPullParser parser, Sink sink )
 456  
         throws XmlPullParserException, MacroExecutionException
 457  
     {
 458  214
         if ( !baseEndTag( parser, sink ) )
 459  
         {
 460  
             // unrecognized tag is already logged in StartTag
 461  
         }
 462  214
     }
 463  
 
 464  
     /** {@inheritDoc} */
 465  
     protected void handleText( XmlPullParser parser, Sink sink )
 466  
         throws XmlPullParserException
 467  
     {
 468  78
         String text = getText( parser );
 469  
 
 470  
         /*
 471  
          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
 472  
          * parser so any whitespace that makes it here is significant.
 473  
          *
 474  
          * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
 475  
          */
 476  78
         if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
 477  
         {
 478  78
             sink.text( text, decoration );
 479  
         }
 480  78
     }
 481  
 
 482  
     /** {@inheritDoc} */
 483  
     protected void handleComment( XmlPullParser parser, Sink sink )
 484  
         throws XmlPullParserException
 485  
     {
 486  4
         String text = getText( parser ).trim();
 487  
 
 488  4
         if ( "PB".equals( text ) )
 489  
         {
 490  2
             sink.pageBreak();
 491  
         }
 492  
         else
 493  
         {
 494  2
             sink.comment( text );
 495  
         }
 496  4
     }
 497  
 
 498  
     /** {@inheritDoc} */
 499  
     protected void handleCdsect( XmlPullParser parser, Sink sink )
 500  
         throws XmlPullParserException
 501  
     {
 502  4
         String text = getText( parser );
 503  
 
 504  4
         if ( isScriptBlock() )
 505  
         {
 506  0
             sink.unknown( CDATA, new Object[] {new Integer( CDATA_TYPE ), text}, null );
 507  
         }
 508  
         else
 509  
         {
 510  4
             sink.text( text );
 511  
         }
 512  4
     }
 513  
 
 514  
     /**
 515  
      * Make sure sections are nested consecutively.
 516  
      *
 517  
      * <p>
 518  
      * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
 519  
      * open close any sections that are missing in between.
 520  
      * </p>
 521  
      *
 522  
      * <p>
 523  
      * For instance, if the following sequence is parsed:
 524  
      * <pre>
 525  
      * &lt;h3&gt;&lt;/h3&gt;
 526  
      * &lt;h6&gt;&lt;/h6&gt;
 527  
      * </pre>
 528  
      * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
 529  
      * In the following sequence
 530  
      * <pre>
 531  
      * &lt;h6&gt;&lt;/h6&gt;
 532  
      * &lt;h3&gt;&lt;/h3&gt;
 533  
      * </pre>
 534  
      * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
 535  
      * </p>
 536  
      *
 537  
      * <p>The current level is set to newLevel afterwards.</p>
 538  
      *
 539  
      * @param newLevel the new section level, all upper levels have to be closed.
 540  
      * @param sink the sink to receive the events.
 541  
      */
 542  
     protected void consecutiveSections( int newLevel, Sink sink )
 543  
     {
 544  50
         closeOpenSections( newLevel, sink );
 545  50
         openMissingSections( newLevel, sink );
 546  
 
 547  50
         this.sectionLevel = newLevel;
 548  50
     }
 549  
 
 550  
     /**
 551  
      * Close open sections.
 552  
      *
 553  
      * @param newLevel the new section level, all upper levels have to be closed.
 554  
      * @param sink the sink to receive the events.
 555  
      */
 556  
     private void closeOpenSections( int newLevel, Sink sink )
 557  
     {
 558  92
         while ( this.sectionLevel >= newLevel )
 559  
         {
 560  42
             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
 561  
             {
 562  4
                 sink.section5_();
 563  
             }
 564  38
             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
 565  
             {
 566  4
                 sink.section4_();
 567  
             }
 568  34
             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
 569  
             {
 570  10
                 sink.section3_();
 571  
             }
 572  24
             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
 573  
             {
 574  10
                 sink.section2_();
 575  
             }
 576  14
             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
 577  
             {
 578  14
                 sink.section1_();
 579  
             }
 580  
 
 581  42
             this.sectionLevel--;
 582  
         }
 583  50
     }
 584  
 
 585  
     /**
 586  
      * Open missing sections.
 587  
      *
 588  
      * @param newLevel the new section level, all lower levels have to be opened.
 589  
      * @param sink the sink to receive the events.
 590  
      */
 591  
     private void openMissingSections( int newLevel, Sink sink )
 592  
     {
 593  56
         while ( this.sectionLevel < newLevel - 1 )
 594  
         {
 595  6
             this.sectionLevel++;
 596  
 
 597  6
             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
 598  
             {
 599  0
                 sink.section5();
 600  
             }
 601  6
             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
 602  
             {
 603  2
                 sink.section4();
 604  
             }
 605  4
             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
 606  
             {
 607  2
                 sink.section3();
 608  
             }
 609  2
             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
 610  
             {
 611  2
                 sink.section2();
 612  
             }
 613  0
             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
 614  
             {
 615  0
                 sink.section1();
 616  
             }
 617  
         }
 618  50
     }
 619  
 
 620  
     /**
 621  
      * Return the current section level.
 622  
      *
 623  
      * @return the current section level.
 624  
      */
 625  
     protected int getSectionLevel()
 626  
     {
 627  0
         return this.sectionLevel;
 628  
     }
 629  
 
 630  
     /**
 631  
      * Set the current section level.
 632  
      *
 633  
      * @param newLevel the new section level.
 634  
      */
 635  
     protected void setSectionLevel( int newLevel )
 636  
     {
 637  0
         this.sectionLevel = newLevel;
 638  0
     }
 639  
 
 640  
     /**
 641  
      * Stop verbatim mode.
 642  
      */
 643  
     protected void verbatim_()
 644  
     {
 645  8
         this.inVerbatim = false;
 646  8
     }
 647  
 
 648  
     /**
 649  
      * Start verbatim mode.
 650  
      */
 651  
     protected void verbatim()
 652  
     {
 653  8
         this.inVerbatim = true;
 654  8
     }
 655  
 
 656  
     /**
 657  
      * Checks if we are currently inside a &lt;pre&gt; tag.
 658  
      *
 659  
      * @return true if we are currently in verbatim mode.
 660  
      */
 661  
     protected boolean isVerbatim()
 662  
     {
 663  0
         return this.inVerbatim;
 664  
     }
 665  
 
 666  
     /**
 667  
      * Checks if we are currently inside a &lt;script&gt; tag.
 668  
      *
 669  
      * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
 670  
      *
 671  
      * @since 1.1.1.
 672  
      */
 673  
     protected boolean isScriptBlock()
 674  
     {
 675  82
         return this.scriptBlock;
 676  
     }
 677  
 
 678  
     /**
 679  
      * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
 680  
      *
 681  
      * @param id The id to validate.
 682  
      * @return A transformed id or the original id if it was already valid.
 683  
      * @see DoxiaUtils#encodeId(String)
 684  
      */
 685  
     protected String validAnchor( String id )
 686  
     {
 687  6
         if ( !DoxiaUtils.isValidId( id ) )
 688  
         {
 689  4
             String linkAnchor = DoxiaUtils.encodeId( id, true );
 690  
 
 691  4
             String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
 692  4
             logMessage( "modifiedLink", msg );
 693  
 
 694  4
             return linkAnchor;
 695  
         }
 696  
 
 697  2
         return id;
 698  
     }
 699  
 
 700  
     /** {@inheritDoc} */
 701  
     protected void init()
 702  
     {
 703  208
         super.init();
 704  
 
 705  208
         this.scriptBlock = false;
 706  208
         this.isLink = false;
 707  208
         this.isAnchor = false;
 708  208
         this.orderedListDepth = 0;
 709  208
         this.sectionLevel = 0;
 710  208
         this.inVerbatim = false;
 711  208
         this.inFigure = false;
 712  208
         while ( this.decoration.getAttributeNames().hasMoreElements() )
 713  
         {
 714  0
             this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() );
 715  
         }
 716  208
         this.warnMessages = null;
 717  208
     }
 718  
 
 719  
     private void handleAEnd( Sink sink )
 720  
     {
 721  20
         if ( isLink )
 722  
         {
 723  14
             sink.link_();
 724  14
             isLink = false;
 725  
         }
 726  6
         else if ( isAnchor )
 727  
         {
 728  6
             sink.anchor_();
 729  6
             isAnchor = false;
 730  
         }
 731  20
     }
 732  
 
 733  
     private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
 734  
     {
 735  20
         String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
 736  
 
 737  20
         if ( href != null )
 738  
         {
 739  14
             int hashIndex = href.indexOf( "#" );
 740  14
             if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
 741  
             {
 742  2
                 String hash = href.substring( hashIndex + 1 );
 743  
 
 744  2
                 if ( !DoxiaUtils.isValidId( hash ) )
 745  
                 {
 746  2
                     href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
 747  
 
 748  2
                     String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
 749  2
                     logMessage( "modifiedLink", msg );
 750  
                 }
 751  
             }
 752  14
             sink.link( href, attribs );
 753  14
             isLink = true;
 754  14
         }
 755  
         else
 756  
         {
 757  6
             String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
 758  
 
 759  6
             if ( name != null )
 760  
             {
 761  4
                 sink.anchor( validAnchor( name ), attribs );
 762  4
                 isAnchor = true;
 763  
             }
 764  
             else
 765  
             {
 766  2
                 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
 767  2
                 if ( id != null )
 768  
                 {
 769  2
                     sink.anchor( validAnchor( id ), attribs );
 770  2
                     isAnchor = true;
 771  
                 }
 772  
             }
 773  
         }
 774  20
     }
 775  
 
 776  
     private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
 777  
     {
 778  18
         boolean visited = true;
 779  
 
 780  18
         String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
 781  
 
 782  18
         if ( "figure".equals( divclass ) )
 783  
         {
 784  2
             this.inFigure = true;
 785  2
             SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
 786  2
             atts.removeAttribute( SinkEventAttributes.CLASS );
 787  2
             sink.figure( atts );
 788  2
         }
 789  
         else
 790  
         {
 791  16
             visited = false;
 792  
         }
 793  
 
 794  18
         return visited;
 795  
     }
 796  
 
 797  
     private void handleFigureCaptionEnd( Sink sink )
 798  
     {
 799  14
         if ( inFigure )
 800  
         {
 801  2
             sink.figureCaption_();
 802  
         }
 803  
         else
 804  
         {
 805  12
             sink.italic_();
 806  
         }
 807  14
     }
 808  
 
 809  
     private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
 810  
     {
 811  14
         if ( inFigure )
 812  
         {
 813  2
             sink.figureCaption( attribs );
 814  
         }
 815  
         else
 816  
         {
 817  12
             sink.italic();
 818  
         }
 819  14
     }
 820  
 
 821  
     private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
 822  
     {
 823  8
         String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
 824  
 
 825  8
         if ( src != null )
 826  
         {
 827  8
             sink.figureGraphics( src, attribs );
 828  
         }
 829  8
     }
 830  
 
 831  
     private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
 832  
     {
 833  4
         if ( orderedListDepth == 0 )
 834  
         {
 835  2
             sink.listItem( attribs );
 836  
         }
 837  
         else
 838  
         {
 839  2
             sink.numberedListItem( attribs );
 840  
         }
 841  4
     }
 842  
 
 843  
     private void handleListItemEnd( Sink sink )
 844  
     {
 845  4
         if ( orderedListDepth == 0 )
 846  
         {
 847  2
             sink.listItem_();
 848  
         }
 849  
         else
 850  
         {
 851  2
             sink.numberedListItem_();
 852  
         }
 853  4
     }
 854  
 
 855  
     private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
 856  
     {
 857  2
         int numbering = Sink.NUMBERING_DECIMAL;
 858  
         // this will have to be generalized if we handle styles
 859  2
         String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
 860  
 
 861  2
         if ( style != null )
 862  
         {
 863  0
             if ( "list-style-type: upper-alpha".equals( style ) )
 864  
             {
 865  0
                 numbering = Sink.NUMBERING_UPPER_ALPHA;
 866  
             }
 867  0
             else if ( "list-style-type: lower-alpha".equals( style ) )
 868  
             {
 869  0
                 numbering = Sink.NUMBERING_LOWER_ALPHA;
 870  
             }
 871  0
             else if ( "list-style-type: upper-roman".equals( style ) )
 872  
             {
 873  0
                 numbering = Sink.NUMBERING_UPPER_ROMAN;
 874  
             }
 875  0
             else if ( "list-style-type: lower-roman".equals( style ) )
 876  
             {
 877  0
                 numbering = Sink.NUMBERING_LOWER_ROMAN;
 878  
             }
 879  0
             else if ( "list-style-type: decimal".equals( style ) )
 880  
             {
 881  0
                 numbering = Sink.NUMBERING_DECIMAL;
 882  
             }
 883  
         }
 884  
 
 885  2
         sink.numberedList( numbering, attribs );
 886  2
         orderedListDepth++;
 887  2
     }
 888  
 
 889  
     private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
 890  
     {
 891  18
         if ( !inFigure )
 892  
         {
 893  14
             sink.paragraph( attribs );
 894  
         }
 895  18
     }
 896  
 
 897  
     /*
 898  
      * The PRE element tells visual user agents that the enclosed text is
 899  
      * "preformatted". When handling preformatted text, visual user agents:
 900  
      * - May leave white space intact.
 901  
      * - May render text with a fixed-pitch font.
 902  
      * - May disable automatic word wrap.
 903  
      * - Must not disable bidirectional processing.
 904  
      * Non-visual user agents are not required to respect extra white space
 905  
      * in the content of a PRE element.
 906  
      */
 907  
     private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
 908  
     {
 909  8
         verbatim();
 910  8
         attribs.removeAttribute( SinkEventAttributes.DECORATION );
 911  8
         sink.verbatim( attribs );
 912  8
     }
 913  
 
 914  
     private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
 915  
     {
 916  50
         consecutiveSections( level, sink );
 917  50
         sink.section( level, attribs );
 918  50
         sink.sectionTitle( level, attribs );
 919  50
     }
 920  
 
 921  
     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
 922  
     {
 923  4
         sink.table( attribs );
 924  4
         String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
 925  4
         boolean grid = true;
 926  
 
 927  4
         if ( border == null || "0".equals( border ) )
 928  
         {
 929  4
             grid = false;
 930  
         }
 931  
 
 932  4
         String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
 933  4
         int[] justif = {Sink.JUSTIFY_LEFT};
 934  
 
 935  4
         if ( "center".equals( align ) )
 936  
         {
 937  2
             justif[0] = Sink.JUSTIFY_CENTER;
 938  
         }
 939  2
         else if ( "right".equals( align ) )
 940  
         {
 941  0
             justif[0] = Sink.JUSTIFY_RIGHT;
 942  
         }
 943  
 
 944  4
         sink.tableRows( justif, grid );
 945  4
     }
 946  
 
 947  
     /**
 948  
      * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
 949  
      *
 950  
      * @param key not null
 951  
      * @param msg not null
 952  
      * @see #parse(Reader, Sink)
 953  
      * @since 1.1.1
 954  
      */
 955  
     private void logMessage( String key, String msg )
 956  
     {
 957  6
         msg = "[XHTML Parser] " + msg;
 958  6
         if ( getLog().isDebugEnabled() )
 959  
         {
 960  0
             getLog().debug( msg );
 961  
 
 962  0
             return;
 963  
         }
 964  
 
 965  6
         if ( warnMessages == null )
 966  
         {
 967  2
             warnMessages = new HashMap<String, Set<String>>();
 968  
         }
 969  
 
 970  6
         Set<String> set = warnMessages.get( key );
 971  6
         if ( set == null )
 972  
         {
 973  2
             set = new TreeSet<String>();
 974  
         }
 975  6
         set.add( msg );
 976  6
         warnMessages.put( key, set );
 977  6
     }
 978  
 
 979  
     /**
 980  
      * @since 1.1.1
 981  
      */
 982  
     private void logWarnings()
 983  
     {
 984  52
         if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
 985  
         {
 986  0
             for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
 987  
             {
 988  0
                 for ( String msg : entry.getValue() )
 989  
                 {
 990  0
                     getLog().warn( msg );
 991  
                 }
 992  
             }
 993  
 
 994  0
             this.warnMessages = null;
 995  
         }
 996  52
     }
 997  
 }