Coverage Report - org.apache.maven.doxia.parser.XhtmlBaseParser
 
Classes in this File Line Coverage Branch Coverage Complexity
XhtmlBaseParser
92%
311/337
85%
217/254
4,794
 
 1  
 package org.apache.maven.doxia.parser;
 2  
 
 3  
 /*
 4  
  * Licensed to the Apache Software Foundation (ASF) under one
 5  
  * or more contributor license agreements.  See the NOTICE file
 6  
  * distributed with this work for additional information
 7  
  * regarding copyright ownership.  The ASF licenses this file
 8  
  * to you under the Apache License, Version 2.0 (the
 9  
  * "License"); you may not use this file except in compliance
 10  
  * with the License.  You may obtain a copy of the License at
 11  
  *
 12  
  *   http://www.apache.org/licenses/LICENSE-2.0
 13  
  *
 14  
  * Unless required by applicable law or agreed to in writing,
 15  
  * software distributed under the License is distributed on an
 16  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 17  
  * KIND, either express or implied.  See the License for the
 18  
  * specific language governing permissions and limitations
 19  
  * under the License.
 20  
  */
 21  
 
 22  
 import java.io.Reader;
 23  
 import java.util.HashMap;
 24  
 import java.util.Map;
 25  
 import java.util.Set;
 26  
 import java.util.TreeSet;
 27  
 
 28  
 import javax.swing.text.html.HTML.Attribute;
 29  
 
 30  
 import org.apache.maven.doxia.macro.MacroExecutionException;
 31  
 import org.apache.maven.doxia.markup.HtmlMarkup;
 32  
 import org.apache.maven.doxia.sink.Sink;
 33  
 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
 34  
 import org.apache.maven.doxia.sink.SinkEventAttributes;
 35  
 import org.apache.maven.doxia.util.DoxiaUtils;
 36  
 
 37  
 import org.codehaus.plexus.util.StringUtils;
 38  
 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
 39  
 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
 40  
 
 41  
 /**
 42  
  * Common base parser for xhtml events.
 43  
  *
 44  
  * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
 45  
  * @author ltheussl
 46  
  * @version $Id: XhtmlBaseParser.java 1185112 2011-10-17 11:33:00Z ltheussl $
 47  
  * @since 1.1
 48  
  */
 49  44
 public class XhtmlBaseParser
 50  
     extends AbstractXmlParser
 51  
         implements HtmlMarkup
 52  
 {
 53  
     /** True if a &lt;script&gt;&lt;/script&gt; block is read. CDATA sections within are handled as rawText. */
 54  
     private boolean scriptBlock;
 55  
 
 56  
     /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
 57  
     private boolean isLink;
 58  
 
 59  
     /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
 60  
     private boolean isAnchor;
 61  
 
 62  
     /** Used for nested lists. */
 63  44
     private int orderedListDepth = 0;
 64  
 
 65  
     /** Counts section level. */
 66  
     private int sectionLevel;
 67  
 
 68  
     /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
 69  
     private boolean inVerbatim;
 70  
 
 71  
     /** Used to recognize the case of img inside figure. */
 72  
     private boolean inFigure;
 73  
 
 74  
     /** Decoration properties, eg for texts. */
 75  44
     private final SinkEventAttributeSet decoration = new SinkEventAttributeSet();
 76  
 
 77  
     /** Map of warn messages with a String as key to describe the error type and a Set as value.
 78  
      * Using to reduce warn messages. */
 79  
     private Map<String, Set<String>> warnMessages;
 80  
 
 81  
     /** {@inheritDoc} */
 82  
     @Override
 83  
     public void parse( Reader source, Sink sink )
 84  
         throws ParseException
 85  
     {
 86  52
         init();
 87  
 
 88  
         try
 89  
         {
 90  52
             super.parse( source, sink );
 91  
         }
 92  
         finally
 93  
         {
 94  52
             logWarnings();
 95  
 
 96  52
             setSecondParsing( false );
 97  52
             init();
 98  52
         }
 99  52
     }
 100  
 
 101  
     /**
 102  
      * <p>
 103  
      *   Goes through a common list of possible html start tags. These include only tags that can go into
 104  
      *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
 105  
      * </p>
 106  
      * <p>
 107  
      *   The currently handled tags are:
 108  
      * </p>
 109  
      * <p>
 110  
      *   <code>
 111  
      *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
 112  
      *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
 113  
      *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
 114  
      *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
 115  
      *   </code>
 116  
      * </p>
 117  
      *
 118  
      * @param parser A parser.
 119  
      * @param sink the sink to receive the events.
 120  
      * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
 121  
      */
 122  
     protected boolean baseStartTag( XmlPullParser parser, Sink sink )
 123  
     {
 124  214
         boolean visited = true;
 125  
 
 126  214
         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
 127  
 
 128  214
         if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
 129  
         {
 130  26
             handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
 131  
         }
 132  188
         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
 133  
         {
 134  10
             handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
 135  
         }
 136  178
         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
 137  
         {
 138  8
             handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
 139  
         }
 140  170
         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
 141  
         {
 142  2
             handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
 143  
         }
 144  168
         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
 145  
         {
 146  4
             handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
 147  
         }
 148  164
         else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
 149  
         {
 150  2
             decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" );
 151  
         }
 152  162
         else if ( parser.getName().equals( HtmlMarkup.S.toString() )
 153  
                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
 154  
                 || parser.getName().equals( "del" ) )
 155  
         {
 156  6
             decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" );
 157  
         }
 158  156
         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
 159  
         {
 160  2
             decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" );
 161  
         }
 162  154
         else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
 163  
         {
 164  2
             decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" );
 165  
         }
 166  152
         else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
 167  
         {
 168  18
             handlePStart( sink, attribs );
 169  
         }
 170  134
         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
 171  
         {
 172  18
             visited = handleDivStart( parser, attribs, sink );
 173  
         }
 174  116
         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
 175  
         {
 176  8
             handlePreStart( attribs, sink );
 177  
         }
 178  108
         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
 179  
         {
 180  2
             sink.list( attribs );
 181  
         }
 182  106
         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
 183  
         {
 184  2
             handleOLStart( parser, sink, attribs );
 185  
         }
 186  104
         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
 187  
         {
 188  4
             handleLIStart( sink, attribs );
 189  
         }
 190  100
         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
 191  
         {
 192  2
             sink.definitionList( attribs );
 193  
         }
 194  98
         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
 195  
         {
 196  2
             sink.definitionListItem( attribs );
 197  2
             sink.definedTerm( attribs );
 198  
         }
 199  96
         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
 200  
         {
 201  2
             sink.definition( attribs );
 202  
         }
 203  94
         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
 204  
                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
 205  
         {
 206  12
             sink.bold();
 207  
         }
 208  82
         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
 209  
                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
 210  
         {
 211  14
             handleFigureCaptionStart( sink, attribs );
 212  
         }
 213  68
         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
 214  
                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
 215  
                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
 216  
         {
 217  8
             sink.monospaced();
 218  
         }
 219  60
         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
 220  
         {
 221  20
             handleAStart( parser, sink, attribs );
 222  
         }
 223  40
         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
 224  
         {
 225  4
             handleTableStart( sink, attribs, parser );
 226  
         }
 227  36
         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
 228  
         {
 229  8
             sink.tableRow( attribs );
 230  
         }
 231  28
         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
 232  
         {
 233  4
             sink.tableHeaderCell( attribs );
 234  
         }
 235  24
         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
 236  
         {
 237  4
             sink.tableCell( attribs );
 238  
         }
 239  20
         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
 240  
         {
 241  2
             sink.tableCaption( attribs );
 242  
         }
 243  18
         else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
 244  
         {
 245  2
             sink.lineBreak( attribs );
 246  
         }
 247  16
         else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
 248  
         {
 249  2
             sink.horizontalRule( attribs );
 250  
         }
 251  14
         else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
 252  
         {
 253  8
             handleImgStart( parser, sink, attribs );
 254  
         }
 255  6
         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) )
 256  
         {
 257  2
             handleUnknown( parser, sink, TAG_TYPE_START );
 258  2
             scriptBlock = true;
 259  
         }
 260  
         else
 261  
         {
 262  4
             visited = false;
 263  
         }
 264  
 
 265  214
         return visited;
 266  
     }
 267  
 
 268  
     /**
 269  
      * <p>
 270  
      *   Goes through a common list of possible html end tags.
 271  
      *   These should be re-usable by different xhtml-based parsers.
 272  
      *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
 273  
      *   except for the empty elements (<code>&lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;<code>).
 274  
      * </p>
 275  
      *
 276  
      * @param parser A parser.
 277  
      * @param sink the sink to receive the events.
 278  
      * @return True if the event has been handled by this method, false otherwise.
 279  
      */
 280  
     protected boolean baseEndTag( XmlPullParser parser, Sink sink )
 281  
     {
 282  214
         boolean visited = true;
 283  
 
 284  214
         if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
 285  
         {
 286  18
             if ( !inFigure )
 287  
             {
 288  14
                 sink.paragraph_();
 289  
             }
 290  
         }
 291  196
         else if ( parser.getName().equals( HtmlMarkup.U.toString() )
 292  
                 || parser.getName().equals( HtmlMarkup.S.toString() )
 293  
                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
 294  
                 || parser.getName().equals( "del" ) )
 295  
         {
 296  8
             decoration.removeAttribute( SinkEventAttributes.DECORATION );
 297  
         }
 298  188
         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
 299  
                 || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
 300  
         {
 301  4
             decoration.removeAttribute( SinkEventAttributes.VALIGN );
 302  
         }
 303  184
         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
 304  
         {
 305  18
             if ( inFigure )
 306  
             {
 307  2
                 sink.figure_();
 308  2
                 this.inFigure = false;
 309  
             }
 310  
             else
 311  
             {
 312  16
                 visited = false;
 313  
             }
 314  
         }
 315  166
         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
 316  
         {
 317  8
             verbatim_();
 318  
 
 319  8
             sink.verbatim_();
 320  
         }
 321  158
         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
 322  
         {
 323  2
             sink.list_();
 324  
         }
 325  156
         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
 326  
         {
 327  2
             sink.numberedList_();
 328  2
             orderedListDepth--;
 329  
         }
 330  154
         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
 331  
         {
 332  4
             handleListItemEnd( sink );
 333  
         }
 334  150
         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
 335  
         {
 336  2
             sink.definitionList_();
 337  
         }
 338  148
         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
 339  
         {
 340  2
             sink.definedTerm_();
 341  
         }
 342  146
         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
 343  
         {
 344  2
             sink.definition_();
 345  2
             sink.definitionListItem_();
 346  
         }
 347  144
         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
 348  
                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
 349  
         {
 350  12
             sink.bold_();
 351  
         }
 352  132
         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
 353  
                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
 354  
         {
 355  14
             handleFigureCaptionEnd( sink );
 356  
         }
 357  118
         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
 358  
                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
 359  
                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
 360  
         {
 361  8
             sink.monospaced_();
 362  
         }
 363  110
         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
 364  
         {
 365  20
             handleAEnd( sink );
 366  
         }
 367  
 
 368  
         // ----------------------------------------------------------------------
 369  
         // Tables
 370  
         // ----------------------------------------------------------------------
 371  
 
 372  90
         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
 373  
         {
 374  4
             sink.tableRows_();
 375  
 
 376  4
             sink.table_();
 377  
         }
 378  86
         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
 379  
         {
 380  8
             sink.tableRow_();
 381  
         }
 382  78
         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
 383  
         {
 384  4
             sink.tableHeaderCell_();
 385  
         }
 386  74
         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
 387  
         {
 388  4
             sink.tableCell_();
 389  
         }
 390  70
         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
 391  
         {
 392  2
             sink.tableCaption_();
 393  
         }
 394  68
         else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
 395  
         {
 396  26
             sink.sectionTitle1_();
 397  
         }
 398  42
         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
 399  
         {
 400  10
             sink.sectionTitle2_();
 401  
         }
 402  32
         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
 403  
         {
 404  8
             sink.sectionTitle3_();
 405  
         }
 406  24
         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
 407  
         {
 408  2
             sink.sectionTitle4_();
 409  
         }
 410  22
         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
 411  
         {
 412  4
             sink.sectionTitle5_();
 413  
         }
 414  18
         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) )
 415  
         {
 416  2
             handleUnknown( parser, sink, TAG_TYPE_END );
 417  
 
 418  2
             scriptBlock = false;
 419  
         }
 420  
         else
 421  
         {
 422  16
             visited = false;
 423  
         }
 424  
 
 425  214
         return visited;
 426  
     }
 427  
 
 428  
     /**
 429  
      * {@inheritDoc}
 430  
      *
 431  
      * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
 432  
      * overridden by implementing parsers to include additional tags.
 433  
      */
 434  
     protected void handleStartTag( XmlPullParser parser, Sink sink )
 435  
         throws XmlPullParserException, MacroExecutionException
 436  
     {
 437  214
         if ( !baseStartTag( parser, sink ) )
 438  
         {
 439  20
             if ( getLog().isWarnEnabled() )
 440  
             {
 441  6
                 String position = "[" + parser.getLineNumber() + ":"
 442  
                     + parser.getColumnNumber() + "]";
 443  6
                 String tag = "<" + parser.getName() + ">";
 444  
 
 445  6
                 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
 446  
             }
 447  
         }
 448  214
     }
 449  
 
 450  
     /**
 451  
      * {@inheritDoc}
 452  
      *
 453  
      * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
 454  
      * overridden by implementing parsers to include additional tags.
 455  
      */
 456  
     protected void handleEndTag( XmlPullParser parser, Sink sink )
 457  
         throws XmlPullParserException, MacroExecutionException
 458  
     {
 459  214
         if ( !baseEndTag( parser, sink ) )
 460  
         {
 461  
             // unrecognized tag is already logged in StartTag
 462  
         }
 463  214
     }
 464  
 
 465  
     /** {@inheritDoc} */
 466  
     @Override
 467  
     protected void handleText( XmlPullParser parser, Sink sink )
 468  
         throws XmlPullParserException
 469  
     {
 470  78
         String text = getText( parser );
 471  
 
 472  
         /*
 473  
          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
 474  
          * parser so any whitespace that makes it here is significant.
 475  
          *
 476  
          * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
 477  
          */
 478  78
         if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
 479  
         {
 480  78
             sink.text( text, decoration );
 481  
         }
 482  78
     }
 483  
 
 484  
     /** {@inheritDoc} */
 485  
     @Override
 486  
     protected void handleComment( XmlPullParser parser, Sink sink )
 487  
         throws XmlPullParserException
 488  
     {
 489  4
         String text = getText( parser ).trim();
 490  
 
 491  4
         if ( "PB".equals( text ) )
 492  
         {
 493  2
             sink.pageBreak();
 494  
         }
 495  
         else
 496  
         {
 497  2
             sink.comment( text );
 498  
         }
 499  4
     }
 500  
 
 501  
     /** {@inheritDoc} */
 502  
     @Override
 503  
     protected void handleCdsect( XmlPullParser parser, Sink sink )
 504  
         throws XmlPullParserException
 505  
     {
 506  4
         String text = getText( parser );
 507  
 
 508  4
         if ( isScriptBlock() )
 509  
         {
 510  0
             sink.unknown( CDATA, new Object[] {new Integer( CDATA_TYPE ), text}, null );
 511  
         }
 512  
         else
 513  
         {
 514  4
             sink.text( text );
 515  
         }
 516  4
     }
 517  
 
 518  
     /**
 519  
      * Make sure sections are nested consecutively.
 520  
      *
 521  
      * <p>
 522  
      * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
 523  
      * open close any sections that are missing in between.
 524  
      * </p>
 525  
      *
 526  
      * <p>
 527  
      * For instance, if the following sequence is parsed:
 528  
      * <pre>
 529  
      * &lt;h3&gt;&lt;/h3&gt;
 530  
      * &lt;h6&gt;&lt;/h6&gt;
 531  
      * </pre>
 532  
      * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
 533  
      * In the following sequence
 534  
      * <pre>
 535  
      * &lt;h6&gt;&lt;/h6&gt;
 536  
      * &lt;h3&gt;&lt;/h3&gt;
 537  
      * </pre>
 538  
      * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
 539  
      * </p>
 540  
      *
 541  
      * <p>The current level is set to newLevel afterwards.</p>
 542  
      *
 543  
      * @param newLevel the new section level, all upper levels have to be closed.
 544  
      * @param sink the sink to receive the events.
 545  
      */
 546  
     protected void consecutiveSections( int newLevel, Sink sink )
 547  
     {
 548  50
         closeOpenSections( newLevel, sink );
 549  50
         openMissingSections( newLevel, sink );
 550  
 
 551  50
         this.sectionLevel = newLevel;
 552  50
     }
 553  
 
 554  
     /**
 555  
      * Close open sections.
 556  
      *
 557  
      * @param newLevel the new section level, all upper levels have to be closed.
 558  
      * @param sink the sink to receive the events.
 559  
      */
 560  
     private void closeOpenSections( int newLevel, Sink sink )
 561  
     {
 562  92
         while ( this.sectionLevel >= newLevel )
 563  
         {
 564  42
             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
 565  
             {
 566  4
                 sink.section5_();
 567  
             }
 568  38
             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
 569  
             {
 570  4
                 sink.section4_();
 571  
             }
 572  34
             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
 573  
             {
 574  10
                 sink.section3_();
 575  
             }
 576  24
             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
 577  
             {
 578  10
                 sink.section2_();
 579  
             }
 580  14
             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
 581  
             {
 582  14
                 sink.section1_();
 583  
             }
 584  
 
 585  42
             this.sectionLevel--;
 586  
         }
 587  50
     }
 588  
 
 589  
     /**
 590  
      * Open missing sections.
 591  
      *
 592  
      * @param newLevel the new section level, all lower levels have to be opened.
 593  
      * @param sink the sink to receive the events.
 594  
      */
 595  
     private void openMissingSections( int newLevel, Sink sink )
 596  
     {
 597  56
         while ( this.sectionLevel < newLevel - 1 )
 598  
         {
 599  6
             this.sectionLevel++;
 600  
 
 601  6
             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
 602  
             {
 603  0
                 sink.section5();
 604  
             }
 605  6
             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
 606  
             {
 607  2
                 sink.section4();
 608  
             }
 609  4
             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
 610  
             {
 611  2
                 sink.section3();
 612  
             }
 613  2
             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
 614  
             {
 615  2
                 sink.section2();
 616  
             }
 617  0
             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
 618  
             {
 619  0
                 sink.section1();
 620  
             }
 621  
         }
 622  50
     }
 623  
 
 624  
     /**
 625  
      * Return the current section level.
 626  
      *
 627  
      * @return the current section level.
 628  
      */
 629  
     protected int getSectionLevel()
 630  
     {
 631  0
         return this.sectionLevel;
 632  
     }
 633  
 
 634  
     /**
 635  
      * Set the current section level.
 636  
      *
 637  
      * @param newLevel the new section level.
 638  
      */
 639  
     protected void setSectionLevel( int newLevel )
 640  
     {
 641  0
         this.sectionLevel = newLevel;
 642  0
     }
 643  
 
 644  
     /**
 645  
      * Stop verbatim mode.
 646  
      */
 647  
     protected void verbatim_()
 648  
     {
 649  8
         this.inVerbatim = false;
 650  8
     }
 651  
 
 652  
     /**
 653  
      * Start verbatim mode.
 654  
      */
 655  
     protected void verbatim()
 656  
     {
 657  8
         this.inVerbatim = true;
 658  8
     }
 659  
 
 660  
     /**
 661  
      * Checks if we are currently inside a &lt;pre&gt; tag.
 662  
      *
 663  
      * @return true if we are currently in verbatim mode.
 664  
      */
 665  
     protected boolean isVerbatim()
 666  
     {
 667  0
         return this.inVerbatim;
 668  
     }
 669  
 
 670  
     /**
 671  
      * Checks if we are currently inside a &lt;script&gt; tag.
 672  
      *
 673  
      * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
 674  
      *
 675  
      * @since 1.1.1.
 676  
      */
 677  
     protected boolean isScriptBlock()
 678  
     {
 679  82
         return this.scriptBlock;
 680  
     }
 681  
 
 682  
     /**
 683  
      * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
 684  
      *
 685  
      * @param id The id to validate.
 686  
      * @return A transformed id or the original id if it was already valid.
 687  
      * @see DoxiaUtils#encodeId(String)
 688  
      */
 689  
     protected String validAnchor( String id )
 690  
     {
 691  6
         if ( !DoxiaUtils.isValidId( id ) )
 692  
         {
 693  4
             String linkAnchor = DoxiaUtils.encodeId( id, true );
 694  
 
 695  4
             String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
 696  4
             logMessage( "modifiedLink", msg );
 697  
 
 698  4
             return linkAnchor;
 699  
         }
 700  
 
 701  2
         return id;
 702  
     }
 703  
 
 704  
     /** {@inheritDoc} */
 705  
     @Override
 706  
     protected void init()
 707  
     {
 708  208
         super.init();
 709  
 
 710  208
         this.scriptBlock = false;
 711  208
         this.isLink = false;
 712  208
         this.isAnchor = false;
 713  208
         this.orderedListDepth = 0;
 714  208
         this.sectionLevel = 0;
 715  208
         this.inVerbatim = false;
 716  208
         this.inFigure = false;
 717  208
         while ( this.decoration.getAttributeNames().hasMoreElements() )
 718  
         {
 719  0
             this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() );
 720  
         }
 721  208
         this.warnMessages = null;
 722  208
     }
 723  
 
 724  
     private void handleAEnd( Sink sink )
 725  
     {
 726  20
         if ( isLink )
 727  
         {
 728  14
             sink.link_();
 729  14
             isLink = false;
 730  
         }
 731  6
         else if ( isAnchor )
 732  
         {
 733  6
             sink.anchor_();
 734  6
             isAnchor = false;
 735  
         }
 736  20
     }
 737  
 
 738  
     private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
 739  
     {
 740  20
         String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
 741  
 
 742  20
         if ( href != null )
 743  
         {
 744  14
             int hashIndex = href.indexOf( '#');
 745  14
             if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
 746  
             {
 747  2
                 String hash = href.substring( hashIndex + 1 );
 748  
 
 749  2
                 if ( !DoxiaUtils.isValidId( hash ) )
 750  
                 {
 751  2
                     href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
 752  
 
 753  2
                     String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
 754  2
                     logMessage( "modifiedLink", msg );
 755  
                 }
 756  
             }
 757  14
             sink.link( href, attribs );
 758  14
             isLink = true;
 759  14
         }
 760  
         else
 761  
         {
 762  6
             String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
 763  
 
 764  6
             if ( name != null )
 765  
             {
 766  4
                 sink.anchor( validAnchor( name ), attribs );
 767  4
                 isAnchor = true;
 768  
             }
 769  
             else
 770  
             {
 771  2
                 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
 772  2
                 if ( id != null )
 773  
                 {
 774  2
                     sink.anchor( validAnchor( id ), attribs );
 775  2
                     isAnchor = true;
 776  
                 }
 777  
             }
 778  
         }
 779  20
     }
 780  
 
 781  
     private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
 782  
     {
 783  18
         boolean visited = true;
 784  
 
 785  18
         String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
 786  
 
 787  18
         if ( "figure".equals( divclass ) )
 788  
         {
 789  2
             this.inFigure = true;
 790  2
             SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
 791  2
             atts.removeAttribute( SinkEventAttributes.CLASS );
 792  2
             sink.figure( atts );
 793  2
         }
 794  
         else
 795  
         {
 796  16
             visited = false;
 797  
         }
 798  
 
 799  18
         return visited;
 800  
     }
 801  
 
 802  
     private void handleFigureCaptionEnd( Sink sink )
 803  
     {
 804  14
         if ( inFigure )
 805  
         {
 806  2
             sink.figureCaption_();
 807  
         }
 808  
         else
 809  
         {
 810  12
             sink.italic_();
 811  
         }
 812  14
     }
 813  
 
 814  
     private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
 815  
     {
 816  14
         if ( inFigure )
 817  
         {
 818  2
             sink.figureCaption( attribs );
 819  
         }
 820  
         else
 821  
         {
 822  12
             sink.italic();
 823  
         }
 824  14
     }
 825  
 
 826  
     private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
 827  
     {
 828  8
         String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
 829  
 
 830  8
         if ( src != null )
 831  
         {
 832  8
             sink.figureGraphics( src, attribs );
 833  
         }
 834  8
     }
 835  
 
 836  
     private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
 837  
     {
 838  4
         if ( orderedListDepth == 0 )
 839  
         {
 840  2
             sink.listItem( attribs );
 841  
         }
 842  
         else
 843  
         {
 844  2
             sink.numberedListItem( attribs );
 845  
         }
 846  4
     }
 847  
 
 848  
     private void handleListItemEnd( Sink sink )
 849  
     {
 850  4
         if ( orderedListDepth == 0 )
 851  
         {
 852  2
             sink.listItem_();
 853  
         }
 854  
         else
 855  
         {
 856  2
             sink.numberedListItem_();
 857  
         }
 858  4
     }
 859  
 
 860  
     private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
 861  
     {
 862  2
         int numbering = Sink.NUMBERING_DECIMAL;
 863  
         // this will have to be generalized if we handle styles
 864  2
         String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
 865  
 
 866  2
         if ( style != null )
 867  
         {
 868  0
             if ( "list-style-type: upper-alpha".equals( style ) )
 869  
             {
 870  0
                 numbering = Sink.NUMBERING_UPPER_ALPHA;
 871  
             }
 872  0
             else if ( "list-style-type: lower-alpha".equals( style ) )
 873  
             {
 874  0
                 numbering = Sink.NUMBERING_LOWER_ALPHA;
 875  
             }
 876  0
             else if ( "list-style-type: upper-roman".equals( style ) )
 877  
             {
 878  0
                 numbering = Sink.NUMBERING_UPPER_ROMAN;
 879  
             }
 880  0
             else if ( "list-style-type: lower-roman".equals( style ) )
 881  
             {
 882  0
                 numbering = Sink.NUMBERING_LOWER_ROMAN;
 883  
             }
 884  0
             else if ( "list-style-type: decimal".equals( style ) )
 885  
             {
 886  0
                 numbering = Sink.NUMBERING_DECIMAL;
 887  
             }
 888  
         }
 889  
 
 890  2
         sink.numberedList( numbering, attribs );
 891  2
         orderedListDepth++;
 892  2
     }
 893  
 
 894  
     private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
 895  
     {
 896  18
         if ( !inFigure )
 897  
         {
 898  14
             sink.paragraph( attribs );
 899  
         }
 900  18
     }
 901  
 
 902  
     /*
 903  
      * The PRE element tells visual user agents that the enclosed text is
 904  
      * "preformatted". When handling preformatted text, visual user agents:
 905  
      * - May leave white space intact.
 906  
      * - May render text with a fixed-pitch font.
 907  
      * - May disable automatic word wrap.
 908  
      * - Must not disable bidirectional processing.
 909  
      * Non-visual user agents are not required to respect extra white space
 910  
      * in the content of a PRE element.
 911  
      */
 912  
     private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
 913  
     {
 914  8
         verbatim();
 915  8
         attribs.removeAttribute( SinkEventAttributes.DECORATION );
 916  8
         sink.verbatim( attribs );
 917  8
     }
 918  
 
 919  
     private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
 920  
     {
 921  50
         consecutiveSections( level, sink );
 922  50
         sink.section( level, attribs );
 923  50
         sink.sectionTitle( level, attribs );
 924  50
     }
 925  
 
 926  
     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
 927  
     {
 928  4
         sink.table( attribs );
 929  4
         String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
 930  4
         boolean grid = true;
 931  
 
 932  4
         if ( border == null || "0".equals( border ) )
 933  
         {
 934  4
             grid = false;
 935  
         }
 936  
 
 937  4
         String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
 938  4
         int[] justif = {Sink.JUSTIFY_LEFT};
 939  
 
 940  4
         if ( "center".equals( align ) )
 941  
         {
 942  2
             justif[0] = Sink.JUSTIFY_CENTER;
 943  
         }
 944  2
         else if ( "right".equals( align ) )
 945  
         {
 946  0
             justif[0] = Sink.JUSTIFY_RIGHT;
 947  
         }
 948  
 
 949  4
         sink.tableRows( justif, grid );
 950  4
     }
 951  
 
 952  
     /**
 953  
      * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
 954  
      *
 955  
      * @param key not null
 956  
      * @param msg not null
 957  
      * @see #parse(Reader, Sink)
 958  
      * @since 1.1.1
 959  
      */
 960  
     private void logMessage( String key, String msg )
 961  
     {
 962  6
         final String log = "[XHTML Parser] " + msg;
 963  6
         if ( getLog().isDebugEnabled() )
 964  
         {
 965  0
             getLog().debug( log );
 966  
 
 967  0
             return;
 968  
         }
 969  
 
 970  6
         if ( warnMessages == null )
 971  
         {
 972  2
             warnMessages = new HashMap<String, Set<String>>();
 973  
         }
 974  
 
 975  6
         Set<String> set = warnMessages.get( key );
 976  6
         if ( set == null )
 977  
         {
 978  2
             set = new TreeSet<String>();
 979  
         }
 980  6
         set.add( log );
 981  6
         warnMessages.put( key, set );
 982  6
     }
 983  
 
 984  
     /**
 985  
      * @since 1.1.1
 986  
      */
 987  
     private void logWarnings()
 988  
     {
 989  52
         if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
 990  
         {
 991  0
             for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
 992  
             {
 993  0
                 for ( String msg : entry.getValue() )
 994  
                 {
 995  0
                     getLog().warn( msg );
 996  
                 }
 997  
             }
 998  
 
 999  0
             this.warnMessages = null;
 1000  
         }
 1001  52
     }
 1002  
 }