View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.Reader;
23  import java.util.HashMap;
24  import java.util.Map;
25  import java.util.Set;
26  import java.util.TreeSet;
27  
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.markup.HtmlMarkup;
32  import org.apache.maven.doxia.sink.Sink;
33  import org.apache.maven.doxia.sink.SinkEventAttributes;
34  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
35  import org.apache.maven.doxia.util.DoxiaUtils;
36  
37  import org.codehaus.plexus.util.StringUtils;
38  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
39  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
40  
41  /**
42   * Common base parser for xhtml events.
43   *
44   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
45   * @author ltheussl
46   * @version $Id$
47   * @since 1.1
48   */
49  public class XhtmlBaseParser
50      extends AbstractXmlParser
51          implements HtmlMarkup
52  {
53      /**
54       * True if a &lt;script&gt;&lt;/script&gt; or &lt;style&gt;&lt;/style&gt; block is read. CDATA sections within are
55       * handled as rawText.
56       */
57      private boolean scriptBlock;
58  
59      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
60      private boolean isLink;
61  
62      /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
63      private boolean isAnchor;
64  
65      /** Used for nested lists. */
66      private int orderedListDepth = 0;
67  
68      /** Counts section level. */
69      private int sectionLevel;
70  
71      /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
72      private boolean inVerbatim;
73  
74      /** Used to recognize the case of img inside figure. */
75      private boolean inFigure;
76  
77      /** Used to wrap the definedTerm with its definition, even when one is omitted */
78      boolean hasDefinitionListItem = false;
79  
80      /** Map of warn messages with a String as key to describe the error type and a Set as value.
81       * Using to reduce warn messages. */
82      private Map<String, Set<String>> warnMessages;
83  
84      /** {@inheritDoc} */
85      @Override
86      public void parse( Reader source, Sink sink )
87          throws ParseException
88      {
89          init();
90  
91          try
92          {
93              super.parse( source, sink );
94          }
95          finally
96          {
97              logWarnings();
98  
99              setSecondParsing( false );
100             init();
101         }
102     }
103 
104     /**
105      * {@inheritDoc}
106      *
107      * Adds all XHTML (HTML 4.0) entities to the parser so that they can be recognized and resolved
108      * without additional DTD.
109      */
110     @Override
111     protected void initXmlParser( XmlPullParser parser )
112         throws XmlPullParserException
113     {
114         super.initXmlParser( parser );
115 
116         // the entities taken from org.apache.maven.doxia.document.io.xpp3.DocumentXpp3Reader,
117         // which is generated automatically
118 
119         // ----------------------------------------------------------------------
120         // Latin 1 entities
121         // ----------------------------------------------------------------------
122 
123         parser.defineEntityReplacementText( "nbsp", "\u00a0" );
124         parser.defineEntityReplacementText( "iexcl", "\u00a1" );
125         parser.defineEntityReplacementText( "cent", "\u00a2" );
126         parser.defineEntityReplacementText( "pound", "\u00a3" );
127         parser.defineEntityReplacementText( "curren", "\u00a4" );
128         parser.defineEntityReplacementText( "yen", "\u00a5" );
129         parser.defineEntityReplacementText( "brvbar", "\u00a6" );
130         parser.defineEntityReplacementText( "sect", "\u00a7" );
131         parser.defineEntityReplacementText( "uml", "\u00a8" );
132         parser.defineEntityReplacementText( "copy", "\u00a9" );
133         parser.defineEntityReplacementText( "ordf", "\u00aa" );
134         parser.defineEntityReplacementText( "laquo", "\u00ab" );
135         parser.defineEntityReplacementText( "not", "\u00ac" );
136         parser.defineEntityReplacementText( "shy", "\u00ad" );
137         parser.defineEntityReplacementText( "reg", "\u00ae" );
138         parser.defineEntityReplacementText( "macr", "\u00af" );
139         parser.defineEntityReplacementText( "deg", "\u00b0" );
140         parser.defineEntityReplacementText( "plusmn", "\u00b1" );
141         parser.defineEntityReplacementText( "sup2", "\u00b2" );
142         parser.defineEntityReplacementText( "sup3", "\u00b3" );
143         parser.defineEntityReplacementText( "acute", "\u00b4" );
144         parser.defineEntityReplacementText( "micro", "\u00b5" );
145         parser.defineEntityReplacementText( "para", "\u00b6" );
146         parser.defineEntityReplacementText( "middot", "\u00b7" );
147         parser.defineEntityReplacementText( "cedil", "\u00b8" );
148         parser.defineEntityReplacementText( "sup1", "\u00b9" );
149         parser.defineEntityReplacementText( "ordm", "\u00ba" );
150         parser.defineEntityReplacementText( "raquo", "\u00bb" );
151         parser.defineEntityReplacementText( "frac14", "\u00bc" );
152         parser.defineEntityReplacementText( "frac12", "\u00bd" );
153         parser.defineEntityReplacementText( "frac34", "\u00be" );
154         parser.defineEntityReplacementText( "iquest", "\u00bf" );
155         parser.defineEntityReplacementText( "Agrave", "\u00c0" );
156         parser.defineEntityReplacementText( "Aacute", "\u00c1" );
157         parser.defineEntityReplacementText( "Acirc", "\u00c2" );
158         parser.defineEntityReplacementText( "Atilde", "\u00c3" );
159         parser.defineEntityReplacementText( "Auml", "\u00c4" );
160         parser.defineEntityReplacementText( "Aring", "\u00c5" );
161         parser.defineEntityReplacementText( "AElig", "\u00c6" );
162         parser.defineEntityReplacementText( "Ccedil", "\u00c7" );
163         parser.defineEntityReplacementText( "Egrave", "\u00c8" );
164         parser.defineEntityReplacementText( "Eacute", "\u00c9" );
165         parser.defineEntityReplacementText( "Ecirc", "\u00ca" );
166         parser.defineEntityReplacementText( "Euml", "\u00cb" );
167         parser.defineEntityReplacementText( "Igrave", "\u00cc" );
168         parser.defineEntityReplacementText( "Iacute", "\u00cd" );
169         parser.defineEntityReplacementText( "Icirc", "\u00ce" );
170         parser.defineEntityReplacementText( "Iuml", "\u00cf" );
171         parser.defineEntityReplacementText( "ETH", "\u00d0" );
172         parser.defineEntityReplacementText( "Ntilde", "\u00d1" );
173         parser.defineEntityReplacementText( "Ograve", "\u00d2" );
174         parser.defineEntityReplacementText( "Oacute", "\u00d3" );
175         parser.defineEntityReplacementText( "Ocirc", "\u00d4" );
176         parser.defineEntityReplacementText( "Otilde", "\u00d5" );
177         parser.defineEntityReplacementText( "Ouml", "\u00d6" );
178         parser.defineEntityReplacementText( "times", "\u00d7" );
179         parser.defineEntityReplacementText( "Oslash", "\u00d8" );
180         parser.defineEntityReplacementText( "Ugrave", "\u00d9" );
181         parser.defineEntityReplacementText( "Uacute", "\u00da" );
182         parser.defineEntityReplacementText( "Ucirc", "\u00db" );
183         parser.defineEntityReplacementText( "Uuml", "\u00dc" );
184         parser.defineEntityReplacementText( "Yacute", "\u00dd" );
185         parser.defineEntityReplacementText( "THORN", "\u00de" );
186         parser.defineEntityReplacementText( "szlig", "\u00df" );
187         parser.defineEntityReplacementText( "agrave", "\u00e0" );
188         parser.defineEntityReplacementText( "aacute", "\u00e1" );
189         parser.defineEntityReplacementText( "acirc", "\u00e2" );
190         parser.defineEntityReplacementText( "atilde", "\u00e3" );
191         parser.defineEntityReplacementText( "auml", "\u00e4" );
192         parser.defineEntityReplacementText( "aring", "\u00e5" );
193         parser.defineEntityReplacementText( "aelig", "\u00e6" );
194         parser.defineEntityReplacementText( "ccedil", "\u00e7" );
195         parser.defineEntityReplacementText( "egrave", "\u00e8" );
196         parser.defineEntityReplacementText( "eacute", "\u00e9" );
197         parser.defineEntityReplacementText( "ecirc", "\u00ea" );
198         parser.defineEntityReplacementText( "euml", "\u00eb" );
199         parser.defineEntityReplacementText( "igrave", "\u00ec" );
200         parser.defineEntityReplacementText( "iacute", "\u00ed" );
201         parser.defineEntityReplacementText( "icirc", "\u00ee" );
202         parser.defineEntityReplacementText( "iuml", "\u00ef" );
203         parser.defineEntityReplacementText( "eth", "\u00f0" );
204         parser.defineEntityReplacementText( "ntilde", "\u00f1" );
205         parser.defineEntityReplacementText( "ograve", "\u00f2" );
206         parser.defineEntityReplacementText( "oacute", "\u00f3" );
207         parser.defineEntityReplacementText( "ocirc", "\u00f4" );
208         parser.defineEntityReplacementText( "otilde", "\u00f5" );
209         parser.defineEntityReplacementText( "ouml", "\u00f6" );
210         parser.defineEntityReplacementText( "divide", "\u00f7" );
211         parser.defineEntityReplacementText( "oslash", "\u00f8" );
212         parser.defineEntityReplacementText( "ugrave", "\u00f9" );
213         parser.defineEntityReplacementText( "uacute", "\u00fa" );
214         parser.defineEntityReplacementText( "ucirc", "\u00fb" );
215         parser.defineEntityReplacementText( "uuml", "\u00fc" );
216         parser.defineEntityReplacementText( "yacute", "\u00fd" );
217         parser.defineEntityReplacementText( "thorn", "\u00fe" );
218         parser.defineEntityReplacementText( "yuml", "\u00ff" );
219 
220         // ----------------------------------------------------------------------
221         // Special entities
222         // ----------------------------------------------------------------------
223 
224         parser.defineEntityReplacementText( "OElig", "\u0152" );
225         parser.defineEntityReplacementText( "oelig", "\u0153" );
226         parser.defineEntityReplacementText( "Scaron", "\u0160" );
227         parser.defineEntityReplacementText( "scaron", "\u0161" );
228         parser.defineEntityReplacementText( "Yuml", "\u0178" );
229         parser.defineEntityReplacementText( "circ", "\u02c6" );
230         parser.defineEntityReplacementText( "tilde", "\u02dc" );
231         parser.defineEntityReplacementText( "ensp", "\u2002" );
232         parser.defineEntityReplacementText( "emsp", "\u2003" );
233         parser.defineEntityReplacementText( "thinsp", "\u2009" );
234         parser.defineEntityReplacementText( "zwnj", "\u200c" );
235         parser.defineEntityReplacementText( "zwj", "\u200d" );
236         parser.defineEntityReplacementText( "lrm", "\u200e" );
237         parser.defineEntityReplacementText( "rlm", "\u200f" );
238         parser.defineEntityReplacementText( "ndash", "\u2013" );
239         parser.defineEntityReplacementText( "mdash", "\u2014" );
240         parser.defineEntityReplacementText( "lsquo", "\u2018" );
241         parser.defineEntityReplacementText( "rsquo", "\u2019" );
242         parser.defineEntityReplacementText( "sbquo", "\u201a" );
243         parser.defineEntityReplacementText( "ldquo", "\u201c" );
244         parser.defineEntityReplacementText( "rdquo", "\u201d" );
245         parser.defineEntityReplacementText( "bdquo", "\u201e" );
246         parser.defineEntityReplacementText( "dagger", "\u2020" );
247         parser.defineEntityReplacementText( "Dagger", "\u2021" );
248         parser.defineEntityReplacementText( "permil", "\u2030" );
249         parser.defineEntityReplacementText( "lsaquo", "\u2039" );
250         parser.defineEntityReplacementText( "rsaquo", "\u203a" );
251         parser.defineEntityReplacementText( "euro", "\u20ac" );
252 
253         // ----------------------------------------------------------------------
254         // Symbol entities
255         // ----------------------------------------------------------------------
256 
257         parser.defineEntityReplacementText( "fnof", "\u0192" );
258         parser.defineEntityReplacementText( "Alpha", "\u0391" );
259         parser.defineEntityReplacementText( "Beta", "\u0392" );
260         parser.defineEntityReplacementText( "Gamma", "\u0393" );
261         parser.defineEntityReplacementText( "Delta", "\u0394" );
262         parser.defineEntityReplacementText( "Epsilon", "\u0395" );
263         parser.defineEntityReplacementText( "Zeta", "\u0396" );
264         parser.defineEntityReplacementText( "Eta", "\u0397" );
265         parser.defineEntityReplacementText( "Theta", "\u0398" );
266         parser.defineEntityReplacementText( "Iota", "\u0399" );
267         parser.defineEntityReplacementText( "Kappa", "\u039a" );
268         parser.defineEntityReplacementText( "Lambda", "\u039b" );
269         parser.defineEntityReplacementText( "Mu", "\u039c" );
270         parser.defineEntityReplacementText( "Nu", "\u039d" );
271         parser.defineEntityReplacementText( "Xi", "\u039e" );
272         parser.defineEntityReplacementText( "Omicron", "\u039f" );
273         parser.defineEntityReplacementText( "Pi", "\u03a0" );
274         parser.defineEntityReplacementText( "Rho", "\u03a1" );
275         parser.defineEntityReplacementText( "Sigma", "\u03a3" );
276         parser.defineEntityReplacementText( "Tau", "\u03a4" );
277         parser.defineEntityReplacementText( "Upsilon", "\u03a5" );
278         parser.defineEntityReplacementText( "Phi", "\u03a6" );
279         parser.defineEntityReplacementText( "Chi", "\u03a7" );
280         parser.defineEntityReplacementText( "Psi", "\u03a8" );
281         parser.defineEntityReplacementText( "Omega", "\u03a9" );
282         parser.defineEntityReplacementText( "alpha", "\u03b1" );
283         parser.defineEntityReplacementText( "beta", "\u03b2" );
284         parser.defineEntityReplacementText( "gamma", "\u03b3" );
285         parser.defineEntityReplacementText( "delta", "\u03b4" );
286         parser.defineEntityReplacementText( "epsilon", "\u03b5" );
287         parser.defineEntityReplacementText( "zeta", "\u03b6" );
288         parser.defineEntityReplacementText( "eta", "\u03b7" );
289         parser.defineEntityReplacementText( "theta", "\u03b8" );
290         parser.defineEntityReplacementText( "iota", "\u03b9" );
291         parser.defineEntityReplacementText( "kappa", "\u03ba" );
292         parser.defineEntityReplacementText( "lambda", "\u03bb" );
293         parser.defineEntityReplacementText( "mu", "\u03bc" );
294         parser.defineEntityReplacementText( "nu", "\u03bd" );
295         parser.defineEntityReplacementText( "xi", "\u03be" );
296         parser.defineEntityReplacementText( "omicron", "\u03bf" );
297         parser.defineEntityReplacementText( "pi", "\u03c0" );
298         parser.defineEntityReplacementText( "rho", "\u03c1" );
299         parser.defineEntityReplacementText( "sigmaf", "\u03c2" );
300         parser.defineEntityReplacementText( "sigma", "\u03c3" );
301         parser.defineEntityReplacementText( "tau", "\u03c4" );
302         parser.defineEntityReplacementText( "upsilon", "\u03c5" );
303         parser.defineEntityReplacementText( "phi", "\u03c6" );
304         parser.defineEntityReplacementText( "chi", "\u03c7" );
305         parser.defineEntityReplacementText( "psi", "\u03c8" );
306         parser.defineEntityReplacementText( "omega", "\u03c9" );
307         parser.defineEntityReplacementText( "thetasym", "\u03d1" );
308         parser.defineEntityReplacementText( "upsih", "\u03d2" );
309         parser.defineEntityReplacementText( "piv", "\u03d6" );
310         parser.defineEntityReplacementText( "bull", "\u2022" );
311         parser.defineEntityReplacementText( "hellip", "\u2026" );
312         parser.defineEntityReplacementText( "prime", "\u2032" );
313         parser.defineEntityReplacementText( "Prime", "\u2033" );
314         parser.defineEntityReplacementText( "oline", "\u203e" );
315         parser.defineEntityReplacementText( "frasl", "\u2044" );
316         parser.defineEntityReplacementText( "weierp", "\u2118" );
317         parser.defineEntityReplacementText( "image", "\u2111" );
318         parser.defineEntityReplacementText( "real", "\u211c" );
319         parser.defineEntityReplacementText( "trade", "\u2122" );
320         parser.defineEntityReplacementText( "alefsym", "\u2135" );
321         parser.defineEntityReplacementText( "larr", "\u2190" );
322         parser.defineEntityReplacementText( "uarr", "\u2191" );
323         parser.defineEntityReplacementText( "rarr", "\u2192" );
324         parser.defineEntityReplacementText( "darr", "\u2193" );
325         parser.defineEntityReplacementText( "harr", "\u2194" );
326         parser.defineEntityReplacementText( "crarr", "\u21b5" );
327         parser.defineEntityReplacementText( "lArr", "\u21d0" );
328         parser.defineEntityReplacementText( "uArr", "\u21d1" );
329         parser.defineEntityReplacementText( "rArr", "\u21d2" );
330         parser.defineEntityReplacementText( "dArr", "\u21d3" );
331         parser.defineEntityReplacementText( "hArr", "\u21d4" );
332         parser.defineEntityReplacementText( "forall", "\u2200" );
333         parser.defineEntityReplacementText( "part", "\u2202" );
334         parser.defineEntityReplacementText( "exist", "\u2203" );
335         parser.defineEntityReplacementText( "empty", "\u2205" );
336         parser.defineEntityReplacementText( "nabla", "\u2207" );
337         parser.defineEntityReplacementText( "isin", "\u2208" );
338         parser.defineEntityReplacementText( "notin", "\u2209" );
339         parser.defineEntityReplacementText( "ni", "\u220b" );
340         parser.defineEntityReplacementText( "prod", "\u220f" );
341         parser.defineEntityReplacementText( "sum", "\u2211" );
342         parser.defineEntityReplacementText( "minus", "\u2212" );
343         parser.defineEntityReplacementText( "lowast", "\u2217" );
344         parser.defineEntityReplacementText( "radic", "\u221a" );
345         parser.defineEntityReplacementText( "prop", "\u221d" );
346         parser.defineEntityReplacementText( "infin", "\u221e" );
347         parser.defineEntityReplacementText( "ang", "\u2220" );
348         parser.defineEntityReplacementText( "and", "\u2227" );
349         parser.defineEntityReplacementText( "or", "\u2228" );
350         parser.defineEntityReplacementText( "cap", "\u2229" );
351         parser.defineEntityReplacementText( "cup", "\u222a" );
352         parser.defineEntityReplacementText( "int", "\u222b" );
353         parser.defineEntityReplacementText( "there4", "\u2234" );
354         parser.defineEntityReplacementText( "sim", "\u223c" );
355         parser.defineEntityReplacementText( "cong", "\u2245" );
356         parser.defineEntityReplacementText( "asymp", "\u2248" );
357         parser.defineEntityReplacementText( "ne", "\u2260" );
358         parser.defineEntityReplacementText( "equiv", "\u2261" );
359         parser.defineEntityReplacementText( "le", "\u2264" );
360         parser.defineEntityReplacementText( "ge", "\u2265" );
361         parser.defineEntityReplacementText( "sub", "\u2282" );
362         parser.defineEntityReplacementText( "sup", "\u2283" );
363         parser.defineEntityReplacementText( "nsub", "\u2284" );
364         parser.defineEntityReplacementText( "sube", "\u2286" );
365         parser.defineEntityReplacementText( "supe", "\u2287" );
366         parser.defineEntityReplacementText( "oplus", "\u2295" );
367         parser.defineEntityReplacementText( "otimes", "\u2297" );
368         parser.defineEntityReplacementText( "perp", "\u22a5" );
369         parser.defineEntityReplacementText( "sdot", "\u22c5" );
370         parser.defineEntityReplacementText( "lceil", "\u2308" );
371         parser.defineEntityReplacementText( "rceil", "\u2309" );
372         parser.defineEntityReplacementText( "lfloor", "\u230a" );
373         parser.defineEntityReplacementText( "rfloor", "\u230b" );
374         parser.defineEntityReplacementText( "lang", "\u2329" );
375         parser.defineEntityReplacementText( "rang", "\u232a" );
376         parser.defineEntityReplacementText( "loz", "\u25ca" );
377         parser.defineEntityReplacementText( "spades", "\u2660" );
378         parser.defineEntityReplacementText( "clubs", "\u2663" );
379         parser.defineEntityReplacementText( "hearts", "\u2665" );
380         parser.defineEntityReplacementText( "diams", "\u2666" );
381     }
382 
383     /**
384      * <p>
385      *   Goes through a common list of possible html start tags. These include only tags that can go into
386      *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
387      * </p>
388      * <p>
389      *   The currently handled tags are:
390      * </p>
391      * <p>
392      *   <code>
393      *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
394      *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
395      *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
396      *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
397      *   </code>
398      * </p>
399      *
400      * @param parser A parser.
401      * @param sink the sink to receive the events.
402      * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
403      */
404     protected boolean baseStartTag( XmlPullParser parser, Sink sink )
405     {
406         boolean visited = true;
407 
408         SinkEventAttributeSet attribs = getAttributesFromParser( parser );
409 
410         if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
411         {
412             handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
413         }
414         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
415         {
416             handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
417         }
418         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
419         {
420             handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
421         }
422         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
423         {
424             handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
425         }
426         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
427         {
428             handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
429         }
430         else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
431         {
432             attribs.addAttributes( SinkEventAttributeSet.Semantics.ANNOTATION );
433             sink.inline( attribs );
434         }
435         else if ( parser.getName().equals( HtmlMarkup.S.toString() )
436                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
437                 || parser.getName().equals( "del" ) )
438         {
439             attribs.addAttributes( SinkEventAttributeSet.Semantics.LINE_THROUGH );
440             sink.inline( attribs );
441         }
442         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
443         {
444             attribs.addAttributes( SinkEventAttributeSet.Semantics.SUBSCRIPT );
445             sink.inline( attribs );
446         }
447         else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
448         {
449             attribs.addAttributes( SinkEventAttributeSet.Semantics.SUPERSCRIPT );
450             sink.inline( attribs );
451         }
452         else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
453         {
454             handlePStart( sink, attribs );
455         }
456         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
457         {
458             visited = handleDivStart( parser, attribs, sink );
459         }
460         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
461         {
462             handlePreStart( attribs, sink );
463         }
464         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
465         {
466             sink.list( attribs );
467         }
468         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
469         {
470             handleOLStart( parser, sink, attribs );
471         }
472         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
473         {
474             handleLIStart( sink, attribs );
475         }
476         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
477         {
478             sink.definitionList( attribs );
479         }
480         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
481         {
482             if ( hasDefinitionListItem )
483             {
484                 // close previous listItem
485                 sink.definitionListItem_();
486             }
487             sink.definitionListItem( attribs );
488             hasDefinitionListItem = true;
489             sink.definedTerm( attribs );
490         }
491         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
492         {
493             if ( !hasDefinitionListItem )
494             {
495                 sink.definitionListItem( attribs );
496             }
497             sink.definition( attribs );
498         }
499         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
500                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
501         {
502             sink.inline( SinkEventAttributeSet.Semantics.BOLD );
503         }
504         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
505                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
506         {
507             handleFigureCaptionStart( sink, attribs );
508         }
509         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
510                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
511                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
512         {
513             sink.inline( SinkEventAttributeSet.Semantics.MONOSPACED );
514         }
515         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
516         {
517             handleAStart( parser, sink, attribs );
518         }
519         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
520         {
521             handleTableStart( sink, attribs, parser );
522         }
523         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
524         {
525             sink.tableRow( attribs );
526         }
527         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
528         {
529             sink.tableHeaderCell( attribs );
530         }
531         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
532         {
533             sink.tableCell( attribs );
534         }
535         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
536         {
537             sink.tableCaption( attribs );
538         }
539         else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
540         {
541             sink.lineBreak( attribs );
542         }
543         else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
544         {
545             sink.horizontalRule( attribs );
546         }
547         else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
548         {
549             handleImgStart( parser, sink, attribs );
550         }
551         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
552             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
553         {
554             handleUnknown( parser, sink, TAG_TYPE_START );
555             scriptBlock = true;
556         }
557         else
558         {
559             visited = false;
560         }
561 
562         return visited;
563     }
564 
565     /**
566      * <p>
567      *   Goes through a common list of possible html end tags.
568      *   These should be re-usable by different xhtml-based parsers.
569      *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
570      *   except for the empty elements ({@code<br/>, <hr/>, <img/>}).
571      * </p>
572      *
573      * @param parser A parser.
574      * @param sink the sink to receive the events.
575      * @return True if the event has been handled by this method, false otherwise.
576      */
577     protected boolean baseEndTag( XmlPullParser parser, Sink sink )
578     {
579         boolean visited = true;
580 
581         if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
582         {
583             if ( !inFigure )
584             {
585                 sink.paragraph_();
586             }
587         }
588         else if ( parser.getName().equals( HtmlMarkup.U.toString() )
589                 || parser.getName().equals( HtmlMarkup.S.toString() )
590                 || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
591                 || parser.getName().equals( "del" ) )
592         {
593             sink.inline_();
594         }
595         else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
596                 || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
597         {
598             sink.inline_();
599         }
600         else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
601         {
602             if ( inFigure )
603             {
604                 sink.figure_();
605                 this.inFigure = false;
606             }
607             else
608             {
609                 visited = false;
610             }
611         }
612         else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
613         {
614             verbatim_();
615 
616             sink.verbatim_();
617         }
618         else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
619         {
620             sink.list_();
621         }
622         else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
623         {
624             sink.numberedList_();
625             orderedListDepth--;
626         }
627         else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
628         {
629             handleListItemEnd( sink );
630         }
631         else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
632         {
633             if ( hasDefinitionListItem )
634             {
635                 sink.definitionListItem_();
636                 hasDefinitionListItem = false;
637             }
638             sink.definitionList_();
639         }
640         else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
641         {
642             sink.definedTerm_();
643         }
644         else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
645         {
646             sink.definition_();
647             sink.definitionListItem_();
648             hasDefinitionListItem = false;
649         }
650         else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
651                 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
652         {
653             sink.inline_();
654         }
655         else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
656                 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
657         {
658             handleFigureCaptionEnd( sink );
659         }
660         else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
661                 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
662                 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
663         {
664             sink.inline_();
665         }
666         else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
667         {
668             handleAEnd( sink );
669         }
670 
671         // ----------------------------------------------------------------------
672         // Tables
673         // ----------------------------------------------------------------------
674 
675         else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
676         {
677             sink.tableRows_();
678 
679             sink.table_();
680         }
681         else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
682         {
683             sink.tableRow_();
684         }
685         else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
686         {
687             sink.tableHeaderCell_();
688         }
689         else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
690         {
691             sink.tableCell_();
692         }
693         else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
694         {
695             sink.tableCaption_();
696         }
697         else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
698         {
699             sink.sectionTitle1_();
700         }
701         else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
702         {
703             sink.sectionTitle2_();
704         }
705         else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
706         {
707             sink.sectionTitle3_();
708         }
709         else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
710         {
711             sink.sectionTitle4_();
712         }
713         else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
714         {
715             sink.sectionTitle5_();
716         }
717         else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
718             || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
719         {
720             handleUnknown( parser, sink, TAG_TYPE_END );
721 
722             scriptBlock = false;
723         }
724         else
725         {
726             visited = false;
727         }
728 
729         return visited;
730     }
731 
732     /**
733      * {@inheritDoc}
734      *
735      * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
736      * overridden by implementing parsers to include additional tags.
737      */
738     protected void handleStartTag( XmlPullParser parser, Sink sink )
739         throws XmlPullParserException, MacroExecutionException
740     {
741         if ( !baseStartTag( parser, sink ) )
742         {
743             if ( getLog().isWarnEnabled() )
744             {
745                 String position = "[" + parser.getLineNumber() + ":"
746                     + parser.getColumnNumber() + "]";
747                 String tag = "<" + parser.getName() + ">";
748 
749                 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
750             }
751         }
752     }
753 
754     /**
755      * {@inheritDoc}
756      *
757      * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
758      * overridden by implementing parsers to include additional tags.
759      */
760     protected void handleEndTag( XmlPullParser parser, Sink sink )
761         throws XmlPullParserException, MacroExecutionException
762     {
763         if ( !baseEndTag( parser, sink ) )
764         {
765             // unrecognized tag is already logged in StartTag
766         }
767     }
768 
769     /** {@inheritDoc} */
770     @Override
771     protected void handleText( XmlPullParser parser, Sink sink )
772         throws XmlPullParserException
773     {
774         String text = getText( parser );
775 
776         /*
777          * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
778          * parser so any whitespace that makes it here is significant.
779          *
780          * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
781          */
782         if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
783         {
784             sink.text( text );
785         }
786     }
787 
788     /** {@inheritDoc} */
789     @Override
790     protected void handleComment( XmlPullParser parser, Sink sink )
791         throws XmlPullParserException
792     {
793         String text = getText( parser );
794 
795         if ( "PB".equals( text.trim() ) )
796         {
797             sink.pageBreak();
798         }
799         else
800         {
801             if ( isEmitComments() )
802             {
803                 sink.comment( text );
804             }
805         }
806     }
807 
808     /** {@inheritDoc} */
809     @Override
810     protected void handleCdsect( XmlPullParser parser, Sink sink )
811         throws XmlPullParserException
812     {
813         String text = getText( parser );
814 
815         if ( isScriptBlock() )
816         {
817             sink.unknown( CDATA, new Object[] { CDATA_TYPE, text }, null );
818         }
819         else
820         {
821             sink.text( text );
822         }
823     }
824 
825     /**
826      * Make sure sections are nested consecutively.
827      *
828      * <p>
829      * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
830      * open close any sections that are missing in between.
831      * </p>
832      *
833      * <p>
834      * For instance, if the following sequence is parsed:
835      * </p>
836      * <pre>
837      * &lt;h3&gt;&lt;/h3&gt;
838      * &lt;h6&gt;&lt;/h6&gt;
839      * </pre>
840      * <p>
841      * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
842      * In the following sequence
843      * </p>
844      * <pre>
845      * &lt;h6&gt;&lt;/h6&gt;
846      * &lt;h3&gt;&lt;/h3&gt;
847      * </pre>
848      * <p>
849      * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
850      * </p>
851      *
852      * <p>The current level is set to newLevel afterwards.</p>
853      *
854      * @param newLevel the new section level, all upper levels have to be closed.
855      * @param sink the sink to receive the events.
856      */
857     protected void consecutiveSections( int newLevel, Sink sink )
858     {
859         closeOpenSections( newLevel, sink );
860         openMissingSections( newLevel, sink );
861 
862         this.sectionLevel = newLevel;
863     }
864 
865     /**
866      * Close open sections.
867      *
868      * @param newLevel the new section level, all upper levels have to be closed.
869      * @param sink the sink to receive the events.
870      */
871     private void closeOpenSections( int newLevel, Sink sink )
872     {
873         while ( this.sectionLevel >= newLevel )
874         {
875             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
876             {
877                 sink.section5_();
878             }
879             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
880             {
881                 sink.section4_();
882             }
883             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
884             {
885                 sink.section3_();
886             }
887             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
888             {
889                 sink.section2_();
890             }
891             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
892             {
893                 sink.section1_();
894             }
895 
896             this.sectionLevel--;
897         }
898     }
899 
900     /**
901      * Open missing sections.
902      *
903      * @param newLevel the new section level, all lower levels have to be opened.
904      * @param sink the sink to receive the events.
905      */
906     private void openMissingSections( int newLevel, Sink sink )
907     {
908         while ( this.sectionLevel < newLevel - 1 )
909         {
910             this.sectionLevel++;
911 
912             if ( sectionLevel == Sink.SECTION_LEVEL_5 )
913             {
914                 sink.section5();
915             }
916             else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
917             {
918                 sink.section4();
919             }
920             else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
921             {
922                 sink.section3();
923             }
924             else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
925             {
926                 sink.section2();
927             }
928             else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
929             {
930                 sink.section1();
931             }
932         }
933     }
934 
935     /**
936      * Return the current section level.
937      *
938      * @return the current section level.
939      */
940     protected int getSectionLevel()
941     {
942         return this.sectionLevel;
943     }
944 
945     /**
946      * Set the current section level.
947      *
948      * @param newLevel the new section level.
949      */
950     protected void setSectionLevel( int newLevel )
951     {
952         this.sectionLevel = newLevel;
953     }
954 
955     /**
956      * Stop verbatim mode.
957      */
958     protected void verbatim_()
959     {
960         this.inVerbatim = false;
961     }
962 
963     /**
964      * Start verbatim mode.
965      */
966     protected void verbatim()
967     {
968         this.inVerbatim = true;
969     }
970 
971     /**
972      * Checks if we are currently inside a &lt;pre&gt; tag.
973      *
974      * @return true if we are currently in verbatim mode.
975      */
976     protected boolean isVerbatim()
977     {
978         return this.inVerbatim;
979     }
980 
981     /**
982      * Checks if we are currently inside a &lt;script&gt; tag.
983      *
984      * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
985      *
986      * @since 1.1.1.
987      */
988     protected boolean isScriptBlock()
989     {
990         return this.scriptBlock;
991     }
992 
993     /**
994      * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
995      *
996      * @param id The id to validate.
997      * @return A transformed id or the original id if it was already valid.
998      * @see DoxiaUtils#encodeId(String)
999      */
1000     protected String validAnchor( String id )
1001     {
1002         if ( !DoxiaUtils.isValidId( id ) )
1003         {
1004             String linkAnchor = DoxiaUtils.encodeId( id, true );
1005 
1006             String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
1007             logMessage( "modifiedLink", msg );
1008 
1009             return linkAnchor;
1010         }
1011 
1012         return id;
1013     }
1014 
1015     /** {@inheritDoc} */
1016     @Override
1017     protected void init()
1018     {
1019         super.init();
1020 
1021         this.scriptBlock = false;
1022         this.isLink = false;
1023         this.isAnchor = false;
1024         this.orderedListDepth = 0;
1025         this.sectionLevel = 0;
1026         this.inVerbatim = false;
1027         this.inFigure = false;
1028         this.warnMessages = null;
1029     }
1030 
1031     private void handleAEnd( Sink sink )
1032     {
1033         if ( isLink )
1034         {
1035             sink.link_();
1036             isLink = false;
1037         }
1038         else if ( isAnchor )
1039         {
1040             sink.anchor_();
1041             isAnchor = false;
1042         }
1043     }
1044 
1045     private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1046     {
1047         String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
1048 
1049         if ( href != null )
1050         {
1051             int hashIndex = href.indexOf( '#' );
1052             if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
1053             {
1054                 String hash = href.substring( hashIndex + 1 );
1055 
1056                 if ( !DoxiaUtils.isValidId( hash ) )
1057                 {
1058                     href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
1059 
1060                     String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
1061                     logMessage( "modifiedLink", msg );
1062                 }
1063             }
1064             sink.link( href, attribs );
1065             isLink = true;
1066         }
1067         else
1068         {
1069             String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
1070 
1071             if ( name != null )
1072             {
1073                 sink.anchor( validAnchor( name ), attribs );
1074                 isAnchor = true;
1075             }
1076             else
1077             {
1078                 String id = parser.getAttributeValue( null, Attribute.ID.toString() );
1079                 if ( id != null )
1080                 {
1081                     sink.anchor( validAnchor( id ), attribs );
1082                     isAnchor = true;
1083                 }
1084             }
1085         }
1086     }
1087 
1088     private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
1089     {
1090         boolean visited = true;
1091 
1092         String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
1093 
1094         if ( "figure".equals( divclass ) )
1095         {
1096             this.inFigure = true;
1097             SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
1098             atts.removeAttribute( SinkEventAttributes.CLASS );
1099             sink.figure( atts );
1100         }
1101         else
1102         {
1103             visited = false;
1104         }
1105 
1106         return visited;
1107     }
1108 
1109     private void handleFigureCaptionEnd( Sink sink )
1110     {
1111         if ( inFigure )
1112         {
1113             sink.figureCaption_();
1114         }
1115         else
1116         {
1117             sink.inline_();
1118         }
1119     }
1120 
1121     private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
1122     {
1123         if ( inFigure )
1124         {
1125             sink.figureCaption( attribs );
1126         }
1127         else
1128         {
1129             sink.inline( SinkEventAttributeSet.Semantics.ITALIC );
1130         }
1131     }
1132 
1133     private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1134     {
1135         String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
1136 
1137         if ( src != null )
1138         {
1139             sink.figureGraphics( src, attribs );
1140         }
1141     }
1142 
1143     private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
1144     {
1145         if ( orderedListDepth == 0 )
1146         {
1147             sink.listItem( attribs );
1148         }
1149         else
1150         {
1151             sink.numberedListItem( attribs );
1152         }
1153     }
1154 
1155     private void handleListItemEnd( Sink sink )
1156     {
1157         if ( orderedListDepth == 0 )
1158         {
1159             sink.listItem_();
1160         }
1161         else
1162         {
1163             sink.numberedListItem_();
1164         }
1165     }
1166 
1167     private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1168     {
1169         int numbering = Sink.NUMBERING_DECIMAL;
1170         // this will have to be generalized if we handle styles
1171         String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
1172 
1173         if ( style != null )
1174         {
1175             switch ( style )
1176             {
1177                 case "list-style-type: upper-alpha":
1178                     numbering = Sink.NUMBERING_UPPER_ALPHA;
1179                     break;
1180                 case "list-style-type: lower-alpha":
1181                     numbering = Sink.NUMBERING_LOWER_ALPHA;
1182                     break;
1183                 case "list-style-type: upper-roman":
1184                     numbering = Sink.NUMBERING_UPPER_ROMAN;
1185                     break;
1186                 case "list-style-type: lower-roman":
1187                     numbering = Sink.NUMBERING_LOWER_ROMAN;
1188                     break;
1189                 case "list-style-type: decimal":
1190                     numbering = Sink.NUMBERING_DECIMAL;
1191                     break;
1192                 default:
1193                     // ignore all other
1194             }
1195         }
1196 
1197         sink.numberedList( numbering, attribs );
1198         orderedListDepth++;
1199     }
1200 
1201     private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
1202     {
1203         if ( !inFigure )
1204         {
1205             sink.paragraph( attribs );
1206         }
1207     }
1208 
1209     /*
1210      * The PRE element tells visual user agents that the enclosed text is
1211      * "preformatted". When handling preformatted text, visual user agents:
1212      * - May leave white space intact.
1213      * - May render text with a fixed-pitch font.
1214      * - May disable automatic word wrap.
1215      * - Must not disable bidirectional processing.
1216      * Non-visual user agents are not required to respect extra white space
1217      * in the content of a PRE element.
1218      */
1219     private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
1220     {
1221         verbatim();
1222         sink.verbatim( attribs );
1223     }
1224 
1225     private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
1226     {
1227         consecutiveSections( level, sink );
1228         sink.section( level, attribs );
1229         sink.sectionTitle( level, attribs );
1230     }
1231 
1232     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
1233     {
1234         sink.table( attribs );
1235         String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
1236         boolean grid = true;
1237 
1238         if ( border == null || "0".equals( border ) )
1239         {
1240             grid = false;
1241         }
1242 
1243         String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
1244         int[] justif = {Sink.JUSTIFY_LEFT};
1245 
1246         if ( "center".equals( align ) )
1247         {
1248             justif[0] = Sink.JUSTIFY_CENTER;
1249         }
1250         else if ( "right".equals( align ) )
1251         {
1252             justif[0] = Sink.JUSTIFY_RIGHT;
1253         }
1254 
1255         sink.tableRows( justif, grid );
1256     }
1257 
1258     /**
1259      * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
1260      *
1261      * @param key not null
1262      * @param msg not null
1263      * @see #parse(Reader, Sink)
1264      * @since 1.1.1
1265      */
1266     private void logMessage( String key, String msg )
1267     {
1268         final String log = "[XHTML Parser] " + msg;
1269         if ( getLog().isDebugEnabled() )
1270         {
1271             getLog().debug( log );
1272 
1273             return;
1274         }
1275 
1276         if ( warnMessages == null )
1277         {
1278             warnMessages = new HashMap<>();
1279         }
1280 
1281         Set<String> set = warnMessages.get( key );
1282         if ( set == null )
1283         {
1284             set = new TreeSet<>();
1285         }
1286         set.add( log );
1287         warnMessages.put( key, set );
1288     }
1289 
1290     /**
1291      * @since 1.1.1
1292      */
1293     private void logWarnings()
1294     {
1295         if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
1296         {
1297             for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
1298             {
1299                 for ( String msg : entry.getValue() )
1300                 {
1301                     getLog().warn( msg );
1302                 }
1303             }
1304 
1305             this.warnMessages = null;
1306         }
1307     }
1308 }