001package org.apache.maven.doxia.parser;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.io.Reader;
023import java.util.HashMap;
024import java.util.Map;
025import java.util.Set;
026import java.util.TreeSet;
027
028import javax.swing.text.html.HTML.Attribute;
029
030import org.apache.maven.doxia.macro.MacroExecutionException;
031import org.apache.maven.doxia.markup.HtmlMarkup;
032import org.apache.maven.doxia.sink.Sink;
033import org.apache.maven.doxia.sink.SinkEventAttributes;
034import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
035import org.apache.maven.doxia.util.DoxiaUtils;
036
037import org.codehaus.plexus.util.StringUtils;
038import org.codehaus.plexus.util.xml.pull.XmlPullParser;
039import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
040
041/**
042 * Common base parser for xhtml events.
043 *
044 * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
045 * @author ltheussl
046 * @version $Id$
047 * @since 1.1
048 */
049public class XhtmlBaseParser
050    extends AbstractXmlParser
051        implements HtmlMarkup
052{
053    /**
054     * True if a &lt;script&gt;&lt;/script&gt; or &lt;style&gt;&lt;/style&gt; block is read. CDATA sections within are
055     * handled as rawText.
056     */
057    private boolean scriptBlock;
058
059    /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
060    private boolean isLink;
061
062    /** Used to distinguish &lt;a href=""&gt; from &lt;a name=""&gt;. */
063    private boolean isAnchor;
064
065    /** Used for nested lists. */
066    private int orderedListDepth = 0;
067
068    /** Counts section level. */
069    private int sectionLevel;
070
071    /** Verbatim flag, true whenever we are inside a &lt;pre&gt; tag. */
072    private boolean inVerbatim;
073
074    /** Used to recognize the case of img inside figure. */
075    private boolean inFigure;
076
077    /** Used to wrap the definedTerm with its definition, even when one is omitted */
078    boolean hasDefinitionListItem = false;
079
080    /** Decoration properties, eg for texts. */
081    private final SinkEventAttributeSet decoration = new SinkEventAttributeSet();
082
083    /** Map of warn messages with a String as key to describe the error type and a Set as value.
084     * Using to reduce warn messages. */
085    private Map<String, Set<String>> warnMessages;
086
087    /** {@inheritDoc} */
088    @Override
089    public void parse( Reader source, Sink sink )
090        throws ParseException
091    {
092        init();
093
094        try
095        {
096            super.parse( source, sink );
097        }
098        finally
099        {
100            logWarnings();
101
102            setSecondParsing( false );
103            init();
104        }
105    }
106
107    /**
108     * {@inheritDoc}
109     *
110     * Adds all XHTML (HTML 4.0) entities to the parser so that they can be recognized and resolved
111     * without additional DTD.
112     */
113    @Override
114    protected void initXmlParser( XmlPullParser parser )
115        throws XmlPullParserException
116    {
117        super.initXmlParser( parser );
118
119        // the entities taken from org.apache.maven.doxia.document.io.xpp3.DocumentXpp3Reader,
120        // which is generated automatically
121
122        // ----------------------------------------------------------------------
123        // Latin 1 entities
124        // ----------------------------------------------------------------------
125
126        parser.defineEntityReplacementText( "nbsp", "\u00a0" );
127        parser.defineEntityReplacementText( "iexcl", "\u00a1" );
128        parser.defineEntityReplacementText( "cent", "\u00a2" );
129        parser.defineEntityReplacementText( "pound", "\u00a3" );
130        parser.defineEntityReplacementText( "curren", "\u00a4" );
131        parser.defineEntityReplacementText( "yen", "\u00a5" );
132        parser.defineEntityReplacementText( "brvbar", "\u00a6" );
133        parser.defineEntityReplacementText( "sect", "\u00a7" );
134        parser.defineEntityReplacementText( "uml", "\u00a8" );
135        parser.defineEntityReplacementText( "copy", "\u00a9" );
136        parser.defineEntityReplacementText( "ordf", "\u00aa" );
137        parser.defineEntityReplacementText( "laquo", "\u00ab" );
138        parser.defineEntityReplacementText( "not", "\u00ac" );
139        parser.defineEntityReplacementText( "shy", "\u00ad" );
140        parser.defineEntityReplacementText( "reg", "\u00ae" );
141        parser.defineEntityReplacementText( "macr", "\u00af" );
142        parser.defineEntityReplacementText( "deg", "\u00b0" );
143        parser.defineEntityReplacementText( "plusmn", "\u00b1" );
144        parser.defineEntityReplacementText( "sup2", "\u00b2" );
145        parser.defineEntityReplacementText( "sup3", "\u00b3" );
146        parser.defineEntityReplacementText( "acute", "\u00b4" );
147        parser.defineEntityReplacementText( "micro", "\u00b5" );
148        parser.defineEntityReplacementText( "para", "\u00b6" );
149        parser.defineEntityReplacementText( "middot", "\u00b7" );
150        parser.defineEntityReplacementText( "cedil", "\u00b8" );
151        parser.defineEntityReplacementText( "sup1", "\u00b9" );
152        parser.defineEntityReplacementText( "ordm", "\u00ba" );
153        parser.defineEntityReplacementText( "raquo", "\u00bb" );
154        parser.defineEntityReplacementText( "frac14", "\u00bc" );
155        parser.defineEntityReplacementText( "frac12", "\u00bd" );
156        parser.defineEntityReplacementText( "frac34", "\u00be" );
157        parser.defineEntityReplacementText( "iquest", "\u00bf" );
158        parser.defineEntityReplacementText( "Agrave", "\u00c0" );
159        parser.defineEntityReplacementText( "Aacute", "\u00c1" );
160        parser.defineEntityReplacementText( "Acirc", "\u00c2" );
161        parser.defineEntityReplacementText( "Atilde", "\u00c3" );
162        parser.defineEntityReplacementText( "Auml", "\u00c4" );
163        parser.defineEntityReplacementText( "Aring", "\u00c5" );
164        parser.defineEntityReplacementText( "AElig", "\u00c6" );
165        parser.defineEntityReplacementText( "Ccedil", "\u00c7" );
166        parser.defineEntityReplacementText( "Egrave", "\u00c8" );
167        parser.defineEntityReplacementText( "Eacute", "\u00c9" );
168        parser.defineEntityReplacementText( "Ecirc", "\u00ca" );
169        parser.defineEntityReplacementText( "Euml", "\u00cb" );
170        parser.defineEntityReplacementText( "Igrave", "\u00cc" );
171        parser.defineEntityReplacementText( "Iacute", "\u00cd" );
172        parser.defineEntityReplacementText( "Icirc", "\u00ce" );
173        parser.defineEntityReplacementText( "Iuml", "\u00cf" );
174        parser.defineEntityReplacementText( "ETH", "\u00d0" );
175        parser.defineEntityReplacementText( "Ntilde", "\u00d1" );
176        parser.defineEntityReplacementText( "Ograve", "\u00d2" );
177        parser.defineEntityReplacementText( "Oacute", "\u00d3" );
178        parser.defineEntityReplacementText( "Ocirc", "\u00d4" );
179        parser.defineEntityReplacementText( "Otilde", "\u00d5" );
180        parser.defineEntityReplacementText( "Ouml", "\u00d6" );
181        parser.defineEntityReplacementText( "times", "\u00d7" );
182        parser.defineEntityReplacementText( "Oslash", "\u00d8" );
183        parser.defineEntityReplacementText( "Ugrave", "\u00d9" );
184        parser.defineEntityReplacementText( "Uacute", "\u00da" );
185        parser.defineEntityReplacementText( "Ucirc", "\u00db" );
186        parser.defineEntityReplacementText( "Uuml", "\u00dc" );
187        parser.defineEntityReplacementText( "Yacute", "\u00dd" );
188        parser.defineEntityReplacementText( "THORN", "\u00de" );
189        parser.defineEntityReplacementText( "szlig", "\u00df" );
190        parser.defineEntityReplacementText( "agrave", "\u00e0" );
191        parser.defineEntityReplacementText( "aacute", "\u00e1" );
192        parser.defineEntityReplacementText( "acirc", "\u00e2" );
193        parser.defineEntityReplacementText( "atilde", "\u00e3" );
194        parser.defineEntityReplacementText( "auml", "\u00e4" );
195        parser.defineEntityReplacementText( "aring", "\u00e5" );
196        parser.defineEntityReplacementText( "aelig", "\u00e6" );
197        parser.defineEntityReplacementText( "ccedil", "\u00e7" );
198        parser.defineEntityReplacementText( "egrave", "\u00e8" );
199        parser.defineEntityReplacementText( "eacute", "\u00e9" );
200        parser.defineEntityReplacementText( "ecirc", "\u00ea" );
201        parser.defineEntityReplacementText( "euml", "\u00eb" );
202        parser.defineEntityReplacementText( "igrave", "\u00ec" );
203        parser.defineEntityReplacementText( "iacute", "\u00ed" );
204        parser.defineEntityReplacementText( "icirc", "\u00ee" );
205        parser.defineEntityReplacementText( "iuml", "\u00ef" );
206        parser.defineEntityReplacementText( "eth", "\u00f0" );
207        parser.defineEntityReplacementText( "ntilde", "\u00f1" );
208        parser.defineEntityReplacementText( "ograve", "\u00f2" );
209        parser.defineEntityReplacementText( "oacute", "\u00f3" );
210        parser.defineEntityReplacementText( "ocirc", "\u00f4" );
211        parser.defineEntityReplacementText( "otilde", "\u00f5" );
212        parser.defineEntityReplacementText( "ouml", "\u00f6" );
213        parser.defineEntityReplacementText( "divide", "\u00f7" );
214        parser.defineEntityReplacementText( "oslash", "\u00f8" );
215        parser.defineEntityReplacementText( "ugrave", "\u00f9" );
216        parser.defineEntityReplacementText( "uacute", "\u00fa" );
217        parser.defineEntityReplacementText( "ucirc", "\u00fb" );
218        parser.defineEntityReplacementText( "uuml", "\u00fc" );
219        parser.defineEntityReplacementText( "yacute", "\u00fd" );
220        parser.defineEntityReplacementText( "thorn", "\u00fe" );
221        parser.defineEntityReplacementText( "yuml", "\u00ff" );
222
223        // ----------------------------------------------------------------------
224        // Special entities
225        // ----------------------------------------------------------------------
226
227        parser.defineEntityReplacementText( "OElig", "\u0152" );
228        parser.defineEntityReplacementText( "oelig", "\u0153" );
229        parser.defineEntityReplacementText( "Scaron", "\u0160" );
230        parser.defineEntityReplacementText( "scaron", "\u0161" );
231        parser.defineEntityReplacementText( "Yuml", "\u0178" );
232        parser.defineEntityReplacementText( "circ", "\u02c6" );
233        parser.defineEntityReplacementText( "tilde", "\u02dc" );
234        parser.defineEntityReplacementText( "ensp", "\u2002" );
235        parser.defineEntityReplacementText( "emsp", "\u2003" );
236        parser.defineEntityReplacementText( "thinsp", "\u2009" );
237        parser.defineEntityReplacementText( "zwnj", "\u200c" );
238        parser.defineEntityReplacementText( "zwj", "\u200d" );
239        parser.defineEntityReplacementText( "lrm", "\u200e" );
240        parser.defineEntityReplacementText( "rlm", "\u200f" );
241        parser.defineEntityReplacementText( "ndash", "\u2013" );
242        parser.defineEntityReplacementText( "mdash", "\u2014" );
243        parser.defineEntityReplacementText( "lsquo", "\u2018" );
244        parser.defineEntityReplacementText( "rsquo", "\u2019" );
245        parser.defineEntityReplacementText( "sbquo", "\u201a" );
246        parser.defineEntityReplacementText( "ldquo", "\u201c" );
247        parser.defineEntityReplacementText( "rdquo", "\u201d" );
248        parser.defineEntityReplacementText( "bdquo", "\u201e" );
249        parser.defineEntityReplacementText( "dagger", "\u2020" );
250        parser.defineEntityReplacementText( "Dagger", "\u2021" );
251        parser.defineEntityReplacementText( "permil", "\u2030" );
252        parser.defineEntityReplacementText( "lsaquo", "\u2039" );
253        parser.defineEntityReplacementText( "rsaquo", "\u203a" );
254        parser.defineEntityReplacementText( "euro", "\u20ac" );
255
256        // ----------------------------------------------------------------------
257        // Symbol entities
258        // ----------------------------------------------------------------------
259
260        parser.defineEntityReplacementText( "fnof", "\u0192" );
261        parser.defineEntityReplacementText( "Alpha", "\u0391" );
262        parser.defineEntityReplacementText( "Beta", "\u0392" );
263        parser.defineEntityReplacementText( "Gamma", "\u0393" );
264        parser.defineEntityReplacementText( "Delta", "\u0394" );
265        parser.defineEntityReplacementText( "Epsilon", "\u0395" );
266        parser.defineEntityReplacementText( "Zeta", "\u0396" );
267        parser.defineEntityReplacementText( "Eta", "\u0397" );
268        parser.defineEntityReplacementText( "Theta", "\u0398" );
269        parser.defineEntityReplacementText( "Iota", "\u0399" );
270        parser.defineEntityReplacementText( "Kappa", "\u039a" );
271        parser.defineEntityReplacementText( "Lambda", "\u039b" );
272        parser.defineEntityReplacementText( "Mu", "\u039c" );
273        parser.defineEntityReplacementText( "Nu", "\u039d" );
274        parser.defineEntityReplacementText( "Xi", "\u039e" );
275        parser.defineEntityReplacementText( "Omicron", "\u039f" );
276        parser.defineEntityReplacementText( "Pi", "\u03a0" );
277        parser.defineEntityReplacementText( "Rho", "\u03a1" );
278        parser.defineEntityReplacementText( "Sigma", "\u03a3" );
279        parser.defineEntityReplacementText( "Tau", "\u03a4" );
280        parser.defineEntityReplacementText( "Upsilon", "\u03a5" );
281        parser.defineEntityReplacementText( "Phi", "\u03a6" );
282        parser.defineEntityReplacementText( "Chi", "\u03a7" );
283        parser.defineEntityReplacementText( "Psi", "\u03a8" );
284        parser.defineEntityReplacementText( "Omega", "\u03a9" );
285        parser.defineEntityReplacementText( "alpha", "\u03b1" );
286        parser.defineEntityReplacementText( "beta", "\u03b2" );
287        parser.defineEntityReplacementText( "gamma", "\u03b3" );
288        parser.defineEntityReplacementText( "delta", "\u03b4" );
289        parser.defineEntityReplacementText( "epsilon", "\u03b5" );
290        parser.defineEntityReplacementText( "zeta", "\u03b6" );
291        parser.defineEntityReplacementText( "eta", "\u03b7" );
292        parser.defineEntityReplacementText( "theta", "\u03b8" );
293        parser.defineEntityReplacementText( "iota", "\u03b9" );
294        parser.defineEntityReplacementText( "kappa", "\u03ba" );
295        parser.defineEntityReplacementText( "lambda", "\u03bb" );
296        parser.defineEntityReplacementText( "mu", "\u03bc" );
297        parser.defineEntityReplacementText( "nu", "\u03bd" );
298        parser.defineEntityReplacementText( "xi", "\u03be" );
299        parser.defineEntityReplacementText( "omicron", "\u03bf" );
300        parser.defineEntityReplacementText( "pi", "\u03c0" );
301        parser.defineEntityReplacementText( "rho", "\u03c1" );
302        parser.defineEntityReplacementText( "sigmaf", "\u03c2" );
303        parser.defineEntityReplacementText( "sigma", "\u03c3" );
304        parser.defineEntityReplacementText( "tau", "\u03c4" );
305        parser.defineEntityReplacementText( "upsilon", "\u03c5" );
306        parser.defineEntityReplacementText( "phi", "\u03c6" );
307        parser.defineEntityReplacementText( "chi", "\u03c7" );
308        parser.defineEntityReplacementText( "psi", "\u03c8" );
309        parser.defineEntityReplacementText( "omega", "\u03c9" );
310        parser.defineEntityReplacementText( "thetasym", "\u03d1" );
311        parser.defineEntityReplacementText( "upsih", "\u03d2" );
312        parser.defineEntityReplacementText( "piv", "\u03d6" );
313        parser.defineEntityReplacementText( "bull", "\u2022" );
314        parser.defineEntityReplacementText( "hellip", "\u2026" );
315        parser.defineEntityReplacementText( "prime", "\u2032" );
316        parser.defineEntityReplacementText( "Prime", "\u2033" );
317        parser.defineEntityReplacementText( "oline", "\u203e" );
318        parser.defineEntityReplacementText( "frasl", "\u2044" );
319        parser.defineEntityReplacementText( "weierp", "\u2118" );
320        parser.defineEntityReplacementText( "image", "\u2111" );
321        parser.defineEntityReplacementText( "real", "\u211c" );
322        parser.defineEntityReplacementText( "trade", "\u2122" );
323        parser.defineEntityReplacementText( "alefsym", "\u2135" );
324        parser.defineEntityReplacementText( "larr", "\u2190" );
325        parser.defineEntityReplacementText( "uarr", "\u2191" );
326        parser.defineEntityReplacementText( "rarr", "\u2192" );
327        parser.defineEntityReplacementText( "darr", "\u2193" );
328        parser.defineEntityReplacementText( "harr", "\u2194" );
329        parser.defineEntityReplacementText( "crarr", "\u21b5" );
330        parser.defineEntityReplacementText( "lArr", "\u21d0" );
331        parser.defineEntityReplacementText( "uArr", "\u21d1" );
332        parser.defineEntityReplacementText( "rArr", "\u21d2" );
333        parser.defineEntityReplacementText( "dArr", "\u21d3" );
334        parser.defineEntityReplacementText( "hArr", "\u21d4" );
335        parser.defineEntityReplacementText( "forall", "\u2200" );
336        parser.defineEntityReplacementText( "part", "\u2202" );
337        parser.defineEntityReplacementText( "exist", "\u2203" );
338        parser.defineEntityReplacementText( "empty", "\u2205" );
339        parser.defineEntityReplacementText( "nabla", "\u2207" );
340        parser.defineEntityReplacementText( "isin", "\u2208" );
341        parser.defineEntityReplacementText( "notin", "\u2209" );
342        parser.defineEntityReplacementText( "ni", "\u220b" );
343        parser.defineEntityReplacementText( "prod", "\u220f" );
344        parser.defineEntityReplacementText( "sum", "\u2211" );
345        parser.defineEntityReplacementText( "minus", "\u2212" );
346        parser.defineEntityReplacementText( "lowast", "\u2217" );
347        parser.defineEntityReplacementText( "radic", "\u221a" );
348        parser.defineEntityReplacementText( "prop", "\u221d" );
349        parser.defineEntityReplacementText( "infin", "\u221e" );
350        parser.defineEntityReplacementText( "ang", "\u2220" );
351        parser.defineEntityReplacementText( "and", "\u2227" );
352        parser.defineEntityReplacementText( "or", "\u2228" );
353        parser.defineEntityReplacementText( "cap", "\u2229" );
354        parser.defineEntityReplacementText( "cup", "\u222a" );
355        parser.defineEntityReplacementText( "int", "\u222b" );
356        parser.defineEntityReplacementText( "there4", "\u2234" );
357        parser.defineEntityReplacementText( "sim", "\u223c" );
358        parser.defineEntityReplacementText( "cong", "\u2245" );
359        parser.defineEntityReplacementText( "asymp", "\u2248" );
360        parser.defineEntityReplacementText( "ne", "\u2260" );
361        parser.defineEntityReplacementText( "equiv", "\u2261" );
362        parser.defineEntityReplacementText( "le", "\u2264" );
363        parser.defineEntityReplacementText( "ge", "\u2265" );
364        parser.defineEntityReplacementText( "sub", "\u2282" );
365        parser.defineEntityReplacementText( "sup", "\u2283" );
366        parser.defineEntityReplacementText( "nsub", "\u2284" );
367        parser.defineEntityReplacementText( "sube", "\u2286" );
368        parser.defineEntityReplacementText( "supe", "\u2287" );
369        parser.defineEntityReplacementText( "oplus", "\u2295" );
370        parser.defineEntityReplacementText( "otimes", "\u2297" );
371        parser.defineEntityReplacementText( "perp", "\u22a5" );
372        parser.defineEntityReplacementText( "sdot", "\u22c5" );
373        parser.defineEntityReplacementText( "lceil", "\u2308" );
374        parser.defineEntityReplacementText( "rceil", "\u2309" );
375        parser.defineEntityReplacementText( "lfloor", "\u230a" );
376        parser.defineEntityReplacementText( "rfloor", "\u230b" );
377        parser.defineEntityReplacementText( "lang", "\u2329" );
378        parser.defineEntityReplacementText( "rang", "\u232a" );
379        parser.defineEntityReplacementText( "loz", "\u25ca" );
380        parser.defineEntityReplacementText( "spades", "\u2660" );
381        parser.defineEntityReplacementText( "clubs", "\u2663" );
382        parser.defineEntityReplacementText( "hearts", "\u2665" );
383        parser.defineEntityReplacementText( "diams", "\u2666" );
384    }
385
386    /**
387     * <p>
388     *   Goes through a common list of possible html start tags. These include only tags that can go into
389     *   the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
390     * </p>
391     * <p>
392     *   The currently handled tags are:
393     * </p>
394     * <p>
395     *   <code>
396     *      &lt;h2&gt;, &lt;h3&gt;, &lt;h4&gt;, &lt;h5&gt;, &lt;h6&gt;, &lt;p&gt;, &lt;pre&gt;,
397     *      &lt;ul&gt;, &lt;ol&gt;, &lt;li&gt;, &lt;dl&gt;, &lt;dt&gt;, &lt;dd&gt;, &lt;b&gt;, &lt;strong&gt;,
398     *      &lt;i&gt;, &lt;em&gt;, &lt;code&gt;, &lt;samp&gt;, &lt;tt&gt;, &lt;a&gt;, &lt;table&gt;, &lt;tr&gt;,
399     *      &lt;th&gt;, &lt;td&gt;, &lt;caption&gt;, &lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;.
400     *   </code>
401     * </p>
402     *
403     * @param parser A parser.
404     * @param sink the sink to receive the events.
405     * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise.
406     */
407    protected boolean baseStartTag( XmlPullParser parser, Sink sink )
408    {
409        boolean visited = true;
410
411        SinkEventAttributeSet attribs = getAttributesFromParser( parser );
412
413        if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
414        {
415            handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs );
416        }
417        else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
418        {
419            handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs );
420        }
421        else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
422        {
423            handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs );
424        }
425        else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
426        {
427            handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs );
428        }
429        else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
430        {
431            handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs );
432        }
433        else if ( parser.getName().equals( HtmlMarkup.U.toString() ) )
434        {
435            decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" );
436        }
437        else if ( parser.getName().equals( HtmlMarkup.S.toString() )
438                || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
439                || parser.getName().equals( "del" ) )
440        {
441            decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" );
442        }
443        else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) )
444        {
445            decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" );
446        }
447        else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) )
448        {
449            decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" );
450        }
451        else if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
452        {
453            handlePStart( sink, attribs );
454        }
455        else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
456        {
457            visited = handleDivStart( parser, attribs, sink );
458        }
459        else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
460        {
461            handlePreStart( attribs, sink );
462        }
463        else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
464        {
465            sink.list( attribs );
466        }
467        else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
468        {
469            handleOLStart( parser, sink, attribs );
470        }
471        else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
472        {
473            handleLIStart( sink, attribs );
474        }
475        else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
476        {
477            sink.definitionList( attribs );
478        }
479        else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
480        {
481            if ( hasDefinitionListItem )
482            {
483                // close previous listItem
484                sink.definitionListItem_();
485            }
486            sink.definitionListItem( attribs );
487            hasDefinitionListItem = true;
488            sink.definedTerm( attribs );
489        }
490        else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
491        {
492            if ( !hasDefinitionListItem )
493            {
494                sink.definitionListItem( attribs );
495            }
496            sink.definition( attribs );
497        }
498        else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
499                || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
500        {
501            sink.bold();
502        }
503        else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
504                || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
505        {
506            handleFigureCaptionStart( sink, attribs );
507        }
508        else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
509                || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
510                || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
511        {
512            sink.monospaced();
513        }
514        else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
515        {
516            handleAStart( parser, sink, attribs );
517        }
518        else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
519        {
520            handleTableStart( sink, attribs, parser );
521        }
522        else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
523        {
524            sink.tableRow( attribs );
525        }
526        else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
527        {
528            sink.tableHeaderCell( attribs );
529        }
530        else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
531        {
532            sink.tableCell( attribs );
533        }
534        else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
535        {
536            sink.tableCaption( attribs );
537        }
538        else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) )
539        {
540            sink.lineBreak( attribs );
541        }
542        else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) )
543        {
544            sink.horizontalRule( attribs );
545        }
546        else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) )
547        {
548            handleImgStart( parser, sink, attribs );
549        }
550        else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
551            || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
552        {
553            handleUnknown( parser, sink, TAG_TYPE_START );
554            scriptBlock = true;
555        }
556        else
557        {
558            visited = false;
559        }
560
561        return visited;
562    }
563
564    /**
565     * <p>
566     *   Goes through a common list of possible html end tags.
567     *   These should be re-usable by different xhtml-based parsers.
568     *   The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)},
569     *   except for the empty elements (<code>&lt;br/&gt;, &lt;hr/&gt;, &lt;img/&gt;<code>).
570     * </p>
571     *
572     * @param parser A parser.
573     * @param sink the sink to receive the events.
574     * @return True if the event has been handled by this method, false otherwise.
575     */
576    protected boolean baseEndTag( XmlPullParser parser, Sink sink )
577    {
578        boolean visited = true;
579
580        if ( parser.getName().equals( HtmlMarkup.P.toString() ) )
581        {
582            if ( !inFigure )
583            {
584                sink.paragraph_();
585            }
586        }
587        else if ( parser.getName().equals( HtmlMarkup.U.toString() )
588                || parser.getName().equals( HtmlMarkup.S.toString() )
589                || parser.getName().equals( HtmlMarkup.STRIKE.toString() )
590                || parser.getName().equals( "del" ) )
591        {
592            decoration.removeAttribute( SinkEventAttributes.DECORATION );
593        }
594        else if ( parser.getName().equals( HtmlMarkup.SUB.toString() )
595                || parser.getName().equals( HtmlMarkup.SUP.toString() ) )
596        {
597            decoration.removeAttribute( SinkEventAttributes.VALIGN );
598        }
599        else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
600        {
601            if ( inFigure )
602            {
603                sink.figure_();
604                this.inFigure = false;
605            }
606            else
607            {
608                visited = false;
609            }
610        }
611        else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) )
612        {
613            verbatim_();
614
615            sink.verbatim_();
616        }
617        else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) )
618        {
619            sink.list_();
620        }
621        else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) )
622        {
623            sink.numberedList_();
624            orderedListDepth--;
625        }
626        else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) )
627        {
628            handleListItemEnd( sink );
629        }
630        else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) )
631        {
632            if ( hasDefinitionListItem )
633            {
634                sink.definitionListItem_();
635                hasDefinitionListItem = false;
636            }
637            sink.definitionList_();
638        }
639        else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) )
640        {
641            sink.definedTerm_();
642        }
643        else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) )
644        {
645            sink.definition_();
646            sink.definitionListItem_();
647            hasDefinitionListItem = false;
648        }
649        else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) )
650                || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) )
651        {
652            sink.bold_();
653        }
654        else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) )
655                || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) )
656        {
657            handleFigureCaptionEnd( sink );
658        }
659        else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) )
660                || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) )
661                || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) )
662        {
663            sink.monospaced_();
664        }
665        else if ( parser.getName().equals( HtmlMarkup.A.toString() ) )
666        {
667            handleAEnd( sink );
668        }
669
670        // ----------------------------------------------------------------------
671        // Tables
672        // ----------------------------------------------------------------------
673
674        else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) )
675        {
676            sink.tableRows_();
677
678            sink.table_();
679        }
680        else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) )
681        {
682            sink.tableRow_();
683        }
684        else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) )
685        {
686            sink.tableHeaderCell_();
687        }
688        else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) )
689        {
690            sink.tableCell_();
691        }
692        else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) )
693        {
694            sink.tableCaption_();
695        }
696        else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) )
697        {
698            sink.sectionTitle1_();
699        }
700        else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) )
701        {
702            sink.sectionTitle2_();
703        }
704        else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) )
705        {
706            sink.sectionTitle3_();
707        }
708        else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) )
709        {
710            sink.sectionTitle4_();
711        }
712        else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) )
713        {
714            sink.sectionTitle5_();
715        }
716        else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() )
717            || parser.getName().equals( HtmlMarkup.STYLE.toString() ) )
718        {
719            handleUnknown( parser, sink, TAG_TYPE_END );
720
721            scriptBlock = false;
722        }
723        else
724        {
725            visited = false;
726        }
727
728        return visited;
729    }
730
731    /**
732     * {@inheritDoc}
733     *
734     * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be
735     * overridden by implementing parsers to include additional tags.
736     */
737    protected void handleStartTag( XmlPullParser parser, Sink sink )
738        throws XmlPullParserException, MacroExecutionException
739    {
740        if ( !baseStartTag( parser, sink ) )
741        {
742            if ( getLog().isWarnEnabled() )
743            {
744                String position = "[" + parser.getLineNumber() + ":"
745                    + parser.getColumnNumber() + "]";
746                String tag = "<" + parser.getName() + ">";
747
748                getLog().warn( "Unrecognized xml tag: " + tag + " at " + position );
749            }
750        }
751    }
752
753    /**
754     * {@inheritDoc}
755     *
756     * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be
757     * overridden by implementing parsers to include additional tags.
758     */
759    protected void handleEndTag( XmlPullParser parser, Sink sink )
760        throws XmlPullParserException, MacroExecutionException
761    {
762        if ( !baseEndTag( parser, sink ) )
763        {
764            // unrecognized tag is already logged in StartTag
765        }
766    }
767
768    /** {@inheritDoc} */
769    @Override
770    protected void handleText( XmlPullParser parser, Sink sink )
771        throws XmlPullParserException
772    {
773        String text = getText( parser );
774
775        /*
776         * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the
777         * parser so any whitespace that makes it here is significant.
778         *
779         * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA.
780         */
781        if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() )
782        {
783            sink.text( text, decoration );
784        }
785    }
786
787    /** {@inheritDoc} */
788    @Override
789    protected void handleComment( XmlPullParser parser, Sink sink )
790        throws XmlPullParserException
791    {
792        String text = getText( parser );
793
794        if ( "PB".equals( text.trim() ) )
795        {
796            sink.pageBreak();
797        }
798        else
799        {
800            if ( isEmitComments() )
801            {
802                sink.comment( text );
803            }
804        }
805    }
806
807    /** {@inheritDoc} */
808    @Override
809    protected void handleCdsect( XmlPullParser parser, Sink sink )
810        throws XmlPullParserException
811    {
812        String text = getText( parser );
813
814        if ( isScriptBlock() )
815        {
816            sink.unknown( CDATA, new Object[] { Integer.valueOf( CDATA_TYPE ), text}, null );
817        }
818        else
819        {
820            sink.text( text );
821        }
822    }
823
824    /**
825     * Make sure sections are nested consecutively.
826     *
827     * <p>
828     * HTML doesn't have any sections, only sectionTitles (&lt;h2&gt; etc), that means we have to
829     * open close any sections that are missing in between.
830     * </p>
831     *
832     * <p>
833     * For instance, if the following sequence is parsed:
834     * <pre>
835     * &lt;h3&gt;&lt;/h3&gt;
836     * &lt;h6&gt;&lt;/h6&gt;
837     * </pre>
838     * we have to insert two section starts before we open the <code>&lt;h6&gt;</code>.
839     * In the following sequence
840     * <pre>
841     * &lt;h6&gt;&lt;/h6&gt;
842     * &lt;h3&gt;&lt;/h3&gt;
843     * </pre>
844     * we have to close two sections before we open the <code>&lt;h3&gt;</code>.
845     * </p>
846     *
847     * <p>The current level is set to newLevel afterwards.</p>
848     *
849     * @param newLevel the new section level, all upper levels have to be closed.
850     * @param sink the sink to receive the events.
851     */
852    protected void consecutiveSections( int newLevel, Sink sink )
853    {
854        closeOpenSections( newLevel, sink );
855        openMissingSections( newLevel, sink );
856
857        this.sectionLevel = newLevel;
858    }
859
860    /**
861     * Close open sections.
862     *
863     * @param newLevel the new section level, all upper levels have to be closed.
864     * @param sink the sink to receive the events.
865     */
866    private void closeOpenSections( int newLevel, Sink sink )
867    {
868        while ( this.sectionLevel >= newLevel )
869        {
870            if ( sectionLevel == Sink.SECTION_LEVEL_5 )
871            {
872                sink.section5_();
873            }
874            else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
875            {
876                sink.section4_();
877            }
878            else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
879            {
880                sink.section3_();
881            }
882            else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
883            {
884                sink.section2_();
885            }
886            else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
887            {
888                sink.section1_();
889            }
890
891            this.sectionLevel--;
892        }
893    }
894
895    /**
896     * Open missing sections.
897     *
898     * @param newLevel the new section level, all lower levels have to be opened.
899     * @param sink the sink to receive the events.
900     */
901    private void openMissingSections( int newLevel, Sink sink )
902    {
903        while ( this.sectionLevel < newLevel - 1 )
904        {
905            this.sectionLevel++;
906
907            if ( sectionLevel == Sink.SECTION_LEVEL_5 )
908            {
909                sink.section5();
910            }
911            else if ( sectionLevel == Sink.SECTION_LEVEL_4 )
912            {
913                sink.section4();
914            }
915            else if ( sectionLevel == Sink.SECTION_LEVEL_3 )
916            {
917                sink.section3();
918            }
919            else if ( sectionLevel == Sink.SECTION_LEVEL_2 )
920            {
921                sink.section2();
922            }
923            else if ( sectionLevel == Sink.SECTION_LEVEL_1 )
924            {
925                sink.section1();
926            }
927        }
928    }
929
930    /**
931     * Return the current section level.
932     *
933     * @return the current section level.
934     */
935    protected int getSectionLevel()
936    {
937        return this.sectionLevel;
938    }
939
940    /**
941     * Set the current section level.
942     *
943     * @param newLevel the new section level.
944     */
945    protected void setSectionLevel( int newLevel )
946    {
947        this.sectionLevel = newLevel;
948    }
949
950    /**
951     * Stop verbatim mode.
952     */
953    protected void verbatim_()
954    {
955        this.inVerbatim = false;
956    }
957
958    /**
959     * Start verbatim mode.
960     */
961    protected void verbatim()
962    {
963        this.inVerbatim = true;
964    }
965
966    /**
967     * Checks if we are currently inside a &lt;pre&gt; tag.
968     *
969     * @return true if we are currently in verbatim mode.
970     */
971    protected boolean isVerbatim()
972    {
973        return this.inVerbatim;
974    }
975
976    /**
977     * Checks if we are currently inside a &lt;script&gt; tag.
978     *
979     * @return true if we are currently inside <code>&lt;script&gt;</code> tags.
980     *
981     * @since 1.1.1.
982     */
983    protected boolean isScriptBlock()
984    {
985        return this.scriptBlock;
986    }
987
988    /**
989     * Checks if the given id is a valid Doxia id and if not, returns a transformed one.
990     *
991     * @param id The id to validate.
992     * @return A transformed id or the original id if it was already valid.
993     * @see DoxiaUtils#encodeId(String)
994     */
995    protected String validAnchor( String id )
996    {
997        if ( !DoxiaUtils.isValidId( id ) )
998        {
999            String linkAnchor = DoxiaUtils.encodeId( id, true );
1000
1001            String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'";
1002            logMessage( "modifiedLink", msg );
1003
1004            return linkAnchor;
1005        }
1006
1007        return id;
1008    }
1009
1010    /** {@inheritDoc} */
1011    @Override
1012    protected void init()
1013    {
1014        super.init();
1015
1016        this.scriptBlock = false;
1017        this.isLink = false;
1018        this.isAnchor = false;
1019        this.orderedListDepth = 0;
1020        this.sectionLevel = 0;
1021        this.inVerbatim = false;
1022        this.inFigure = false;
1023        while ( this.decoration.getAttributeNames().hasMoreElements() )
1024        {
1025            this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() );
1026        }
1027        this.warnMessages = null;
1028    }
1029
1030    private void handleAEnd( Sink sink )
1031    {
1032        if ( isLink )
1033        {
1034            sink.link_();
1035            isLink = false;
1036        }
1037        else if ( isAnchor )
1038        {
1039            sink.anchor_();
1040            isAnchor = false;
1041        }
1042    }
1043
1044    private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1045    {
1046        String href = parser.getAttributeValue( null, Attribute.HREF.toString() );
1047
1048        if ( href != null )
1049        {
1050            int hashIndex = href.indexOf( '#' );
1051            if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) )
1052            {
1053                String hash = href.substring( hashIndex + 1 );
1054
1055                if ( !DoxiaUtils.isValidId( hash ) )
1056                {
1057                    href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true );
1058
1059                    String msg = "Modified invalid link: '" + hash + "' to '" + href + "'";
1060                    logMessage( "modifiedLink", msg );
1061                }
1062            }
1063            sink.link( href, attribs );
1064            isLink = true;
1065        }
1066        else
1067        {
1068            String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
1069
1070            if ( name != null )
1071            {
1072                sink.anchor( validAnchor( name ), attribs );
1073                isAnchor = true;
1074            }
1075            else
1076            {
1077                String id = parser.getAttributeValue( null, Attribute.ID.toString() );
1078                if ( id != null )
1079                {
1080                    sink.anchor( validAnchor( id ), attribs );
1081                    isAnchor = true;
1082                }
1083            }
1084        }
1085    }
1086
1087    private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink )
1088    {
1089        boolean visited = true;
1090
1091        String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
1092
1093        if ( "figure".equals( divclass ) )
1094        {
1095            this.inFigure = true;
1096            SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs );
1097            atts.removeAttribute( SinkEventAttributes.CLASS );
1098            sink.figure( atts );
1099        }
1100        else
1101        {
1102            visited = false;
1103        }
1104
1105        return visited;
1106    }
1107
1108    private void handleFigureCaptionEnd( Sink sink )
1109    {
1110        if ( inFigure )
1111        {
1112            sink.figureCaption_();
1113        }
1114        else
1115        {
1116            sink.italic_();
1117        }
1118    }
1119
1120    private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs )
1121    {
1122        if ( inFigure )
1123        {
1124            sink.figureCaption( attribs );
1125        }
1126        else
1127        {
1128            sink.italic();
1129        }
1130    }
1131
1132    private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1133    {
1134        String src = parser.getAttributeValue( null, Attribute.SRC.toString() );
1135
1136        if ( src != null )
1137        {
1138            sink.figureGraphics( src, attribs );
1139        }
1140    }
1141
1142    private void handleLIStart( Sink sink, SinkEventAttributeSet attribs )
1143    {
1144        if ( orderedListDepth == 0 )
1145        {
1146            sink.listItem( attribs );
1147        }
1148        else
1149        {
1150            sink.numberedListItem( attribs );
1151        }
1152    }
1153
1154    private void handleListItemEnd( Sink sink )
1155    {
1156        if ( orderedListDepth == 0 )
1157        {
1158            sink.listItem_();
1159        }
1160        else
1161        {
1162            sink.numberedListItem_();
1163        }
1164    }
1165
1166    private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
1167    {
1168        int numbering = Sink.NUMBERING_DECIMAL;
1169        // this will have to be generalized if we handle styles
1170        String style = parser.getAttributeValue( null, Attribute.STYLE.toString() );
1171
1172        if ( style != null )
1173        {
1174            if ( "list-style-type: upper-alpha".equals( style ) )
1175            {
1176                numbering = Sink.NUMBERING_UPPER_ALPHA;
1177            }
1178            else if ( "list-style-type: lower-alpha".equals( style ) )
1179            {
1180                numbering = Sink.NUMBERING_LOWER_ALPHA;
1181            }
1182            else if ( "list-style-type: upper-roman".equals( style ) )
1183            {
1184                numbering = Sink.NUMBERING_UPPER_ROMAN;
1185            }
1186            else if ( "list-style-type: lower-roman".equals( style ) )
1187            {
1188                numbering = Sink.NUMBERING_LOWER_ROMAN;
1189            }
1190            else if ( "list-style-type: decimal".equals( style ) )
1191            {
1192                numbering = Sink.NUMBERING_DECIMAL;
1193            }
1194        }
1195
1196        sink.numberedList( numbering, attribs );
1197        orderedListDepth++;
1198    }
1199
1200    private void handlePStart( Sink sink, SinkEventAttributeSet attribs )
1201    {
1202        if ( !inFigure )
1203        {
1204            sink.paragraph( attribs );
1205        }
1206    }
1207
1208    /*
1209     * The PRE element tells visual user agents that the enclosed text is
1210     * "preformatted". When handling preformatted text, visual user agents:
1211     * - May leave white space intact.
1212     * - May render text with a fixed-pitch font.
1213     * - May disable automatic word wrap.
1214     * - Must not disable bidirectional processing.
1215     * Non-visual user agents are not required to respect extra white space
1216     * in the content of a PRE element.
1217     */
1218    private void handlePreStart( SinkEventAttributeSet attribs, Sink sink )
1219    {
1220        verbatim();
1221        attribs.removeAttribute( SinkEventAttributes.DECORATION );
1222        sink.verbatim( attribs );
1223    }
1224
1225    private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs )
1226    {
1227        consecutiveSections( level, sink );
1228        sink.section( level, attribs );
1229        sink.sectionTitle( level, attribs );
1230    }
1231
1232    private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser )
1233    {
1234        sink.table( attribs );
1235        String border = parser.getAttributeValue( null, Attribute.BORDER.toString() );
1236        boolean grid = true;
1237
1238        if ( border == null || "0".equals( border ) )
1239        {
1240            grid = false;
1241        }
1242
1243        String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() );
1244        int[] justif = {Sink.JUSTIFY_LEFT};
1245
1246        if ( "center".equals( align ) )
1247        {
1248            justif[0] = Sink.JUSTIFY_CENTER;
1249        }
1250        else if ( "right".equals( align ) )
1251        {
1252            justif[0] = Sink.JUSTIFY_RIGHT;
1253        }
1254
1255        sink.tableRows( justif, grid );
1256    }
1257
1258    /**
1259     * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>.
1260     *
1261     * @param key not null
1262     * @param msg not null
1263     * @see #parse(Reader, Sink)
1264     * @since 1.1.1
1265     */
1266    private void logMessage( String key, String msg )
1267    {
1268        final String log = "[XHTML Parser] " + msg;
1269        if ( getLog().isDebugEnabled() )
1270        {
1271            getLog().debug( log );
1272
1273            return;
1274        }
1275
1276        if ( warnMessages == null )
1277        {
1278            warnMessages = new HashMap<String, Set<String>>();
1279        }
1280
1281        Set<String> set = warnMessages.get( key );
1282        if ( set == null )
1283        {
1284            set = new TreeSet<String>();
1285        }
1286        set.add( log );
1287        warnMessages.put( key, set );
1288    }
1289
1290    /**
1291     * @since 1.1.1
1292     */
1293    private void logWarnings()
1294    {
1295        if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() )
1296        {
1297            for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() )
1298            {
1299                for ( String msg : entry.getValue() )
1300                {
1301                    getLog().warn( msg );
1302                }
1303            }
1304
1305            this.warnMessages = null;
1306        }
1307    }
1308}