001package org.apache.maven.doxia.parser; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.io.Reader; 023import java.util.HashMap; 024import java.util.Map; 025import java.util.Set; 026import java.util.TreeSet; 027 028import javax.swing.text.html.HTML.Attribute; 029 030import org.apache.maven.doxia.macro.MacroExecutionException; 031import org.apache.maven.doxia.markup.HtmlMarkup; 032import org.apache.maven.doxia.sink.Sink; 033import org.apache.maven.doxia.sink.SinkEventAttributes; 034import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 035import org.apache.maven.doxia.util.DoxiaUtils; 036 037import org.codehaus.plexus.util.StringUtils; 038import org.codehaus.plexus.util.xml.pull.XmlPullParser; 039import org.codehaus.plexus.util.xml.pull.XmlPullParserException; 040 041/** 042 * Common base parser for xhtml events. 043 * 044 * @author <a href="mailto:jason@maven.org">Jason van Zyl</a> 045 * @author ltheussl 046 * @version $Id$ 047 * @since 1.1 048 */ 049public class XhtmlBaseParser 050 extends AbstractXmlParser 051 implements HtmlMarkup 052{ 053 /** 054 * True if a <script></script> or <style></style> block is read. CDATA sections within are 055 * handled as rawText. 056 */ 057 private boolean scriptBlock; 058 059 /** Used to distinguish <a href=""> from <a name="">. */ 060 private boolean isLink; 061 062 /** Used to distinguish <a href=""> from <a name="">. */ 063 private boolean isAnchor; 064 065 /** Used for nested lists. */ 066 private int orderedListDepth = 0; 067 068 /** Counts section level. */ 069 private int sectionLevel; 070 071 /** Verbatim flag, true whenever we are inside a <pre> tag. */ 072 private boolean inVerbatim; 073 074 /** Used to recognize the case of img inside figure. */ 075 private boolean inFigure; 076 077 /** Used to wrap the definedTerm with its definition, even when one is omitted */ 078 boolean hasDefinitionListItem = false; 079 080 /** Decoration properties, eg for texts. */ 081 private final SinkEventAttributeSet decoration = new SinkEventAttributeSet(); 082 083 /** Map of warn messages with a String as key to describe the error type and a Set as value. 084 * Using to reduce warn messages. */ 085 private Map<String, Set<String>> warnMessages; 086 087 /** {@inheritDoc} */ 088 @Override 089 public void parse( Reader source, Sink sink ) 090 throws ParseException 091 { 092 init(); 093 094 try 095 { 096 super.parse( source, sink ); 097 } 098 finally 099 { 100 logWarnings(); 101 102 setSecondParsing( false ); 103 init(); 104 } 105 } 106 107 /** 108 * {@inheritDoc} 109 * 110 * Adds all XHTML (HTML 4.0) entities to the parser so that they can be recognized and resolved 111 * without additional DTD. 112 */ 113 @Override 114 protected void initXmlParser( XmlPullParser parser ) 115 throws XmlPullParserException 116 { 117 super.initXmlParser( parser ); 118 119 // the entities taken from org.apache.maven.doxia.document.io.xpp3.DocumentXpp3Reader, 120 // which is generated automatically 121 122 // ---------------------------------------------------------------------- 123 // Latin 1 entities 124 // ---------------------------------------------------------------------- 125 126 parser.defineEntityReplacementText( "nbsp", "\u00a0" ); 127 parser.defineEntityReplacementText( "iexcl", "\u00a1" ); 128 parser.defineEntityReplacementText( "cent", "\u00a2" ); 129 parser.defineEntityReplacementText( "pound", "\u00a3" ); 130 parser.defineEntityReplacementText( "curren", "\u00a4" ); 131 parser.defineEntityReplacementText( "yen", "\u00a5" ); 132 parser.defineEntityReplacementText( "brvbar", "\u00a6" ); 133 parser.defineEntityReplacementText( "sect", "\u00a7" ); 134 parser.defineEntityReplacementText( "uml", "\u00a8" ); 135 parser.defineEntityReplacementText( "copy", "\u00a9" ); 136 parser.defineEntityReplacementText( "ordf", "\u00aa" ); 137 parser.defineEntityReplacementText( "laquo", "\u00ab" ); 138 parser.defineEntityReplacementText( "not", "\u00ac" ); 139 parser.defineEntityReplacementText( "shy", "\u00ad" ); 140 parser.defineEntityReplacementText( "reg", "\u00ae" ); 141 parser.defineEntityReplacementText( "macr", "\u00af" ); 142 parser.defineEntityReplacementText( "deg", "\u00b0" ); 143 parser.defineEntityReplacementText( "plusmn", "\u00b1" ); 144 parser.defineEntityReplacementText( "sup2", "\u00b2" ); 145 parser.defineEntityReplacementText( "sup3", "\u00b3" ); 146 parser.defineEntityReplacementText( "acute", "\u00b4" ); 147 parser.defineEntityReplacementText( "micro", "\u00b5" ); 148 parser.defineEntityReplacementText( "para", "\u00b6" ); 149 parser.defineEntityReplacementText( "middot", "\u00b7" ); 150 parser.defineEntityReplacementText( "cedil", "\u00b8" ); 151 parser.defineEntityReplacementText( "sup1", "\u00b9" ); 152 parser.defineEntityReplacementText( "ordm", "\u00ba" ); 153 parser.defineEntityReplacementText( "raquo", "\u00bb" ); 154 parser.defineEntityReplacementText( "frac14", "\u00bc" ); 155 parser.defineEntityReplacementText( "frac12", "\u00bd" ); 156 parser.defineEntityReplacementText( "frac34", "\u00be" ); 157 parser.defineEntityReplacementText( "iquest", "\u00bf" ); 158 parser.defineEntityReplacementText( "Agrave", "\u00c0" ); 159 parser.defineEntityReplacementText( "Aacute", "\u00c1" ); 160 parser.defineEntityReplacementText( "Acirc", "\u00c2" ); 161 parser.defineEntityReplacementText( "Atilde", "\u00c3" ); 162 parser.defineEntityReplacementText( "Auml", "\u00c4" ); 163 parser.defineEntityReplacementText( "Aring", "\u00c5" ); 164 parser.defineEntityReplacementText( "AElig", "\u00c6" ); 165 parser.defineEntityReplacementText( "Ccedil", "\u00c7" ); 166 parser.defineEntityReplacementText( "Egrave", "\u00c8" ); 167 parser.defineEntityReplacementText( "Eacute", "\u00c9" ); 168 parser.defineEntityReplacementText( "Ecirc", "\u00ca" ); 169 parser.defineEntityReplacementText( "Euml", "\u00cb" ); 170 parser.defineEntityReplacementText( "Igrave", "\u00cc" ); 171 parser.defineEntityReplacementText( "Iacute", "\u00cd" ); 172 parser.defineEntityReplacementText( "Icirc", "\u00ce" ); 173 parser.defineEntityReplacementText( "Iuml", "\u00cf" ); 174 parser.defineEntityReplacementText( "ETH", "\u00d0" ); 175 parser.defineEntityReplacementText( "Ntilde", "\u00d1" ); 176 parser.defineEntityReplacementText( "Ograve", "\u00d2" ); 177 parser.defineEntityReplacementText( "Oacute", "\u00d3" ); 178 parser.defineEntityReplacementText( "Ocirc", "\u00d4" ); 179 parser.defineEntityReplacementText( "Otilde", "\u00d5" ); 180 parser.defineEntityReplacementText( "Ouml", "\u00d6" ); 181 parser.defineEntityReplacementText( "times", "\u00d7" ); 182 parser.defineEntityReplacementText( "Oslash", "\u00d8" ); 183 parser.defineEntityReplacementText( "Ugrave", "\u00d9" ); 184 parser.defineEntityReplacementText( "Uacute", "\u00da" ); 185 parser.defineEntityReplacementText( "Ucirc", "\u00db" ); 186 parser.defineEntityReplacementText( "Uuml", "\u00dc" ); 187 parser.defineEntityReplacementText( "Yacute", "\u00dd" ); 188 parser.defineEntityReplacementText( "THORN", "\u00de" ); 189 parser.defineEntityReplacementText( "szlig", "\u00df" ); 190 parser.defineEntityReplacementText( "agrave", "\u00e0" ); 191 parser.defineEntityReplacementText( "aacute", "\u00e1" ); 192 parser.defineEntityReplacementText( "acirc", "\u00e2" ); 193 parser.defineEntityReplacementText( "atilde", "\u00e3" ); 194 parser.defineEntityReplacementText( "auml", "\u00e4" ); 195 parser.defineEntityReplacementText( "aring", "\u00e5" ); 196 parser.defineEntityReplacementText( "aelig", "\u00e6" ); 197 parser.defineEntityReplacementText( "ccedil", "\u00e7" ); 198 parser.defineEntityReplacementText( "egrave", "\u00e8" ); 199 parser.defineEntityReplacementText( "eacute", "\u00e9" ); 200 parser.defineEntityReplacementText( "ecirc", "\u00ea" ); 201 parser.defineEntityReplacementText( "euml", "\u00eb" ); 202 parser.defineEntityReplacementText( "igrave", "\u00ec" ); 203 parser.defineEntityReplacementText( "iacute", "\u00ed" ); 204 parser.defineEntityReplacementText( "icirc", "\u00ee" ); 205 parser.defineEntityReplacementText( "iuml", "\u00ef" ); 206 parser.defineEntityReplacementText( "eth", "\u00f0" ); 207 parser.defineEntityReplacementText( "ntilde", "\u00f1" ); 208 parser.defineEntityReplacementText( "ograve", "\u00f2" ); 209 parser.defineEntityReplacementText( "oacute", "\u00f3" ); 210 parser.defineEntityReplacementText( "ocirc", "\u00f4" ); 211 parser.defineEntityReplacementText( "otilde", "\u00f5" ); 212 parser.defineEntityReplacementText( "ouml", "\u00f6" ); 213 parser.defineEntityReplacementText( "divide", "\u00f7" ); 214 parser.defineEntityReplacementText( "oslash", "\u00f8" ); 215 parser.defineEntityReplacementText( "ugrave", "\u00f9" ); 216 parser.defineEntityReplacementText( "uacute", "\u00fa" ); 217 parser.defineEntityReplacementText( "ucirc", "\u00fb" ); 218 parser.defineEntityReplacementText( "uuml", "\u00fc" ); 219 parser.defineEntityReplacementText( "yacute", "\u00fd" ); 220 parser.defineEntityReplacementText( "thorn", "\u00fe" ); 221 parser.defineEntityReplacementText( "yuml", "\u00ff" ); 222 223 // ---------------------------------------------------------------------- 224 // Special entities 225 // ---------------------------------------------------------------------- 226 227 parser.defineEntityReplacementText( "OElig", "\u0152" ); 228 parser.defineEntityReplacementText( "oelig", "\u0153" ); 229 parser.defineEntityReplacementText( "Scaron", "\u0160" ); 230 parser.defineEntityReplacementText( "scaron", "\u0161" ); 231 parser.defineEntityReplacementText( "Yuml", "\u0178" ); 232 parser.defineEntityReplacementText( "circ", "\u02c6" ); 233 parser.defineEntityReplacementText( "tilde", "\u02dc" ); 234 parser.defineEntityReplacementText( "ensp", "\u2002" ); 235 parser.defineEntityReplacementText( "emsp", "\u2003" ); 236 parser.defineEntityReplacementText( "thinsp", "\u2009" ); 237 parser.defineEntityReplacementText( "zwnj", "\u200c" ); 238 parser.defineEntityReplacementText( "zwj", "\u200d" ); 239 parser.defineEntityReplacementText( "lrm", "\u200e" ); 240 parser.defineEntityReplacementText( "rlm", "\u200f" ); 241 parser.defineEntityReplacementText( "ndash", "\u2013" ); 242 parser.defineEntityReplacementText( "mdash", "\u2014" ); 243 parser.defineEntityReplacementText( "lsquo", "\u2018" ); 244 parser.defineEntityReplacementText( "rsquo", "\u2019" ); 245 parser.defineEntityReplacementText( "sbquo", "\u201a" ); 246 parser.defineEntityReplacementText( "ldquo", "\u201c" ); 247 parser.defineEntityReplacementText( "rdquo", "\u201d" ); 248 parser.defineEntityReplacementText( "bdquo", "\u201e" ); 249 parser.defineEntityReplacementText( "dagger", "\u2020" ); 250 parser.defineEntityReplacementText( "Dagger", "\u2021" ); 251 parser.defineEntityReplacementText( "permil", "\u2030" ); 252 parser.defineEntityReplacementText( "lsaquo", "\u2039" ); 253 parser.defineEntityReplacementText( "rsaquo", "\u203a" ); 254 parser.defineEntityReplacementText( "euro", "\u20ac" ); 255 256 // ---------------------------------------------------------------------- 257 // Symbol entities 258 // ---------------------------------------------------------------------- 259 260 parser.defineEntityReplacementText( "fnof", "\u0192" ); 261 parser.defineEntityReplacementText( "Alpha", "\u0391" ); 262 parser.defineEntityReplacementText( "Beta", "\u0392" ); 263 parser.defineEntityReplacementText( "Gamma", "\u0393" ); 264 parser.defineEntityReplacementText( "Delta", "\u0394" ); 265 parser.defineEntityReplacementText( "Epsilon", "\u0395" ); 266 parser.defineEntityReplacementText( "Zeta", "\u0396" ); 267 parser.defineEntityReplacementText( "Eta", "\u0397" ); 268 parser.defineEntityReplacementText( "Theta", "\u0398" ); 269 parser.defineEntityReplacementText( "Iota", "\u0399" ); 270 parser.defineEntityReplacementText( "Kappa", "\u039a" ); 271 parser.defineEntityReplacementText( "Lambda", "\u039b" ); 272 parser.defineEntityReplacementText( "Mu", "\u039c" ); 273 parser.defineEntityReplacementText( "Nu", "\u039d" ); 274 parser.defineEntityReplacementText( "Xi", "\u039e" ); 275 parser.defineEntityReplacementText( "Omicron", "\u039f" ); 276 parser.defineEntityReplacementText( "Pi", "\u03a0" ); 277 parser.defineEntityReplacementText( "Rho", "\u03a1" ); 278 parser.defineEntityReplacementText( "Sigma", "\u03a3" ); 279 parser.defineEntityReplacementText( "Tau", "\u03a4" ); 280 parser.defineEntityReplacementText( "Upsilon", "\u03a5" ); 281 parser.defineEntityReplacementText( "Phi", "\u03a6" ); 282 parser.defineEntityReplacementText( "Chi", "\u03a7" ); 283 parser.defineEntityReplacementText( "Psi", "\u03a8" ); 284 parser.defineEntityReplacementText( "Omega", "\u03a9" ); 285 parser.defineEntityReplacementText( "alpha", "\u03b1" ); 286 parser.defineEntityReplacementText( "beta", "\u03b2" ); 287 parser.defineEntityReplacementText( "gamma", "\u03b3" ); 288 parser.defineEntityReplacementText( "delta", "\u03b4" ); 289 parser.defineEntityReplacementText( "epsilon", "\u03b5" ); 290 parser.defineEntityReplacementText( "zeta", "\u03b6" ); 291 parser.defineEntityReplacementText( "eta", "\u03b7" ); 292 parser.defineEntityReplacementText( "theta", "\u03b8" ); 293 parser.defineEntityReplacementText( "iota", "\u03b9" ); 294 parser.defineEntityReplacementText( "kappa", "\u03ba" ); 295 parser.defineEntityReplacementText( "lambda", "\u03bb" ); 296 parser.defineEntityReplacementText( "mu", "\u03bc" ); 297 parser.defineEntityReplacementText( "nu", "\u03bd" ); 298 parser.defineEntityReplacementText( "xi", "\u03be" ); 299 parser.defineEntityReplacementText( "omicron", "\u03bf" ); 300 parser.defineEntityReplacementText( "pi", "\u03c0" ); 301 parser.defineEntityReplacementText( "rho", "\u03c1" ); 302 parser.defineEntityReplacementText( "sigmaf", "\u03c2" ); 303 parser.defineEntityReplacementText( "sigma", "\u03c3" ); 304 parser.defineEntityReplacementText( "tau", "\u03c4" ); 305 parser.defineEntityReplacementText( "upsilon", "\u03c5" ); 306 parser.defineEntityReplacementText( "phi", "\u03c6" ); 307 parser.defineEntityReplacementText( "chi", "\u03c7" ); 308 parser.defineEntityReplacementText( "psi", "\u03c8" ); 309 parser.defineEntityReplacementText( "omega", "\u03c9" ); 310 parser.defineEntityReplacementText( "thetasym", "\u03d1" ); 311 parser.defineEntityReplacementText( "upsih", "\u03d2" ); 312 parser.defineEntityReplacementText( "piv", "\u03d6" ); 313 parser.defineEntityReplacementText( "bull", "\u2022" ); 314 parser.defineEntityReplacementText( "hellip", "\u2026" ); 315 parser.defineEntityReplacementText( "prime", "\u2032" ); 316 parser.defineEntityReplacementText( "Prime", "\u2033" ); 317 parser.defineEntityReplacementText( "oline", "\u203e" ); 318 parser.defineEntityReplacementText( "frasl", "\u2044" ); 319 parser.defineEntityReplacementText( "weierp", "\u2118" ); 320 parser.defineEntityReplacementText( "image", "\u2111" ); 321 parser.defineEntityReplacementText( "real", "\u211c" ); 322 parser.defineEntityReplacementText( "trade", "\u2122" ); 323 parser.defineEntityReplacementText( "alefsym", "\u2135" ); 324 parser.defineEntityReplacementText( "larr", "\u2190" ); 325 parser.defineEntityReplacementText( "uarr", "\u2191" ); 326 parser.defineEntityReplacementText( "rarr", "\u2192" ); 327 parser.defineEntityReplacementText( "darr", "\u2193" ); 328 parser.defineEntityReplacementText( "harr", "\u2194" ); 329 parser.defineEntityReplacementText( "crarr", "\u21b5" ); 330 parser.defineEntityReplacementText( "lArr", "\u21d0" ); 331 parser.defineEntityReplacementText( "uArr", "\u21d1" ); 332 parser.defineEntityReplacementText( "rArr", "\u21d2" ); 333 parser.defineEntityReplacementText( "dArr", "\u21d3" ); 334 parser.defineEntityReplacementText( "hArr", "\u21d4" ); 335 parser.defineEntityReplacementText( "forall", "\u2200" ); 336 parser.defineEntityReplacementText( "part", "\u2202" ); 337 parser.defineEntityReplacementText( "exist", "\u2203" ); 338 parser.defineEntityReplacementText( "empty", "\u2205" ); 339 parser.defineEntityReplacementText( "nabla", "\u2207" ); 340 parser.defineEntityReplacementText( "isin", "\u2208" ); 341 parser.defineEntityReplacementText( "notin", "\u2209" ); 342 parser.defineEntityReplacementText( "ni", "\u220b" ); 343 parser.defineEntityReplacementText( "prod", "\u220f" ); 344 parser.defineEntityReplacementText( "sum", "\u2211" ); 345 parser.defineEntityReplacementText( "minus", "\u2212" ); 346 parser.defineEntityReplacementText( "lowast", "\u2217" ); 347 parser.defineEntityReplacementText( "radic", "\u221a" ); 348 parser.defineEntityReplacementText( "prop", "\u221d" ); 349 parser.defineEntityReplacementText( "infin", "\u221e" ); 350 parser.defineEntityReplacementText( "ang", "\u2220" ); 351 parser.defineEntityReplacementText( "and", "\u2227" ); 352 parser.defineEntityReplacementText( "or", "\u2228" ); 353 parser.defineEntityReplacementText( "cap", "\u2229" ); 354 parser.defineEntityReplacementText( "cup", "\u222a" ); 355 parser.defineEntityReplacementText( "int", "\u222b" ); 356 parser.defineEntityReplacementText( "there4", "\u2234" ); 357 parser.defineEntityReplacementText( "sim", "\u223c" ); 358 parser.defineEntityReplacementText( "cong", "\u2245" ); 359 parser.defineEntityReplacementText( "asymp", "\u2248" ); 360 parser.defineEntityReplacementText( "ne", "\u2260" ); 361 parser.defineEntityReplacementText( "equiv", "\u2261" ); 362 parser.defineEntityReplacementText( "le", "\u2264" ); 363 parser.defineEntityReplacementText( "ge", "\u2265" ); 364 parser.defineEntityReplacementText( "sub", "\u2282" ); 365 parser.defineEntityReplacementText( "sup", "\u2283" ); 366 parser.defineEntityReplacementText( "nsub", "\u2284" ); 367 parser.defineEntityReplacementText( "sube", "\u2286" ); 368 parser.defineEntityReplacementText( "supe", "\u2287" ); 369 parser.defineEntityReplacementText( "oplus", "\u2295" ); 370 parser.defineEntityReplacementText( "otimes", "\u2297" ); 371 parser.defineEntityReplacementText( "perp", "\u22a5" ); 372 parser.defineEntityReplacementText( "sdot", "\u22c5" ); 373 parser.defineEntityReplacementText( "lceil", "\u2308" ); 374 parser.defineEntityReplacementText( "rceil", "\u2309" ); 375 parser.defineEntityReplacementText( "lfloor", "\u230a" ); 376 parser.defineEntityReplacementText( "rfloor", "\u230b" ); 377 parser.defineEntityReplacementText( "lang", "\u2329" ); 378 parser.defineEntityReplacementText( "rang", "\u232a" ); 379 parser.defineEntityReplacementText( "loz", "\u25ca" ); 380 parser.defineEntityReplacementText( "spades", "\u2660" ); 381 parser.defineEntityReplacementText( "clubs", "\u2663" ); 382 parser.defineEntityReplacementText( "hearts", "\u2665" ); 383 parser.defineEntityReplacementText( "diams", "\u2666" ); 384 } 385 386 /** 387 * <p> 388 * Goes through a common list of possible html start tags. These include only tags that can go into 389 * the body of a xhtml document and so should be re-usable by different xhtml-based parsers. 390 * </p> 391 * <p> 392 * The currently handled tags are: 393 * </p> 394 * <p> 395 * <code> 396 * <h2>, <h3>, <h4>, <h5>, <h6>, <p>, <pre>, 397 * <ul>, <ol>, <li>, <dl>, <dt>, <dd>, <b>, <strong>, 398 * <i>, <em>, <code>, <samp>, <tt>, <a>, <table>, <tr>, 399 * <th>, <td>, <caption>, <br/>, <hr/>, <img/>. 400 * </code> 401 * </p> 402 * 403 * @param parser A parser. 404 * @param sink the sink to receive the events. 405 * @return True if the event has been handled by this method, i.e. the tag was recognized, false otherwise. 406 */ 407 protected boolean baseStartTag( XmlPullParser parser, Sink sink ) 408 { 409 boolean visited = true; 410 411 SinkEventAttributeSet attribs = getAttributesFromParser( parser ); 412 413 if ( parser.getName().equals( HtmlMarkup.H2.toString() ) ) 414 { 415 handleSectionStart( sink, Sink.SECTION_LEVEL_1, attribs ); 416 } 417 else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) ) 418 { 419 handleSectionStart( sink, Sink.SECTION_LEVEL_2, attribs ); 420 } 421 else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) ) 422 { 423 handleSectionStart( sink, Sink.SECTION_LEVEL_3, attribs ); 424 } 425 else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) ) 426 { 427 handleSectionStart( sink, Sink.SECTION_LEVEL_4, attribs ); 428 } 429 else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) ) 430 { 431 handleSectionStart( sink, Sink.SECTION_LEVEL_5, attribs ); 432 } 433 else if ( parser.getName().equals( HtmlMarkup.U.toString() ) ) 434 { 435 decoration.addAttribute( SinkEventAttributes.DECORATION, "underline" ); 436 } 437 else if ( parser.getName().equals( HtmlMarkup.S.toString() ) 438 || parser.getName().equals( HtmlMarkup.STRIKE.toString() ) 439 || parser.getName().equals( "del" ) ) 440 { 441 decoration.addAttribute( SinkEventAttributes.DECORATION, "line-through" ); 442 } 443 else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) ) 444 { 445 decoration.addAttribute( SinkEventAttributes.VALIGN, "sub" ); 446 } 447 else if ( parser.getName().equals( HtmlMarkup.SUP.toString() ) ) 448 { 449 decoration.addAttribute( SinkEventAttributes.VALIGN, "sup" ); 450 } 451 else if ( parser.getName().equals( HtmlMarkup.P.toString() ) ) 452 { 453 handlePStart( sink, attribs ); 454 } 455 else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) ) 456 { 457 visited = handleDivStart( parser, attribs, sink ); 458 } 459 else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) ) 460 { 461 handlePreStart( attribs, sink ); 462 } 463 else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) ) 464 { 465 sink.list( attribs ); 466 } 467 else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) ) 468 { 469 handleOLStart( parser, sink, attribs ); 470 } 471 else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) ) 472 { 473 handleLIStart( sink, attribs ); 474 } 475 else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) ) 476 { 477 sink.definitionList( attribs ); 478 } 479 else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) ) 480 { 481 if ( hasDefinitionListItem ) 482 { 483 // close previous listItem 484 sink.definitionListItem_(); 485 } 486 sink.definitionListItem( attribs ); 487 hasDefinitionListItem = true; 488 sink.definedTerm( attribs ); 489 } 490 else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) ) 491 { 492 if ( !hasDefinitionListItem ) 493 { 494 sink.definitionListItem( attribs ); 495 } 496 sink.definition( attribs ); 497 } 498 else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) ) 499 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) ) 500 { 501 sink.bold(); 502 } 503 else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) ) 504 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) ) 505 { 506 handleFigureCaptionStart( sink, attribs ); 507 } 508 else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) ) 509 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) ) 510 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) ) 511 { 512 sink.monospaced(); 513 } 514 else if ( parser.getName().equals( HtmlMarkup.A.toString() ) ) 515 { 516 handleAStart( parser, sink, attribs ); 517 } 518 else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) ) 519 { 520 handleTableStart( sink, attribs, parser ); 521 } 522 else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) ) 523 { 524 sink.tableRow( attribs ); 525 } 526 else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) ) 527 { 528 sink.tableHeaderCell( attribs ); 529 } 530 else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) ) 531 { 532 sink.tableCell( attribs ); 533 } 534 else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) ) 535 { 536 sink.tableCaption( attribs ); 537 } 538 else if ( parser.getName().equals( HtmlMarkup.BR.toString() ) ) 539 { 540 sink.lineBreak( attribs ); 541 } 542 else if ( parser.getName().equals( HtmlMarkup.HR.toString() ) ) 543 { 544 sink.horizontalRule( attribs ); 545 } 546 else if ( parser.getName().equals( HtmlMarkup.IMG.toString() ) ) 547 { 548 handleImgStart( parser, sink, attribs ); 549 } 550 else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) 551 || parser.getName().equals( HtmlMarkup.STYLE.toString() ) ) 552 { 553 handleUnknown( parser, sink, TAG_TYPE_START ); 554 scriptBlock = true; 555 } 556 else 557 { 558 visited = false; 559 } 560 561 return visited; 562 } 563 564 /** 565 * <p> 566 * Goes through a common list of possible html end tags. 567 * These should be re-usable by different xhtml-based parsers. 568 * The tags handled here are the same as for {@link #baseStartTag(XmlPullParser,Sink)}, 569 * except for the empty elements (<code><br/>, <hr/>, <img/><code>). 570 * </p> 571 * 572 * @param parser A parser. 573 * @param sink the sink to receive the events. 574 * @return True if the event has been handled by this method, false otherwise. 575 */ 576 protected boolean baseEndTag( XmlPullParser parser, Sink sink ) 577 { 578 boolean visited = true; 579 580 if ( parser.getName().equals( HtmlMarkup.P.toString() ) ) 581 { 582 if ( !inFigure ) 583 { 584 sink.paragraph_(); 585 } 586 } 587 else if ( parser.getName().equals( HtmlMarkup.U.toString() ) 588 || parser.getName().equals( HtmlMarkup.S.toString() ) 589 || parser.getName().equals( HtmlMarkup.STRIKE.toString() ) 590 || parser.getName().equals( "del" ) ) 591 { 592 decoration.removeAttribute( SinkEventAttributes.DECORATION ); 593 } 594 else if ( parser.getName().equals( HtmlMarkup.SUB.toString() ) 595 || parser.getName().equals( HtmlMarkup.SUP.toString() ) ) 596 { 597 decoration.removeAttribute( SinkEventAttributes.VALIGN ); 598 } 599 else if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) ) 600 { 601 if ( inFigure ) 602 { 603 sink.figure_(); 604 this.inFigure = false; 605 } 606 else 607 { 608 visited = false; 609 } 610 } 611 else if ( parser.getName().equals( HtmlMarkup.PRE.toString() ) ) 612 { 613 verbatim_(); 614 615 sink.verbatim_(); 616 } 617 else if ( parser.getName().equals( HtmlMarkup.UL.toString() ) ) 618 { 619 sink.list_(); 620 } 621 else if ( parser.getName().equals( HtmlMarkup.OL.toString() ) ) 622 { 623 sink.numberedList_(); 624 orderedListDepth--; 625 } 626 else if ( parser.getName().equals( HtmlMarkup.LI.toString() ) ) 627 { 628 handleListItemEnd( sink ); 629 } 630 else if ( parser.getName().equals( HtmlMarkup.DL.toString() ) ) 631 { 632 if ( hasDefinitionListItem ) 633 { 634 sink.definitionListItem_(); 635 hasDefinitionListItem = false; 636 } 637 sink.definitionList_(); 638 } 639 else if ( parser.getName().equals( HtmlMarkup.DT.toString() ) ) 640 { 641 sink.definedTerm_(); 642 } 643 else if ( parser.getName().equals( HtmlMarkup.DD.toString() ) ) 644 { 645 sink.definition_(); 646 sink.definitionListItem_(); 647 hasDefinitionListItem = false; 648 } 649 else if ( ( parser.getName().equals( HtmlMarkup.B.toString() ) ) 650 || ( parser.getName().equals( HtmlMarkup.STRONG.toString() ) ) ) 651 { 652 sink.bold_(); 653 } 654 else if ( ( parser.getName().equals( HtmlMarkup.I.toString() ) ) 655 || ( parser.getName().equals( HtmlMarkup.EM.toString() ) ) ) 656 { 657 handleFigureCaptionEnd( sink ); 658 } 659 else if ( ( parser.getName().equals( HtmlMarkup.CODE.toString() ) ) 660 || ( parser.getName().equals( HtmlMarkup.SAMP.toString() ) ) 661 || ( parser.getName().equals( HtmlMarkup.TT.toString() ) ) ) 662 { 663 sink.monospaced_(); 664 } 665 else if ( parser.getName().equals( HtmlMarkup.A.toString() ) ) 666 { 667 handleAEnd( sink ); 668 } 669 670 // ---------------------------------------------------------------------- 671 // Tables 672 // ---------------------------------------------------------------------- 673 674 else if ( parser.getName().equals( HtmlMarkup.TABLE.toString() ) ) 675 { 676 sink.tableRows_(); 677 678 sink.table_(); 679 } 680 else if ( parser.getName().equals( HtmlMarkup.TR.toString() ) ) 681 { 682 sink.tableRow_(); 683 } 684 else if ( parser.getName().equals( HtmlMarkup.TH.toString() ) ) 685 { 686 sink.tableHeaderCell_(); 687 } 688 else if ( parser.getName().equals( HtmlMarkup.TD.toString() ) ) 689 { 690 sink.tableCell_(); 691 } 692 else if ( parser.getName().equals( HtmlMarkup.CAPTION.toString() ) ) 693 { 694 sink.tableCaption_(); 695 } 696 else if ( parser.getName().equals( HtmlMarkup.H2.toString() ) ) 697 { 698 sink.sectionTitle1_(); 699 } 700 else if ( parser.getName().equals( HtmlMarkup.H3.toString() ) ) 701 { 702 sink.sectionTitle2_(); 703 } 704 else if ( parser.getName().equals( HtmlMarkup.H4.toString() ) ) 705 { 706 sink.sectionTitle3_(); 707 } 708 else if ( parser.getName().equals( HtmlMarkup.H5.toString() ) ) 709 { 710 sink.sectionTitle4_(); 711 } 712 else if ( parser.getName().equals( HtmlMarkup.H6.toString() ) ) 713 { 714 sink.sectionTitle5_(); 715 } 716 else if ( parser.getName().equals( HtmlMarkup.SCRIPT.toString() ) 717 || parser.getName().equals( HtmlMarkup.STYLE.toString() ) ) 718 { 719 handleUnknown( parser, sink, TAG_TYPE_END ); 720 721 scriptBlock = false; 722 } 723 else 724 { 725 visited = false; 726 } 727 728 return visited; 729 } 730 731 /** 732 * {@inheritDoc} 733 * 734 * Just calls {@link #baseStartTag(XmlPullParser,Sink)}, this should be 735 * overridden by implementing parsers to include additional tags. 736 */ 737 protected void handleStartTag( XmlPullParser parser, Sink sink ) 738 throws XmlPullParserException, MacroExecutionException 739 { 740 if ( !baseStartTag( parser, sink ) ) 741 { 742 if ( getLog().isWarnEnabled() ) 743 { 744 String position = "[" + parser.getLineNumber() + ":" 745 + parser.getColumnNumber() + "]"; 746 String tag = "<" + parser.getName() + ">"; 747 748 getLog().warn( "Unrecognized xml tag: " + tag + " at " + position ); 749 } 750 } 751 } 752 753 /** 754 * {@inheritDoc} 755 * 756 * Just calls {@link #baseEndTag(XmlPullParser,Sink)}, this should be 757 * overridden by implementing parsers to include additional tags. 758 */ 759 protected void handleEndTag( XmlPullParser parser, Sink sink ) 760 throws XmlPullParserException, MacroExecutionException 761 { 762 if ( !baseEndTag( parser, sink ) ) 763 { 764 // unrecognized tag is already logged in StartTag 765 } 766 } 767 768 /** {@inheritDoc} */ 769 @Override 770 protected void handleText( XmlPullParser parser, Sink sink ) 771 throws XmlPullParserException 772 { 773 String text = getText( parser ); 774 775 /* 776 * NOTE: Don't do any whitespace trimming here. Whitespace normalization has already been performed by the 777 * parser so any whitespace that makes it here is significant. 778 * 779 * NOTE: text within script tags is ignored, scripting code should be embedded in CDATA. 780 */ 781 if ( StringUtils.isNotEmpty( text ) && !isScriptBlock() ) 782 { 783 sink.text( text, decoration ); 784 } 785 } 786 787 /** {@inheritDoc} */ 788 @Override 789 protected void handleComment( XmlPullParser parser, Sink sink ) 790 throws XmlPullParserException 791 { 792 String text = getText( parser ); 793 794 if ( "PB".equals( text.trim() ) ) 795 { 796 sink.pageBreak(); 797 } 798 else 799 { 800 if ( isEmitComments() ) 801 { 802 sink.comment( text ); 803 } 804 } 805 } 806 807 /** {@inheritDoc} */ 808 @Override 809 protected void handleCdsect( XmlPullParser parser, Sink sink ) 810 throws XmlPullParserException 811 { 812 String text = getText( parser ); 813 814 if ( isScriptBlock() ) 815 { 816 sink.unknown( CDATA, new Object[] { Integer.valueOf( CDATA_TYPE ), text}, null ); 817 } 818 else 819 { 820 sink.text( text ); 821 } 822 } 823 824 /** 825 * Make sure sections are nested consecutively. 826 * 827 * <p> 828 * HTML doesn't have any sections, only sectionTitles (<h2> etc), that means we have to 829 * open close any sections that are missing in between. 830 * </p> 831 * 832 * <p> 833 * For instance, if the following sequence is parsed: 834 * <pre> 835 * <h3></h3> 836 * <h6></h6> 837 * </pre> 838 * we have to insert two section starts before we open the <code><h6></code>. 839 * In the following sequence 840 * <pre> 841 * <h6></h6> 842 * <h3></h3> 843 * </pre> 844 * we have to close two sections before we open the <code><h3></code>. 845 * </p> 846 * 847 * <p>The current level is set to newLevel afterwards.</p> 848 * 849 * @param newLevel the new section level, all upper levels have to be closed. 850 * @param sink the sink to receive the events. 851 */ 852 protected void consecutiveSections( int newLevel, Sink sink ) 853 { 854 closeOpenSections( newLevel, sink ); 855 openMissingSections( newLevel, sink ); 856 857 this.sectionLevel = newLevel; 858 } 859 860 /** 861 * Close open sections. 862 * 863 * @param newLevel the new section level, all upper levels have to be closed. 864 * @param sink the sink to receive the events. 865 */ 866 private void closeOpenSections( int newLevel, Sink sink ) 867 { 868 while ( this.sectionLevel >= newLevel ) 869 { 870 if ( sectionLevel == Sink.SECTION_LEVEL_5 ) 871 { 872 sink.section5_(); 873 } 874 else if ( sectionLevel == Sink.SECTION_LEVEL_4 ) 875 { 876 sink.section4_(); 877 } 878 else if ( sectionLevel == Sink.SECTION_LEVEL_3 ) 879 { 880 sink.section3_(); 881 } 882 else if ( sectionLevel == Sink.SECTION_LEVEL_2 ) 883 { 884 sink.section2_(); 885 } 886 else if ( sectionLevel == Sink.SECTION_LEVEL_1 ) 887 { 888 sink.section1_(); 889 } 890 891 this.sectionLevel--; 892 } 893 } 894 895 /** 896 * Open missing sections. 897 * 898 * @param newLevel the new section level, all lower levels have to be opened. 899 * @param sink the sink to receive the events. 900 */ 901 private void openMissingSections( int newLevel, Sink sink ) 902 { 903 while ( this.sectionLevel < newLevel - 1 ) 904 { 905 this.sectionLevel++; 906 907 if ( sectionLevel == Sink.SECTION_LEVEL_5 ) 908 { 909 sink.section5(); 910 } 911 else if ( sectionLevel == Sink.SECTION_LEVEL_4 ) 912 { 913 sink.section4(); 914 } 915 else if ( sectionLevel == Sink.SECTION_LEVEL_3 ) 916 { 917 sink.section3(); 918 } 919 else if ( sectionLevel == Sink.SECTION_LEVEL_2 ) 920 { 921 sink.section2(); 922 } 923 else if ( sectionLevel == Sink.SECTION_LEVEL_1 ) 924 { 925 sink.section1(); 926 } 927 } 928 } 929 930 /** 931 * Return the current section level. 932 * 933 * @return the current section level. 934 */ 935 protected int getSectionLevel() 936 { 937 return this.sectionLevel; 938 } 939 940 /** 941 * Set the current section level. 942 * 943 * @param newLevel the new section level. 944 */ 945 protected void setSectionLevel( int newLevel ) 946 { 947 this.sectionLevel = newLevel; 948 } 949 950 /** 951 * Stop verbatim mode. 952 */ 953 protected void verbatim_() 954 { 955 this.inVerbatim = false; 956 } 957 958 /** 959 * Start verbatim mode. 960 */ 961 protected void verbatim() 962 { 963 this.inVerbatim = true; 964 } 965 966 /** 967 * Checks if we are currently inside a <pre> tag. 968 * 969 * @return true if we are currently in verbatim mode. 970 */ 971 protected boolean isVerbatim() 972 { 973 return this.inVerbatim; 974 } 975 976 /** 977 * Checks if we are currently inside a <script> tag. 978 * 979 * @return true if we are currently inside <code><script></code> tags. 980 * 981 * @since 1.1.1. 982 */ 983 protected boolean isScriptBlock() 984 { 985 return this.scriptBlock; 986 } 987 988 /** 989 * Checks if the given id is a valid Doxia id and if not, returns a transformed one. 990 * 991 * @param id The id to validate. 992 * @return A transformed id or the original id if it was already valid. 993 * @see DoxiaUtils#encodeId(String) 994 */ 995 protected String validAnchor( String id ) 996 { 997 if ( !DoxiaUtils.isValidId( id ) ) 998 { 999 String linkAnchor = DoxiaUtils.encodeId( id, true ); 1000 1001 String msg = "Modified invalid link: '" + id + "' to '" + linkAnchor + "'"; 1002 logMessage( "modifiedLink", msg ); 1003 1004 return linkAnchor; 1005 } 1006 1007 return id; 1008 } 1009 1010 /** {@inheritDoc} */ 1011 @Override 1012 protected void init() 1013 { 1014 super.init(); 1015 1016 this.scriptBlock = false; 1017 this.isLink = false; 1018 this.isAnchor = false; 1019 this.orderedListDepth = 0; 1020 this.sectionLevel = 0; 1021 this.inVerbatim = false; 1022 this.inFigure = false; 1023 while ( this.decoration.getAttributeNames().hasMoreElements() ) 1024 { 1025 this.decoration.removeAttribute( this.decoration.getAttributeNames().nextElement() ); 1026 } 1027 this.warnMessages = null; 1028 } 1029 1030 private void handleAEnd( Sink sink ) 1031 { 1032 if ( isLink ) 1033 { 1034 sink.link_(); 1035 isLink = false; 1036 } 1037 else if ( isAnchor ) 1038 { 1039 sink.anchor_(); 1040 isAnchor = false; 1041 } 1042 } 1043 1044 private void handleAStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs ) 1045 { 1046 String href = parser.getAttributeValue( null, Attribute.HREF.toString() ); 1047 1048 if ( href != null ) 1049 { 1050 int hashIndex = href.indexOf( '#' ); 1051 if ( hashIndex != -1 && !DoxiaUtils.isExternalLink( href ) ) 1052 { 1053 String hash = href.substring( hashIndex + 1 ); 1054 1055 if ( !DoxiaUtils.isValidId( hash ) ) 1056 { 1057 href = href.substring( 0, hashIndex ) + "#" + DoxiaUtils.encodeId( hash, true ); 1058 1059 String msg = "Modified invalid link: '" + hash + "' to '" + href + "'"; 1060 logMessage( "modifiedLink", msg ); 1061 } 1062 } 1063 sink.link( href, attribs ); 1064 isLink = true; 1065 } 1066 else 1067 { 1068 String name = parser.getAttributeValue( null, Attribute.NAME.toString() ); 1069 1070 if ( name != null ) 1071 { 1072 sink.anchor( validAnchor( name ), attribs ); 1073 isAnchor = true; 1074 } 1075 else 1076 { 1077 String id = parser.getAttributeValue( null, Attribute.ID.toString() ); 1078 if ( id != null ) 1079 { 1080 sink.anchor( validAnchor( id ), attribs ); 1081 isAnchor = true; 1082 } 1083 } 1084 } 1085 } 1086 1087 private boolean handleDivStart( XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink ) 1088 { 1089 boolean visited = true; 1090 1091 String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() ); 1092 1093 if ( "figure".equals( divclass ) ) 1094 { 1095 this.inFigure = true; 1096 SinkEventAttributeSet atts = new SinkEventAttributeSet( attribs ); 1097 atts.removeAttribute( SinkEventAttributes.CLASS ); 1098 sink.figure( atts ); 1099 } 1100 else 1101 { 1102 visited = false; 1103 } 1104 1105 return visited; 1106 } 1107 1108 private void handleFigureCaptionEnd( Sink sink ) 1109 { 1110 if ( inFigure ) 1111 { 1112 sink.figureCaption_(); 1113 } 1114 else 1115 { 1116 sink.italic_(); 1117 } 1118 } 1119 1120 private void handleFigureCaptionStart( Sink sink, SinkEventAttributeSet attribs ) 1121 { 1122 if ( inFigure ) 1123 { 1124 sink.figureCaption( attribs ); 1125 } 1126 else 1127 { 1128 sink.italic(); 1129 } 1130 } 1131 1132 private void handleImgStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs ) 1133 { 1134 String src = parser.getAttributeValue( null, Attribute.SRC.toString() ); 1135 1136 if ( src != null ) 1137 { 1138 sink.figureGraphics( src, attribs ); 1139 } 1140 } 1141 1142 private void handleLIStart( Sink sink, SinkEventAttributeSet attribs ) 1143 { 1144 if ( orderedListDepth == 0 ) 1145 { 1146 sink.listItem( attribs ); 1147 } 1148 else 1149 { 1150 sink.numberedListItem( attribs ); 1151 } 1152 } 1153 1154 private void handleListItemEnd( Sink sink ) 1155 { 1156 if ( orderedListDepth == 0 ) 1157 { 1158 sink.listItem_(); 1159 } 1160 else 1161 { 1162 sink.numberedListItem_(); 1163 } 1164 } 1165 1166 private void handleOLStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs ) 1167 { 1168 int numbering = Sink.NUMBERING_DECIMAL; 1169 // this will have to be generalized if we handle styles 1170 String style = parser.getAttributeValue( null, Attribute.STYLE.toString() ); 1171 1172 if ( style != null ) 1173 { 1174 if ( "list-style-type: upper-alpha".equals( style ) ) 1175 { 1176 numbering = Sink.NUMBERING_UPPER_ALPHA; 1177 } 1178 else if ( "list-style-type: lower-alpha".equals( style ) ) 1179 { 1180 numbering = Sink.NUMBERING_LOWER_ALPHA; 1181 } 1182 else if ( "list-style-type: upper-roman".equals( style ) ) 1183 { 1184 numbering = Sink.NUMBERING_UPPER_ROMAN; 1185 } 1186 else if ( "list-style-type: lower-roman".equals( style ) ) 1187 { 1188 numbering = Sink.NUMBERING_LOWER_ROMAN; 1189 } 1190 else if ( "list-style-type: decimal".equals( style ) ) 1191 { 1192 numbering = Sink.NUMBERING_DECIMAL; 1193 } 1194 } 1195 1196 sink.numberedList( numbering, attribs ); 1197 orderedListDepth++; 1198 } 1199 1200 private void handlePStart( Sink sink, SinkEventAttributeSet attribs ) 1201 { 1202 if ( !inFigure ) 1203 { 1204 sink.paragraph( attribs ); 1205 } 1206 } 1207 1208 /* 1209 * The PRE element tells visual user agents that the enclosed text is 1210 * "preformatted". When handling preformatted text, visual user agents: 1211 * - May leave white space intact. 1212 * - May render text with a fixed-pitch font. 1213 * - May disable automatic word wrap. 1214 * - Must not disable bidirectional processing. 1215 * Non-visual user agents are not required to respect extra white space 1216 * in the content of a PRE element. 1217 */ 1218 private void handlePreStart( SinkEventAttributeSet attribs, Sink sink ) 1219 { 1220 verbatim(); 1221 attribs.removeAttribute( SinkEventAttributes.DECORATION ); 1222 sink.verbatim( attribs ); 1223 } 1224 1225 private void handleSectionStart( Sink sink, int level, SinkEventAttributeSet attribs ) 1226 { 1227 consecutiveSections( level, sink ); 1228 sink.section( level, attribs ); 1229 sink.sectionTitle( level, attribs ); 1230 } 1231 1232 private void handleTableStart( Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser ) 1233 { 1234 sink.table( attribs ); 1235 String border = parser.getAttributeValue( null, Attribute.BORDER.toString() ); 1236 boolean grid = true; 1237 1238 if ( border == null || "0".equals( border ) ) 1239 { 1240 grid = false; 1241 } 1242 1243 String align = parser.getAttributeValue( null, Attribute.ALIGN.toString() ); 1244 int[] justif = {Sink.JUSTIFY_LEFT}; 1245 1246 if ( "center".equals( align ) ) 1247 { 1248 justif[0] = Sink.JUSTIFY_CENTER; 1249 } 1250 else if ( "right".equals( align ) ) 1251 { 1252 justif[0] = Sink.JUSTIFY_RIGHT; 1253 } 1254 1255 sink.tableRows( justif, grid ); 1256 } 1257 1258 /** 1259 * If debug mode is enabled, log the <code>msg</code> as is, otherwise add unique msg in <code>warnMessages</code>. 1260 * 1261 * @param key not null 1262 * @param msg not null 1263 * @see #parse(Reader, Sink) 1264 * @since 1.1.1 1265 */ 1266 private void logMessage( String key, String msg ) 1267 { 1268 final String log = "[XHTML Parser] " + msg; 1269 if ( getLog().isDebugEnabled() ) 1270 { 1271 getLog().debug( log ); 1272 1273 return; 1274 } 1275 1276 if ( warnMessages == null ) 1277 { 1278 warnMessages = new HashMap<String, Set<String>>(); 1279 } 1280 1281 Set<String> set = warnMessages.get( key ); 1282 if ( set == null ) 1283 { 1284 set = new TreeSet<String>(); 1285 } 1286 set.add( log ); 1287 warnMessages.put( key, set ); 1288 } 1289 1290 /** 1291 * @since 1.1.1 1292 */ 1293 private void logWarnings() 1294 { 1295 if ( getLog().isWarnEnabled() && this.warnMessages != null && !isSecondParsing() ) 1296 { 1297 for ( Map.Entry<String, Set<String>> entry : this.warnMessages.entrySet() ) 1298 { 1299 for ( String msg : entry.getValue() ) 1300 { 1301 getLog().warn( msg ); 1302 } 1303 } 1304 1305 this.warnMessages = null; 1306 } 1307 } 1308}