001package org.apache.maven.doxia.module.xhtml; 002 003/* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022import java.io.IOException; 023import java.io.Reader; 024import java.io.StringReader; 025import java.io.StringWriter; 026import java.util.HashMap; 027import java.util.Map; 028import javax.swing.text.html.HTML.Attribute; 029 030import org.apache.maven.doxia.macro.MacroExecutionException; 031import org.apache.maven.doxia.macro.manager.MacroNotFoundException; 032import org.apache.maven.doxia.macro.MacroRequest; 033import org.apache.maven.doxia.parser.ParseException; 034import org.apache.maven.doxia.parser.Parser; 035import org.apache.maven.doxia.parser.XhtmlBaseParser; 036import org.apache.maven.doxia.sink.Sink; 037import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; 038import org.codehaus.plexus.component.annotations.Component; 039import org.codehaus.plexus.util.IOUtil; 040import org.codehaus.plexus.util.StringUtils; 041import org.codehaus.plexus.util.xml.pull.XmlPullParser; 042import org.codehaus.plexus.util.xml.pull.XmlPullParserException; 043 044/** 045 * Parse an xhtml model and emit events into a Doxia Sink. 046 * 047 * @author <a href="mailto:jason@maven.org">Jason van Zyl</a> 048 * @version $Id$ 049 * @since 1.0 050 */ 051@Component( role = Parser.class, hint = "xhtml" ) 052public class XhtmlParser 053 extends XhtmlBaseParser 054 implements XhtmlMarkup 055{ 056 /** For boxed verbatim. */ 057 private boolean boxed; 058 059 /** Empty elements don't write a closing tag. */ 060 private boolean isEmptyElement; 061 062 /** 063 * The source content of the input reader. Used to pass into macros. 064 */ 065 private String sourceContent; 066 067 /** {@inheritDoc} */ 068 protected void handleStartTag( XmlPullParser parser, Sink sink ) 069 throws XmlPullParserException, MacroExecutionException 070 { 071 isEmptyElement = parser.isEmptyElementTag(); 072 073 SinkEventAttributeSet attribs = getAttributesFromParser( parser ); 074 075 if ( parser.getName().equals( HTML.toString() ) ) 076 { 077 //Do nothing 078 return; 079 } 080 else if ( parser.getName().equals( HEAD.toString() ) ) 081 { 082 sink.head( attribs ); 083 } 084 else if ( parser.getName().equals( TITLE.toString() ) ) 085 { 086 sink.title( attribs ); 087 } 088 else if ( parser.getName().equals( META.toString() ) ) 089 { 090 String name = parser.getAttributeValue( null, Attribute.NAME.toString() ); 091 String content = parser.getAttributeValue( null, Attribute.CONTENT.toString() ); 092 093 if ( "author".equals( name ) ) 094 { 095 sink.author( null ); 096 097 sink.text( content ); 098 099 sink.author_(); 100 } 101 else if ( "date".equals( name ) ) 102 { 103 sink.date( null ); 104 105 sink.text( content ); 106 107 sink.date_(); 108 } 109 else 110 { 111 sink.unknown( "meta", new Object[] { Integer.valueOf( TAG_TYPE_SIMPLE ) }, attribs ); 112 } 113 } 114 /* 115 * The ADDRESS element may be used by authors to supply contact information 116 * for a model or a major part of a model such as a form. This element 117 * often appears at the beginning or end of a model. 118 */ 119 else if ( parser.getName().equals( ADDRESS.toString() ) ) 120 { 121 sink.author( attribs ); 122 } 123 else if ( parser.getName().equals( BODY.toString() ) ) 124 { 125 sink.body( attribs ); 126 } 127 else if ( parser.getName().equals( DIV.toString() ) ) 128 { 129 String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() ); 130 131 if ( "source".equals( divclass ) ) 132 { 133 this.boxed = true; 134 } 135 136 baseStartTag( parser, sink ); // pick up other divs 137 } 138 /* 139 * The PRE element tells visual user agents that the enclosed text is 140 * "preformatted". When handling preformatted text, visual user agents: 141 * - May leave white space intact. 142 * - May render text with a fixed-pitch font. 143 * - May disable automatic word wrap. 144 * - Must not disable bidirectional processing. 145 * Non-visual user agents are not required to respect extra white space 146 * in the content of a PRE element. 147 */ 148 else if ( parser.getName().equals( PRE.toString() ) ) 149 { 150 if ( boxed ) 151 { 152 attribs.addAttributes( SinkEventAttributeSet.BOXED ); 153 } 154 155 verbatim(); 156 157 sink.verbatim( attribs ); 158 } 159 else if ( !baseStartTag( parser, sink ) ) 160 { 161 if ( isEmptyElement ) 162 { 163 handleUnknown( parser, sink, TAG_TYPE_SIMPLE ); 164 } 165 else 166 { 167 handleUnknown( parser, sink, TAG_TYPE_START ); 168 } 169 170 if ( getLog().isDebugEnabled() ) 171 { 172 String position = "[" + parser.getLineNumber() + ":" 173 + parser.getColumnNumber() + "]"; 174 String tag = "<" + parser.getName() + ">"; 175 176 getLog().debug( "Unrecognized xhtml tag: " + tag + " at " + position ); 177 } 178 } 179 } 180 181 /** {@inheritDoc} */ 182 protected void handleEndTag( XmlPullParser parser, Sink sink ) 183 throws XmlPullParserException, MacroExecutionException 184 { 185 if ( parser.getName().equals( HTML.toString() ) ) 186 { 187 //Do nothing 188 return; 189 } 190 else if ( parser.getName().equals( HEAD.toString() ) ) 191 { 192 sink.head_(); 193 } 194 else if ( parser.getName().equals( TITLE.toString() ) ) 195 { 196 sink.title_(); 197 } 198 else if ( parser.getName().equals( BODY.toString() ) ) 199 { 200 consecutiveSections( 0, sink ); 201 202 sink.body_(); 203 } 204 else if ( parser.getName().equals( ADDRESS.toString() ) ) 205 { 206 sink.author_(); 207 } 208 else if ( parser.getName().equals( DIV.toString() ) ) 209 { 210 this.boxed = false; 211 baseEndTag( parser, sink ); 212 } 213 else if ( !baseEndTag( parser, sink ) ) 214 { 215 if ( !isEmptyElement ) 216 { 217 handleUnknown( parser, sink, TAG_TYPE_END ); 218 } 219 } 220 221 isEmptyElement = false; 222 } 223 224 /** {@inheritDoc} */ 225 @Override 226 protected void handleComment( XmlPullParser parser, Sink sink ) 227 throws XmlPullParserException 228 { 229 String text = getText( parser ).trim(); 230 231 if ( text.startsWith( "MACRO" ) && !isSecondParsing() ) 232 { 233 processMacro( text, sink ); 234 } 235 else 236 { 237 super.handleComment( parser, sink ); 238 } 239 } 240 241 /** process macro embedded in XHTML commment */ 242 private void processMacro( String text, Sink sink ) 243 throws XmlPullParserException 244 { 245 String s = text.substring( text.indexOf( '{' ) + 1, text.indexOf( '}' ) ); 246 s = escapeForMacro( s ); 247 String[] params = StringUtils.split( s, "|" ); 248 String macroName = params[0]; 249 250 Map<String, Object> parameters = new HashMap<String, Object>(); 251 for ( int i = 1; i < params.length; i++ ) 252 { 253 String[] param = StringUtils.split( params[i], "=" ); 254 if ( param.length == 1 ) 255 { 256 throw new XmlPullParserException( "Missing 'key=value' pair for macro parameter: " + params[i] ); 257 } 258 259 String key = unescapeForMacro( param[0] ); 260 String value = unescapeForMacro( param[1] ); 261 parameters.put( key, value ); 262 } 263 264 MacroRequest request = new MacroRequest( sourceContent, new XhtmlParser(), parameters, getBasedir() ); 265 266 try 267 { 268 executeMacro( macroName, request, sink ); 269 } 270 catch ( MacroExecutionException e ) 271 { 272 throw new XmlPullParserException( "Unable to execute macro in the document: " + macroName ); 273 } 274 catch ( MacroNotFoundException me ) 275 { 276 throw new XmlPullParserException( "Macro not found: " + macroName ); 277 } 278 } 279 280 /** 281 * escapeForMacro 282 * 283 * @param s String 284 * @return String 285 */ 286 private String escapeForMacro( String s ) 287 { 288 if ( s == null || s.length() < 1 ) 289 { 290 return s; 291 } 292 293 String result = s; 294 295 // use some outrageously out-of-place chars for text 296 // (these are device control one/two in unicode) 297 result = StringUtils.replace( result, "\\=", "\u0011" ); 298 result = StringUtils.replace( result, "\\|", "\u0012" ); 299 300 return result; 301 } 302 303 /** 304 * unescapeForMacro 305 * 306 * @param s String 307 * @return String 308 */ 309 private String unescapeForMacro( String s ) 310 { 311 if ( s == null || s.length() < 1 ) 312 { 313 return s; 314 } 315 316 String result = s; 317 318 result = StringUtils.replace( result, "\u0011", "=" ); 319 result = StringUtils.replace( result, "\u0012", "|" ); 320 321 return result; 322 } 323 324 /** {@inheritDoc} */ 325 protected void init() 326 { 327 super.init(); 328 329 this.boxed = false; 330 this.isEmptyElement = false; 331 } 332 333 /** {@inheritDoc} */ 334 public void parse( Reader source, Sink sink ) 335 throws ParseException 336 { 337 this.sourceContent = null; 338 339 try 340 { 341 StringWriter contentWriter = new StringWriter(); 342 IOUtil.copy( source, contentWriter ); 343 sourceContent = contentWriter.toString(); 344 } 345 catch ( IOException ex ) 346 { 347 throw new ParseException( "Error reading the input source: " + ex.getMessage(), ex ); 348 } 349 finally 350 { 351 IOUtil.close( source ); 352 } 353 354 try 355 { 356 super.parse( new StringReader( sourceContent ), sink ); 357 } 358 finally 359 { 360 this.sourceContent = null; 361 } 362 } 363}