001package org.apache.maven.doxia.module.xhtml;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.io.IOException;
023import java.io.Reader;
024import java.io.StringReader;
025import java.io.StringWriter;
026import java.util.HashMap;
027import java.util.Map;
028import javax.swing.text.html.HTML.Attribute;
029
030import org.apache.maven.doxia.macro.MacroExecutionException;
031import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
032import org.apache.maven.doxia.macro.MacroRequest;
033import org.apache.maven.doxia.parser.ParseException;
034import org.apache.maven.doxia.parser.Parser;
035import org.apache.maven.doxia.parser.XhtmlBaseParser;
036import org.apache.maven.doxia.sink.Sink;
037import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
038import org.codehaus.plexus.component.annotations.Component;
039import org.codehaus.plexus.util.IOUtil;
040import org.codehaus.plexus.util.StringUtils;
041import org.codehaus.plexus.util.xml.pull.XmlPullParser;
042import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
043
044/**
045 * Parse an xhtml model and emit events into a Doxia Sink.
046 *
047 * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
048 * @version $Id$
049 * @since 1.0
050 */
051@Component( role = Parser.class, hint = "xhtml" )
052public class XhtmlParser
053    extends XhtmlBaseParser
054    implements XhtmlMarkup
055{
056    /** For boxed verbatim. */
057    private boolean boxed;
058
059    /** Empty elements don't write a closing tag. */
060    private boolean isEmptyElement;
061
062    /**
063     * The source content of the input reader. Used to pass into macros.
064     */
065    private String sourceContent;
066
067    /** {@inheritDoc} */
068    protected void handleStartTag( XmlPullParser parser, Sink sink )
069        throws XmlPullParserException, MacroExecutionException
070    {
071        isEmptyElement = parser.isEmptyElementTag();
072
073        SinkEventAttributeSet attribs = getAttributesFromParser( parser );
074
075        if ( parser.getName().equals( HTML.toString() ) )
076        {
077            //Do nothing
078            return;
079        }
080        else if ( parser.getName().equals( HEAD.toString() ) )
081        {
082            sink.head( attribs );
083        }
084        else if ( parser.getName().equals( TITLE.toString() ) )
085        {
086            sink.title( attribs );
087        }
088        else if ( parser.getName().equals( META.toString() ) )
089        {
090            String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
091            String content = parser.getAttributeValue( null, Attribute.CONTENT.toString() );
092
093            if ( "author".equals( name ) )
094            {
095                sink.author( null );
096
097                sink.text( content );
098
099                sink.author_();
100            }
101            else if ( "date".equals( name ) )
102            {
103                sink.date( null );
104
105                sink.text( content );
106
107                sink.date_();
108            }
109            else
110            {
111                sink.unknown( "meta", new Object[] { Integer.valueOf( TAG_TYPE_SIMPLE ) }, attribs );
112            }
113        }
114        /*
115         * The ADDRESS element may be used by authors to supply contact information
116         * for a model or a major part of a model such as a form. This element
117         *  often appears at the beginning or end of a model.
118         */
119        else if ( parser.getName().equals( ADDRESS.toString() ) )
120        {
121            sink.author( attribs );
122        }
123        else if ( parser.getName().equals( BODY.toString() ) )
124        {
125            sink.body( attribs );
126        }
127        else if ( parser.getName().equals( DIV.toString() ) )
128        {
129            String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
130
131            if ( "source".equals( divclass ) )
132            {
133                this.boxed = true;
134            }
135
136            baseStartTag( parser, sink ); // pick up other divs
137        }
138        /*
139         * The PRE element tells visual user agents that the enclosed text is
140         * "preformatted". When handling preformatted text, visual user agents:
141         * - May leave white space intact.
142         * - May render text with a fixed-pitch font.
143         * - May disable automatic word wrap.
144         * - Must not disable bidirectional processing.
145         * Non-visual user agents are not required to respect extra white space
146         * in the content of a PRE element.
147         */
148        else if ( parser.getName().equals( PRE.toString() ) )
149        {
150            if ( boxed )
151            {
152                attribs.addAttributes( SinkEventAttributeSet.BOXED );
153            }
154
155            verbatim();
156
157            sink.verbatim( attribs );
158        }
159        else if ( !baseStartTag( parser, sink ) )
160        {
161            if ( isEmptyElement )
162            {
163                handleUnknown( parser, sink, TAG_TYPE_SIMPLE );
164            }
165            else
166            {
167                handleUnknown( parser, sink, TAG_TYPE_START );
168            }
169
170            if ( getLog().isDebugEnabled() )
171            {
172                String position = "[" + parser.getLineNumber() + ":"
173                    + parser.getColumnNumber() + "]";
174                String tag = "<" + parser.getName() + ">";
175
176                getLog().debug( "Unrecognized xhtml tag: " + tag + " at " + position );
177            }
178        }
179    }
180
181    /** {@inheritDoc} */
182    protected void handleEndTag( XmlPullParser parser, Sink sink )
183        throws XmlPullParserException, MacroExecutionException
184    {
185        if ( parser.getName().equals( HTML.toString() ) )
186        {
187            //Do nothing
188            return;
189        }
190        else if ( parser.getName().equals( HEAD.toString() ) )
191        {
192            sink.head_();
193        }
194        else if ( parser.getName().equals( TITLE.toString() ) )
195        {
196            sink.title_();
197        }
198        else if ( parser.getName().equals( BODY.toString() ) )
199        {
200            consecutiveSections( 0, sink );
201
202            sink.body_();
203        }
204        else if ( parser.getName().equals( ADDRESS.toString() ) )
205        {
206            sink.author_();
207        }
208        else if ( parser.getName().equals( DIV.toString() ) )
209        {
210            this.boxed = false;
211            baseEndTag( parser, sink );
212        }
213        else if ( !baseEndTag( parser, sink ) )
214        {
215            if ( !isEmptyElement )
216            {
217                handleUnknown( parser, sink, TAG_TYPE_END );
218            }
219        }
220
221        isEmptyElement = false;
222    }
223
224    /** {@inheritDoc} */
225    @Override
226    protected void handleComment( XmlPullParser parser, Sink sink )
227        throws XmlPullParserException
228    {
229        String text = getText( parser ).trim();
230
231        if ( text.startsWith( "MACRO" ) && !isSecondParsing() )
232        {
233            processMacro( text, sink );
234        }
235        else
236        {
237            super.handleComment( parser, sink );
238        }
239    }
240
241    /** process macro embedded in XHTML commment */
242    private void processMacro( String text, Sink sink )
243        throws XmlPullParserException
244    {
245        String s = text.substring( text.indexOf( '{' ) + 1, text.indexOf( '}' ) );
246        s = escapeForMacro( s );
247        String[] params = StringUtils.split( s, "|" );
248        String macroName = params[0];
249
250        Map<String, Object> parameters = new HashMap<String, Object>();
251        for ( int i = 1; i < params.length; i++ )
252        {
253            String[] param = StringUtils.split( params[i], "=" );
254            if ( param.length == 1 )
255            {
256                throw new XmlPullParserException( "Missing 'key=value' pair for macro parameter: " + params[i] );
257            }
258
259            String key = unescapeForMacro( param[0] );
260            String value = unescapeForMacro( param[1] );
261            parameters.put( key, value );
262        }
263
264        MacroRequest request = new MacroRequest( sourceContent, new XhtmlParser(), parameters, getBasedir() );
265
266        try
267        {
268            executeMacro( macroName, request, sink );
269        }
270        catch ( MacroExecutionException e )
271        {
272            throw new XmlPullParserException( "Unable to execute macro in the document: " + macroName );
273        }
274        catch ( MacroNotFoundException me )
275        {
276            throw new XmlPullParserException( "Macro not found: " + macroName );
277        }
278    }
279
280    /**
281     * escapeForMacro
282     *
283     * @param s String
284     * @return String
285     */
286    private String escapeForMacro( String s )
287    {
288        if ( s == null || s.length() < 1 )
289        {
290            return s;
291        }
292
293        String result = s;
294
295        // use some outrageously out-of-place chars for text
296        // (these are device control one/two in unicode)
297        result = StringUtils.replace( result, "\\=", "\u0011" );
298        result = StringUtils.replace( result, "\\|", "\u0012" );
299
300        return result;
301    }
302
303    /**
304     * unescapeForMacro
305     *
306     * @param s String
307     * @return String
308     */
309    private String unescapeForMacro( String s )
310    {
311        if ( s == null || s.length() < 1 )
312        {
313            return s;
314        }
315
316        String result = s;
317
318        result = StringUtils.replace( result, "\u0011", "=" );
319        result = StringUtils.replace( result, "\u0012", "|" );
320
321        return result;
322    }
323
324    /** {@inheritDoc} */
325    protected void init()
326    {
327        super.init();
328
329        this.boxed = false;
330        this.isEmptyElement = false;
331    }
332
333    /** {@inheritDoc} */
334    public void parse( Reader source, Sink sink )
335        throws ParseException
336    {
337        this.sourceContent = null;
338
339        try
340        {
341            StringWriter contentWriter = new StringWriter();
342            IOUtil.copy( source, contentWriter );
343            sourceContent = contentWriter.toString();
344        }
345        catch ( IOException ex )
346        {
347            throw new ParseException( "Error reading the input source: " + ex.getMessage(), ex );
348        }
349        finally
350        {
351            IOUtil.close( source );
352        }
353
354        try
355        {
356            super.parse( new StringReader( sourceContent ), sink );
357        }
358        finally
359        {
360            this.sourceContent = null;
361        }
362    }
363}