View Javadoc
1   package org.apache.maven.doxia.module.xhtml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.io.StringReader;
25  import java.io.StringWriter;
26  import java.util.HashMap;
27  import java.util.Map;
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
32  import org.apache.maven.doxia.macro.MacroRequest;
33  import org.apache.maven.doxia.parser.ParseException;
34  import org.apache.maven.doxia.parser.Parser;
35  import org.apache.maven.doxia.parser.XhtmlBaseParser;
36  import org.apache.maven.doxia.sink.Sink;
37  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
38  import org.codehaus.plexus.component.annotations.Component;
39  import org.codehaus.plexus.util.IOUtil;
40  import org.codehaus.plexus.util.StringUtils;
41  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
42  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
43  
44  /**
45   * Parse an xhtml model and emit events into a Doxia Sink.
46   *
47   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
48   * @since 1.0
49   */
50  @Component( role = Parser.class, hint = "xhtml" )
51  public class XhtmlParser
52      extends XhtmlBaseParser
53      implements XhtmlMarkup
54  {
55      /** For boxed verbatim. */
56      protected boolean boxed;
57  
58      /** Empty elements don't write a closing tag. */
59      private boolean isEmptyElement;
60  
61      /**
62       * The source content of the input reader. Used to pass into macros.
63       */
64      private String sourceContent;
65  
66      /** {@inheritDoc} */
67      protected void handleStartTag( XmlPullParser parser, Sink sink )
68          throws XmlPullParserException, MacroExecutionException
69      {
70          isEmptyElement = parser.isEmptyElementTag();
71  
72          SinkEventAttributeSet attribs = getAttributesFromParser( parser );
73  
74          if ( parser.getName().equals( HTML.toString() ) )
75          {
76              //Do nothing
77              return;
78          }
79          else if ( parser.getName().equals( HEAD.toString() ) )
80          {
81              sink.head( attribs );
82          }
83          else if ( parser.getName().equals( TITLE.toString() ) )
84          {
85              sink.title( attribs );
86          }
87          else if ( parser.getName().equals( META.toString() ) )
88          {
89              String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
90              String content = parser.getAttributeValue( null, Attribute.CONTENT.toString() );
91  
92              if ( "author".equals( name ) )
93              {
94                  sink.author( null );
95  
96                  sink.text( content );
97  
98                  sink.author_();
99              }
100             else if ( "date".equals( name ) )
101             {
102                 sink.date( null );
103 
104                 sink.text( content );
105 
106                 sink.date_();
107             }
108             else
109             {
110                 sink.unknown( "meta", new Object[] { TAG_TYPE_SIMPLE }, attribs );
111             }
112         }
113         /*
114          * The ADDRESS element may be used by authors to supply contact information
115          * for a model or a major part of a model such as a form. This element
116          *  often appears at the beginning or end of a model.
117          */
118         else if ( parser.getName().equals( ADDRESS.toString() ) )
119         {
120             sink.address( attribs );
121         }
122         else if ( parser.getName().equals( BODY.toString() ) )
123         {
124             sink.body( attribs );
125         }
126         else if ( parser.getName().equals( DIV.toString() ) )
127         {
128             String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
129 
130             if ( "source".equals( divclass ) )
131             {
132                 this.boxed = true;
133             }
134 
135             baseStartTag( parser, sink ); // pick up other divs
136         }
137         /*
138          * The PRE element tells visual user agents that the enclosed text is
139          * "preformatted". When handling preformatted text, visual user agents:
140          * - May leave white space intact.
141          * - May render text with a fixed-pitch font.
142          * - May disable automatic word wrap.
143          * - Must not disable bidirectional processing.
144          * Non-visual user agents are not required to respect extra white space
145          * in the content of a PRE element.
146          */
147         else if ( parser.getName().equals( PRE.toString() ) )
148         {
149             if ( boxed )
150             {
151                 attribs.addAttributes( SinkEventAttributeSet.BOXED );
152             }
153 
154             verbatim();
155 
156             sink.verbatim( attribs );
157         }
158         else if ( !baseStartTag( parser, sink ) )
159         {
160             if ( isEmptyElement )
161             {
162                 handleUnknown( parser, sink, TAG_TYPE_SIMPLE );
163             }
164             else
165             {
166                 handleUnknown( parser, sink, TAG_TYPE_START );
167             }
168 
169             if ( getLog().isDebugEnabled() )
170             {
171                 String position = "[" + parser.getLineNumber() + ":"
172                     + parser.getColumnNumber() + "]";
173                 String tag = "<" + parser.getName() + ">";
174 
175                 getLog().debug( "Unrecognized xhtml tag: " + tag + " at " + position );
176             }
177         }
178     }
179 
180     /** {@inheritDoc} */
181     protected void handleEndTag( XmlPullParser parser, Sink sink )
182         throws XmlPullParserException, MacroExecutionException
183     {
184         if ( parser.getName().equals( HTML.toString() ) )
185         {
186             //Do nothing
187             return;
188         }
189         else if ( parser.getName().equals( HEAD.toString() ) )
190         {
191             sink.head_();
192         }
193         else if ( parser.getName().equals( TITLE.toString() ) )
194         {
195             sink.title_();
196         }
197         else if ( parser.getName().equals( BODY.toString() ) )
198         {
199             consecutiveSections( 0, sink );
200 
201             sink.body_();
202         }
203         else if ( parser.getName().equals( ADDRESS.toString() ) )
204         {
205             sink.address_();
206         }
207         else if ( parser.getName().equals( DIV.toString() ) )
208         {
209             this.boxed = false;
210             baseEndTag( parser, sink );
211         }
212         else if ( !baseEndTag( parser, sink ) )
213         {
214             if ( !isEmptyElement )
215             {
216                 handleUnknown( parser, sink, TAG_TYPE_END );
217             }
218         }
219 
220         isEmptyElement = false;
221     }
222 
223     /** {@inheritDoc} */
224     @Override
225     protected void handleComment( XmlPullParser parser, Sink sink )
226         throws XmlPullParserException
227     {
228         String text = getText( parser ).trim();
229 
230         if ( text.startsWith( "MACRO" ) && !isSecondParsing() )
231         {
232             processMacro( parser, text, sink );
233         }
234         else
235         {
236             super.handleComment( parser, sink );
237         }
238     }
239 
240     /** process macro embedded in XHTML commment */
241     private void processMacro( XmlPullParser parser, String text, Sink sink )
242         throws XmlPullParserException
243     {
244         String s = text.substring( text.indexOf( '{' ) + 1, text.indexOf( '}' ) );
245         s = escapeForMacro( s );
246         String[] params = StringUtils.split( s, "|" );
247         String macroName = params[0];
248 
249         Map<String, Object> parameters = new HashMap<>();
250         for ( int i = 1; i < params.length; i++ )
251         {
252             String[] param = StringUtils.split( params[i], "=" );
253             if ( param.length == 1 )
254             {
255                 throw new XmlPullParserException( "Invalid 'key=value' pair for macro " + macroName + " parameter: "
256                     + params[i], parser, null );
257             }
258 
259             String key = unescapeForMacro( param[0] );
260             String value = unescapeForMacro( param[1] );
261             parameters.put( key, value );
262         }
263 
264         MacroRequest request = new MacroRequest( sourceContent, new XhtmlParser(), parameters, getBasedir() );
265 
266         try
267         {
268             executeMacro( macroName, request, sink );
269         }
270         catch ( MacroExecutionException e )
271         {
272             throw new XmlPullParserException( "Unable to execute macro in the document: " + macroName, parser, e );
273         }
274         catch ( MacroNotFoundException me )
275         {
276             throw new XmlPullParserException( "Macro not found: " + macroName, parser, null );
277         }
278     }
279 
280     /**
281      * escapeForMacro
282      *
283      * @param s String
284      * @return String
285      */
286     private String escapeForMacro( String s )
287     {
288         if ( s == null || s.length() < 1 )
289         {
290             return s;
291         }
292 
293         String result = s;
294 
295         // use some outrageously out-of-place chars for text
296         // (these are device control one/two in unicode)
297         result = StringUtils.replace( result, "\\=", "\u0011" );
298         result = StringUtils.replace( result, "\\|", "\u0012" );
299 
300         return result;
301     }
302 
303     /**
304      * unescapeForMacro
305      *
306      * @param s String
307      * @return String
308      */
309     private String unescapeForMacro( String s )
310     {
311         if ( s == null || s.length() < 1 )
312         {
313             return s;
314         }
315 
316         String result = s;
317 
318         result = StringUtils.replace( result, "\u0011", "=" );
319         result = StringUtils.replace( result, "\u0012", "|" );
320 
321         return result;
322     }
323 
324     /**
325      * {@inheritDoc}
326      */
327     protected void init()
328     {
329         super.init();
330 
331         this.boxed = false;
332         this.isEmptyElement = false;
333     }
334 
335     /** {@inheritDoc} */
336     public void parse( Reader source, Sink sink, String reference )
337         throws ParseException
338     {
339         this.sourceContent = null;
340 
341         try
342         {
343             StringWriter contentWriter = new StringWriter();
344             IOUtil.copy( source, contentWriter );
345             sourceContent = contentWriter.toString();
346         }
347         catch ( IOException ex )
348         {
349             throw new ParseException( "Error reading the input source: " + ex.getMessage(), ex );
350         }
351         finally
352         {
353             IOUtil.close( source );
354         }
355 
356         try
357         {
358             super.parse( new StringReader( sourceContent ), sink, reference );
359         }
360         finally
361         {
362             this.sourceContent = null;
363         }
364     }
365 }