View Javadoc
1   package org.apache.maven.doxia.module.xhtml5;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.io.StringReader;
25  import java.io.StringWriter;
26  import java.util.HashMap;
27  import java.util.Map;
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
32  import org.apache.maven.doxia.macro.MacroRequest;
33  import org.apache.maven.doxia.parser.ParseException;
34  import org.apache.maven.doxia.parser.Parser;
35  import org.apache.maven.doxia.parser.Xhtml5BaseParser;
36  import org.apache.maven.doxia.sink.Sink;
37  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
38  import org.codehaus.plexus.component.annotations.Component;
39  import org.codehaus.plexus.util.IOUtil;
40  import org.codehaus.plexus.util.StringUtils;
41  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
42  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
43  
44  /**
45   * Parse an xhtml model and emit events into a Doxia Sink.
46   */
47  @Component( role = Parser.class, hint = Xhtml5Parser.ROLE_HINT )
48  public class Xhtml5Parser
49      extends Xhtml5BaseParser
50      implements Xhtml5Markup
51  {
52      /**
53       * The role hint for the {@link Xhtml5Parser} Plexus component.
54       */
55      public static final String ROLE_HINT = "xhtml5";
56  
57      /** For boxed verbatim. */
58      private boolean boxed;
59  
60      /** Empty elements don't write a closing tag. */
61      private boolean isEmptyElement;
62  
63      /**
64       * The source content of the input reader. Used to pass into macros.
65       */
66      private String sourceContent;
67  
68      /** {@inheritDoc} */
69      protected void handleStartTag( XmlPullParser parser, Sink sink )
70          throws XmlPullParserException, MacroExecutionException
71      {
72          isEmptyElement = parser.isEmptyElementTag();
73  
74          SinkEventAttributeSet attribs = getAttributesFromParser( parser );
75  
76          if ( parser.getName().equals( HTML.toString() ) )
77          {
78              //Do nothing
79              return;
80          }
81          else if ( parser.getName().equals( HEAD.toString() ) )
82          {
83              sink.head( attribs );
84          }
85          else if ( parser.getName().equals( TITLE.toString() ) )
86          {
87              sink.title( attribs );
88          }
89          else if ( parser.getName().equals( META.toString() ) )
90          {
91              String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
92              String content = parser.getAttributeValue( null, Attribute.CONTENT.toString() );
93  
94              if ( "author".equals( name ) )
95              {
96                  sink.author( null );
97  
98                  sink.text( content );
99  
100                 sink.author_();
101             }
102             else if ( "date".equals( name ) )
103             {
104                 sink.date( null );
105 
106                 sink.text( content );
107 
108                 sink.date_();
109             }
110             else
111             {
112                 sink.unknown( "meta", new Object[] { TAG_TYPE_SIMPLE }, attribs );
113             }
114         }
115         /*
116          * The ADDRESS element may be used by authors to supply contact information
117          * for a model or a major part of a model such as a form. This element
118          *  often appears at the beginning or end of a model.
119          */
120         else if ( parser.getName().equals( ADDRESS.toString() ) )
121         {
122             sink.address( attribs );
123         }
124         else if ( parser.getName().equals( BODY.toString() ) )
125         {
126             sink.body( attribs );
127         }
128         else if ( parser.getName().equals( DIV.toString() ) )
129         {
130             String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
131 
132             if ( "source".equals( divclass ) )
133             {
134                 this.boxed = true;
135             }
136 
137             baseStartTag( parser, sink ); // pick up other divs
138         }
139         /*
140          * The PRE element tells visual user agents that the enclosed text is
141          * "preformatted". When handling preformatted text, visual user agents:
142          * - May leave white space intact.
143          * - May render text with a fixed-pitch font.
144          * - May disable automatic word wrap.
145          * - Must not disable bidirectional processing.
146          * Non-visual user agents are not required to respect extra white space
147          * in the content of a PRE element.
148          */
149         else if ( parser.getName().equals( PRE.toString() ) )
150         {
151             if ( boxed )
152             {
153                 attribs.addAttributes( SinkEventAttributeSet.BOXED );
154             }
155 
156             verbatim();
157 
158             sink.verbatim( attribs );
159         }
160         else if ( !baseStartTag( parser, sink ) )
161         {
162             if ( isEmptyElement )
163             {
164                 handleUnknown( parser, sink, TAG_TYPE_SIMPLE );
165             }
166             else
167             {
168                 handleUnknown( parser, sink, TAG_TYPE_START );
169             }
170 
171             if ( getLog().isDebugEnabled() )
172             {
173                 String position = "[" + parser.getLineNumber() + ":"
174                     + parser.getColumnNumber() + "]";
175                 String tag = "<" + parser.getName() + ">";
176 
177                 getLog().debug( "Unrecognized xhtml5 tag: " + tag + " at " + position );
178             }
179         }
180     }
181 
182     /** {@inheritDoc} */
183     protected void handleEndTag( XmlPullParser parser, Sink sink )
184         throws XmlPullParserException, MacroExecutionException
185     {
186         if ( parser.getName().equals( HTML.toString() ) )
187         {
188             //Do nothing
189             return;
190         }
191         else if ( parser.getName().equals( HEAD.toString() ) )
192         {
193             sink.head_();
194         }
195         else if ( parser.getName().equals( TITLE.toString() ) )
196         {
197             sink.title_();
198         }
199         else if ( parser.getName().equals( BODY.toString() ) )
200         {
201             consecutiveSections( 0, sink, null );
202 
203             sink.body_();
204         }
205         else if ( parser.getName().equals( ADDRESS.toString() ) )
206         {
207             sink.address_();
208         }
209         else if ( parser.getName().equals( DIV.toString() ) )
210         {
211             this.boxed = false;
212             baseEndTag( parser, sink );
213         }
214         else if ( !baseEndTag( parser, sink ) )
215         {
216             if ( !isEmptyElement )
217             {
218                 handleUnknown( parser, sink, TAG_TYPE_END );
219             }
220         }
221 
222         isEmptyElement = false;
223     }
224 
225     /** {@inheritDoc} */
226     @Override
227     protected void handleComment( XmlPullParser parser, Sink sink )
228         throws XmlPullParserException
229     {
230         String text = getText( parser ).trim();
231 
232         if ( text.startsWith( "MACRO" ) && !isSecondParsing() )
233         {
234             processMacro( text, sink );
235         }
236         else
237         {
238             super.handleComment( parser, sink );
239         }
240     }
241 
242     /** process macro embedded in XHTML commment */
243     private void processMacro( String text, Sink sink )
244         throws XmlPullParserException
245     {
246         String s = text.substring( text.indexOf( '{' ) + 1, text.indexOf( '}' ) );
247         s = escapeForMacro( s );
248         String[] params = StringUtils.split( s, "|" );
249         String macroName = params[0];
250 
251         Map<String, Object> parameters = new HashMap<>();
252         for ( int i = 1; i < params.length; i++ )
253         {
254             String[] param = StringUtils.split( params[i], "=" );
255             if ( param.length == 1 )
256             {
257                 throw new XmlPullParserException( "Missing 'key=value' pair for macro parameter: " + params[i] );
258             }
259 
260             String key = unescapeForMacro( param[0] );
261             String value = unescapeForMacro( param[1] );
262             parameters.put( key, value );
263         }
264 
265         MacroRequest request = new MacroRequest( sourceContent, new Xhtml5Parser(), parameters, getBasedir() );
266 
267         try
268         {
269             executeMacro( macroName, request, sink );
270         }
271         catch ( MacroExecutionException e )
272         {
273             throw new XmlPullParserException( "Unable to execute macro in the document: " + macroName );
274         }
275         catch ( MacroNotFoundException me )
276         {
277             throw new XmlPullParserException( "Macro not found: " + macroName );
278         }
279     }
280 
281     /**
282      * escapeForMacro
283      *
284      * @param s String
285      * @return String
286      */
287     private String escapeForMacro( String s )
288     {
289         if ( s == null || s.length() < 1 )
290         {
291             return s;
292         }
293 
294         String result = s;
295 
296         // use some outrageously out-of-place chars for text
297         // (these are device control one/two in unicode)
298         result = StringUtils.replace( result, "\\=", "\u0011" );
299         result = StringUtils.replace( result, "\\|", "\u0012" );
300 
301         return result;
302     }
303 
304     /**
305      * unescapeForMacro
306      *
307      * @param s String
308      * @return String
309      */
310     private String unescapeForMacro( String s )
311     {
312         if ( s == null || s.length() < 1 )
313         {
314             return s;
315         }
316 
317         String result = s;
318 
319         result = StringUtils.replace( result, "\u0011", "=" );
320         result = StringUtils.replace( result, "\u0012", "|" );
321 
322         return result;
323     }
324 
325     /**
326      * {@inheritDoc}
327      */
328     protected void init()
329     {
330         super.init();
331 
332         this.boxed = false;
333         this.isEmptyElement = false;
334     }
335 
336     /** {@inheritDoc} */
337     public void parse( Reader source, Sink sink, String reference )
338         throws ParseException
339     {
340         this.sourceContent = null;
341 
342         try
343         {
344             StringWriter contentWriter = new StringWriter();
345             IOUtil.copy( source, contentWriter );
346             sourceContent = contentWriter.toString();
347         }
348         catch ( IOException ex )
349         {
350             throw new ParseException( "Error reading the input source: " + ex.getMessage(), ex );
351         }
352         finally
353         {
354             IOUtil.close( source );
355         }
356 
357         try
358         {
359             super.parse( new StringReader( sourceContent ), sink, reference );
360         }
361         finally
362         {
363             this.sourceContent = null;
364         }
365     }
366 }