View Javadoc

1   package org.apache.maven.doxia;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedInputStream;
23  import java.io.CharArrayWriter;
24  import java.io.File;
25  import java.io.FileInputStream;
26  import java.io.FileNotFoundException;
27  import java.io.FileOutputStream;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.io.OutputStream;
31  import java.io.Reader;
32  import java.io.Writer;
33  import java.util.HashMap;
34  import java.util.Iterator;
35  import java.util.List;
36  import java.util.Locale;
37  import java.util.Map;
38  import java.util.regex.Matcher;
39  import java.util.regex.Pattern;
40  
41  import org.apache.maven.doxia.logging.Log;
42  import org.apache.maven.doxia.logging.SystemStreamLog;
43  import org.apache.maven.doxia.parser.ParseException;
44  import org.apache.maven.doxia.parser.Parser;
45  import org.apache.maven.doxia.sink.Sink;
46  import org.apache.maven.doxia.sink.SinkFactory;
47  import org.apache.maven.doxia.util.ConverterUtil;
48  import org.apache.maven.doxia.wrapper.InputFileWrapper;
49  import org.apache.maven.doxia.wrapper.InputReaderWrapper;
50  import org.apache.maven.doxia.wrapper.OutputFileWrapper;
51  import org.apache.maven.doxia.wrapper.OutputStreamWrapper;
52  import org.codehaus.plexus.ContainerConfiguration;
53  import org.codehaus.plexus.DefaultContainerConfiguration;
54  import org.codehaus.plexus.DefaultPlexusContainer;
55  import org.codehaus.plexus.PlexusContainer;
56  import org.codehaus.plexus.PlexusContainerException;
57  import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
58  import org.codehaus.plexus.util.FileUtils;
59  import org.codehaus.plexus.util.IOUtil;
60  import org.codehaus.plexus.util.ReaderFactory;
61  import org.codehaus.plexus.util.SelectorUtils;
62  import org.codehaus.plexus.util.StringUtils;
63  import org.codehaus.plexus.util.WriterFactory;
64  import org.codehaus.plexus.util.xml.XmlStreamReader;
65  import org.codehaus.plexus.util.xml.XmlUtil;
66  import org.codehaus.plexus.util.xml.pull.MXParser;
67  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
68  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
69  
70  import com.ibm.icu.text.CharsetDetector;
71  import com.ibm.icu.text.CharsetMatch;
72  
73  /**
74   * Default implementation of <code>Converter</code>
75   *
76   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
77   * @version $Id: DefaultConverter.java 784074 2009-06-12 11:14:35Z ltheussl $
78   */
79  public class DefaultConverter
80      implements Converter
81  {
82      private static final String APT_PARSER = "apt";
83  
84      private static final String CONFLUENCE_PARSER = "confluence";
85  
86      private static final String DOCBOOK_PARSER = "docbook";
87  
88      private static final String FML_PARSER = "fml";
89  
90      private static final String TWIKI_PARSER = "twiki";
91  
92      private static final String XDOC_PARSER = "xdoc";
93  
94      private static final String XHTML_PARSER = "xhtml";
95  
96      /** Supported input format, i.e. supported Doxia parser */
97      public static final String[] SUPPORTED_FROM_FORMAT =
98          { APT_PARSER, CONFLUENCE_PARSER, DOCBOOK_PARSER, FML_PARSER, TWIKI_PARSER, XDOC_PARSER, XHTML_PARSER };
99  
100     private static final String APT_SINK = "apt";
101 
102     private static final String CONFLUENCE_SINK = "confluence";
103 
104     private static final String DOCBOOK_SINK = "docbook";
105 
106     private static final String FO_SINK = "fo";
107 
108     private static final String ITEXT_SINK = "itext";
109 
110     private static final String LATEX_SINK = "latex";
111 
112     private static final String RTF_SINK = "rtf";
113 
114     private static final String TWIKI_SINK = "twiki";
115 
116     private static final String XDOC_SINK = "xdoc";
117 
118     private static final String XHTML_SINK = "xhtml";
119 
120     /** Supported output format, i.e. supported Doxia Sink */
121     public static final String[] SUPPORTED_TO_FORMAT =
122         { APT_SINK, CONFLUENCE_SINK, DOCBOOK_SINK, FO_SINK, ITEXT_SINK, LATEX_SINK, RTF_SINK, TWIKI_SINK, XDOC_SINK,
123             XHTML_SINK };
124 
125     /** Flag to format the generated files, actually only for XML based sinks. */
126     private boolean formatOutput;
127 
128     /** Plexus container */
129     private PlexusContainer plexus;
130 
131     /** Doxia logger */
132     private Log log;
133 
134     /** {@inheritDoc} */
135     public void enableLogging( Log log )
136     {
137         this.log = log;
138     }
139 
140     /**
141      * Returns a logger for this sink.
142      * If no logger has been configured, a new SystemStreamLog is returned.
143      *
144      * @return Log
145      */
146     protected Log getLog()
147     {
148         if ( log == null )
149         {
150             log = new SystemStreamLog();
151         }
152 
153         return log;
154     }
155 
156     /** {@inheritDoc} */
157     public String[] getInputFormats()
158     {
159         return SUPPORTED_FROM_FORMAT;
160     }
161 
162     /** {@inheritDoc} */
163     public String[] getOutputFormats()
164     {
165         return SUPPORTED_TO_FORMAT;
166     }
167 
168     /** {@inheritDoc} */
169     public void convert( InputFileWrapper input, OutputFileWrapper output )
170         throws UnsupportedFormatException, ConverterException
171     {
172         if ( input == null )
173         {
174             throw new IllegalArgumentException( "input is required" );
175         }
176         if ( output == null )
177         {
178             throw new IllegalArgumentException( "output is required" );
179         }
180 
181         try
182         {
183             startPlexusContainer();
184         }
185         catch ( PlexusContainerException e )
186         {
187             throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
188         }
189 
190         try
191         {
192             if ( input.getFile().isFile() )
193             {
194                 parse( input.getFile(), input.getEncoding(), input.getFormat(), output );
195             }
196             else
197             {
198                 List files;
199                 try
200                 {
201                     files =
202                         FileUtils.getFiles( input.getFile(), "**/*." + input.getFormat(),
203                                             StringUtils.join( FileUtils.getDefaultExcludes(), ", " ) );
204                 }
205                 catch ( IOException e )
206                 {
207                     throw new ConverterException( "IOException: " + e.getMessage(), e );
208                 }
209                 catch ( IllegalStateException e )
210                 {
211                     throw new ConverterException( "IllegalStateException: " + e.getMessage(), e );
212                 }
213 
214                 for ( Iterator it = files.iterator(); it.hasNext(); )
215                 {
216                     File f = (File) it.next();
217 
218                     parse( f, input.getEncoding(), input.getFormat(), output );
219                 }
220             }
221         }
222         finally
223         {
224             stopPlexusContainer();
225         }
226     }
227 
228     /** {@inheritDoc} */
229     public void convert( InputReaderWrapper input, OutputStreamWrapper output )
230         throws UnsupportedFormatException, ConverterException
231     {
232         if ( input == null )
233         {
234             throw new IllegalArgumentException( "input is required" );
235         }
236         if ( output == null )
237         {
238             throw new IllegalArgumentException( "output is required" );
239         }
240 
241         try
242         {
243             startPlexusContainer();
244         }
245         catch ( PlexusContainerException e )
246         {
247             throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
248         }
249 
250         try
251         {
252             Parser parser;
253             try
254             {
255                 parser = ConverterUtil.getParser( plexus, input.getFormat(), SUPPORTED_FROM_FORMAT );
256                 parser.enableLogging( log );
257             }
258             catch ( ComponentLookupException e )
259             {
260                 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
261             }
262 
263             if ( getLog().isDebugEnabled() )
264             {
265                 getLog().debug( "Parser used: " + parser.getClass().getName() );
266             }
267 
268             SinkFactory sinkFactory;
269             try
270             {
271                 sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
272             }
273             catch ( ComponentLookupException e )
274             {
275                 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
276             }
277 
278             Sink sink;
279             try
280             {
281                 sink = sinkFactory.createSink( output.getOutputStream(), output.getEncoding() );
282             }
283             catch ( IOException e )
284             {
285                 throw new ConverterException( "IOException: " + e.getMessage(), e );
286             }
287             sink.enableLogging( log );
288 
289             if ( getLog().isDebugEnabled() )
290             {
291                 getLog().debug( "Sink used: " + sink.getClass().getName() );
292             }
293 
294             parse( parser, input.getReader(), sink );
295         }
296         finally
297         {
298             stopPlexusContainer();
299         }
300     }
301 
302     /** {@inheritDoc} */
303     public void setFormatOutput( boolean formatOutput )
304     {
305         this.formatOutput = formatOutput;
306     }
307 
308     // ----------------------------------------------------------------------
309     // Private methods
310     // ----------------------------------------------------------------------
311 
312     /**
313      * @param inputFile a not null existing file.
314      * @param inputEncoding a not null supported encoding or {@link InputFileWrapper#AUTO_ENCODING}
315      * @param inputFormat  a not null supported format or {@link InputFileWrapper#AUTO_FORMAT}
316      * @param output not null OutputFileWrapper object
317      * @throws ConverterException if any
318      * @throws UnsupportedFormatException if any
319      */
320     private void parse( File inputFile, String inputEncoding, String inputFormat, OutputFileWrapper output )
321         throws ConverterException, UnsupportedFormatException
322     {
323         if ( getLog().isDebugEnabled() )
324         {
325             getLog().debug(
326                             "Parsing file from '" + inputFile.getAbsolutePath() + "' with the encoding '"
327                                 + inputEncoding + "' to '" + output.getFile().getAbsolutePath()
328                                 + "' with the encoding '" + output.getEncoding() + "'" );
329         }
330 
331         if ( inputEncoding.equals( InputFileWrapper.AUTO_ENCODING ) )
332         {
333             inputEncoding = autoDetectEncoding( inputFile );
334             if ( getLog().isDebugEnabled() )
335             {
336                 getLog().debug( "Auto detect encoding: " + inputEncoding );
337             }
338         }
339 
340         if ( inputFormat.equals( InputFileWrapper.AUTO_FORMAT ) )
341         {
342             inputFormat = autoDetectFormat( inputFile, inputEncoding );
343             if ( getLog().isDebugEnabled() )
344             {
345                 getLog().debug( "Auto detect input format: " + inputFormat );
346             }
347         }
348 
349         Parser parser;
350         try
351         {
352             parser = ConverterUtil.getParser( plexus, inputFormat, SUPPORTED_FROM_FORMAT );
353             parser.enableLogging( log );
354         }
355         catch ( ComponentLookupException e )
356         {
357             throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
358         }
359 
360         File outputFile;
361         if ( output.getFile().exists() && output.getFile().isDirectory() )
362         {
363             outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
364         }
365         else
366         {
367             if ( !SelectorUtils.match( "**.*", output.getFile().getName() ) )
368             {
369                 // assume it is a directory
370                 output.getFile().mkdirs();
371                 outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
372             }
373             else
374             {
375                 output.getFile().getParentFile().mkdirs();
376                 outputFile = output.getFile();
377             }
378         }
379 
380         Reader reader;
381         try
382         {
383             if ( inputEncoding != null )
384             {
385                 if ( parser.getType() == Parser.XML_TYPE )
386                 {
387                     reader = ReaderFactory.newXmlReader( inputFile );
388                 }
389                 else
390                 {
391                     reader = ReaderFactory.newReader( inputFile, inputEncoding );
392                 }
393             }
394             else
395             {
396                 reader = ReaderFactory.newPlatformReader( inputFile );
397             }
398         }
399         catch ( IOException e )
400         {
401             throw new ConverterException( "IOException: " + e.getMessage(), e );
402         }
403 
404         SinkFactory sinkFactory;
405         try
406         {
407             sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
408         }
409         catch ( ComponentLookupException e )
410         {
411             throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
412         }
413 
414         Sink sink;
415         try
416         {
417             String outputEncoding;
418             if ( StringUtils.isEmpty( output.getEncoding() )
419                 || output.getEncoding().equals( OutputFileWrapper.AUTO_ENCODING ) )
420             {
421                 outputEncoding = inputEncoding;
422             }
423             else
424             {
425                 outputEncoding = output.getEncoding();
426             }
427 
428             OutputStream out = new FileOutputStream( outputFile );
429             sink = sinkFactory.createSink( out, outputEncoding );
430         }
431         catch ( IOException e )
432         {
433             throw new ConverterException( "IOException: " + e.getMessage(), e );
434         }
435 
436         sink.enableLogging( log );
437 
438         if ( getLog().isDebugEnabled() )
439         {
440             getLog().debug( "Sink used: " + sink.getClass().getName() );
441         }
442 
443         parse( parser, reader, sink );
444 
445         if ( formatOutput && ( output.getFormat().equals( DOCBOOK_SINK ) || output.getFormat().equals( FO_SINK )
446             || output.getFormat().equals( ITEXT_SINK ) || output.getFormat().equals( XDOC_SINK )
447             || output.getFormat().equals( XHTML_SINK ) ) )
448         {
449             // format all xml files excluding docbook which is buggy
450             // TODO Add doc book format
451             if ( output.getFormat().equals( DOCBOOK_SINK ) || inputFormat.equals( DOCBOOK_PARSER ) )
452             {
453                 return;
454             }
455             Reader r = null;
456             Writer w = null;
457             try
458             {
459                 r = ReaderFactory.newXmlReader( outputFile );
460                 CharArrayWriter caw = new CharArrayWriter();
461                 XmlUtil.prettyFormat( r, caw );
462                 w = WriterFactory.newXmlWriter( outputFile );
463                 w.write( caw.toString() );
464             }
465             catch ( IOException e )
466             {
467                 throw new ConverterException( "IOException: " + e.getMessage(), e );
468             }
469             finally
470             {
471                 IOUtil.close( r );
472                 IOUtil.close( w );
473             }
474         }
475     }
476 
477     /**
478      * @param parser not null
479      * @param reader not null
480      * @param sink not null
481      * @throws ConverterException if any
482      */
483     private void parse( Parser parser, Reader reader, Sink sink )
484         throws ConverterException
485     {
486         try
487         {
488             parser.parse( reader, sink );
489         }
490         catch ( ParseException e )
491         {
492             throw new ConverterException( "ParseException: " + e.getMessage(), e );
493         }
494         finally
495         {
496             IOUtil.close( reader );
497             sink.flush();
498             sink.close();
499         }
500     }
501 
502     /**
503      * Start the Plexus container.
504      *
505      * @throws PlexusContainerException if any
506      */
507     private void startPlexusContainer()
508         throws PlexusContainerException
509     {
510         if ( plexus != null )
511         {
512             return;
513         }
514 
515         Map context = new HashMap();
516         context.put( "basedir", new File( "" ).getAbsolutePath() );
517 
518         ContainerConfiguration containerConfiguration = new DefaultContainerConfiguration();
519         containerConfiguration.setName( "Doxia" );
520         containerConfiguration.setContext( context );
521 
522         plexus = new DefaultPlexusContainer( containerConfiguration );
523     }
524 
525     /**
526      * Stop the Plexus container.
527      */
528     private void stopPlexusContainer()
529     {
530         if ( plexus == null )
531         {
532             return;
533         }
534 
535         plexus.dispose();
536         plexus = null;
537     }
538 
539     /**
540      * @param f not null file
541      * @return the detected encoding for f or <code>null</code> if not able to detect it.
542      * @throws IllegalArgumentException if f is not a file.
543      * @throws UnsupportedOperationException if could not detect the file encoding.
544      * @see {@link XmlStreamReader#getEncoding()} for xml files
545      * @see {@link CharsetDetector#detect()} for text files
546      */
547     private static String autoDetectEncoding( File f )
548     {
549         if ( !f.isFile() )
550         {
551             throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
552                 + "' is not a file, could not detect encoding." );
553         }
554 
555         Reader reader = null;
556         InputStream is = null;
557         try
558         {
559             if ( XmlUtil.isXml( f ) )
560             {
561                 reader = ReaderFactory.newXmlReader( f );
562                 return ( (XmlStreamReader) reader ).getEncoding();
563             }
564 
565             is = new BufferedInputStream( new FileInputStream( f ) );
566             CharsetDetector detector = new CharsetDetector();
567             detector.setText( is );
568             CharsetMatch match = detector.detect();
569 
570             return match.getName().toUpperCase( Locale.ENGLISH );
571         }
572         catch ( IOException e )
573         {
574             // nop
575         }
576         finally
577         {
578             IOUtil.close( reader );
579             IOUtil.close( is );
580         }
581 
582         StringBuffer msg = new StringBuffer();
583         msg.append( "Could not detect the encoding for file: " );
584         msg.append( f.getAbsolutePath() );
585         msg.append( "\n Specify explicitly the encoding." );
586         throw new UnsupportedOperationException( msg.toString() );
587     }
588 
589     /**
590      * Auto detect Doxia format for the given file depending:
591      * <ul>
592      * <li>the file name for TextMarkup based Doxia files</li>
593      * <li>the file content for XMLMarkup based Doxia files</li>
594      * </ul>
595      *
596      * @param f not null file
597      * @param encoding a not null encoding.
598      * @return the detected encoding from f.
599      * @throws IllegalArgumentException if f is not a file.
600      * @throws UnsupportedOperationException if could not detect the Doxia format.
601      */
602     private static String autoDetectFormat( File f, String encoding )
603     {
604         if ( !f.isFile() )
605         {
606             throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
607                 + "' is not a file, could not detect format." );
608         }
609 
610         for ( int i = 0; i < SUPPORTED_FROM_FORMAT.length; i++ )
611         {
612             String supportedFromFormat = SUPPORTED_FROM_FORMAT[i];
613 
614             // Handle Doxia text files
615             if ( supportedFromFormat.equalsIgnoreCase( APT_PARSER )
616                 && isDoxiaFileName( f, supportedFromFormat ) )
617             {
618                 return supportedFromFormat;
619             }
620             else if ( supportedFromFormat.equalsIgnoreCase( CONFLUENCE_PARSER )
621                 && isDoxiaFileName( f, supportedFromFormat ) )
622             {
623                 return supportedFromFormat;
624             }
625             else if ( supportedFromFormat.equalsIgnoreCase( TWIKI_PARSER )
626                 && isDoxiaFileName( f, supportedFromFormat ) )
627             {
628                 return supportedFromFormat;
629             }
630 
631             // Handle Doxia xml files
632             String firstTag = getFirstTag( f );
633             if ( firstTag == null )
634             {
635                 continue;
636             }
637             else if ( firstTag.equals( "article" )
638                 && supportedFromFormat.equalsIgnoreCase( DOCBOOK_PARSER ) )
639             {
640                 return supportedFromFormat;
641             }
642             else if ( firstTag.equals( "faqs" )
643                 && supportedFromFormat.equalsIgnoreCase( FML_PARSER ) )
644             {
645                 return supportedFromFormat;
646             }
647             else if ( firstTag.equals( "document" )
648                 && supportedFromFormat.equalsIgnoreCase( XDOC_PARSER ) )
649             {
650                 return supportedFromFormat;
651             }
652             else if ( firstTag.equals( "html" )
653                 && supportedFromFormat.equalsIgnoreCase( XHTML_PARSER ) )
654             {
655                 return supportedFromFormat;
656             }
657         }
658 
659         StringBuffer msg = new StringBuffer();
660         msg.append( "Could not detect the Doxia format for file: " );
661         msg.append( f.getAbsolutePath() );
662         msg.append( "\n Specify explicitly the Doxia format." );
663         throw new UnsupportedOperationException( msg.toString() );
664     }
665 
666     /**
667      * @param f not null
668      * @param format could be null
669      * @return <code>true</code> if the file name computes the format.
670      */
671     private static boolean isDoxiaFileName( File f, String format )
672     {
673         if ( f == null )
674         {
675             throw new IllegalArgumentException( "f is required." );
676         }
677 
678         Pattern pattern = Pattern.compile( "(.*?)\\." + format.toLowerCase( Locale.ENGLISH ) + "$" );
679         Matcher matcher = pattern.matcher( f.getName().toLowerCase( Locale.ENGLISH ) );
680 
681         return matcher.matches();
682     }
683 
684     /**
685      * @param xmlFile not null and should be a file.
686      * @return the first tag name if found, <code>null</code> in other case.
687      */
688     private static String getFirstTag( File xmlFile )
689     {
690         if ( xmlFile == null )
691         {
692             throw new IllegalArgumentException( "xmlFile is required." );
693         }
694         if ( !xmlFile.isFile() )
695         {
696             throw new IllegalArgumentException( "The file '" + xmlFile.getAbsolutePath() + "' is not a file." );
697         }
698 
699         Reader reader = null;
700         try
701         {
702             reader = ReaderFactory.newXmlReader( xmlFile );
703             XmlPullParser parser = new MXParser();
704             parser.setInput( reader );
705             int eventType = parser.getEventType();
706             while ( eventType != XmlPullParser.END_DOCUMENT )
707             {
708                 if ( eventType == XmlPullParser.START_TAG )
709                 {
710                     return parser.getName();
711                 }
712                 eventType = parser.nextToken();
713             }
714         }
715         catch ( FileNotFoundException e )
716         {
717             return null;
718         }
719         catch ( XmlPullParserException e )
720         {
721             return null;
722         }
723         catch ( IOException e )
724         {
725             return null;
726         }
727         finally
728         {
729             IOUtil.close( reader );
730         }
731 
732         return null;
733     }
734 }