1 package org.apache.maven.doxia;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedInputStream;
23 import java.io.CharArrayWriter;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.OutputStream;
31 import java.io.Reader;
32 import java.io.Writer;
33 import java.util.HashMap;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.Locale;
37 import java.util.Map;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40
41 import org.apache.maven.doxia.logging.Log;
42 import org.apache.maven.doxia.logging.SystemStreamLog;
43 import org.apache.maven.doxia.parser.ParseException;
44 import org.apache.maven.doxia.parser.Parser;
45 import org.apache.maven.doxia.sink.Sink;
46 import org.apache.maven.doxia.sink.SinkFactory;
47 import org.apache.maven.doxia.util.ConverterUtil;
48 import org.apache.maven.doxia.wrapper.InputFileWrapper;
49 import org.apache.maven.doxia.wrapper.InputReaderWrapper;
50 import org.apache.maven.doxia.wrapper.OutputFileWrapper;
51 import org.apache.maven.doxia.wrapper.OutputStreamWrapper;
52 import org.codehaus.plexus.ContainerConfiguration;
53 import org.codehaus.plexus.DefaultContainerConfiguration;
54 import org.codehaus.plexus.DefaultPlexusContainer;
55 import org.codehaus.plexus.PlexusContainer;
56 import org.codehaus.plexus.PlexusContainerException;
57 import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
58 import org.codehaus.plexus.util.FileUtils;
59 import org.codehaus.plexus.util.IOUtil;
60 import org.codehaus.plexus.util.ReaderFactory;
61 import org.codehaus.plexus.util.SelectorUtils;
62 import org.codehaus.plexus.util.StringUtils;
63 import org.codehaus.plexus.util.WriterFactory;
64 import org.codehaus.plexus.util.xml.XmlStreamReader;
65 import org.codehaus.plexus.util.xml.XmlUtil;
66 import org.codehaus.plexus.util.xml.pull.MXParser;
67 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
68 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
69
70 import com.ibm.icu.text.CharsetDetector;
71 import com.ibm.icu.text.CharsetMatch;
72
73
74
75
76
77
78
79 public class DefaultConverter
80 implements Converter
81 {
82 private static final String APT_PARSER = "apt";
83
84 private static final String CONFLUENCE_PARSER = "confluence";
85
86 private static final String DOCBOOK_PARSER = "docbook";
87
88 private static final String FML_PARSER = "fml";
89
90 private static final String TWIKI_PARSER = "twiki";
91
92 private static final String XDOC_PARSER = "xdoc";
93
94 private static final String XHTML_PARSER = "xhtml";
95
96
97 public static final String[] SUPPORTED_FROM_FORMAT =
98 { APT_PARSER, CONFLUENCE_PARSER, DOCBOOK_PARSER, FML_PARSER, TWIKI_PARSER, XDOC_PARSER, XHTML_PARSER };
99
100 private static final String APT_SINK = "apt";
101
102 private static final String CONFLUENCE_SINK = "confluence";
103
104 private static final String DOCBOOK_SINK = "docbook";
105
106 private static final String FO_SINK = "fo";
107
108 private static final String ITEXT_SINK = "itext";
109
110 private static final String LATEX_SINK = "latex";
111
112 private static final String RTF_SINK = "rtf";
113
114 private static final String TWIKI_SINK = "twiki";
115
116 private static final String XDOC_SINK = "xdoc";
117
118 private static final String XHTML_SINK = "xhtml";
119
120
121 public static final String[] SUPPORTED_TO_FORMAT =
122 { APT_SINK, CONFLUENCE_SINK, DOCBOOK_SINK, FO_SINK, ITEXT_SINK, LATEX_SINK, RTF_SINK, TWIKI_SINK, XDOC_SINK,
123 XHTML_SINK };
124
125
126 private boolean formatOutput;
127
128
129 private PlexusContainer plexus;
130
131
132 private Log log;
133
134
135 public void enableLogging( Log log )
136 {
137 this.log = log;
138 }
139
140
141
142
143
144
145
146 protected Log getLog()
147 {
148 if ( log == null )
149 {
150 log = new SystemStreamLog();
151 }
152
153 return log;
154 }
155
156
157 public String[] getInputFormats()
158 {
159 return SUPPORTED_FROM_FORMAT;
160 }
161
162
163 public String[] getOutputFormats()
164 {
165 return SUPPORTED_TO_FORMAT;
166 }
167
168
169 public void convert( InputFileWrapper input, OutputFileWrapper output )
170 throws UnsupportedFormatException, ConverterException
171 {
172 if ( input == null )
173 {
174 throw new IllegalArgumentException( "input is required" );
175 }
176 if ( output == null )
177 {
178 throw new IllegalArgumentException( "output is required" );
179 }
180
181 try
182 {
183 startPlexusContainer();
184 }
185 catch ( PlexusContainerException e )
186 {
187 throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
188 }
189
190 try
191 {
192 if ( input.getFile().isFile() )
193 {
194 parse( input.getFile(), input.getEncoding(), input.getFormat(), output );
195 }
196 else
197 {
198 List files;
199 try
200 {
201 files =
202 FileUtils.getFiles( input.getFile(), "**/*." + input.getFormat(),
203 StringUtils.join( FileUtils.getDefaultExcludes(), ", " ) );
204 }
205 catch ( IOException e )
206 {
207 throw new ConverterException( "IOException: " + e.getMessage(), e );
208 }
209 catch ( IllegalStateException e )
210 {
211 throw new ConverterException( "IllegalStateException: " + e.getMessage(), e );
212 }
213
214 for ( Iterator it = files.iterator(); it.hasNext(); )
215 {
216 File f = (File) it.next();
217
218 parse( f, input.getEncoding(), input.getFormat(), output );
219 }
220 }
221 }
222 finally
223 {
224 stopPlexusContainer();
225 }
226 }
227
228
229 public void convert( InputReaderWrapper input, OutputStreamWrapper output )
230 throws UnsupportedFormatException, ConverterException
231 {
232 if ( input == null )
233 {
234 throw new IllegalArgumentException( "input is required" );
235 }
236 if ( output == null )
237 {
238 throw new IllegalArgumentException( "output is required" );
239 }
240
241 try
242 {
243 startPlexusContainer();
244 }
245 catch ( PlexusContainerException e )
246 {
247 throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
248 }
249
250 try
251 {
252 Parser parser;
253 try
254 {
255 parser = ConverterUtil.getParser( plexus, input.getFormat(), SUPPORTED_FROM_FORMAT );
256 parser.enableLogging( log );
257 }
258 catch ( ComponentLookupException e )
259 {
260 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
261 }
262
263 if ( getLog().isDebugEnabled() )
264 {
265 getLog().debug( "Parser used: " + parser.getClass().getName() );
266 }
267
268 SinkFactory sinkFactory;
269 try
270 {
271 sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
272 }
273 catch ( ComponentLookupException e )
274 {
275 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
276 }
277
278 Sink sink;
279 try
280 {
281 sink = sinkFactory.createSink( output.getOutputStream(), output.getEncoding() );
282 }
283 catch ( IOException e )
284 {
285 throw new ConverterException( "IOException: " + e.getMessage(), e );
286 }
287 sink.enableLogging( log );
288
289 if ( getLog().isDebugEnabled() )
290 {
291 getLog().debug( "Sink used: " + sink.getClass().getName() );
292 }
293
294 parse( parser, input.getReader(), sink );
295 }
296 finally
297 {
298 stopPlexusContainer();
299 }
300 }
301
302
303 public void setFormatOutput( boolean formatOutput )
304 {
305 this.formatOutput = formatOutput;
306 }
307
308
309
310
311
312
313
314
315
316
317
318
319
320 private void parse( File inputFile, String inputEncoding, String inputFormat, OutputFileWrapper output )
321 throws ConverterException, UnsupportedFormatException
322 {
323 if ( getLog().isDebugEnabled() )
324 {
325 getLog().debug(
326 "Parsing file from '" + inputFile.getAbsolutePath() + "' with the encoding '"
327 + inputEncoding + "' to '" + output.getFile().getAbsolutePath()
328 + "' with the encoding '" + output.getEncoding() + "'" );
329 }
330
331 if ( inputEncoding.equals( InputFileWrapper.AUTO_ENCODING ) )
332 {
333 inputEncoding = autoDetectEncoding( inputFile );
334 if ( getLog().isDebugEnabled() )
335 {
336 getLog().debug( "Auto detect encoding: " + inputEncoding );
337 }
338 }
339
340 if ( inputFormat.equals( InputFileWrapper.AUTO_FORMAT ) )
341 {
342 inputFormat = autoDetectFormat( inputFile, inputEncoding );
343 if ( getLog().isDebugEnabled() )
344 {
345 getLog().debug( "Auto detect input format: " + inputFormat );
346 }
347 }
348
349 Parser parser;
350 try
351 {
352 parser = ConverterUtil.getParser( plexus, inputFormat, SUPPORTED_FROM_FORMAT );
353 parser.enableLogging( log );
354 }
355 catch ( ComponentLookupException e )
356 {
357 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
358 }
359
360 File outputFile;
361 if ( output.getFile().exists() && output.getFile().isDirectory() )
362 {
363 outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
364 }
365 else
366 {
367 if ( !SelectorUtils.match( "**.*", output.getFile().getName() ) )
368 {
369
370 output.getFile().mkdirs();
371 outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
372 }
373 else
374 {
375 output.getFile().getParentFile().mkdirs();
376 outputFile = output.getFile();
377 }
378 }
379
380 Reader reader;
381 try
382 {
383 if ( inputEncoding != null )
384 {
385 if ( parser.getType() == Parser.XML_TYPE )
386 {
387 reader = ReaderFactory.newXmlReader( inputFile );
388 }
389 else
390 {
391 reader = ReaderFactory.newReader( inputFile, inputEncoding );
392 }
393 }
394 else
395 {
396 reader = ReaderFactory.newPlatformReader( inputFile );
397 }
398 }
399 catch ( IOException e )
400 {
401 throw new ConverterException( "IOException: " + e.getMessage(), e );
402 }
403
404 SinkFactory sinkFactory;
405 try
406 {
407 sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
408 }
409 catch ( ComponentLookupException e )
410 {
411 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
412 }
413
414 Sink sink;
415 try
416 {
417 String outputEncoding;
418 if ( StringUtils.isEmpty( output.getEncoding() )
419 || output.getEncoding().equals( OutputFileWrapper.AUTO_ENCODING ) )
420 {
421 outputEncoding = inputEncoding;
422 }
423 else
424 {
425 outputEncoding = output.getEncoding();
426 }
427
428 OutputStream out = new FileOutputStream( outputFile );
429 sink = sinkFactory.createSink( out, outputEncoding );
430 }
431 catch ( IOException e )
432 {
433 throw new ConverterException( "IOException: " + e.getMessage(), e );
434 }
435
436 sink.enableLogging( log );
437
438 if ( getLog().isDebugEnabled() )
439 {
440 getLog().debug( "Sink used: " + sink.getClass().getName() );
441 }
442
443 parse( parser, reader, sink );
444
445 if ( formatOutput && ( output.getFormat().equals( DOCBOOK_SINK ) || output.getFormat().equals( FO_SINK )
446 || output.getFormat().equals( ITEXT_SINK ) || output.getFormat().equals( XDOC_SINK )
447 || output.getFormat().equals( XHTML_SINK ) ) )
448 {
449
450
451 if ( output.getFormat().equals( DOCBOOK_SINK ) || inputFormat.equals( DOCBOOK_PARSER ) )
452 {
453 return;
454 }
455 Reader r = null;
456 Writer w = null;
457 try
458 {
459 r = ReaderFactory.newXmlReader( outputFile );
460 CharArrayWriter caw = new CharArrayWriter();
461 XmlUtil.prettyFormat( r, caw );
462 w = WriterFactory.newXmlWriter( outputFile );
463 w.write( caw.toString() );
464 }
465 catch ( IOException e )
466 {
467 throw new ConverterException( "IOException: " + e.getMessage(), e );
468 }
469 finally
470 {
471 IOUtil.close( r );
472 IOUtil.close( w );
473 }
474 }
475 }
476
477
478
479
480
481
482
483 private void parse( Parser parser, Reader reader, Sink sink )
484 throws ConverterException
485 {
486 try
487 {
488 parser.parse( reader, sink );
489 }
490 catch ( ParseException e )
491 {
492 throw new ConverterException( "ParseException: " + e.getMessage(), e );
493 }
494 finally
495 {
496 IOUtil.close( reader );
497 sink.flush();
498 sink.close();
499 }
500 }
501
502
503
504
505
506
507 private void startPlexusContainer()
508 throws PlexusContainerException
509 {
510 if ( plexus != null )
511 {
512 return;
513 }
514
515 Map context = new HashMap();
516 context.put( "basedir", new File( "" ).getAbsolutePath() );
517
518 ContainerConfiguration containerConfiguration = new DefaultContainerConfiguration();
519 containerConfiguration.setName( "Doxia" );
520 containerConfiguration.setContext( context );
521
522 plexus = new DefaultPlexusContainer( containerConfiguration );
523 }
524
525
526
527
528 private void stopPlexusContainer()
529 {
530 if ( plexus == null )
531 {
532 return;
533 }
534
535 plexus.dispose();
536 plexus = null;
537 }
538
539
540
541
542
543
544
545
546
547 private static String autoDetectEncoding( File f )
548 {
549 if ( !f.isFile() )
550 {
551 throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
552 + "' is not a file, could not detect encoding." );
553 }
554
555 Reader reader = null;
556 InputStream is = null;
557 try
558 {
559 if ( XmlUtil.isXml( f ) )
560 {
561 reader = ReaderFactory.newXmlReader( f );
562 return ( (XmlStreamReader) reader ).getEncoding();
563 }
564
565 is = new BufferedInputStream( new FileInputStream( f ) );
566 CharsetDetector detector = new CharsetDetector();
567 detector.setText( is );
568 CharsetMatch match = detector.detect();
569
570 return match.getName().toUpperCase( Locale.ENGLISH );
571 }
572 catch ( IOException e )
573 {
574
575 }
576 finally
577 {
578 IOUtil.close( reader );
579 IOUtil.close( is );
580 }
581
582 StringBuffer msg = new StringBuffer();
583 msg.append( "Could not detect the encoding for file: " );
584 msg.append( f.getAbsolutePath() );
585 msg.append( "\n Specify explicitly the encoding." );
586 throw new UnsupportedOperationException( msg.toString() );
587 }
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602 private static String autoDetectFormat( File f, String encoding )
603 {
604 if ( !f.isFile() )
605 {
606 throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
607 + "' is not a file, could not detect format." );
608 }
609
610 for ( int i = 0; i < SUPPORTED_FROM_FORMAT.length; i++ )
611 {
612 String supportedFromFormat = SUPPORTED_FROM_FORMAT[i];
613
614
615 if ( supportedFromFormat.equalsIgnoreCase( APT_PARSER )
616 && isDoxiaFileName( f, supportedFromFormat ) )
617 {
618 return supportedFromFormat;
619 }
620 else if ( supportedFromFormat.equalsIgnoreCase( CONFLUENCE_PARSER )
621 && isDoxiaFileName( f, supportedFromFormat ) )
622 {
623 return supportedFromFormat;
624 }
625 else if ( supportedFromFormat.equalsIgnoreCase( TWIKI_PARSER )
626 && isDoxiaFileName( f, supportedFromFormat ) )
627 {
628 return supportedFromFormat;
629 }
630
631
632 String firstTag = getFirstTag( f );
633 if ( firstTag == null )
634 {
635 continue;
636 }
637 else if ( firstTag.equals( "article" )
638 && supportedFromFormat.equalsIgnoreCase( DOCBOOK_PARSER ) )
639 {
640 return supportedFromFormat;
641 }
642 else if ( firstTag.equals( "faqs" )
643 && supportedFromFormat.equalsIgnoreCase( FML_PARSER ) )
644 {
645 return supportedFromFormat;
646 }
647 else if ( firstTag.equals( "document" )
648 && supportedFromFormat.equalsIgnoreCase( XDOC_PARSER ) )
649 {
650 return supportedFromFormat;
651 }
652 else if ( firstTag.equals( "html" )
653 && supportedFromFormat.equalsIgnoreCase( XHTML_PARSER ) )
654 {
655 return supportedFromFormat;
656 }
657 }
658
659 StringBuffer msg = new StringBuffer();
660 msg.append( "Could not detect the Doxia format for file: " );
661 msg.append( f.getAbsolutePath() );
662 msg.append( "\n Specify explicitly the Doxia format." );
663 throw new UnsupportedOperationException( msg.toString() );
664 }
665
666
667
668
669
670
671 private static boolean isDoxiaFileName( File f, String format )
672 {
673 if ( f == null )
674 {
675 throw new IllegalArgumentException( "f is required." );
676 }
677
678 Pattern pattern = Pattern.compile( "(.*?)\\." + format.toLowerCase( Locale.ENGLISH ) + "$" );
679 Matcher matcher = pattern.matcher( f.getName().toLowerCase( Locale.ENGLISH ) );
680
681 return matcher.matches();
682 }
683
684
685
686
687
688 private static String getFirstTag( File xmlFile )
689 {
690 if ( xmlFile == null )
691 {
692 throw new IllegalArgumentException( "xmlFile is required." );
693 }
694 if ( !xmlFile.isFile() )
695 {
696 throw new IllegalArgumentException( "The file '" + xmlFile.getAbsolutePath() + "' is not a file." );
697 }
698
699 Reader reader = null;
700 try
701 {
702 reader = ReaderFactory.newXmlReader( xmlFile );
703 XmlPullParser parser = new MXParser();
704 parser.setInput( reader );
705 int eventType = parser.getEventType();
706 while ( eventType != XmlPullParser.END_DOCUMENT )
707 {
708 if ( eventType == XmlPullParser.START_TAG )
709 {
710 return parser.getName();
711 }
712 eventType = parser.nextToken();
713 }
714 }
715 catch ( FileNotFoundException e )
716 {
717 return null;
718 }
719 catch ( XmlPullParserException e )
720 {
721 return null;
722 }
723 catch ( IOException e )
724 {
725 return null;
726 }
727 finally
728 {
729 IOUtil.close( reader );
730 }
731
732 return null;
733 }
734 }