View Javadoc
1   package org.apache.maven.doxia.module.docbook;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.util.Collection;
24  import java.util.HashSet;
25  import java.util.Stack;
26  
27  import org.apache.maven.doxia.macro.MacroExecutionException;
28  import org.apache.maven.doxia.markup.HtmlMarkup;
29  import org.apache.maven.doxia.parser.AbstractXmlParser;
30  import org.apache.maven.doxia.parser.Parser;
31  import org.apache.maven.doxia.sink.Sink;
32  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
33  import org.codehaus.plexus.component.annotations.Component;
34  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
35  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
36  
37  /**
38   * Parse a <a href="http://www.docbook.org/schemas/simplified"><code>Simplified DocBook</code></a> document
39   * and emit events into the specified doxia Sink.
40   *
41   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
42   * @since 1.0
43   */
44  @Component( role = Parser.class, hint = "docbook" )
45  public class DocBookParser
46      extends AbstractXmlParser
47      implements DocbookMarkup, SimplifiedDocbookMarkup
48  {
49      /**
50       * Level counter for calculating the section level.
51       */
52      private int level;
53  
54      /**
55       * Used to distinguish italic from bold.
56       */
57      private boolean isBold;
58  
59      private boolean inHead;
60  
61      private boolean ignore;
62  
63      private boolean simpleTag;
64  
65      private char trademark;
66  
67      /**
68       * A selective stack of parent elements
69       */
70      private final Stack<String> parent = new Stack<>();
71  
72      /**
73       * The list of DocBook elements that introduce a new level of hierarchy.
74       */
75      private static final Collection<String> HIER_ELEMENTS = new HashSet<>();
76  
77      /**
78       * Simplified DocBook elements that are direct children of &lt;article&gt;
79       * and that should be emitted into the Sink's head.
80       */
81      private static final Collection<String> META_ELEMENTS = new HashSet<>();
82  
83      /**
84       * Simplified DocBook elements that occur within &lt;articleinfo&gt;
85       * and that are currently recognized by the parser.
86       */
87      private static final Collection<String> ARTICLEINFO_ELEMENTS = new HashSet<>();
88  
89      /**
90       * The list of DocBook elements that will be rendered verbatim
91       */
92      private static final Collection<String> VERBATIM_ELEMENTS = new HashSet<>();
93  
94      /**
95       * The list of DocBook elements that will be rendered inline and bold
96       */
97      private static final Collection<String> BOLD_ELEMENTS = new HashSet<>();
98  
99      /**
100      * The list of DocBook elements that will be rendered inline and italic
101      */
102     private static final Collection<String> ITALIC_ELEMENTS = new HashSet<>();
103 
104     /**
105      * The list of DocBook elements that will be rendered inline and monospace
106      */
107     private static final Collection<String> MONOSPACE_ELEMENTS = new HashSet<>();
108 
109     /**
110      * The list of DocBook elements that may be ignored, either because they don't
111      * require any special processing or because they are not yet implemented.
112      */
113     private static final Collection<String> IGNORABLE_ELEMENTS = new HashSet<>();
114     static
115     {
116         META_ELEMENTS.add( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() );
117         META_ELEMENTS.add( SimplifiedDocbookMarkup.AUTHORBLURB_TAG.toString() );
118         META_ELEMENTS.add( SimplifiedDocbookMarkup.SUBTITLE_TAG.toString() );
119         META_ELEMENTS.add( SimplifiedDocbookMarkup.TITLE_TAG.toString() );
120         META_ELEMENTS.add( SimplifiedDocbookMarkup.TITLEABBREV_TAG.toString() );
121 
122         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.TITLE_TAG.toString() );
123         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() );
124         ARTICLEINFO_ELEMENTS.add( SimplifiedDocbookMarkup.DATE_TAG.toString() );
125 
126         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.SECTION_TAG.toString() );
127         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.APPENDIX_TAG.toString() );
128         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOGRAPHY_TAG.toString() );
129         HIER_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIODIV_TAG.toString() );
130 
131         VERBATIM_ELEMENTS.add( SimplifiedDocbookMarkup.PROGRAMLISTING_TAG.toString() );
132         VERBATIM_ELEMENTS.add( SimplifiedDocbookMarkup.LITERALLAYOUT_TAG.toString() );
133 
134         BOLD_ELEMENTS.add( SimplifiedDocbookMarkup.COMMAND_TAG.toString() );
135         BOLD_ELEMENTS.add( SimplifiedDocbookMarkup.USERINPUT_TAG.toString() );
136 
137         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.REPLACEABLE_TAG.toString() );
138         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.SYSTEMITEM_TAG.toString() );
139         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.CITETITLE_TAG.toString() );
140         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.EMPHASIS_TAG.toString() );
141         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.ATTRIBUTION_TAG.toString() );
142         ITALIC_ELEMENTS.add( SimplifiedDocbookMarkup.LINEANNOTATION_TAG.toString() );
143 
144         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.COMPUTEROUTPUT_TAG.toString() );
145         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.REPLACEABLE_TAG.toString() );
146         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.LITERAL_TAG.toString() );
147         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.OPTION_TAG.toString() );
148         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.SYSTEMITEM_TAG.toString() );
149         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.USERINPUT_TAG.toString() );
150         MONOSPACE_ELEMENTS.add( SimplifiedDocbookMarkup.FILENAME_TAG.toString() );
151 
152         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ABBREV_TAG.toString() );
153         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ABSTRACT_TAG.toString() );
154         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOMIXED_TAG.toString() );
155         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.BIBLIOMSET_TAG.toString() );
156         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.COLSPEC_TAG.toString() );
157         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.EPIGRAPH_TAG.toString() );
158         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.EXAMPLE_TAG.toString() );
159         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.FOOTNOTEREF_TAG.toString() );
160         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() );
161         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.INLINEMEDIAOBJECT_TAG.toString() );
162         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.ISSUENUM_TAG.toString() );
163         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PHRASE_TAG.toString() );
164         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PUBDATE_TAG.toString() );
165         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.PUBLISHERNAME_TAG.toString() );
166         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.SPANSPEC_TAG.toString() );
167         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.TEXTOBJECT_TAG.toString() );
168         IGNORABLE_ELEMENTS.add( SimplifiedDocbookMarkup.VOLUMENUM_TAG.toString() );
169     }
170 
171     /**
172      * {@inheritDoc}
173      */
174     protected void init()
175     {
176         super.init();
177 
178         this.parent.clear();
179         this.trademark = 0;
180         this.level = 0;
181         this.isBold = false;
182         this.inHead = false;
183         this.ignore = false;
184         this.simpleTag = false;
185     }
186 
187     // ----------------------------------------------------------------------
188     //
189     // ----------------------------------------------------------------------
190 
191     /** {@inheritDoc} */
192     protected void handleStartTag( XmlPullParser parser, Sink sink )
193         throws XmlPullParserException, MacroExecutionException
194     {
195         if ( inHead && !META_ELEMENTS.contains( parser.getName() )
196                 && isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
197         {
198             sink.head_();
199             inHead = false;
200 
201             // assume any element that is not meta starts the body
202             sink.body();
203         }
204 
205         final SinkEventAttributeSet attribs = getAttributesFromParser( parser );
206         simpleTag = parser.isEmptyElementTag();
207 
208         if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
209         {
210             handleArticleStart( sink, attribs );
211         }
212         else if ( isParent( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
213         {
214             handleArticleInfoStartTags( parser.getName(), sink, attribs );
215         }
216         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
217         {
218             parent.push( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() );
219         }
220         else if ( parser.getName().equals( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() )
221                 || parser.getName().equals( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
222                 || parser.getName().equals( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
223                 || parser.getName().equals( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() ) )
224         {
225             parent.push( parser.getName() );
226             ignore = true;
227         }
228         else if ( isParent( ( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() ) )
229                 || isParent( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
230                 || isParent( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
231                 || isParent( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
232                 || isParent( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
233         {
234             return; // TODO: implement footnotes, entrytbl
235         }
236         else if ( HIER_ELEMENTS.contains( parser.getName() ) )
237         {
238             handleSectionElements( sink, parser.getName(), attribs );
239         }
240         else if ( listStartTags ( parser.getName(), sink, attribs ) )
241         {
242             return;
243         }
244         else if ( mediaStartTag( parser.getName(), sink, attribs ) )
245         {
246             return;
247         }
248         else if ( tableStartTags( parser.getName(), sink, attribs ) )
249         {
250             return;
251         }
252         else if ( parser.getName().equals( SimplifiedDocbookMarkup.PARA_TAG.toString() ) )
253         {
254             handleParaStart( sink, attribs );
255         }
256         else if ( styleStartTags( parser.getName(), sink, attribs ) )
257         {
258             return;
259         }
260         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
261         {
262             handleTitleStart( sink, attribs );
263         }
264         else if ( parser.getName().equals( SimplifiedDocbookMarkup.EMAIL_TAG.toString() ) )
265         {
266             handleEmailStart( parser, sink, attribs );
267         }
268         else if ( linkStartTag( parser.getName(), sink, attribs ) )
269         {
270             return;
271         }
272         else if ( parser.getName().equals( SimplifiedDocbookMarkup.QUOTE_TAG.toString() ) )
273         {
274             sink.text( "\"", null );
275         }
276         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TRADEMARK_TAG.toString() ) )
277         {
278             trademark = '\u2122';
279             final Object trade = attribs.getAttribute( "class" );
280 
281             if ( trade != null )
282             {
283                 trademark = DocbookUtils.trademarkFromClass( trade.toString() );
284             }
285         }
286         else
287         {
288             if ( !ignorable( parser.getName() ) )
289             {
290                 if ( simpleTag )
291                 {
292                     handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_SIMPLE );
293                 }
294                 else
295                 {
296                     handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_START );
297                 }
298             }
299         }
300     }
301 
302     /** {@inheritDoc} */
303     protected void handleEndTag( XmlPullParser parser, Sink sink )
304         throws XmlPullParserException, MacroExecutionException
305     {
306         if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
307         {
308             sink.body_();
309         }
310         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
311         {
312             parent.pop();
313         }
314         else if ( isParent( SimplifiedDocbookMarkup.ARTICLEINFO_TAG.toString() ) )
315         {
316              handleArticleInfoEndTags( parser.getName(), sink );
317         }
318         else if ( HIER_ELEMENTS.contains( parser.getName() ) )
319         {
320             sink.section_( level );
321 
322             //decrease the nesting level
323             level--;
324             parent.pop();
325         }
326         else if ( parser.getName().equals( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() )
327                 || parser.getName().equals( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
328                 || parser.getName().equals( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
329                 || parser.getName().equals( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
330                 || parser.getName().equals( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
331         {
332             parent.pop();
333             ignore = false;
334         }
335         else if ( isParent( ( SimplifiedDocbookMarkup.FOOTNOTE_TAG.toString() ) )
336                 || isParent( SimplifiedDocbookMarkup.AUDIOOBJECT_TAG.toString() )
337                 || isParent( SimplifiedDocbookMarkup.VIDEOOBJECT_TAG.toString() )
338                 || isParent( SimplifiedDocbookMarkup.SECTIONINFO_TAG.toString() )
339                 || isParent( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
340         {
341             return;
342         }
343         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() ) )
344         {
345             sink.list_();
346             parent.pop();
347         }
348         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
349         {
350             sink.numberedList_();
351             parent.pop();
352         }
353         else if ( parser.getName().equals( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
354         {
355             parent.pop();
356 
357             if ( isParent( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
358             {
359                 sink.definition_();
360             }
361             else if ( isParent( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
362             {
363                 sink.numberedListItem_();
364             }
365             else
366             {
367                 sink.listItem_();
368             }
369         }
370         else if ( parser.getName().equals( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
371         {
372             sink.definitionList_();
373         }
374         else if ( parser.getName().equals( SimplifiedDocbookMarkup.VARLISTENTRY_TAG.toString() ) )
375         {
376             sink.definitionListItem_();
377         }
378         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TERM_TAG.toString() ) )
379         {
380             sink.definedTerm_();
381         }
382         else if ( parser.getName().equals( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
383         {
384             sink.figure_();
385             parent.pop();
386         }
387         else if ( parser.getName().equals( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() )
388                 || parser.getName().equals( SimplifiedDocbookMarkup.FIGURE_TAG.toString() )
389                 || parser.getName().equals( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
390                 || parser.getName().equals( SimplifiedDocbookMarkup.TFOOT_TAG.toString() )
391                 || parser.getName().equals( SimplifiedDocbookMarkup.TBODY_TAG.toString() ) )
392         {
393             parent.pop();
394         }
395         else if ( parser.getName().equals( SimplifiedDocbookMarkup.CAPTION_TAG.toString() ) )
396         {
397             handleCaptionEnd( sink );
398         }
399         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
400             || parser.getName().equals( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
401         {
402             sink.table_();
403 
404             parent.pop();
405         }
406         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TR_TAG.toString() )
407                 || parser.getName().equals( SimplifiedDocbookMarkup.ROW_TAG.toString() ) )
408         {
409             sink.tableRow_();
410         }
411         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TGROUP_TAG.toString() ) )
412         {
413             sink.tableRows_();
414         }
415         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() )
416                 && isParent( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
417             || parser.getName().equals( TH_TAG.toString() ) )
418         {
419             sink.tableHeaderCell_();
420         }
421         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() ) )
422         {
423             sink.tableCell_();
424         }
425         else if ( parser.getName().equals( SimplifiedDocbookMarkup.PARA_TAG.toString() ) )
426         {
427             handleParaEnd( sink );
428         }
429         else if ( VERBATIM_ELEMENTS.contains( parser.getName() ) )
430         {
431             sink.verbatim_();
432         }
433         else if ( BOLD_ELEMENTS.contains( parser.getName() )
434             && MONOSPACE_ELEMENTS.contains( parser.getName() ) )
435         {
436             sink.monospaced_();
437             sink.bold_();
438         }
439         else if ( ITALIC_ELEMENTS.contains( parser.getName() )
440             && MONOSPACE_ELEMENTS.contains( parser.getName() ) )
441         {
442             sink.monospaced_();
443             sink.italic_();
444         }
445         else if ( BOLD_ELEMENTS.contains( parser.getName() ) )
446         {
447             sink.bold_();
448         }
449         else if ( ITALIC_ELEMENTS.contains( parser.getName() ) )
450         {
451             if ( isBold )
452             {
453                 sink.bold_();
454 
455                 isBold = false;
456             }
457             else
458             {
459                 sink.italic_();
460             }
461         }
462         else if ( MONOSPACE_ELEMENTS.contains( parser.getName() ) )
463         {
464             sink.monospaced_();
465         }
466         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
467         {
468             handleTitleEnd( sink );
469         }
470         else if ( parser.getName().equals( SimplifiedDocbookMarkup.ULINK_TAG.toString() )
471                 || parser.getName().equals( SimplifiedDocbookMarkup.LINK_TAG.toString() ) )
472         {
473             if ( isParent( parser.getName() ) )
474             {
475                 parent.pop();
476                 sink.link_();
477             }
478         }
479         else if ( parser.getName().equals( SimplifiedDocbookMarkup.QUOTE_TAG.toString() ) )
480         {
481             sink.text( "\"", null );
482         }
483         else if ( parser.getName().equals( SimplifiedDocbookMarkup.TRADEMARK_TAG.toString() ) )
484         {
485             sink.text( Character.toString( trademark ), null );
486         }
487         else if ( !simpleTag && !ignorable( parser.getName() ) )
488         {
489             handleUnknown( parser, sink, HtmlMarkup.TAG_TYPE_END );
490         }
491     }
492 
493     /** {@inheritDoc} */
494     protected void handleComment( XmlPullParser parser, Sink sink )
495         throws XmlPullParserException
496     {
497         final String text = parser.getText();
498 
499         switch ( text.trim() )
500         {
501             case "PB":
502                 sink.pageBreak();
503                 break;
504             case "HR":
505                 sink.horizontalRule();
506                 break;
507             case "LB":
508                 sink.lineBreak();
509                 break;
510             case "anchor_end":
511                 sink.anchor_();
512                 break;
513             default:
514                 if ( isEmitComments() )
515                 {
516                     sink.comment( text );
517                 }
518                 break;
519         }
520     }
521 
522     /** {@inheritDoc} */
523     protected void handleCdsect( XmlPullParser parser, Sink sink )
524             throws XmlPullParserException
525     {
526         if ( !ignore )
527         {
528             super.handleCdsect( parser, sink );
529         }
530     }
531 
532     /** {@inheritDoc} */
533     protected void handleEntity( XmlPullParser parser, Sink sink )
534             throws XmlPullParserException
535     {
536         if ( !ignore )
537         {
538             super.handleEntity( parser, sink );
539         }
540     }
541 
542     /** {@inheritDoc} */
543     protected void handleText( XmlPullParser parser, Sink sink )
544             throws XmlPullParserException
545     {
546         if ( !ignore )
547         {
548             super.handleText( parser, sink );
549         }
550     }
551 
552     // ----------------------------------------------------------------------
553     //
554     // ----------------------------------------------------------------------
555 
556     private void handleArticleInfoStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
557     {
558         if ( !ARTICLEINFO_ELEMENTS.contains( name ) )
559         {
560             ignore = true;
561             return; // TODO: other meta data are ignored, implement!
562         }
563 
564         if ( name.equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
565         {
566             sink.title( attribs );
567         }
568         else if ( name.equals( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() ) )
569         {
570             sink.author( attribs );
571         }
572         else if ( name.equals( SimplifiedDocbookMarkup.DATE_TAG.toString() ) )
573         {
574             sink.date( attribs );
575         }
576     }
577 
578     private void handleArticleInfoEndTags( String name, Sink sink )
579     {
580         if ( !ARTICLEINFO_ELEMENTS.contains( name ) )
581         {
582             ignore = false;
583             return; // TODO: other meta data are ignored, implement!
584         }
585 
586         if ( name.equals( SimplifiedDocbookMarkup.TITLE_TAG.toString() ) )
587         {
588             sink.title_();
589         }
590         else if ( name.equals( SimplifiedDocbookMarkup.CORPAUTHOR_TAG.toString() ) )
591         {
592             sink.author_();
593         }
594         else if ( name.equals( SimplifiedDocbookMarkup.DATE_TAG.toString() ) )
595         {
596             sink.date_();
597         }
598     }
599 
600     private void handleCaptionStart( Sink sink, SinkEventAttributeSet attribs )
601     {
602         if ( isParent( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
603         {
604             sink.figureCaption( attribs );
605         }
606         else if ( isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() )
607             || isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() ) )
608         {
609             sink.tableCaption( attribs );
610         }
611 
612         parent.push( SimplifiedDocbookMarkup.CAPTION_TAG.toString() );
613     }
614 
615     private void handleCaptionEnd( Sink sink )
616     {
617         parent.pop();
618 
619         if ( isParent( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
620         {
621             sink.figureCaption_();
622         }
623         else if ( isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() )
624             || isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() ) )
625         {
626             sink.tableCaption_();
627         }
628     }
629 
630     private void handleEmailStart( XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs )
631             throws XmlPullParserException
632     {
633         try
634         {
635             final String mailto = parser.nextText();
636             sink.link( "mailto:" + mailto, attribs );
637             sink.monospaced();
638             sink.text( mailto, null );
639             sink.monospaced_();
640             sink.link_();
641         }
642         catch ( IOException e )
643         {
644             throw new XmlPullParserException( "IOException: " + e.getMessage(), parser, e );
645         }
646     }
647 
648     private void handleFigureStart( Sink sink, SinkEventAttributeSet attribs )
649     {
650         sink.figure( attribs );
651         parent.push( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() );
652     }
653 
654     private void handleArticleStart( Sink sink, SinkEventAttributeSet attribs )
655     {
656         sink.head( attribs );
657         inHead = true;
658 
659         parent.push( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() );
660     }
661 
662     //If the element introduces a new level of hierarchy, raise the stack
663     private void handleSectionElements( Sink sink, String name, SinkEventAttributeSet attribs )
664     {
665         //increase the nesting level
666         level++;
667 
668         sink.section( level, attribs );
669 
670         parent.push( name );
671     }
672 
673     private void handleAnchorStart( Sink sink, SinkEventAttributeSet attribs  )
674     {
675         final Object id = attribs.getAttribute( SimplifiedDocbookMarkup.ID_ATTRIBUTE );
676 
677         if ( id != null )
678         {
679             sink.anchor( id.toString(), attribs );
680         }
681     }
682 
683     private void handleImageDataStart( Sink sink, SinkEventAttributeSet attribs )
684             throws XmlPullParserException
685     {
686         final Object fileref = attribs.getAttribute( SimplifiedDocbookMarkup.FILEREF_ATTRIBUTE );
687 
688         if ( fileref == null )
689         {
690             throw new XmlPullParserException( "Missing fileref attribute in imagedata!" );
691         }
692 
693         sink.figureGraphics( fileref.toString(), attribs );
694     }
695 
696     private void handleItemizedListStart( Sink sink, SinkEventAttributeSet attribs )
697     {
698         sink.list( attribs );
699         //for itemizedlists in variablelists
700         parent.push( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() );
701     }
702 
703     private void handleLinkStart( Sink sink, SinkEventAttributeSet attribs )
704             throws XmlPullParserException
705     {
706         final Object linkend = attribs.getAttribute( SimplifiedDocbookMarkup.LINKEND_ATTRIBUTE );
707 
708         if ( linkend == null )
709         {
710             throw new XmlPullParserException( "Missing linkend attribute in link!" );
711         }
712 
713         parent.push( SimplifiedDocbookMarkup.LINK_TAG.toString() );
714         sink.link( "#" + linkend.toString(), attribs );
715     }
716 
717     private void handleListItemStart( Sink sink, SinkEventAttributeSet attribs )
718     {
719         if ( isParent( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
720         {
721             sink.definition( attribs );
722         }
723         else if ( isParent( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
724         {
725             sink.numberedListItem( attribs );
726         }
727         else
728         {
729             sink.listItem( attribs );
730         }
731 
732         parent.push( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() );
733     }
734 
735     private void handleOrderedListStart( Sink sink, SinkEventAttributeSet attribs )
736     {
737         //default enumeration style is decimal
738         int numeration = Sink.NUMBERING_DECIMAL;
739 
740         final Object num = attribs.getAttribute( SimplifiedDocbookMarkup.NUMERATION_ATTRIBUTE );
741 
742         if ( num != null )
743         {
744             numeration = DocbookUtils.doxiaListNumbering( num.toString() );
745         }
746 
747         sink.numberedList( numeration, attribs );
748         parent.push( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() );
749     }
750 
751     private void handleParaEnd( Sink sink )
752     {
753         if ( !isParent( SimplifiedDocbookMarkup.CAPTION_TAG.toString() )
754                 && ! isParent( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
755         {
756             sink.paragraph_();
757         }
758     }
759 
760     private void handleParaStart( Sink sink, SinkEventAttributeSet attribs )
761     {
762         if ( !isParent( SimplifiedDocbookMarkup.CAPTION_TAG.toString() )
763                 && ! isParent( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
764         {
765             sink.paragraph( attribs );
766         }
767     }
768 
769     private void handleTableStart( Sink sink, SinkEventAttributeSet attribs )
770     {
771         final Object frame = attribs.getAttribute( SimplifiedDocbookMarkup.FRAME_ATTRIBUTE );
772         if ( frame != null )
773         {
774             attribs.addAttribute( SimplifiedDocbookMarkup.FRAME_ATTRIBUTE,
775                     DocbookUtils.doxiaTableFrameAttribute( frame.toString() ) );
776         }
777 
778         sink.table( attribs );
779 
780         parent.push( SimplifiedDocbookMarkup.TABLE_TAG.toString() );
781     }
782 
783     private void handleTitleStart( Sink sink, SinkEventAttributeSet attribs )
784     {
785         if ( isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
786                 || isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
787         {
788             sink.tableCaption( attribs );
789         }
790         else if ( isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
791         {
792             sink.title( attribs );
793         }
794         else if ( isParent( SimplifiedDocbookMarkup.SECTION_TAG.toString() ) )
795         {
796             sink.sectionTitle( level, attribs );
797         }
798         else
799         {
800             sink.bold();
801         }
802     }
803 
804     private void handleTitleEnd( Sink sink )
805     {
806         if ( isParent( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
807                 || isParent( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
808         {
809             sink.tableCaption_();
810         }
811         else if ( isParent( SimplifiedDocbookMarkup.SECTION_TAG.toString() ) )
812         {
813             sink.sectionTitle_( level );
814         }
815         else if ( isParent( SimplifiedDocbookMarkup.ARTICLE_TAG.toString() ) )
816         {
817             sink.title_();
818         }
819         else
820         {
821             sink.bold_();
822         }
823     }
824 
825     private void handleUlinkStart( Sink sink, SinkEventAttributeSet attribs )
826             throws XmlPullParserException
827     {
828         final Object url = attribs.getAttribute( SimplifiedDocbookMarkup.URL_ATTRIBUTE );
829 
830         if ( url == null )
831         {
832             throw new XmlPullParserException( "Missing url attribute in ulink!" );
833         }
834 
835         parent.push( SimplifiedDocbookMarkup.ULINK_TAG.toString() );
836         sink.link( url.toString(), attribs );
837     }
838 
839     private void handleVariableListStart( Sink sink, SinkEventAttributeSet attribs )
840     {
841         sink.definitionList( attribs );
842         parent.push( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() );
843     }
844 
845     private void handleXrefStart( Sink sink, SinkEventAttributeSet attribs )
846             throws XmlPullParserException
847     {
848         final Object linkend = attribs.getAttribute( SimplifiedDocbookMarkup.LINKEND_ATTRIBUTE );
849 
850         if ( linkend == null )
851         {
852             throw new XmlPullParserException( "Missing linkend attribute in xref!" );
853         }
854 
855         sink.link( "#" + linkend.toString(), attribs );
856         sink.text( "Link" ); //TODO: determine text of link target
857         sink.link_();
858     }
859 
860     private boolean ignorable( String name )
861     {
862         return IGNORABLE_ELEMENTS.contains( name );
863     }
864 
865     /**
866      * Determines if the given element is a parent element.
867      *
868      * @param element the element to determine.
869      * @return true if the given element is a parent element.
870      */
871     private boolean isParent( String element )
872     {
873         if ( parent.size() > 0 )
874         {
875             return parent.peek().equals( element );
876         }
877 
878         return false;
879     }
880 
881     private boolean linkStartTag( String name, Sink sink, SinkEventAttributeSet attribs )
882             throws XmlPullParserException
883     {
884         if ( name.equals( SimplifiedDocbookMarkup.ULINK_TAG.toString() ) )
885         {
886             handleUlinkStart( sink, attribs );
887         }
888         else if ( name.equals( SimplifiedDocbookMarkup.LINK_TAG.toString() ) )
889         {
890             handleLinkStart( sink, attribs );
891         }
892         else if ( name.equals( SimplifiedDocbookMarkup.XREF_TAG.toString() ) )
893         {
894             handleXrefStart( sink, attribs );
895         }
896         else if ( name.equals( SimplifiedDocbookMarkup.ANCHOR_TAG.toString() ) )
897         {
898             handleAnchorStart( sink, attribs );
899         }
900         else
901         {
902             return false;
903         }
904 
905         return true;
906     }
907 
908     private boolean listStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
909     {
910         if ( name.equals( SimplifiedDocbookMarkup.ITEMIZEDLIST_TAG.toString() ) )
911         {
912             handleItemizedListStart( sink, attribs );
913         }
914         else if ( name.equals( SimplifiedDocbookMarkup.ORDEREDLIST_TAG.toString() ) )
915         {
916             handleOrderedListStart( sink, attribs );
917         }
918         else if ( name.equals( SimplifiedDocbookMarkup.LISTITEM_TAG.toString() ) )
919         {
920             handleListItemStart( sink, attribs );
921         }
922         else if ( name.equals( SimplifiedDocbookMarkup.VARIABLELIST_TAG.toString() ) )
923         {
924             handleVariableListStart( sink, attribs );
925         }
926         else if ( name.equals( SimplifiedDocbookMarkup.VARLISTENTRY_TAG.toString() ) )
927         {
928             sink.definitionListItem( attribs );
929         }
930         else if ( name.equals( SimplifiedDocbookMarkup.TERM_TAG.toString() ) )
931         {
932             sink.definedTerm( attribs );
933         }
934         else
935         {
936             return false;
937         }
938 
939         return true;
940     }
941 
942     private boolean mediaStartTag( String name, Sink sink, SinkEventAttributeSet attribs )
943             throws XmlPullParserException
944     {
945         if ( name.equals( SimplifiedDocbookMarkup.MEDIAOBJECT_TAG.toString() ) )
946         {
947             handleFigureStart( sink, attribs );
948         }
949         else if ( name.equals( SimplifiedDocbookMarkup.IMAGEOBJECT_TAG.toString() )
950                 || name.equals( SimplifiedDocbookMarkup.FIGURE_TAG.toString() ) )
951         {
952             parent.push( name );
953         }
954         else if ( name.equals( SimplifiedDocbookMarkup.IMAGEDATA_TAG.toString() ) )
955         {
956             handleImageDataStart( sink, attribs );
957         }
958         else if ( name.equals( SimplifiedDocbookMarkup.CAPTION_TAG.toString() ) )
959         {
960             handleCaptionStart( sink, attribs );
961         }
962         else
963         {
964             return false;
965         }
966 
967         return true;
968     }
969 
970     private boolean styleStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
971     {
972         if ( VERBATIM_ELEMENTS.contains( name ) )
973         {
974             sink.verbatim( SinkEventAttributeSet.BOXED );
975         }
976         else if ( BOLD_ELEMENTS.contains( name ) && MONOSPACE_ELEMENTS.contains( name ) )
977         {
978             sink.bold();
979             sink.monospaced();
980         }
981         else if ( ITALIC_ELEMENTS.contains( name ) && MONOSPACE_ELEMENTS.contains( name ) )
982         {
983             sink.italic();
984             sink.monospaced();
985         }
986         else if ( BOLD_ELEMENTS.contains( name ) )
987         {
988             sink.bold();
989         }
990         else if ( ITALIC_ELEMENTS.contains( name ) && "bold".equals( attribs.getAttribute( "role" ) ) )
991         {
992             sink.bold();
993             isBold = true;
994         }
995         else if ( ITALIC_ELEMENTS.contains( name ) )
996         {
997             sink.italic();
998         }
999         else if ( MONOSPACE_ELEMENTS.contains( name ) )
1000         {
1001             sink.monospaced();
1002         }
1003         else
1004         {
1005             return false;
1006         }
1007 
1008         return true;
1009     }
1010 
1011     private boolean tableStartTags( String name, Sink sink, SinkEventAttributeSet attribs )
1012     {
1013         if ( name.equals( SimplifiedDocbookMarkup.ENTRYTBL_TAG.toString() ) )
1014         {
1015             parent.push( name );
1016             ignore = true;
1017             // insert empty table cell instead
1018             sink.tableCell( (SinkEventAttributeSet) null );
1019             sink.tableCell_();
1020         }
1021         else if ( name.equals( SimplifiedDocbookMarkup.TABLE_TAG.toString() )
1022             || name.equals( SimplifiedDocbookMarkup.INFORMALTABLE_TAG.toString() ) )
1023         {
1024             handleTableStart( sink, attribs );
1025         }
1026         else if ( name.equals( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
1027                 || name.equals( SimplifiedDocbookMarkup.TFOOT_TAG.toString() )
1028                 || name.equals( SimplifiedDocbookMarkup.TBODY_TAG.toString() ) )
1029         {
1030             parent.push( name );
1031         }
1032         else if ( name.equals( SimplifiedDocbookMarkup.TGROUP_TAG.toString() ) )
1033         {
1034             // this is required by the DTD
1035             final int cols = Integer.parseInt( (String) attribs.getAttribute( "cols" ) );
1036             int[] justification = new int[cols];
1037             int justif = Sink.JUSTIFY_LEFT;
1038 
1039             final Object align = attribs.getAttribute( SinkEventAttributeSet.ALIGN );
1040 
1041             if ( align != null )
1042             {
1043                 final String al = align.toString();
1044 
1045                 if ( "right".equals( al ) )
1046                 {
1047                     justif = Sink.JUSTIFY_RIGHT;
1048                 }
1049                 else if ( "center".equals( al ) )
1050                 {
1051                     justif = Sink.JUSTIFY_CENTER;
1052                 }
1053             }
1054 
1055             for ( int i = 0; i < justification.length; i++ )
1056             {
1057                 justification[i] = justif;
1058             }
1059 
1060             boolean grid = false;
1061             final Object rowsep = attribs.getAttribute( "rowsep" );
1062 
1063             if ( rowsep != null && Integer.parseInt( (String) rowsep ) == 1 )
1064             {
1065                 grid = true;
1066             }
1067 
1068             final Object colsep = attribs.getAttribute( "colsep" );
1069 
1070             if ( colsep != null && Integer.parseInt( (String) colsep ) == 1 )
1071             {
1072                 grid = true;
1073             }
1074 
1075             sink.tableRows( justification, grid );
1076         }
1077         else if ( name.equals( SimplifiedDocbookMarkup.TR_TAG.toString() )
1078                 || name.equals( SimplifiedDocbookMarkup.ROW_TAG.toString() ) )
1079         {
1080             sink.tableRow( attribs );
1081         }
1082         else if ( name.equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() )
1083                 && isParent( SimplifiedDocbookMarkup.THEAD_TAG.toString() )
1084                 || name.equals( SimplifiedDocbookMarkup.TH_TAG.toString() ) )
1085         {
1086             sink.tableHeaderCell( attribs );
1087         }
1088         else if ( name.equals( SimplifiedDocbookMarkup.ENTRY_TAG.toString() ) )
1089         {
1090             sink.tableCell( attribs );
1091         }
1092         else
1093         {
1094             return false;
1095         }
1096 
1097         return true;
1098     }
1099 }