001package org.apache.maven.doxia.parser;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 *   http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.util.Iterator;
023
024import org.apache.maven.doxia.logging.Log;
025import org.apache.maven.doxia.sink.SinkEventAttributeSet;
026import org.apache.maven.doxia.sink.SinkEventElement;
027import org.apache.maven.doxia.sink.SinkEventTestingSink;
028
029/**
030 * Test for XhtmlBaseParser.
031 *
032 * @author ltheussl
033 * @version $Id$
034 * @since 1.1
035 */
036public class XhtmlBaseParserTest
037    extends AbstractParserTest
038{
039    private XhtmlBaseParser parser;
040    private final SinkEventTestingSink sink = new SinkEventTestingSink();
041
042
043    @Override
044    protected Parser createParser()
045    {
046        parser = new XhtmlBaseParser();
047        parser.getLog().setLogLevel( Log.LEVEL_ERROR );
048        return parser;
049    }
050
051    @Override
052    protected String outputExtension()
053    {
054        return "xhtml";
055    }
056
057    @Override
058    protected void setUp() throws Exception
059    {
060        super.setUp();
061
062        parser = new XhtmlBaseParser();
063        parser.getLog().setLogLevel( Log.LEVEL_ERROR );
064        sink.reset();
065    }
066
067    /** Test Doxia version. */
068    public void testDoxiaVersion()
069    {
070        assertNotNull( XhtmlBaseParser.doxiaVersion() );
071        assertFalse( "unknown".equals( XhtmlBaseParser.doxiaVersion() ) );
072    }
073
074    /** @throws Exception  */
075    public void testHeadingEventsList()
076        throws Exception
077    {
078        String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
079
080        parser.parse( text, sink );
081
082        Iterator<SinkEventElement> it = sink.getEventList().iterator();
083
084        assertEquals( "paragraph", it.next().getName() );
085        assertEquals( "section1", it.next().getName() );
086        assertEquals( "sectionTitle1", it.next().getName() );
087        assertEquals( "sectionTitle1_", it.next().getName() );
088        assertEquals( "section2", it.next().getName() );
089        assertEquals( "sectionTitle2", it.next().getName() );
090        assertEquals( "sectionTitle2_", it.next().getName() );
091        assertEquals( "section3", it.next().getName() );
092        assertEquals( "sectionTitle3", it.next().getName() );
093        assertEquals( "sectionTitle3_", it.next().getName() );
094        assertEquals( "section4", it.next().getName() );
095        assertEquals( "sectionTitle4", it.next().getName() );
096        assertEquals( "sectionTitle4_", it.next().getName() );
097        assertEquals( "section5", it.next().getName() );
098        assertEquals( "sectionTitle5", it.next().getName() );
099        assertEquals( "sectionTitle5_", it.next().getName() );
100        assertEquals( "section5_", it.next().getName() );
101        assertEquals( "section4_", it.next().getName() );
102        assertEquals( "section3_", it.next().getName() );
103        assertEquals( "section2_", it.next().getName() );
104        assertEquals( "section1_", it.next().getName() );
105        assertEquals( "section1", it.next().getName() );
106        assertEquals( "sectionTitle1", it.next().getName() );
107        assertEquals( "sectionTitle1_", it.next().getName() );
108        // this one is missing because we enclose everything in <p> which is not valid xhtml,
109        // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
110        //assertEquals( "section1_", it.next().getName() );
111        assertEquals( "paragraph_", it.next().getName() );
112        assertFalse( it.hasNext() );
113    }
114
115    /** @throws Exception  */
116    public void testNestedHeadingEventsList()
117        throws Exception
118    {
119        // DOXIA-241
120        String text = "<p><h2></h2><h6></h6><h3></h3></p>";
121
122        parser.parse( text, sink );
123
124        Iterator<SinkEventElement> it = sink.getEventList().iterator();
125
126        assertEquals( "paragraph", it.next().getName() );
127        assertEquals( "section1", it.next().getName() );
128        assertEquals( "sectionTitle1", it.next().getName() );
129        assertEquals( "sectionTitle1_", it.next().getName() );
130
131        assertEquals( "section2", it.next().getName() );
132        assertEquals( "section3", it.next().getName() );
133        assertEquals( "section4", it.next().getName() );
134
135        assertEquals( "section5", it.next().getName() );
136        assertEquals( "sectionTitle5", it.next().getName() );
137        assertEquals( "sectionTitle5_", it.next().getName() );
138        assertEquals( "section5_", it.next().getName() );
139
140        assertEquals( "section4_", it.next().getName() );
141        assertEquals( "section3_", it.next().getName() );
142        assertEquals( "section2_", it.next().getName() );
143
144        assertEquals( "section2", it.next().getName() );
145        assertEquals( "sectionTitle2", it.next().getName() );
146        assertEquals( "sectionTitle2_", it.next().getName() );
147        // these two are missing because we enclose everything in <p> which is not valid xhtml,
148        // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
149        //assertEquals( "section2_", it.next().getName() );
150        //assertEquals( "section1_", it.next().getName() );
151        assertEquals( "paragraph_", it.next().getName() );
152        assertFalse( it.hasNext() );
153    }
154
155    /** @throws Exception  */
156    public void testFigureEventsList()
157        throws Exception
158    {
159        String text = "<img src=\"source\" title=\"caption\" />";
160
161        parser.parse( text, sink );
162
163        Iterator<SinkEventElement> it = sink.getEventList().iterator();
164
165        assertEquals( "figureGraphics", it.next().getName() );
166        assertFalse( it.hasNext() );
167    }
168
169    /** @throws Exception  */
170    public void testTableEventsList()
171        throws Exception
172    {
173        // TODO: table caption, see DOXIA-177
174
175        String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
176
177        parser.parse( text, sink );
178
179        Iterator<SinkEventElement> it = sink.getEventList().iterator();
180
181        assertEquals( "table", it.next().getName() );
182        assertEquals( "tableRows", it.next().getName() );
183        assertEquals( "tableRow", it.next().getName() );
184        assertEquals( "tableHeaderCell", it.next().getName() );
185        assertEquals( "text", it.next().getName() );
186        assertEquals( "tableHeaderCell_", it.next().getName() );
187        assertEquals( "tableRow_", it.next().getName() );
188        assertEquals( "tableRow", it.next().getName() );
189        assertEquals( "tableCell", it.next().getName() );
190        assertEquals( "text", it.next().getName() );
191        assertEquals( "tableCell_", it.next().getName() );
192        assertEquals( "tableRow_", it.next().getName() );
193        assertEquals( "tableRows_", it.next().getName() );
194        assertEquals( "table_", it.next().getName() );
195
196        assertFalse( it.hasNext() );
197    }
198
199    /** @throws Exception  */
200    public void testSignificantWhiteSpace()
201        throws Exception
202    {
203        // NOTE significant white space
204        String text = "<p><b>word</b> <i>word</i></p>";
205
206        parser.parse( text, sink );
207
208        Iterator<SinkEventElement> it = sink.getEventList().iterator();
209
210        assertEquals( "paragraph", it.next().getName() );
211        assertEquals( "bold", it.next().getName() );
212        assertEquals( "text", it.next().getName() );
213        assertEquals( "bold_", it.next().getName() );
214
215        SinkEventElement el = it.next();
216        assertEquals( "text", el.getName() );
217        assertEquals( " ",  (String) el.getArgs()[0] );
218
219        assertEquals( "italic", it.next().getName() );
220        assertEquals( "text", it.next().getName() );
221        assertEquals( "italic_", it.next().getName() );
222        assertEquals( "paragraph_", it.next().getName() );
223        assertFalse( it.hasNext() );
224
225
226        // same test with EOL
227        String eol = System.getProperty( "line.separator" );
228        text = "<p><b>word</b>" + eol + "<i>word</i></p>";
229
230        sink.reset();
231        parser.parse( text, sink );
232        it = sink.getEventList().iterator();
233
234        assertEquals( "paragraph", it.next().getName() );
235        assertEquals( "bold", it.next().getName() );
236        assertEquals( "text", it.next().getName() );
237        assertEquals( "bold_", it.next().getName() );
238
239        el = it.next();
240        assertEquals( "text", el.getName() );
241        // according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n"
242        assertEquals( "\n",  (String) el.getArgs()[0] );
243
244        assertEquals( "italic", it.next().getName() );
245        assertEquals( "text", it.next().getName() );
246        assertEquals( "italic_", it.next().getName() );
247        assertEquals( "paragraph_", it.next().getName() );
248        assertFalse( it.hasNext() );
249
250
251        // DOXIA-189: there should be no EOL after closing tag
252        text = "<p>There should be no space after the last <i>word</i>.</p>";
253
254        sink.reset();
255        parser.parse( text, sink );
256        it = sink.getEventList().iterator();
257
258        assertEquals( "paragraph", it.next().getName() );
259        assertEquals( "text", it.next().getName() );
260        assertEquals( "italic", it.next().getName() );
261        assertEquals( "text", it.next().getName() );
262        assertEquals( "italic_", it.next().getName() );
263
264        el = it.next();
265        assertEquals( "text", el.getName() );
266        assertEquals( ".",  (String) el.getArgs()[0] );
267
268        assertEquals( "paragraph_", it.next().getName() );
269        assertFalse( it.hasNext() );
270    }
271
272    /** @throws Exception  */
273    public void testPreFormattedText()
274        throws Exception
275    {
276        String text = "<pre><a href=\"what.html\">what</a></pre>";
277
278        parser.parse( text, sink );
279
280        Iterator<SinkEventElement> it = sink.getEventList().iterator();
281        assertEquals( "verbatim", it.next().getName() );
282        assertEquals( "link", it.next().getName() );
283        assertEquals( "text", it.next().getName() );
284        assertEquals( "link_", it.next().getName() );
285        assertEquals( "verbatim_", it.next().getName() );
286        assertFalse( it.hasNext() );
287
288        text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
289        sink.reset();
290        parser.parse( text, sink );
291
292        it = sink.getEventList().iterator();
293        assertEquals( "verbatim", it.next().getName() );
294        assertEquals( "text", it.next().getName() );
295        assertEquals( "verbatim_", it.next().getName() );
296        assertFalse( it.hasNext() );
297
298        text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
299        sink.reset();
300        parser.parse( text, sink );
301
302        it = sink.getEventList().iterator();
303        assertEquals( "verbatim", it.next().getName() );
304        assertEquals( "text", it.next().getName() );
305        assertEquals( "verbatim_", it.next().getName() );
306        assertFalse( it.hasNext() );
307    }
308
309    /** @throws Exception  */
310    public void testPreEOL()
311        throws Exception
312    {
313        // test EOLs within <pre>: the sink MUST receive a text event for the EOL
314        String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
315                + "<a href=\"what.html\">what</a></pre>";
316
317        parser.parse( text, sink );
318
319        Iterator<SinkEventElement> it = sink.getEventList().iterator();
320
321        assertEquals( "verbatim", it.next().getName() );
322        assertEquals( "link", it.next().getName() );
323        assertEquals( "text", it.next().getName() );
324        assertEquals( "link_", it.next().getName() );
325        assertEquals( "text", it.next().getName() );
326        assertEquals( "link", it.next().getName() );
327        assertEquals( "text", it.next().getName() );
328        assertEquals( "link_", it.next().getName() );
329        assertEquals( "verbatim_", it.next().getName() );
330    }
331
332    /** @throws Exception  */
333    public void testDoxia250()
334        throws Exception
335    {
336        StringBuilder sb = new StringBuilder();
337        sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
338        sb.append( "<!ENTITY foo \"&#x159;\">" ).append( XhtmlBaseParser.EOL );
339        sb.append( "<!ENTITY foo1 \"&nbsp;\">" ).append( XhtmlBaseParser.EOL );
340        sb.append( "<!ENTITY foo2 \"&#x161;\">" ).append( XhtmlBaseParser.EOL );
341        sb.append( "<!ENTITY tritPos \"&#x1d7ed;\">" ).append( XhtmlBaseParser.EOL );
342        sb.append( "]>" ).append( XhtmlBaseParser.EOL );
343        sb.append( "<b>&foo;&foo1;&foo2;&tritPos;</b>" );
344
345        parser.setValidate( false );
346        parser.parse( sb.toString(), sink );
347
348        Iterator<SinkEventElement> it = sink.getEventList().iterator();
349
350        SinkEventElement event = it.next();
351        assertEquals( "bold", event.getName() );
352
353        event = it.next();
354        assertEquals( "text", event.getName() );
355        assertEquals( "\u0159",  (String) event.getArgs()[0] );
356
357        event = it.next();
358        assertEquals( "text", event.getName() );
359        assertEquals( "\u00A0",  (String) event.getArgs()[0] );
360
361        event = it.next();
362        assertEquals( "text", event.getName() );
363        assertEquals( "\u0161",  (String) event.getArgs()[0] );
364
365        event = it.next();
366        assertEquals( "text", event.getName() );
367        assertEquals( "\uD835\uDFED",  (String) event.getArgs()[0] );
368
369        event = it.next();
370        assertEquals( "bold_", event.getName() );
371    }
372
373    /** @throws Exception  */
374    public void testEntities()
375        throws Exception
376    {
377        final String text = "<!DOCTYPE test [<!ENTITY flo \"&#x159;\"><!ENTITY tritPos \"&#x1d7ed;\"><!ENTITY fo \"&#65;\"><!ENTITY myCustom \"&fo;\">]>"
378                + "<body><h2>&amp;&flo;&#x159;&tritPos;&#x1d7ed;</h2><p>&amp;&flo;&#x159;&tritPos;&#x1d7ed;&myCustom;</p></body>";
379
380        parser.setValidate( false );
381        parser.parse( text, sink );
382
383        Iterator<SinkEventElement> it = sink.getEventList().iterator();
384
385        assertEquals( "section1", it.next().getName() );
386        assertEquals( "sectionTitle1", it.next().getName() );
387
388        SinkEventElement textEvt = it.next();
389        assertEquals( "text", textEvt.getName() );
390        assertEquals( "&", textEvt.getArgs()[0] );
391
392        textEvt = it.next();
393        assertEquals( "text", textEvt.getName() );
394        assertEquals( "\u0159", textEvt.getArgs()[0] );
395
396        textEvt = it.next();
397        assertEquals( "text", textEvt.getName() );
398        assertEquals( "\u0159", textEvt.getArgs()[0] );
399
400        textEvt = it.next();
401        assertEquals( "text", textEvt.getName() );
402        assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
403
404        textEvt = it.next();
405        assertEquals( "text", textEvt.getName() );
406        assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
407
408        assertEquals( "sectionTitle1_", it.next().getName() );
409        assertEquals( "paragraph", it.next().getName() );
410
411        textEvt = it.next();
412        assertEquals( "text", textEvt.getName() );
413        assertEquals( "&", textEvt.getArgs()[0] );
414
415        textEvt = it.next();
416        assertEquals( "text", textEvt.getName() );
417        assertEquals( "\u0159", textEvt.getArgs()[0] );
418
419        textEvt = it.next();
420        assertEquals( "text", textEvt.getName() );
421        assertEquals( "\u0159", textEvt.getArgs()[0] );
422
423        textEvt = it.next();
424        assertEquals( "text", textEvt.getName() );
425        assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
426
427        textEvt = it.next();
428        assertEquals( "text", textEvt.getName() );
429        assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
430
431        textEvt = it.next();
432        assertEquals( "text", textEvt.getName() );
433        assertEquals( "A", textEvt.getArgs()[0] );
434
435        assertEquals( "paragraph_", it.next().getName() );
436
437        assertFalse( it.hasNext() );
438    }
439
440    /** @throws Exception  */
441    public void testXhtmlEntities()
442        throws Exception
443    {
444        final String text = "<body><h2>&laquo;&reg;</h2><p>&ldquo;&rsquo;&Phi;&larr;</p></body>";
445
446        parser.parse( text, sink );
447
448        Iterator<SinkEventElement> it = sink.getEventList().iterator();
449
450        assertEquals( "section1", it.next().getName() );
451        assertEquals( "sectionTitle1", it.next().getName() );
452
453        // Couple symbols from Latin-1:
454        // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Latin-1_characters
455
456        SinkEventElement textEvt = it.next();
457        assertEquals( "text", textEvt.getName() );
458        assertEquals( "\u00AB", textEvt.getArgs()[0] );
459
460        textEvt = it.next();
461        assertEquals( "text", textEvt.getName() );
462        assertEquals( "\u00AE", textEvt.getArgs()[0] );
463
464        assertEquals( "sectionTitle1_", it.next().getName() );
465        assertEquals( "paragraph", it.next().getName() );
466
467        // Couple symbols from Special characters:
468        // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
469
470        textEvt = it.next();
471        assertEquals( "text", textEvt.getName() );
472        assertEquals( "\u201C", textEvt.getArgs()[0] );
473
474        textEvt = it.next();
475        assertEquals( "text", textEvt.getName() );
476        assertEquals( "\u2019", textEvt.getArgs()[0] );
477
478        // Couple symbols from Symbols:
479        // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Symbols
480
481        textEvt = it.next();
482        assertEquals( "text", textEvt.getName() );
483        assertEquals( "\u03A6", textEvt.getArgs()[0] );
484
485        textEvt = it.next();
486        assertEquals( "text", textEvt.getName() );
487        assertEquals( "\u2190", textEvt.getArgs()[0] );
488
489        assertEquals( "paragraph_", it.next().getName() );
490
491        assertFalse( it.hasNext() );
492    }
493
494    /** @throws Exception  */
495    public void testDecoration()
496        throws Exception
497    {
498        String text = "<div><u>u</u><s>s</s><del>del</del><strike>strike</strike><sub>sub</sub><sup>sup</sup></div>";
499        parser.parse( text, sink );
500        Iterator<SinkEventElement> it = sink.getEventList().iterator();
501
502        SinkEventElement event = it.next();
503        assertEquals( "text", event.getName() );
504        assertEquals( "u",  (String) event.getArgs()[0] );
505
506        event = it.next();
507        assertEquals( "text", event.getName() );
508        assertEquals( "s",  (String) event.getArgs()[0] );
509
510        event = it.next();
511        assertEquals( "text", event.getName() );
512        assertEquals( "del",  (String) event.getArgs()[0] );
513
514        event = it.next();
515        assertEquals( "text", event.getName() );
516        assertEquals( "strike",  (String) event.getArgs()[0] );
517
518        event = it.next();
519        assertEquals( "text", event.getName() );
520        assertEquals( "sub",  (String) event.getArgs()[0] );
521
522        event = it.next();
523        assertEquals( "text", event.getName() );
524        assertEquals( "sup",  (String) event.getArgs()[0] );
525//        assertTrue( ( (SinkEventAttributeSet) event.getArgs()[1] )
526//                .containsAttribute( SinkEventAttributeSet.VALIGN, "sup" ) ); // TODO
527    }
528
529    /** @throws Exception  */
530    public void testLists()
531        throws Exception
532    {
533        String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
534        parser.parse( text, sink );
535        Iterator<SinkEventElement> it = sink.getEventList().iterator();
536
537        assertEquals( "list", it.next().getName() );
538        assertEquals( "listItem", it.next().getName() );
539        assertEquals( "listItem_", it.next().getName() );
540        assertEquals( "list_", it.next().getName() );
541
542        assertEquals( "numberedList", it.next().getName() );
543        assertEquals( "numberedListItem", it.next().getName() );
544        assertEquals( "numberedListItem_", it.next().getName() );
545        assertEquals( "numberedList_", it.next().getName() );
546
547        assertEquals( "definitionList", it.next().getName() );
548        assertEquals( "definitionListItem", it.next().getName() );
549        assertEquals( "definedTerm", it.next().getName() );
550        assertEquals( "definedTerm_", it.next().getName() );
551        assertEquals( "definition", it.next().getName() );
552        assertEquals( "definition_", it.next().getName() );
553        assertEquals( "definitionListItem_", it.next().getName() );
554        assertEquals( "definitionList_", it.next().getName() );
555    }
556
557    /** @throws Exception  */
558    public void testStyles()
559        throws Exception
560    {
561        String text = "<div><b></b><strong></strong><i></i><em></em><code></code><samp></samp><tt></tt></div>";
562        parser.parse( text, sink );
563        Iterator<SinkEventElement> it = sink.getEventList().iterator();
564
565        assertEquals( "bold", it.next().getName() );
566        assertEquals( "bold_", it.next().getName() );
567        assertEquals( "bold", it.next().getName() );
568        assertEquals( "bold_", it.next().getName() );
569
570        assertEquals( "italic", it.next().getName() );
571        assertEquals( "italic_", it.next().getName() );
572        assertEquals( "italic", it.next().getName() );
573        assertEquals( "italic_", it.next().getName() );
574
575        assertEquals( "monospaced", it.next().getName() );
576        assertEquals( "monospaced_", it.next().getName() );
577        assertEquals( "monospaced", it.next().getName() );
578        assertEquals( "monospaced_", it.next().getName() );
579        assertEquals( "monospaced", it.next().getName() );
580        assertEquals( "monospaced_", it.next().getName() );
581    }
582
583    /** @throws Exception  */
584    public void testSimpleTags()
585        throws Exception
586    {
587        String text = "<div><br/><hr/><img src=\"img.src\"/></div>";
588        parser.parse( text, sink );
589        Iterator<SinkEventElement> it = sink.getEventList().iterator();
590
591        assertEquals( "lineBreak", it.next().getName() );
592        assertEquals( "horizontalRule", it.next().getName() );
593        assertEquals( "figureGraphics", it.next().getName() );
594    }
595
596    /** @throws Exception  */
597    public void testSpecial()
598        throws Exception
599    {
600        String text = "<p><!-- a pagebreak: --><!-- PB -->&nbsp;&#160;<unknown /></p>";
601        parser.parse( text, sink );
602        Iterator<SinkEventElement> it = sink.getEventList().iterator();
603
604        assertEquals( "paragraph", it.next().getName() );
605        assertEquals( "comment", it.next().getName() );
606        assertEquals( "pageBreak", it.next().getName() );
607        assertEquals( "nonBreakingSpace", it.next().getName() );
608        assertEquals( "nonBreakingSpace", it.next().getName() );
609        // unknown events are not reported by the base parser
610        assertEquals( "paragraph_", it.next().getName() );
611    }
612
613    /** @throws Exception  */
614    public void testTable()
615        throws Exception
616    {
617        String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
618        parser.parse( text, sink );
619        Iterator<SinkEventElement> it = sink.getEventList().iterator();
620
621        assertEquals( "table", it.next().getName() );
622
623        // DOXIA-374
624        SinkEventElement el = it.next();
625        assertEquals( "tableRows", el.getName() );
626        assertFalse( ( (Boolean) el.getArgs()[1] ).booleanValue() );
627
628        assertEquals( "tableCaption", it.next().getName() );
629        assertEquals( "tableCaption_", it.next().getName() );
630        assertEquals( "tableRow", it.next().getName() );
631        assertEquals( "tableHeaderCell", it.next().getName() );
632        assertEquals( "tableHeaderCell_", it.next().getName() );
633        assertEquals( "tableRow_", it.next().getName() );
634        assertEquals( "tableRow", it.next().getName() );
635        assertEquals( "tableCell", it.next().getName() );
636        assertEquals( "tableCell_", it.next().getName() );
637        assertEquals( "tableRow_", it.next().getName() );
638        assertEquals( "tableRows_", it.next().getName() );
639        assertEquals( "table_", it.next().getName() );
640    }
641
642    /** @throws Exception  */
643    public void testFigure()
644        throws Exception
645    {
646        String text = "<div class=\"figure\"><p><img src=\"src.jpg\"/></p><p><i></i></p></div>";
647        parser.parse( text, sink );
648        Iterator<SinkEventElement> it = sink.getEventList().iterator();
649
650        assertEquals( "figure", it.next().getName() );
651        assertEquals( "figureGraphics", it.next().getName() );
652        assertEquals( "figureCaption", it.next().getName() );
653        assertEquals( "figureCaption_", it.next().getName() );
654        assertEquals( "figure_", it.next().getName() );
655    }
656
657    /** @throws Exception  */
658    public void testAnchorLink()
659        throws Exception
660    {
661        String text = "<div><a href=\"\"></a>" +
662                "<a href=\"valid\"></a>" +
663                "<a href=\"#1invalid\"></a>" +
664                "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
665                "<a name=\"valid\"></a>" +
666                "<a name=\"1invalid\"></a>" +
667                "<a id=\"1invalid\"></a></div>";
668
669        parser.parse( text, sink );
670        Iterator<SinkEventElement> it = sink.getEventList().iterator();
671
672        SinkEventElement element = it.next();
673        assertEquals( "link", element.getName() );
674        assertEquals( "", element.getArgs()[0] );
675        assertEquals( "link_", it.next().getName() );
676
677        element = it.next();
678        assertEquals( "link", element.getName() );
679        assertEquals( "valid", element.getArgs()[0] );
680        assertEquals( "link_", it.next().getName() );
681
682        element = it.next();
683        assertEquals( "link", element.getName() );
684        assertEquals( "#a1invalid", element.getArgs()[0] );
685        assertEquals( "link_", it.next().getName() );
686
687        element = it.next();
688        assertEquals( "link", element.getName() );
689        assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
690        assertEquals( "link_", it.next().getName() );
691
692        element = it.next();
693        assertEquals( "anchor", element.getName() );
694        assertEquals( "valid", element.getArgs()[0] );
695        assertEquals( "anchor_", it.next().getName() );
696
697        element = it.next();
698        assertEquals( "anchor", element.getName() );
699        assertEquals( "a1invalid", element.getArgs()[0] );
700        assertEquals( "anchor_", it.next().getName() );
701
702        element = it.next();
703        assertEquals( "anchor", element.getName() );
704        assertEquals( "a1invalid", element.getArgs()[0] );
705        assertEquals( "anchor_", it.next().getName() );
706    }
707
708    /**
709     * Test entities in attributes.
710     *
711     * @throws java.lang.Exception if any.
712     */
713    public void testAttributeEntities()
714        throws Exception
715    {
716        String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&amp;l=e\"></script>";
717
718        parser.parse( text, sink );
719
720        Iterator<SinkEventElement> it = sink.getEventList().iterator();
721
722        SinkEventElement event = it.next();
723
724        assertEquals( "unknown", event.getName() );
725        assertEquals( "script", event.getArgs()[0] );
726        SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
727        // ampersand should be un-escaped
728        assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
729        assertEquals( "unknown", it.next().getName() );
730        assertFalse( it.hasNext() );
731
732        sink.reset();
733        text = "<img src=\"http://ex.com/ex.jpg?v=l&amp;l=e\" alt=\"image\"/>";
734        parser.parse( text, sink );
735
736        it = sink.getEventList().iterator();
737        event = it.next();
738        assertEquals( "figureGraphics", event.getName() );
739        attribs = (SinkEventAttributeSet) event.getArgs()[1];
740        // ampersand should be un-escaped
741        assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
742    }
743    
744    public void testUnbalancedDefinitionListItem() throws Exception
745    {
746        String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
747                        "<dl><dd>value</dd></dl>" +
748                        "<dl><dt>key</dt></dl>" +
749                        "<dl></dl>" +
750                        "<dl><dd>value</dd><dt>key</dt></dl></body>";
751
752        parser.parse( text, sink );
753
754        Iterator<SinkEventElement> it = sink.getEventList().iterator();
755        assertEquals( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_", "definition",
756                      "text", "definition_", "definitionListItem_", "definitionList_" );
757        assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
758                      "definitionListItem_", "definitionList_" );
759        assertEquals( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
760                      "definitionListItem_", "definitionList_" );
761        assertEquals( it, "definitionList", "definitionList_" );
762        assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
763                      "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
764                      "definitionListItem_", "definitionList_" );
765        assertFalse( it.hasNext() );
766    }
767}