View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Iterator;
23  
24  import org.apache.maven.doxia.logging.Log;
25  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
26  import org.apache.maven.doxia.sink.impl.SinkEventElement;
27  import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
28  
29  import static org.junit.Assert.assertNotEquals;
30  
31  /**
32   * Test for XhtmlBaseParser.
33   *
34   * @author ltheussl
35   * @since 1.1
36   */
37  public class XhtmlBaseParserTest
38      extends AbstractParserTest
39  {
40      private XhtmlBaseParser parser;
41      private final SinkEventTestingSink sink = new SinkEventTestingSink();
42  
43  
44      @Override
45      protected Parser createParser()
46      {
47          parser = new XhtmlBaseParser();
48          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
49          return parser;
50      }
51  
52      @Override
53      protected String outputExtension()
54      {
55          return "xhtml";
56      }
57  
58      @Override
59      protected void setUp() throws Exception
60      {
61          super.setUp();
62  
63          parser = new XhtmlBaseParser();
64          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
65          sink.reset();
66      }
67  
68      /** Test Doxia version. */
69      public void testDoxiaVersion()
70      {
71          assertNotNull( XhtmlBaseParser.doxiaVersion() );
72          assertNotEquals( "unknown", XhtmlBaseParser.doxiaVersion() );
73      }
74  
75      /** @throws Exception  */
76      public void testHeadingEventsList()
77          throws Exception
78      {
79          String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
80  
81          parser.parse( text, sink );
82  
83          Iterator<SinkEventElement> it = sink.getEventList().iterator();
84  
85          assertEquals( "paragraph", it.next().getName() );
86          assertEquals( "section1", it.next().getName() );
87          assertEquals( "sectionTitle1", it.next().getName() );
88          assertEquals( "sectionTitle1_", it.next().getName() );
89          assertEquals( "section2", it.next().getName() );
90          assertEquals( "sectionTitle2", it.next().getName() );
91          assertEquals( "sectionTitle2_", it.next().getName() );
92          assertEquals( "section3", it.next().getName() );
93          assertEquals( "sectionTitle3", it.next().getName() );
94          assertEquals( "sectionTitle3_", it.next().getName() );
95          assertEquals( "section4", it.next().getName() );
96          assertEquals( "sectionTitle4", it.next().getName() );
97          assertEquals( "sectionTitle4_", it.next().getName() );
98          assertEquals( "section5", it.next().getName() );
99          assertEquals( "sectionTitle5", it.next().getName() );
100         assertEquals( "sectionTitle5_", it.next().getName() );
101         assertEquals( "section5_", it.next().getName() );
102         assertEquals( "section4_", it.next().getName() );
103         assertEquals( "section3_", it.next().getName() );
104         assertEquals( "section2_", it.next().getName() );
105         assertEquals( "section1_", it.next().getName() );
106         assertEquals( "section1", it.next().getName() );
107         assertEquals( "sectionTitle1", it.next().getName() );
108         assertEquals( "sectionTitle1_", it.next().getName() );
109         // this one is missing because we enclose everything in <p> which is not valid xhtml,
110         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
111         //assertEquals( "section1_", it.next().getName() );
112         assertEquals( "paragraph_", it.next().getName() );
113         assertFalse( it.hasNext() );
114     }
115 
116     /** @throws Exception  */
117     public void testNestedHeadingEventsList()
118         throws Exception
119     {
120         // DOXIA-241
121         String text = "<p><h2></h2><h6></h6><h3></h3></p>";
122 
123         parser.parse( text, sink );
124 
125         Iterator<SinkEventElement> it = sink.getEventList().iterator();
126 
127         assertEquals( "paragraph", it.next().getName() );
128         assertEquals( "section1", it.next().getName() );
129         assertEquals( "sectionTitle1", it.next().getName() );
130         assertEquals( "sectionTitle1_", it.next().getName() );
131 
132         assertEquals( "section2", it.next().getName() );
133         assertEquals( "section3", it.next().getName() );
134         assertEquals( "section4", it.next().getName() );
135 
136         assertEquals( "section5", it.next().getName() );
137         assertEquals( "sectionTitle5", it.next().getName() );
138         assertEquals( "sectionTitle5_", it.next().getName() );
139         assertEquals( "section5_", it.next().getName() );
140 
141         assertEquals( "section4_", it.next().getName() );
142         assertEquals( "section3_", it.next().getName() );
143         assertEquals( "section2_", it.next().getName() );
144 
145         assertEquals( "section2", it.next().getName() );
146         assertEquals( "sectionTitle2", it.next().getName() );
147         assertEquals( "sectionTitle2_", it.next().getName() );
148         // these two are missing because we enclose everything in <p> which is not valid xhtml,
149         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
150         //assertEquals( "section2_", it.next().getName() );
151         //assertEquals( "section1_", it.next().getName() );
152         assertEquals( "paragraph_", it.next().getName() );
153         assertFalse( it.hasNext() );
154     }
155 
156     /** @throws Exception  */
157     public void testFigureEventsList()
158         throws Exception
159     {
160         String text = "<img src=\"source\" title=\"caption\" />";
161 
162         parser.parse( text, sink );
163 
164         Iterator<SinkEventElement> it = sink.getEventList().iterator();
165 
166         assertEquals( "figureGraphics", it.next().getName() );
167         assertFalse( it.hasNext() );
168     }
169 
170     /** @throws Exception  */
171     public void testTableEventsList()
172         throws Exception
173     {
174         // TODO: table caption, see DOXIA-177
175 
176         String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
177 
178         parser.parse( text, sink );
179 
180         Iterator<SinkEventElement> it = sink.getEventList().iterator();
181 
182         assertEquals( "table", it.next().getName() );
183         assertEquals( "tableRows", it.next().getName() );
184         assertEquals( "tableRow", it.next().getName() );
185         assertEquals( "tableHeaderCell", it.next().getName() );
186         assertEquals( "text", it.next().getName() );
187         assertEquals( "tableHeaderCell_", it.next().getName() );
188         assertEquals( "tableRow_", it.next().getName() );
189         assertEquals( "tableRow", it.next().getName() );
190         assertEquals( "tableCell", it.next().getName() );
191         assertEquals( "text", it.next().getName() );
192         assertEquals( "tableCell_", it.next().getName() );
193         assertEquals( "tableRow_", it.next().getName() );
194         assertEquals( "tableRows_", it.next().getName() );
195         assertEquals( "table_", it.next().getName() );
196 
197         assertFalse( it.hasNext() );
198     }
199 
200     /** @throws Exception  */
201     public void testSignificantWhiteSpace()
202         throws Exception
203     {
204         // NOTE significant white space
205         String text = "<p><b>word</b> <i>word</i></p>";
206 
207         parser.parse( text, sink );
208 
209         Iterator<SinkEventElement> it = sink.getEventList().iterator();
210 
211         assertEquals( "paragraph", it.next().getName() );
212         assertEquals( "inline", it.next().getName() );
213         assertEquals( "text", it.next().getName() );
214         assertEquals( "inline_", it.next().getName() );
215 
216         SinkEventElement el = it.next();
217         assertEquals( "text", el.getName() );
218         assertEquals( " ",  (String) el.getArgs()[0] );
219 
220         assertEquals( "inline", it.next().getName() );
221         assertEquals( "text", it.next().getName() );
222         assertEquals( "inline_", it.next().getName() );
223         assertEquals( "paragraph_", it.next().getName() );
224         assertFalse( it.hasNext() );
225 
226 
227         // same test with EOL
228         String eol = System.getProperty( "line.separator" );
229         text = "<p><b>word</b>" + eol + "<i>word</i></p>";
230 
231         sink.reset();
232         parser.parse( text, sink );
233         it = sink.getEventList().iterator();
234 
235         assertEquals( "paragraph", it.next().getName() );
236         assertEquals( "inline", it.next().getName() );
237         assertEquals( "text", it.next().getName() );
238         assertEquals( "inline_", it.next().getName() );
239 
240         el = it.next();
241         assertEquals( "text", el.getName() );
242         // according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n"
243         assertEquals( "\n",  (String) el.getArgs()[0] );
244 
245         assertEquals( "inline", it.next().getName() );
246         assertEquals( "text", it.next().getName() );
247         assertEquals( "inline_", it.next().getName() );
248         assertEquals( "paragraph_", it.next().getName() );
249         assertFalse( it.hasNext() );
250 
251 
252         // DOXIA-189: there should be no EOL after closing tag
253         text = "<p>There should be no space after the last <i>word</i>.</p>";
254 
255         sink.reset();
256         parser.parse( text, sink );
257         it = sink.getEventList().iterator();
258 
259         assertEquals( "paragraph", it.next().getName() );
260         assertEquals( "text", it.next().getName() );
261         assertEquals( "inline", it.next().getName() );
262         assertEquals( "text", it.next().getName() );
263         assertEquals( "inline_", it.next().getName() );
264 
265         el = it.next();
266         assertEquals( "text", el.getName() );
267         assertEquals( ".",  (String) el.getArgs()[0] );
268 
269         assertEquals( "paragraph_", it.next().getName() );
270         assertFalse( it.hasNext() );
271     }
272 
273     /** @throws Exception  */
274     public void testPreFormattedText()
275         throws Exception
276     {
277         String text = "<pre><a href=\"what.html\">what</a></pre>";
278 
279         parser.parse( text, sink );
280 
281         Iterator<SinkEventElement> it = sink.getEventList().iterator();
282         assertEquals( "verbatim", it.next().getName() );
283         assertEquals( "link", it.next().getName() );
284         assertEquals( "text", it.next().getName() );
285         assertEquals( "link_", it.next().getName() );
286         assertEquals( "verbatim_", it.next().getName() );
287         assertFalse( it.hasNext() );
288 
289         text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
290         sink.reset();
291         parser.parse( text, sink );
292 
293         it = sink.getEventList().iterator();
294         assertEquals( "verbatim", it.next().getName() );
295         assertEquals( "text", it.next().getName() );
296         assertEquals( "verbatim_", it.next().getName() );
297         assertFalse( it.hasNext() );
298 
299         text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
300         sink.reset();
301         parser.parse( text, sink );
302 
303         it = sink.getEventList().iterator();
304         assertEquals( "verbatim", it.next().getName() );
305         assertEquals( "text", it.next().getName() );
306         assertEquals( "verbatim_", it.next().getName() );
307         assertFalse( it.hasNext() );
308     }
309 
310     /** @throws Exception  */
311     public void testPreEOL()
312         throws Exception
313     {
314         // test EOLs within <pre>: the sink MUST receive a text event for the EOL
315         String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
316                 + "<a href=\"what.html\">what</a></pre>";
317 
318         parser.parse( text, sink );
319 
320         Iterator<SinkEventElement> it = sink.getEventList().iterator();
321 
322         assertEquals( "verbatim", it.next().getName() );
323         assertEquals( "link", it.next().getName() );
324         assertEquals( "text", it.next().getName() );
325         assertEquals( "link_", it.next().getName() );
326         assertEquals( "text", it.next().getName() );
327         assertEquals( "link", it.next().getName() );
328         assertEquals( "text", it.next().getName() );
329         assertEquals( "link_", it.next().getName() );
330         assertEquals( "verbatim_", it.next().getName() );
331     }
332 
333     /** @throws Exception  */
334     public void testDoxia250()
335         throws Exception
336     {
337         StringBuilder sb = new StringBuilder();
338         sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
339         sb.append( "<!ENTITY foo \"&#x159;\">" ).append( XhtmlBaseParser.EOL );
340         sb.append( "<!ENTITY foo1 \"&nbsp;\">" ).append( XhtmlBaseParser.EOL );
341         sb.append( "<!ENTITY foo2 \"&#x161;\">" ).append( XhtmlBaseParser.EOL );
342         sb.append( "<!ENTITY tritPos \"&#x1d7ed;\">" ).append( XhtmlBaseParser.EOL );
343         sb.append( "]>" ).append( XhtmlBaseParser.EOL );
344         sb.append( "<b>&foo;&foo1;&foo2;&tritPos;</b>" );
345 
346         parser.setValidate( false );
347         parser.parse( sb.toString(), sink );
348 
349         Iterator<SinkEventElement> it = sink.getEventList().iterator();
350 
351         SinkEventElement event = it.next();
352         assertEquals( "inline", event.getName() );
353 
354         event = it.next();
355         assertEquals( "text", event.getName() );
356         assertEquals( "\u0159",  (String) event.getArgs()[0] );
357 
358         event = it.next();
359         assertEquals( "text", event.getName() );
360         assertEquals( "\u00A0",  (String) event.getArgs()[0] );
361 
362         event = it.next();
363         assertEquals( "text", event.getName() );
364         assertEquals( "\u0161",  (String) event.getArgs()[0] );
365 
366         event = it.next();
367         assertEquals( "text", event.getName() );
368         assertEquals( "\uD835\uDFED",  (String) event.getArgs()[0] );
369 
370         event = it.next();
371         assertEquals( "inline_", event.getName() );
372     }
373 
374     /** @throws Exception  */
375     public void testEntities()
376         throws Exception
377     {
378         final String text = "<!DOCTYPE test [<!ENTITY flo \"&#x159;\"><!ENTITY tritPos \"&#x1d7ed;\"><!ENTITY fo \"&#65;\"><!ENTITY myCustom \"&fo;\">]>"
379                 + "<body><h2>&amp;&flo;&#x159;&tritPos;&#x1d7ed;</h2><p>&amp;&flo;&#x159;&tritPos;&#x1d7ed;&myCustom;</p></body>";
380 
381         parser.setValidate( false );
382         parser.parse( text, sink );
383 
384         Iterator<SinkEventElement> it = sink.getEventList().iterator();
385 
386         assertEquals( "section1", it.next().getName() );
387         assertEquals( "sectionTitle1", it.next().getName() );
388 
389         SinkEventElement textEvt = it.next();
390         assertEquals( "text", textEvt.getName() );
391         assertEquals( "&", textEvt.getArgs()[0] );
392 
393         textEvt = it.next();
394         assertEquals( "text", textEvt.getName() );
395         assertEquals( "\u0159", textEvt.getArgs()[0] );
396 
397         textEvt = it.next();
398         assertEquals( "text", textEvt.getName() );
399         assertEquals( "\u0159", textEvt.getArgs()[0] );
400 
401         textEvt = it.next();
402         assertEquals( "text", textEvt.getName() );
403         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
404 
405         textEvt = it.next();
406         assertEquals( "text", textEvt.getName() );
407         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
408 
409         assertEquals( "sectionTitle1_", it.next().getName() );
410         assertEquals( "paragraph", it.next().getName() );
411 
412         textEvt = it.next();
413         assertEquals( "text", textEvt.getName() );
414         assertEquals( "&", textEvt.getArgs()[0] );
415 
416         textEvt = it.next();
417         assertEquals( "text", textEvt.getName() );
418         assertEquals( "\u0159", textEvt.getArgs()[0] );
419 
420         textEvt = it.next();
421         assertEquals( "text", textEvt.getName() );
422         assertEquals( "\u0159", textEvt.getArgs()[0] );
423 
424         textEvt = it.next();
425         assertEquals( "text", textEvt.getName() );
426         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
427 
428         textEvt = it.next();
429         assertEquals( "text", textEvt.getName() );
430         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
431 
432         textEvt = it.next();
433         assertEquals( "text", textEvt.getName() );
434         assertEquals( "A", textEvt.getArgs()[0] );
435 
436         assertEquals( "paragraph_", it.next().getName() );
437 
438         assertFalse( it.hasNext() );
439     }
440 
441     /** @throws Exception  */
442     public void testXhtmlEntities()
443         throws Exception
444     {
445         final String text = "<body><h2>&laquo;&reg;</h2><p>&ldquo;&rsquo;&Phi;&larr;</p></body>";
446 
447         parser.parse( text, sink );
448 
449         Iterator<SinkEventElement> it = sink.getEventList().iterator();
450 
451         assertEquals( "section1", it.next().getName() );
452         assertEquals( "sectionTitle1", it.next().getName() );
453 
454         // Couple symbols from Latin-1:
455         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Latin-1_characters
456 
457         SinkEventElement textEvt = it.next();
458         assertEquals( "text", textEvt.getName() );
459         assertEquals( "\u00AB", textEvt.getArgs()[0] );
460 
461         textEvt = it.next();
462         assertEquals( "text", textEvt.getName() );
463         assertEquals( "\u00AE", textEvt.getArgs()[0] );
464 
465         assertEquals( "sectionTitle1_", it.next().getName() );
466         assertEquals( "paragraph", it.next().getName() );
467 
468         // Couple symbols from Special characters:
469         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
470 
471         textEvt = it.next();
472         assertEquals( "text", textEvt.getName() );
473         assertEquals( "\u201C", textEvt.getArgs()[0] );
474 
475         textEvt = it.next();
476         assertEquals( "text", textEvt.getName() );
477         assertEquals( "\u2019", textEvt.getArgs()[0] );
478 
479         // Couple symbols from Symbols:
480         // http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Symbols
481 
482         textEvt = it.next();
483         assertEquals( "text", textEvt.getName() );
484         assertEquals( "\u03A6", textEvt.getArgs()[0] );
485 
486         textEvt = it.next();
487         assertEquals( "text", textEvt.getName() );
488         assertEquals( "\u2190", textEvt.getArgs()[0] );
489 
490         assertEquals( "paragraph_", it.next().getName() );
491 
492         assertFalse( it.hasNext() );
493     }
494 
495     /** @throws Exception  */
496     public void testLists()
497         throws Exception
498     {
499         String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
500         parser.parse( text, sink );
501         Iterator<SinkEventElement> it = sink.getEventList().iterator();
502 
503         assertEquals( "list", it.next().getName() );
504         assertEquals( "listItem", it.next().getName() );
505         assertEquals( "listItem_", it.next().getName() );
506         assertEquals( "list_", it.next().getName() );
507 
508         assertEquals( "numberedList", it.next().getName() );
509         assertEquals( "numberedListItem", it.next().getName() );
510         assertEquals( "numberedListItem_", it.next().getName() );
511         assertEquals( "numberedList_", it.next().getName() );
512 
513         assertEquals( "definitionList", it.next().getName() );
514         assertEquals( "definitionListItem", it.next().getName() );
515         assertEquals( "definedTerm", it.next().getName() );
516         assertEquals( "definedTerm_", it.next().getName() );
517         assertEquals( "definition", it.next().getName() );
518         assertEquals( "definition_", it.next().getName() );
519         assertEquals( "definitionListItem_", it.next().getName() );
520         assertEquals( "definitionList_", it.next().getName() );
521     }
522 
523     /** @throws Exception  */
524     public void testSimpleTags()
525         throws Exception
526     {
527         String text = "<div><br/><hr/><img src=\"img.src\"/></div>";
528         parser.parse( text, sink );
529         Iterator<SinkEventElement> it = sink.getEventList().iterator();
530 
531         assertEquals( "lineBreak", it.next().getName() );
532         assertEquals( "horizontalRule", it.next().getName() );
533         assertEquals( "figureGraphics", it.next().getName() );
534     }
535 
536     /** @throws Exception  */
537     public void testSemanticTags()
538         throws Exception
539     {
540         String text = "<s><i><b><code><samp><sup><sub><u>a text &amp; &#xc6;</u></sub></sup></samp></code></b></i></s>";
541         parser.parse( text, sink );
542         Iterator<SinkEventElement> it = sink.getEventList().iterator();
543 
544         SinkEventElement event = it.next();
545         assertEquals( "inline", event.getName() );
546         assertEquals( "semantics=line-through",  event.getArgs()[0].toString().trim() );
547 
548         event = it.next();
549         assertEquals( "inline", event.getName() );
550         assertEquals( "semantics=italic",  event.getArgs()[0].toString().trim() );
551 
552         event = it.next();
553         assertEquals( "inline", event.getName() );
554         assertEquals( "semantics=bold",  event.getArgs()[0].toString().trim() );
555 
556         event = it.next();
557         assertEquals( "inline", event.getName() );
558         assertEquals( "semantics=code",  event.getArgs()[0].toString().trim() );
559 
560         event = it.next();
561         assertEquals( "inline", event.getName() );
562         assertEquals( "semantics=code",  event.getArgs()[0].toString().trim() );
563 
564         event = it.next();
565         assertEquals( "inline", event.getName() );
566         assertEquals( "semantics=superscript",  event.getArgs()[0].toString().trim() );
567 
568         event = it.next();
569         assertEquals( "inline", event.getName() );
570         assertEquals( "semantics=subscript",  event.getArgs()[0].toString().trim() );
571 
572         event = it.next();
573         assertEquals( "inline", event.getName() );
574         assertEquals( "semantics=annotation",  event.getArgs()[0].toString().trim() );
575 
576         assertEquals( "text", it.next().getName() );
577         assertEquals( "text", it.next().getName() );
578         assertEquals( "text", it.next().getName() );
579         assertEquals( "text", it.next().getName() );
580 
581         assertEquals( "inline_", it.next().getName() );
582         assertEquals( "inline_", it.next().getName() );
583         assertEquals( "inline_", it.next().getName() );
584         assertEquals( "inline_", it.next().getName() );
585         assertEquals( "inline_", it.next().getName() );
586         assertEquals( "inline_", it.next().getName() );
587         assertEquals( "inline_", it.next().getName() );
588         assertEquals( "inline_", it.next().getName() );
589 
590     }
591 
592     /** @throws Exception  */
593     public void testSpecial()
594         throws Exception
595     {
596         String text = "<p><!-- a pagebreak: --><!-- PB -->&nbsp;&#160;<unknown /></p>";
597         parser.parse( text, sink );
598         Iterator<SinkEventElement> it = sink.getEventList().iterator();
599 
600         assertEquals( "paragraph", it.next().getName() );
601         assertEquals( "comment", it.next().getName() );
602         assertEquals( "pageBreak", it.next().getName() );
603         assertEquals( "nonBreakingSpace", it.next().getName() );
604         assertEquals( "nonBreakingSpace", it.next().getName() );
605         // unknown events are not reported by the base parser
606         assertEquals( "paragraph_", it.next().getName() );
607     }
608 
609     /** @throws Exception  */
610     public void testTable()
611         throws Exception
612     {
613         String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
614         parser.parse( text, sink );
615         Iterator<SinkEventElement> it = sink.getEventList().iterator();
616 
617         assertEquals( "table", it.next().getName() );
618 
619         // DOXIA-374
620         SinkEventElement el = it.next();
621         assertEquals( "tableRows", el.getName() );
622         assertFalse( (Boolean) el.getArgs()[1] );
623 
624         assertEquals( "tableCaption", it.next().getName() );
625         assertEquals( "tableCaption_", it.next().getName() );
626         assertEquals( "tableRow", it.next().getName() );
627         assertEquals( "tableHeaderCell", it.next().getName() );
628         assertEquals( "tableHeaderCell_", it.next().getName() );
629         assertEquals( "tableRow_", it.next().getName() );
630         assertEquals( "tableRow", it.next().getName() );
631         assertEquals( "tableCell", it.next().getName() );
632         assertEquals( "tableCell_", it.next().getName() );
633         assertEquals( "tableRow_", it.next().getName() );
634         assertEquals( "tableRows_", it.next().getName() );
635         assertEquals( "table_", it.next().getName() );
636     }
637 
638     /** @throws Exception  */
639     public void testFigure()
640         throws Exception
641     {
642         String text = "<div class=\"figure\"><p><img src=\"src.jpg\"/></p><p><i></i></p></div>";
643         parser.parse( text, sink );
644         Iterator<SinkEventElement> it = sink.getEventList().iterator();
645 
646         assertEquals( "figure", it.next().getName() );
647         assertEquals( "figureGraphics", it.next().getName() );
648         assertEquals( "figureCaption", it.next().getName() );
649         assertEquals( "figureCaption_", it.next().getName() );
650         assertEquals( "figure_", it.next().getName() );
651     }
652 
653     /** @throws Exception  */
654     public void testAnchorLink()
655         throws Exception
656     {
657         String text = "<div><a href=\"\"></a>" +
658                 "<a href=\"valid\"></a>" +
659                 "<a href=\"#1invalid\"></a>" +
660                 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
661                 "<a name=\"valid\"></a>" +
662                 "<a name=\"1invalid\"></a>" +
663                 "<a id=\"1invalid\"></a></div>";
664 
665         parser.parse( text, sink );
666         Iterator<SinkEventElement> it = sink.getEventList().iterator();
667 
668         SinkEventElement element = it.next();
669         assertEquals( "link", element.getName() );
670         assertEquals( "", element.getArgs()[0] );
671         assertEquals( "link_", it.next().getName() );
672 
673         element = it.next();
674         assertEquals( "link", element.getName() );
675         assertEquals( "valid", element.getArgs()[0] );
676         assertEquals( "link_", it.next().getName() );
677 
678         element = it.next();
679         assertEquals( "link", element.getName() );
680         assertEquals( "#a1invalid", element.getArgs()[0] );
681         assertEquals( "link_", it.next().getName() );
682 
683         element = it.next();
684         assertEquals( "link", element.getName() );
685         assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
686         assertEquals( "link_", it.next().getName() );
687 
688         element = it.next();
689         assertEquals( "anchor", element.getName() );
690         assertEquals( "valid", element.getArgs()[0] );
691         assertEquals( "anchor_", it.next().getName() );
692 
693         element = it.next();
694         assertEquals( "anchor", element.getName() );
695         assertEquals( "a1invalid", element.getArgs()[0] );
696         assertEquals( "anchor_", it.next().getName() );
697 
698         element = it.next();
699         assertEquals( "anchor", element.getName() );
700         assertEquals( "a1invalid", element.getArgs()[0] );
701         assertEquals( "anchor_", it.next().getName() );
702     }
703 
704     /**
705      * Test entities in attributes.
706      *
707      * @throws java.lang.Exception if any.
708      */
709     public void testAttributeEntities()
710         throws Exception
711     {
712         String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&amp;l=e\"></script>";
713 
714         parser.parse( text, sink );
715 
716         Iterator<SinkEventElement> it = sink.getEventList().iterator();
717 
718         SinkEventElement event = it.next();
719 
720         assertEquals( "unknown", event.getName() );
721         assertEquals( "script", event.getArgs()[0] );
722         SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
723         // ampersand should be un-escaped
724         assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
725         assertEquals( "unknown", it.next().getName() );
726         assertFalse( it.hasNext() );
727 
728         sink.reset();
729         text = "<img src=\"http://ex.com/ex.jpg?v=l&amp;l=e\" alt=\"image\"/>";
730         parser.parse( text, sink );
731 
732         it = sink.getEventList().iterator();
733         event = it.next();
734         assertEquals( "figureGraphics", event.getName() );
735         attribs = (SinkEventAttributeSet) event.getArgs()[1];
736         // ampersand should be un-escaped
737         assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
738     }
739     
740     public void testUnbalancedDefinitionListItem() throws Exception
741     {
742         String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
743                         "<dl><dd>value</dd></dl>" +
744                         "<dl><dt>key</dt></dl>" +
745                         "<dl></dl>" +
746                         "<dl><dd>value</dd><dt>key</dt></dl></body>";
747 
748         parser.parse( text, sink );
749 
750         Iterator<SinkEventElement> it = sink.getEventList().iterator();
751         assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
752                           "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
753         assertStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
754                           "definitionListItem_", "definitionList_" );
755         assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
756                           "definitionListItem_", "definitionList_" );
757         assertStartsWith( it, "definitionList", "definitionList_" );
758         assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
759                           "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
760                           "definitionListItem_", "definitionList_" );
761     }
762 }