View Javadoc
1   package org.apache.maven.doxia.parser;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.Iterator;
23  
24  import org.apache.maven.doxia.logging.Log;
25  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
26  import org.apache.maven.doxia.sink.impl.SinkEventElement;
27  import org.apache.maven.doxia.sink.impl.SinkEventTestingSink;
28  
29  import static org.junit.Assert.assertNotEquals;
30  
31  /**
32   * Test for XhtmlBaseParser.
33   */
34  public class Xhtml5BaseParserTest
35      extends AbstractParserTest
36  {
37      private Xhtml5BaseParser parser;
38      private final SinkEventTestingSink sink = new SinkEventTestingSink();
39  
40  
41      @Override
42      protected Parser createParser()
43      {
44          parser = new Xhtml5BaseParser();
45          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
46          return parser;
47      }
48  
49      @Override
50      protected String outputExtension()
51      {
52          return "xhtml";
53      }
54  
55      @Override
56      protected void setUp() throws Exception
57      {
58          super.setUp();
59  
60          parser = new Xhtml5BaseParser();
61          parser.getLog().setLogLevel( Log.LEVEL_ERROR );
62          sink.reset();
63      }
64  
65      /** Test Doxia version. */
66      public void testDoxiaVersion()
67      {
68          assertNotNull( XhtmlBaseParser.doxiaVersion() );
69          assertNotEquals( "unknown", XhtmlBaseParser.doxiaVersion() );
70      }
71  
72      public void testHeadingEventsList()
73          throws Exception
74      {
75          String text = "<p><h2></h2><h3></h3><h4></h4><h5></h5><h6></h6><h2></h2></p>";
76  
77          parser.parse( text, sink );
78  
79          Iterator<SinkEventElement> it = sink.getEventList().iterator();
80  
81          assertEquals( "paragraph", it.next().getName() );
82          assertEquals( "section1", it.next().getName() );
83          assertEquals( "sectionTitle1", it.next().getName() );
84          assertEquals( "sectionTitle1_", it.next().getName() );
85          assertEquals( "section2", it.next().getName() );
86          assertEquals( "sectionTitle2", it.next().getName() );
87          assertEquals( "sectionTitle2_", it.next().getName() );
88          assertEquals( "section3", it.next().getName() );
89          assertEquals( "sectionTitle3", it.next().getName() );
90          assertEquals( "sectionTitle3_", it.next().getName() );
91          assertEquals( "section4", it.next().getName() );
92          assertEquals( "sectionTitle4", it.next().getName() );
93          assertEquals( "sectionTitle4_", it.next().getName() );
94          assertEquals( "section5", it.next().getName() );
95          assertEquals( "sectionTitle5", it.next().getName() );
96          assertEquals( "sectionTitle5_", it.next().getName() );
97          assertEquals( "section5_", it.next().getName() );
98          assertEquals( "section4_", it.next().getName() );
99          assertEquals( "section3_", it.next().getName() );
100         assertEquals( "section2_", it.next().getName() );
101         assertEquals( "section1_", it.next().getName() );
102         assertEquals( "section1", it.next().getName() );
103         assertEquals( "sectionTitle1", it.next().getName() );
104         assertEquals( "sectionTitle1_", it.next().getName() );
105         // this one is missing because we enclose everything in <p> which is not valid xhtml,
106         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
107         //assertEquals( "section1_", it.next().getName() );
108         assertEquals( "paragraph_", it.next().getName() );
109         assertFalse( it.hasNext() );
110     }
111 
112     public void testNestedHeadingEventsList()
113         throws Exception
114     {
115         // DOXIA-241
116         String text = "<p><h2></h2><h6></h6><h3></h3></p>";
117 
118         parser.parse( text, sink );
119 
120         Iterator<SinkEventElement> it = sink.getEventList().iterator();
121 
122         assertEquals( "paragraph", it.next().getName() );
123         assertEquals( "section1", it.next().getName() );
124         assertEquals( "sectionTitle1", it.next().getName() );
125         assertEquals( "sectionTitle1_", it.next().getName() );
126 
127         assertEquals( "section2", it.next().getName() );
128         assertEquals( "section3", it.next().getName() );
129         assertEquals( "section4", it.next().getName() );
130 
131         assertEquals( "section5", it.next().getName() );
132         assertEquals( "sectionTitle5", it.next().getName() );
133         assertEquals( "sectionTitle5_", it.next().getName() );
134         assertEquals( "section5_", it.next().getName() );
135 
136         assertEquals( "section4_", it.next().getName() );
137         assertEquals( "section3_", it.next().getName() );
138         assertEquals( "section2_", it.next().getName() );
139 
140         assertEquals( "section2", it.next().getName() );
141         assertEquals( "sectionTitle2", it.next().getName() );
142         assertEquals( "sectionTitle2_", it.next().getName() );
143         // these two are missing because we enclose everything in <p> which is not valid xhtml,
144         // needs to be tested in overriding parser, eg XhtmlParser, XdocParser.
145         //assertEquals( "section2_", it.next().getName() );
146         //assertEquals( "section1_", it.next().getName() );
147         assertEquals( "paragraph_", it.next().getName() );
148         assertFalse( it.hasNext() );
149     }
150 
151     public void testFigureEventsList()
152         throws Exception
153     {
154         String text = "<img src=\"source\" title=\"caption\" />";
155 
156         parser.parse( text, sink );
157 
158         Iterator<SinkEventElement> it = sink.getEventList().iterator();
159 
160         assertEquals( "figureGraphics", it.next().getName() );
161         assertFalse( it.hasNext() );
162     }
163 
164     public void testTableEventsList()
165         throws Exception
166     {
167         // TODO: table caption, see DOXIA-177
168 
169         String text = "<table align=\"center\"><tr><th>Header</th></tr><tr><td>cell</td></tr></table>";
170 
171         parser.parse( text, sink );
172 
173         Iterator<SinkEventElement> it = sink.getEventList().iterator();
174 
175         assertEquals( "table", it.next().getName() );
176         assertEquals( "tableRows", it.next().getName() );
177         assertEquals( "tableRow", it.next().getName() );
178         assertEquals( "tableHeaderCell", it.next().getName() );
179         assertEquals( "text", it.next().getName() );
180         assertEquals( "tableHeaderCell_", it.next().getName() );
181         assertEquals( "tableRow_", it.next().getName() );
182         assertEquals( "tableRow", it.next().getName() );
183         assertEquals( "tableCell", it.next().getName() );
184         assertEquals( "text", it.next().getName() );
185         assertEquals( "tableCell_", it.next().getName() );
186         assertEquals( "tableRow_", it.next().getName() );
187         assertEquals( "tableRows_", it.next().getName() );
188         assertEquals( "table_", it.next().getName() );
189 
190         assertFalse( it.hasNext() );
191     }
192 
193     public void testSignificantWhiteSpace()
194         throws Exception
195     {
196         // NOTE significant white space
197         String text = "<p><b>word</b> <i>word</i></p>";
198 
199         parser.parse( text, sink );
200 
201         Iterator<SinkEventElement> it = sink.getEventList().iterator();
202 
203         assertEquals( "paragraph", it.next().getName() );
204         assertEquals( "inline", it.next().getName() );
205         assertEquals( "text", it.next().getName() );
206         assertEquals( "inline_", it.next().getName() );
207 
208         SinkEventElement el = it.next();
209         assertEquals( "text", el.getName() );
210         assertEquals( " ",  (String) el.getArgs()[0] );
211 
212         assertEquals( "inline", it.next().getName() );
213         assertEquals( "text", it.next().getName() );
214         assertEquals( "inline_", it.next().getName() );
215         assertEquals( "paragraph_", it.next().getName() );
216         assertFalse( it.hasNext() );
217 
218 
219         // same test with EOL
220         String eol = System.getProperty( "line.separator" );
221         text = "<p><b>word</b>" + eol + "<i>word</i></p>";
222 
223         sink.reset();
224         parser.parse( text, sink );
225         it = sink.getEventList().iterator();
226 
227         assertEquals( "paragraph", it.next().getName() );
228         assertEquals( "inline", it.next().getName() );
229         assertEquals( "text", it.next().getName() );
230         assertEquals( "inline_", it.next().getName() );
231 
232         el = it.next();
233         assertEquals( "text", el.getName() );
234         // according to section 2.11 of the XML spec, parsers must normalize line breaks to "\n"
235         assertEquals( "\n",  (String) el.getArgs()[0] );
236 
237         assertEquals( "inline", it.next().getName() );
238         assertEquals( "text", it.next().getName() );
239         assertEquals( "inline_", it.next().getName() );
240         assertEquals( "paragraph_", it.next().getName() );
241         assertFalse( it.hasNext() );
242 
243 
244         // DOXIA-189: there should be no EOL after closing tag
245         text = "<p>There should be no space after the last <i>word</i>.</p>";
246 
247         sink.reset();
248         parser.parse( text, sink );
249         it = sink.getEventList().iterator();
250 
251         assertEquals( "paragraph", it.next().getName() );
252         assertEquals( "text", it.next().getName() );
253         assertEquals( "inline", it.next().getName() );
254         assertEquals( "text", it.next().getName() );
255         assertEquals( "inline_", it.next().getName() );
256 
257         el = it.next();
258         assertEquals( "text", el.getName() );
259         assertEquals( ".",  (String) el.getArgs()[0] );
260 
261         assertEquals( "paragraph_", it.next().getName() );
262         assertFalse( it.hasNext() );
263     }
264 
265     public void testPreFormattedText()
266         throws Exception
267     {
268         String text = "<pre><a href=\"what.html\">what</a></pre>";
269 
270         parser.parse( text, sink );
271 
272         Iterator<SinkEventElement> it = sink.getEventList().iterator();
273         assertEquals( "verbatim", it.next().getName() );
274         assertEquals( "link", it.next().getName() );
275         assertEquals( "text", it.next().getName() );
276         assertEquals( "link_", it.next().getName() );
277         assertEquals( "verbatim_", it.next().getName() );
278         assertFalse( it.hasNext() );
279 
280         text = "<pre><![CDATA[<a href=\"what.html\">what</a>]]></pre>";
281         sink.reset();
282         parser.parse( text, sink );
283 
284         it = sink.getEventList().iterator();
285         assertEquals( "verbatim", it.next().getName() );
286         assertEquals( "text", it.next().getName() );
287         assertEquals( "verbatim_", it.next().getName() );
288         assertFalse( it.hasNext() );
289 
290         text = "<pre><![CDATA[<pre>what</pre>]]></pre>";
291         sink.reset();
292         parser.parse( text, sink );
293 
294         it = sink.getEventList().iterator();
295         assertEquals( "verbatim", it.next().getName() );
296         assertEquals( "text", it.next().getName() );
297         assertEquals( "verbatim_", it.next().getName() );
298         assertFalse( it.hasNext() );
299     }
300 
301     public void testPreEOL()
302         throws Exception
303     {
304         // test EOLs within <pre>: the sink MUST receive a text event for the EOL
305         String text = "<pre><a href=\"what.html\">what</a>" + XhtmlBaseParser.EOL
306                 + "<a href=\"what.html\">what</a></pre>";
307 
308         parser.parse( text, sink );
309 
310         Iterator<SinkEventElement> it = sink.getEventList().iterator();
311 
312         assertEquals( "verbatim", it.next().getName() );
313         assertEquals( "link", it.next().getName() );
314         assertEquals( "text", it.next().getName() );
315         assertEquals( "link_", it.next().getName() );
316         assertEquals( "text", it.next().getName() );
317         assertEquals( "link", it.next().getName() );
318         assertEquals( "text", it.next().getName() );
319         assertEquals( "link_", it.next().getName() );
320         assertEquals( "verbatim_", it.next().getName() );
321     }
322 
323     public void testDoxia250()
324         throws Exception
325     {
326         StringBuilder sb = new StringBuilder();
327         sb.append( "<!DOCTYPE test [" ).append( XhtmlBaseParser.EOL );
328         sb.append( "<!ENTITY foo \"&#x159;\">" ).append( XhtmlBaseParser.EOL );
329         sb.append( "<!ENTITY foo1 \"&nbsp;\">" ).append( XhtmlBaseParser.EOL );
330         sb.append( "<!ENTITY foo2 \"&#x161;\">" ).append( XhtmlBaseParser.EOL );
331         sb.append( "<!ENTITY tritPos \"&#x1d7ed;\">" ).append( XhtmlBaseParser.EOL );
332         sb.append( "]>" ).append( XhtmlBaseParser.EOL );
333         sb.append( "<p>&foo;&foo1;&foo2;&tritPos;</p>" );
334 
335         parser.setValidate( false );
336         parser.parse( sb.toString(), sink );
337 
338         Iterator<SinkEventElement> it = sink.getEventList().iterator();
339 
340         SinkEventElement event = it.next();
341         assertEquals( "paragraph", event.getName() );
342 
343         event = it.next();
344         assertEquals( "text", event.getName() );
345         assertEquals( "\u0159",  (String) event.getArgs()[0] );
346 
347         event = it.next();
348         assertEquals( "text", event.getName() );
349         assertEquals( "\u00A0",  (String) event.getArgs()[0] );
350 
351         event = it.next();
352         assertEquals( "text", event.getName() );
353         assertEquals( "\u0161",  (String) event.getArgs()[0] );
354 
355         event = it.next();
356         assertEquals( "text", event.getName() );
357         assertEquals( "\uD835\uDFED",  (String) event.getArgs()[0] );
358 
359         event = it.next();
360         assertEquals( "paragraph_", event.getName() );
361     }
362 
363     public void testEntities()
364         throws Exception
365     {
366         final String text = "<!DOCTYPE test [<!ENTITY flo \"&#x159;\"><!ENTITY tritPos \"&#x1d7ed;\"><!ENTITY fo \"&#65;\"><!ENTITY myCustom \"&fo;\">]>"
367                 + "<body><h2>&amp;&flo;&#x159;&tritPos;&#x1d7ed;</h2><p>&amp;&flo;&#x159;&tritPos;&#x1d7ed;&myCustom;</p></body>";
368 
369         parser.setValidate( false );
370         parser.parse( text, sink );
371 
372         Iterator<SinkEventElement> it = sink.getEventList().iterator();
373 
374         assertEquals( "section1", it.next().getName() );
375         assertEquals( "sectionTitle1", it.next().getName() );
376 
377         SinkEventElement textEvt = it.next();
378         assertEquals( "text", textEvt.getName() );
379         assertEquals( "&", textEvt.getArgs()[0] );
380 
381         textEvt = it.next();
382         assertEquals( "text", textEvt.getName() );
383         assertEquals( "\u0159", textEvt.getArgs()[0] );
384 
385         textEvt = it.next();
386         assertEquals( "text", textEvt.getName() );
387         assertEquals( "\u0159", textEvt.getArgs()[0] );
388 
389         textEvt = it.next();
390         assertEquals( "text", textEvt.getName() );
391         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
392 
393         textEvt = it.next();
394         assertEquals( "text", textEvt.getName() );
395         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
396 
397         assertEquals( "sectionTitle1_", it.next().getName() );
398         assertEquals( "paragraph", it.next().getName() );
399 
400         textEvt = it.next();
401         assertEquals( "text", textEvt.getName() );
402         assertEquals( "&", textEvt.getArgs()[0] );
403 
404         textEvt = it.next();
405         assertEquals( "text", textEvt.getName() );
406         assertEquals( "\u0159", textEvt.getArgs()[0] );
407 
408         textEvt = it.next();
409         assertEquals( "text", textEvt.getName() );
410         assertEquals( "\u0159", textEvt.getArgs()[0] );
411 
412         textEvt = it.next();
413         assertEquals( "text", textEvt.getName() );
414         assertEquals( "\uD835\uDFED",  (String) textEvt.getArgs()[0] );
415 
416         textEvt = it.next();
417         assertEquals( "text", textEvt.getName() );
418         assertEquals( "\uD835\uDFED", textEvt.getArgs()[0] );
419 
420         textEvt = it.next();
421         assertEquals( "text", textEvt.getName() );
422         assertEquals( "A", textEvt.getArgs()[0] );
423 
424         assertEquals( "paragraph_", it.next().getName() );
425 // FIXME
426 //        assertEquals( "section1_", it.next().getName() );
427 
428         assertFalse( it.hasNext() );
429     }
430 
431     public void testXhtmlEntities()
432         throws Exception
433     {
434         final String text = "<body><h2>&quot;&amp;</h2><p>&apos;&lt;&gt;</p></body>";
435 
436         parser.parse( text, sink );
437 
438         Iterator<SinkEventElement> it = sink.getEventList().iterator();
439 
440         assertEquals( "section1", it.next().getName() );
441         assertEquals( "sectionTitle1", it.next().getName() );
442 
443         SinkEventElement textEvt = it.next();
444         assertEquals( "text", textEvt.getName() );
445         assertEquals( "\"", textEvt.getArgs()[0] );
446 
447         textEvt = it.next();
448         assertEquals( "text", textEvt.getName() );
449         assertEquals( "&", textEvt.getArgs()[0] );
450 
451         assertEquals( "sectionTitle1_", it.next().getName() );
452         assertEquals( "paragraph", it.next().getName() );
453 
454         textEvt = it.next();
455         assertEquals( "text", textEvt.getName() );
456         assertEquals( "\'", textEvt.getArgs()[0] );
457 
458         textEvt = it.next();
459         assertEquals( "text", textEvt.getName() );
460         assertEquals( "<", textEvt.getArgs()[0] );
461 
462         textEvt = it.next();
463         assertEquals( "text", textEvt.getName() );
464         assertEquals( ">", textEvt.getArgs()[0] );
465 
466         assertEquals( "paragraph_", it.next().getName() );
467 
468         assertFalse( it.hasNext() );
469     }
470 
471     public void testLists()
472         throws Exception
473     {
474         String text = "<div><ul><li></li></ul><ol><li></li></ol><dl><dt></dt><dd></dd></dl></div>";
475         parser.parse( text, sink );
476         Iterator<SinkEventElement> it = sink.getEventList().iterator();
477 
478         assertEquals( "division", it.next().getName() );
479         assertEquals( "list", it.next().getName() );
480         assertEquals( "listItem", it.next().getName() );
481         assertEquals( "listItem_", it.next().getName() );
482         assertEquals( "list_", it.next().getName() );
483 
484         assertEquals( "numberedList", it.next().getName() );
485         assertEquals( "numberedListItem", it.next().getName() );
486         assertEquals( "numberedListItem_", it.next().getName() );
487         assertEquals( "numberedList_", it.next().getName() );
488 
489         assertEquals( "definitionList", it.next().getName() );
490         assertEquals( "definitionListItem", it.next().getName() );
491         assertEquals( "definedTerm", it.next().getName() );
492         assertEquals( "definedTerm_", it.next().getName() );
493         assertEquals( "definition", it.next().getName() );
494         assertEquals( "definition_", it.next().getName() );
495         assertEquals( "definitionListItem_", it.next().getName() );
496         assertEquals( "definitionList_", it.next().getName() );
497         assertEquals( "division_", it.next().getName() );
498     }
499 
500     public void testSimpleTags()
501         throws Exception
502     {
503         String text = "<div><br /><wbr /><hr /><img src=\"img.src\"/></div>";
504         parser.parse( text, sink );
505         Iterator<SinkEventElement> it = sink.getEventList().iterator();
506 
507         assertEquals( "division", it.next().getName() );
508         assertEquals( "lineBreak", it.next().getName() );
509         assertEquals( "lineBreakOpportunity", it.next().getName() );
510         assertEquals( "horizontalRule", it.next().getName() );
511         assertEquals( "figureGraphics", it.next().getName() );
512         assertEquals( "division_", it.next().getName() );
513     }
514 
515     public void testSemanticTags()
516         throws Exception
517     {
518         String text = "<em><strong><small><s><cite><q><dfn><abbr><i><b><code><var><samp><kbd><sup><sub><u><mark><ruby><rb><rt><rtc><rp><bdi><bdo><span><ins><del>a text &amp; &#xc6;</del></ins></span></bdo></bdi></rp></rtc></rt></rb></ruby></mark></u></sub></sup></kbd></samp></var></code></b></i></abbr></dfn></q></cite></s></small></strong></em>";
519         parser.parse( text, sink );
520         Iterator<SinkEventElement> it = sink.getEventList().iterator();
521 
522         SinkEventElement event = it.next();
523         assertEquals( "inline", event.getName() );
524         assertEquals( "semantics=emphasis",  event.getArgs()[0].toString().trim() );
525 
526         event = it.next();
527         assertEquals( "inline", event.getName() );
528         assertEquals( "semantics=strong",  event.getArgs()[0].toString().trim() );
529 
530         event = it.next();
531         assertEquals( "inline", event.getName() );
532         assertEquals( "semantics=small",  event.getArgs()[0].toString().trim() );
533 
534         event = it.next();
535         assertEquals( "inline", event.getName() );
536         assertEquals( "semantics=line-through",  event.getArgs()[0].toString().trim() );
537 
538         event = it.next();
539         assertEquals( "inline", event.getName() );
540         assertEquals( "semantics=citation",  event.getArgs()[0].toString().trim() );
541 
542         event = it.next();
543         assertEquals( "inline", event.getName() );
544         assertEquals( "semantics=quote",  event.getArgs()[0].toString().trim() );
545 
546         event = it.next();
547         assertEquals( "inline", event.getName() );
548         assertEquals( "semantics=definition",  event.getArgs()[0].toString().trim() );
549 
550         event = it.next();
551         assertEquals( "inline", event.getName() );
552         assertEquals( "semantics=abbreviation",  event.getArgs()[0].toString().trim() );
553 
554         event = it.next();
555         assertEquals( "inline", event.getName() );
556         assertEquals( "semantics=italic",  event.getArgs()[0].toString().trim() );
557 
558         event = it.next();
559         assertEquals( "inline", event.getName() );
560         assertEquals( "semantics=bold",  event.getArgs()[0].toString().trim() );
561 
562         event = it.next();
563         assertEquals( "inline", event.getName() );
564         assertEquals( "semantics=code",  event.getArgs()[0].toString().trim() );
565 
566         event = it.next();
567         assertEquals( "inline", event.getName() );
568         assertEquals( "semantics=variable",  event.getArgs()[0].toString().trim() );
569 
570         event = it.next();
571         assertEquals( "inline", event.getName() );
572         assertEquals( "semantics=sample",  event.getArgs()[0].toString().trim() );
573 
574         event = it.next();
575         assertEquals( "inline", event.getName() );
576         assertEquals( "semantics=keyboard",  event.getArgs()[0].toString().trim() );
577 
578         event = it.next();
579         assertEquals( "inline", event.getName() );
580         assertEquals( "semantics=superscript",  event.getArgs()[0].toString().trim() );
581 
582         event = it.next();
583         assertEquals( "inline", event.getName() );
584         assertEquals( "semantics=subscript",  event.getArgs()[0].toString().trim() );
585 
586         event = it.next();
587         assertEquals( "inline", event.getName() );
588         assertEquals( "semantics=annotation",  event.getArgs()[0].toString().trim() );
589 
590         event = it.next();
591         assertEquals( "inline", event.getName() );
592         assertEquals( "semantics=highlight",  event.getArgs()[0].toString().trim() );
593 
594         event = it.next();
595         assertEquals( "inline", event.getName() );
596         assertEquals( "semantics=ruby",  event.getArgs()[0].toString().trim() );
597 
598         event = it.next();
599         assertEquals( "inline", event.getName() );
600         assertEquals( "semantics=rubyBase",  event.getArgs()[0].toString().trim() );
601 
602         event = it.next();
603         assertEquals( "inline", event.getName() );
604         assertEquals( "semantics=rubyText",  event.getArgs()[0].toString().trim() );
605 
606         event = it.next();
607         assertEquals( "inline", event.getName() );
608         assertEquals( "semantics=rubyTextContainer",  event.getArgs()[0].toString().trim() );
609 
610         event = it.next();
611         assertEquals( "inline", event.getName() );
612         assertEquals( "semantics=rubyParentheses",  event.getArgs()[0].toString().trim() );
613 
614         event = it.next();
615         assertEquals( "inline", event.getName() );
616         assertEquals( "semantics=bidirectionalIsolation",  event.getArgs()[0].toString().trim() );
617 
618         event = it.next();
619         assertEquals( "inline", event.getName() );
620         assertEquals( "semantics=bidirectionalOverride",  event.getArgs()[0].toString().trim() );
621 
622         event = it.next();
623         assertEquals( "inline", event.getName() );
624         assertEquals( "semantics=phrase",  event.getArgs()[0].toString().trim() );
625 
626         event = it.next();
627         assertEquals( "inline", event.getName() );
628         assertEquals( "semantics=insert",  event.getArgs()[0].toString().trim() );
629 
630         event = it.next();
631         assertEquals( "inline", event.getName() );
632         assertEquals( "semantics=delete",  event.getArgs()[0].toString().trim() );
633 
634         assertEquals( "text", it.next().getName() );
635         assertEquals( "text", it.next().getName() );
636         assertEquals( "text", it.next().getName() );
637         assertEquals( "text", it.next().getName() );
638 
639         assertEquals( "inline_", it.next().getName() );
640         assertEquals( "inline_", it.next().getName() );
641         assertEquals( "inline_", it.next().getName() );
642         assertEquals( "inline_", it.next().getName() );
643         assertEquals( "inline_", it.next().getName() );
644         assertEquals( "inline_", it.next().getName() );
645         assertEquals( "inline_", it.next().getName() );
646         assertEquals( "inline_", it.next().getName() );
647         assertEquals( "inline_", it.next().getName() );
648         assertEquals( "inline_", it.next().getName() );
649         assertEquals( "inline_", it.next().getName() );
650         assertEquals( "inline_", it.next().getName() );
651         assertEquals( "inline_", it.next().getName() );
652         assertEquals( "inline_", it.next().getName() );
653         assertEquals( "inline_", it.next().getName() );
654         assertEquals( "inline_", it.next().getName() );
655         assertEquals( "inline_", it.next().getName() );
656         assertEquals( "inline_", it.next().getName() );
657         assertEquals( "inline_", it.next().getName() );
658         assertEquals( "inline_", it.next().getName() );
659         assertEquals( "inline_", it.next().getName() );
660         assertEquals( "inline_", it.next().getName() );
661         assertEquals( "inline_", it.next().getName() );
662         assertEquals( "inline_", it.next().getName() );
663         assertEquals( "inline_", it.next().getName() );
664         assertEquals( "inline_", it.next().getName() );
665         assertEquals( "inline_", it.next().getName() );
666         assertEquals( "inline_", it.next().getName() );
667 
668     }
669 
670     public void testSpecial()
671         throws Exception
672     {
673         String text = "<p><!-- a pagebreak: --><!-- PB -->&nbsp;&#160;<unknown /></p>";
674         parser.parse( text, sink );
675         Iterator<SinkEventElement> it = sink.getEventList().iterator();
676 
677         assertEquals( "paragraph", it.next().getName() );
678         assertEquals( "comment", it.next().getName() );
679         assertEquals( "pageBreak", it.next().getName() );
680         assertEquals( "nonBreakingSpace", it.next().getName() );
681         assertEquals( "nonBreakingSpace", it.next().getName() );
682         // unknown events are not reported by the base parser
683         assertEquals( "paragraph_", it.next().getName() );
684     }
685 
686     public void testTable()
687         throws Exception
688     {
689         String text = "<table><caption></caption><tr><th></th></tr><tr><td></td></tr></table>";
690         parser.parse( text, sink );
691         Iterator<SinkEventElement> it = sink.getEventList().iterator();
692 
693         assertEquals( "table", it.next().getName() );
694 
695         // DOXIA-374
696         SinkEventElement el = it.next();
697         assertEquals( "tableRows", el.getName() );
698         assertFalse( (Boolean) el.getArgs()[1] );
699 
700         assertEquals( "tableCaption", it.next().getName() );
701         assertEquals( "tableCaption_", it.next().getName() );
702         assertEquals( "tableRow", it.next().getName() );
703         assertEquals( "tableHeaderCell", it.next().getName() );
704         assertEquals( "tableHeaderCell_", it.next().getName() );
705         assertEquals( "tableRow_", it.next().getName() );
706         assertEquals( "tableRow", it.next().getName() );
707         assertEquals( "tableCell", it.next().getName() );
708         assertEquals( "tableCell_", it.next().getName() );
709         assertEquals( "tableRow_", it.next().getName() );
710         assertEquals( "tableRows_", it.next().getName() );
711         assertEquals( "table_", it.next().getName() );
712     }
713 
714     public void testFigure()
715         throws Exception
716     {
717         String text = "<figure><img src=\"src.jpg\"/><figcaption></figcaption></figure>";
718         parser.parse( text, sink );
719         Iterator<SinkEventElement> it = sink.getEventList().iterator();
720 
721         assertEquals( "figure", it.next().getName() );
722         assertEquals( "figureGraphics", it.next().getName() );
723         assertEquals( "figureCaption", it.next().getName() );
724         assertEquals( "figureCaption_", it.next().getName() );
725         assertEquals( "figure_", it.next().getName() );
726     }
727 
728     public void testAnchorLink()
729         throws Exception
730     {
731         String text = "<div><a href=\"\"></a>" +
732                 "<a href=\"valid\"></a>" +
733                 "<a href=\"#1invalid\"></a>" +
734                 "<a href=\"http://www.fo.com/index.html#1invalid\"></a>" +
735                 "<a name=\"valid\"></a>" +
736                 "<a name=\"1invalid\"></a>" +
737                 "<a id=\"1invalid\"></a></div>";
738 
739         parser.parse( text, sink );
740         Iterator<SinkEventElement> it = sink.getEventList().iterator();
741 
742         SinkEventElement element = it.next();
743         assertEquals( "division", element.getName() );
744 
745         element = it.next();
746         assertEquals( "link", element.getName() );
747         assertEquals( "", element.getArgs()[0] );
748         assertEquals( "link_", it.next().getName() );
749 
750         element = it.next();
751         assertEquals( "link", element.getName() );
752         assertEquals( "valid", element.getArgs()[0] );
753         assertEquals( "link_", it.next().getName() );
754 
755         element = it.next();
756         assertEquals( "link", element.getName() );
757         assertEquals( "#a1invalid", element.getArgs()[0] );
758         assertEquals( "link_", it.next().getName() );
759 
760         element = it.next();
761         assertEquals( "link", element.getName() );
762         assertEquals( "http://www.fo.com/index.html#1invalid", element.getArgs()[0] );
763         assertEquals( "link_", it.next().getName() );
764 
765         element = it.next();
766         assertEquals( "anchor", element.getName() );
767         assertEquals( "valid", element.getArgs()[0] );
768         assertEquals( "anchor_", it.next().getName() );
769 
770         element = it.next();
771         assertEquals( "anchor", element.getName() );
772         assertEquals( "a1invalid", element.getArgs()[0] );
773         assertEquals( "anchor_", it.next().getName() );
774 
775         element = it.next();
776         assertEquals( "anchor", element.getName() );
777         assertEquals( "a1invalid", element.getArgs()[0] );
778         assertEquals( "anchor_", it.next().getName() );
779 
780         element = it.next();
781         assertEquals( "division_", element.getName() );
782     }
783 
784     /**
785      * Test entities in attributes.
786      *
787      * @throws java.lang.Exception if any.
788      */
789     public void testAttributeEntities()
790         throws Exception
791     {
792         String text = "<script type=\"text/javascript\" src=\"http://ex.com/ex.js?v=l&amp;l=e\"></script>";
793 
794         parser.parse( text, sink );
795 
796         Iterator<SinkEventElement> it = sink.getEventList().iterator();
797 
798         SinkEventElement event = it.next();
799 
800         assertEquals( "unknown", event.getName() );
801         assertEquals( "script", event.getArgs()[0] );
802         SinkEventAttributeSet attribs = (SinkEventAttributeSet) event.getArgs()[2];
803         // ampersand should be un-escaped
804         assertEquals( "http://ex.com/ex.js?v=l&l=e", attribs.getAttribute( "src" ) );
805         assertEquals( "unknown", it.next().getName() );
806         assertFalse( it.hasNext() );
807 
808         sink.reset();
809         text = "<img src=\"http://ex.com/ex.jpg?v=l&amp;l=e\" alt=\"image\"/>";
810         parser.parse( text, sink );
811 
812         it = sink.getEventList().iterator();
813         event = it.next();
814         assertEquals( "figureGraphics", event.getName() );
815         attribs = (SinkEventAttributeSet) event.getArgs()[1];
816         // ampersand should be un-escaped
817         assertEquals( "http://ex.com/ex.jpg?v=l&l=e", attribs.getAttribute( "src" ) );
818     }
819 
820     public void testUnbalancedDefinitionListItem() throws Exception
821     {
822         String text = "<body><dl><dt>key</dt><dd>value</dd></dl>" +
823                         "<dl><dd>value</dd></dl>" +
824                         "<dl><dt>key</dt></dl>" +
825                         "<dl></dl>" +
826                         "<dl><dd>value</dd><dt>key</dt></dl></body>";
827 
828         parser.parse( text, sink );
829 
830         Iterator<SinkEventElement> it = sink.getEventList().iterator();
831         assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
832                           "definition", "text", "definition_", "definitionListItem_", "definitionList_" );
833         assertStartsWith( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
834                           "definitionListItem_", "definitionList_" );
835         assertStartsWith( it, "definitionList", "definitionListItem", "definedTerm", "text", "definedTerm_",
836                           "definitionListItem_", "definitionList_" );
837         assertStartsWith( it, "definitionList", "definitionList_" );
838         assertEquals( it, "definitionList", "definitionListItem", "definition", "text", "definition_",
839                           "definitionListItem_", "definitionListItem", "definedTerm", "text", "definedTerm_",
840                           "definitionListItem_", "definitionList_" );
841     }
842 }